@guidekit/core 0.1.0-beta.1 → 0.1.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1091,7 +1091,7 @@ var DOMScanner = class {
1091
1091
  if (el.closest("[data-guidekit-ignore]")) return;
1092
1092
  const style = window.getComputedStyle(el);
1093
1093
  const position = style.position;
1094
- const zIndex = parseInt(style.zIndex, 10);
1094
+ const zIndex = parseInt(style.zIndex, 10) || 0;
1095
1095
  if ((position === "fixed" || position === "absolute") && !isNaN(zIndex) && zIndex >= 1e3) {
1096
1096
  const visible = isElementVisible(el);
1097
1097
  if (!visible) return;
@@ -1748,7 +1748,9 @@ var ErrorCodes = {
1748
1748
  // Content
1749
1749
  CONTENT_FILTER_TRIGGERED: "CONTENT_FILTER_TRIGGERED",
1750
1750
  // Privacy
1751
- PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED"
1751
+ PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED",
1752
+ // General
1753
+ UNKNOWN: "UNKNOWN"
1752
1754
  };
1753
1755
  var GuideKitError = class extends Error {
1754
1756
  code;
@@ -1841,13 +1843,27 @@ function isGuideKitError(error) {
1841
1843
  var DEFAULT_OPENAI_MODEL = "gpt-4o";
1842
1844
  var DEFAULT_TIMEOUT_MS = 15e3;
1843
1845
  var OPENAI_CHAT_URL = "https://api.openai.com/v1/chat/completions";
1846
+ function emptyUsage() {
1847
+ return { prompt: 0, completion: 0, total: 0 };
1848
+ }
1844
1849
  var OpenAIAdapter = class {
1845
1850
  apiKey;
1846
1851
  model;
1852
+ /** Tracks whether the last extractChunks call emitted a done chunk. */
1853
+ lastExtractEmittedDone = false;
1854
+ /**
1855
+ * Token usage extracted from the most recent `parseResponse` call.
1856
+ * Updated as each SSE chunk is parsed.
1857
+ */
1858
+ _lastUsage = emptyUsage();
1847
1859
  constructor(config) {
1848
1860
  this.apiKey = config.apiKey;
1849
1861
  this.model = config.model ?? DEFAULT_OPENAI_MODEL;
1850
1862
  }
1863
+ /** Token usage from the most recent parseResponse call. */
1864
+ get lastUsage() {
1865
+ return this._lastUsage;
1866
+ }
1851
1867
  // -----------------------------------------------------------------------
1852
1868
  // LLMProviderAdapter implementation
1853
1869
  // -----------------------------------------------------------------------
@@ -1862,7 +1878,11 @@ var OpenAIAdapter = class {
1862
1878
  function: {
1863
1879
  name: tool.name,
1864
1880
  description: tool.description,
1865
- parameters: tool.parameters
1881
+ parameters: {
1882
+ type: "object",
1883
+ properties: { ...tool.parameters },
1884
+ required: tool.required ?? []
1885
+ }
1866
1886
  }
1867
1887
  }));
1868
1888
  }
@@ -1884,11 +1904,17 @@ var OpenAIAdapter = class {
1884
1904
  * prefixed by `data: `. The final line is `data: [DONE]`.
1885
1905
  * Text content arrives in `choices[0].delta.content` and tool calls
1886
1906
  * arrive in `choices[0].delta.tool_calls`.
1907
+ *
1908
+ * This method also:
1909
+ * - Detects content filtering and throws `ContentFilterError`.
1910
+ * - Tracks token usage (accessible via `lastUsage` after iteration).
1887
1911
  */
1888
1912
  async *parseResponse(stream) {
1889
1913
  const reader = stream.getReader();
1890
1914
  const decoder = new TextDecoder();
1891
1915
  let buffer = "";
1916
+ let doneEmitted = false;
1917
+ this._lastUsage = emptyUsage();
1892
1918
  const pendingToolCalls = /* @__PURE__ */ new Map();
1893
1919
  try {
1894
1920
  while (true) {
@@ -1904,7 +1930,10 @@ var OpenAIAdapter = class {
1904
1930
  if (jsonStr === "" || jsonStr === "[DONE]") {
1905
1931
  if (jsonStr === "[DONE]") {
1906
1932
  yield* this.flushPendingToolCalls(pendingToolCalls);
1907
- yield { text: "", done: true };
1933
+ if (!doneEmitted) {
1934
+ doneEmitted = true;
1935
+ yield { text: "", done: true };
1936
+ }
1908
1937
  }
1909
1938
  continue;
1910
1939
  }
@@ -1914,19 +1943,53 @@ var OpenAIAdapter = class {
1914
1943
  } catch {
1915
1944
  continue;
1916
1945
  }
1917
- yield* this.extractChunks(parsed, pendingToolCalls);
1946
+ if (this.isContentFiltered(parsed)) {
1947
+ throw new ContentFilterError({
1948
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
1949
+ message: "Response was blocked by provider content safety filter.",
1950
+ provider: "openai",
1951
+ suggestion: "Rephrase your question or adjust safety settings."
1952
+ });
1953
+ }
1954
+ const chunkUsage = this.extractUsage(parsed);
1955
+ if (chunkUsage) {
1956
+ this._lastUsage = chunkUsage;
1957
+ }
1958
+ yield* this.extractChunks(parsed, pendingToolCalls, doneEmitted);
1959
+ if (!doneEmitted && this.lastExtractEmittedDone) {
1960
+ doneEmitted = true;
1961
+ }
1918
1962
  }
1919
1963
  }
1920
1964
  if (buffer.trim().startsWith("data:")) {
1921
1965
  const jsonStr = buffer.trim().slice(5).trim();
1922
1966
  if (jsonStr === "[DONE]") {
1923
1967
  yield* this.flushPendingToolCalls(pendingToolCalls);
1924
- yield { text: "", done: true };
1968
+ if (!doneEmitted) {
1969
+ doneEmitted = true;
1970
+ yield { text: "", done: true };
1971
+ }
1925
1972
  } else if (jsonStr !== "") {
1926
1973
  try {
1927
1974
  const parsed = JSON.parse(jsonStr);
1928
- yield* this.extractChunks(parsed, pendingToolCalls);
1929
- } catch {
1975
+ if (this.isContentFiltered(parsed)) {
1976
+ throw new ContentFilterError({
1977
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
1978
+ message: "Response was blocked by provider content safety filter.",
1979
+ provider: "openai",
1980
+ suggestion: "Rephrase your question or adjust safety settings."
1981
+ });
1982
+ }
1983
+ const chunkUsage = this.extractUsage(parsed);
1984
+ if (chunkUsage) {
1985
+ this._lastUsage = chunkUsage;
1986
+ }
1987
+ yield* this.extractChunks(parsed, pendingToolCalls, doneEmitted);
1988
+ if (!doneEmitted && this.lastExtractEmittedDone) {
1989
+ doneEmitted = true;
1990
+ }
1991
+ } catch (error) {
1992
+ if (error instanceof ContentFilterError) throw error;
1930
1993
  }
1931
1994
  }
1932
1995
  }
@@ -1955,10 +2018,14 @@ var OpenAIAdapter = class {
1955
2018
  * the raw Response object.
1956
2019
  */
1957
2020
  async streamRequest(params) {
2021
+ const contentsArray = params.contents;
1958
2022
  const messages = [
1959
2023
  { role: "system", content: params.systemPrompt },
1960
- ...params.contents
2024
+ ...contentsArray
1961
2025
  ];
2026
+ if (params.userMessage) {
2027
+ messages.push({ role: "user", content: params.userMessage });
2028
+ }
1962
2029
  const body = {
1963
2030
  model: this.model,
1964
2031
  messages,
@@ -2041,7 +2108,8 @@ var OpenAIAdapter = class {
2041
2108
  * yield complete `ToolCall` objects when the finish_reason is 'tool_calls'
2042
2109
  * or when flushed.
2043
2110
  */
2044
- *extractChunks(parsed, pendingToolCalls) {
2111
+ *extractChunks(parsed, pendingToolCalls, doneEmitted) {
2112
+ this.lastExtractEmittedDone = false;
2045
2113
  const choices = parsed.choices;
2046
2114
  if (!choices || choices.length === 0) return;
2047
2115
  for (const choice of choices) {
@@ -2075,7 +2143,8 @@ var OpenAIAdapter = class {
2075
2143
  if (finishReason === "tool_calls") {
2076
2144
  yield* this.flushPendingToolCalls(pendingToolCalls);
2077
2145
  }
2078
- if (finishReason === "stop") {
2146
+ if (finishReason === "stop" && !doneEmitted && !this.lastExtractEmittedDone) {
2147
+ this.lastExtractEmittedDone = true;
2079
2148
  yield { text: "", done: true };
2080
2149
  }
2081
2150
  }
@@ -2091,7 +2160,8 @@ var OpenAIAdapter = class {
2091
2160
  let args = {};
2092
2161
  try {
2093
2162
  args = JSON.parse(tc.argumentsJson);
2094
- } catch {
2163
+ } catch (_e) {
2164
+ console.warn("[GuideKit:LLM] Failed to parse tool call arguments:", tc.argumentsJson);
2095
2165
  }
2096
2166
  yield {
2097
2167
  id: tc.id,
@@ -2194,16 +2264,26 @@ var DEFAULT_SAFETY_SETTINGS = [
2194
2264
  { category: "HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold: "BLOCK_ONLY_HIGH" },
2195
2265
  { category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_ONLY_HIGH" }
2196
2266
  ];
2197
- function emptyUsage() {
2267
+ function emptyUsage2() {
2198
2268
  return { prompt: 0, completion: 0, total: 0 };
2199
2269
  }
2200
2270
  var GeminiAdapter = class {
2201
2271
  apiKey;
2202
2272
  model;
2273
+ /**
2274
+ * Token usage extracted from the most recent `parseResponse` call.
2275
+ * Updated as each SSE chunk is parsed; the final value reflects the
2276
+ * cumulative usage metadata sent by Gemini (typically in the last chunk).
2277
+ */
2278
+ _lastUsage = emptyUsage2();
2203
2279
  constructor(config) {
2204
2280
  this.apiKey = config.apiKey;
2205
2281
  this.model = config.model ?? DEFAULT_GEMINI_MODEL;
2206
2282
  }
2283
+ /** Token usage from the most recent parseResponse call. */
2284
+ get lastUsage() {
2285
+ return this._lastUsage;
2286
+ }
2207
2287
  // -----------------------------------------------------------------------
2208
2288
  // LLMProviderAdapter implementation
2209
2289
  // -----------------------------------------------------------------------
@@ -2218,7 +2298,11 @@ var GeminiAdapter = class {
2218
2298
  functionDeclarations: tools.map((tool) => ({
2219
2299
  name: tool.name,
2220
2300
  description: tool.description,
2221
- parameters: tool.parameters
2301
+ parameters: {
2302
+ type: "object",
2303
+ properties: { ...tool.parameters },
2304
+ required: tool.required ?? []
2305
+ }
2222
2306
  }))
2223
2307
  }
2224
2308
  ];
@@ -2240,11 +2324,16 @@ var GeminiAdapter = class {
2240
2324
  * The Gemini `streamGenerateContent?alt=sse` endpoint sends each chunk
2241
2325
  * as a JSON object prefixed by `data: `. We parse line-by-line, extract
2242
2326
  * text parts and function call parts, and yield the appropriate types.
2327
+ *
2328
+ * This method also:
2329
+ * - Detects content filtering and throws `ContentFilterError`.
2330
+ * - Tracks token usage (accessible via `lastUsage` after iteration).
2243
2331
  */
2244
2332
  async *parseResponse(stream) {
2245
2333
  const reader = stream.getReader();
2246
2334
  const decoder = new TextDecoder();
2247
2335
  let buffer = "";
2336
+ this._lastUsage = emptyUsage2();
2248
2337
  try {
2249
2338
  while (true) {
2250
2339
  const { done, value } = await reader.read();
@@ -2263,6 +2352,18 @@ var GeminiAdapter = class {
2263
2352
  } catch {
2264
2353
  continue;
2265
2354
  }
2355
+ if (this.isContentFiltered(parsed)) {
2356
+ throw new ContentFilterError({
2357
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
2358
+ message: "Response was blocked by provider content safety filter.",
2359
+ provider: "gemini",
2360
+ suggestion: "Rephrase your question or adjust safety settings."
2361
+ });
2362
+ }
2363
+ const chunkUsage = this.extractUsage(parsed);
2364
+ if (chunkUsage) {
2365
+ this._lastUsage = chunkUsage;
2366
+ }
2266
2367
  yield* this.extractChunks(parsed);
2267
2368
  }
2268
2369
  }
@@ -2271,8 +2372,21 @@ var GeminiAdapter = class {
2271
2372
  if (jsonStr !== "" && jsonStr !== "[DONE]") {
2272
2373
  try {
2273
2374
  const parsed = JSON.parse(jsonStr);
2375
+ if (this.isContentFiltered(parsed)) {
2376
+ throw new ContentFilterError({
2377
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
2378
+ message: "Response was blocked by provider content safety filter.",
2379
+ provider: "gemini",
2380
+ suggestion: "Rephrase your question or adjust safety settings."
2381
+ });
2382
+ }
2383
+ const chunkUsage = this.extractUsage(parsed);
2384
+ if (chunkUsage) {
2385
+ this._lastUsage = chunkUsage;
2386
+ }
2274
2387
  yield* this.extractChunks(parsed);
2275
- } catch {
2388
+ } catch (error) {
2389
+ if (error instanceof ContentFilterError) throw error;
2276
2390
  }
2277
2391
  }
2278
2392
  }
@@ -2303,15 +2417,21 @@ var GeminiAdapter = class {
2303
2417
  /**
2304
2418
  * Build and execute a streaming request to the Gemini API.
2305
2419
  * Returns the raw `ReadableStream` for the response body together with
2306
- * a promise that resolves to token usage extracted from the final chunk.
2420
+ * the raw Response object.
2421
+ *
2422
+ * Note: The Gemini API key is passed as a URL query parameter (`key=`).
2423
+ * This is inherent to the Gemini REST SSE endpoint design; the key is
2424
+ * transmitted over HTTPS so it remains encrypted in transit. (H3)
2307
2425
  */
2308
2426
  async streamRequest(params) {
2427
+ const contentsArray = params.contents;
2428
+ const fullContents = params.userMessage ? [...contentsArray, { role: "user", parts: [{ text: params.userMessage }] }] : contentsArray;
2309
2429
  const url = `${GEMINI_BASE_URL}/${this.model}:streamGenerateContent?alt=sse&key=${this.apiKey}`;
2310
2430
  const body = {
2311
2431
  systemInstruction: {
2312
2432
  parts: [{ text: params.systemPrompt }]
2313
2433
  },
2314
- contents: params.contents,
2434
+ contents: fullContents,
2315
2435
  safetySettings: DEFAULT_SAFETY_SETTINGS,
2316
2436
  generationConfig: {
2317
2437
  temperature: 0.7,
@@ -2379,7 +2499,7 @@ var GeminiAdapter = class {
2379
2499
  return { stream: response.body, response };
2380
2500
  }
2381
2501
  // -----------------------------------------------------------------------
2382
- // Internal helpers
2502
+ // Public helpers (LLMProviderAdapter interface)
2383
2503
  // -----------------------------------------------------------------------
2384
2504
  /**
2385
2505
  * Extract `TextChunk` and `ToolCall` items from a single parsed Gemini
@@ -2552,7 +2672,8 @@ var LLMOrchestrator = class {
2552
2672
  updateConfig(config) {
2553
2673
  this._config = config;
2554
2674
  this._adapter = this.createAdapter(config);
2555
- this.log(`Config updated: provider=${config.provider}`);
2675
+ const label = "provider" in config ? config.provider : "custom adapter";
2676
+ this.log(`Config updated: ${label}`);
2556
2677
  }
2557
2678
  /** Get the current provider adapter. */
2558
2679
  get adapter() {
@@ -2563,139 +2684,42 @@ var LLMOrchestrator = class {
2563
2684
  // -----------------------------------------------------------------------
2564
2685
  /**
2565
2686
  * Execute a streaming LLM request and collect the results.
2687
+ *
2688
+ * This method is fully adapter-agnostic: it delegates streaming,
2689
+ * response parsing, content-filter detection, and usage extraction
2690
+ * entirely to the active `LLMProviderAdapter`. No provider-specific
2691
+ * SSE parsing lives in the orchestrator.
2566
2692
  */
2567
2693
  async executeStream(params, _isRetry) {
2568
- const geminiAdapter = this._adapter;
2569
- const historyContents = geminiAdapter.formatConversation(params.history);
2570
- const contents = [
2571
- ...historyContents,
2572
- { role: "user", parts: [{ text: params.userMessage }] }
2573
- ];
2574
- const tools = params.tools && params.tools.length > 0 ? geminiAdapter.formatTools(params.tools) : void 0;
2575
- const { stream } = await geminiAdapter.streamRequest({
2694
+ const adapter = this._adapter;
2695
+ const historyContents = adapter.formatConversation(params.history);
2696
+ const tools = params.tools && params.tools.length > 0 ? adapter.formatTools(params.tools) : void 0;
2697
+ const { stream } = await adapter.streamRequest({
2576
2698
  systemPrompt: params.systemPrompt,
2577
- contents,
2699
+ contents: historyContents,
2700
+ userMessage: params.userMessage,
2578
2701
  tools,
2579
2702
  signal: params.signal
2580
2703
  });
2581
2704
  let fullText = "";
2582
2705
  const toolCalls = [];
2583
- let usage = emptyUsage();
2584
- let wasContentFiltered = false;
2585
- const reader = stream.getReader();
2586
- const decoder = new TextDecoder();
2587
- let buffer = "";
2588
- try {
2589
- while (true) {
2590
- const { done, value } = await reader.read();
2591
- if (done) break;
2592
- buffer += decoder.decode(value, { stream: true });
2593
- const lines = buffer.split("\n");
2594
- buffer = lines.pop() ?? "";
2595
- for (const line of lines) {
2596
- const trimmed = line.trim();
2597
- if (!trimmed.startsWith("data:")) continue;
2598
- const jsonStr = trimmed.slice(5).trim();
2599
- if (jsonStr === "" || jsonStr === "[DONE]") continue;
2600
- let parsed;
2601
- try {
2602
- parsed = JSON.parse(jsonStr);
2603
- } catch {
2604
- continue;
2605
- }
2606
- if (geminiAdapter.isContentFiltered(parsed)) {
2607
- wasContentFiltered = true;
2608
- break;
2609
- }
2610
- const chunkUsage = geminiAdapter.extractUsage(parsed);
2611
- if (chunkUsage) {
2612
- usage = chunkUsage;
2613
- }
2614
- const candidates = parsed.candidates;
2615
- if (!candidates || candidates.length === 0) continue;
2616
- for (const candidate of candidates) {
2617
- const content = candidate.content;
2618
- if (!content?.parts) continue;
2619
- const finishReason = candidate.finishReason;
2620
- const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
2621
- for (const part of content.parts) {
2622
- if (typeof part.text === "string") {
2623
- fullText += part.text;
2624
- const chunk = { text: part.text, done: isDone };
2625
- this.callbacks.onChunk?.(chunk);
2626
- }
2627
- if (part.functionCall) {
2628
- const fc = part.functionCall;
2629
- const toolCall = {
2630
- id: fc.name,
2631
- name: fc.name,
2632
- arguments: fc.args ?? {}
2633
- };
2634
- toolCalls.push(toolCall);
2635
- this.callbacks.onToolCall?.(toolCall);
2636
- }
2637
- }
2638
- }
2639
- }
2640
- if (wasContentFiltered) break;
2641
- }
2642
- if (!wasContentFiltered && buffer.trim().startsWith("data:")) {
2643
- const jsonStr = buffer.trim().slice(5).trim();
2644
- if (jsonStr !== "" && jsonStr !== "[DONE]") {
2645
- try {
2646
- const parsed = JSON.parse(jsonStr);
2647
- if (geminiAdapter.isContentFiltered(parsed)) {
2648
- wasContentFiltered = true;
2649
- } else {
2650
- const chunkUsage = geminiAdapter.extractUsage(parsed);
2651
- if (chunkUsage) usage = chunkUsage;
2652
- const candidates = parsed.candidates;
2653
- if (candidates) {
2654
- for (const candidate of candidates) {
2655
- const content = candidate.content;
2656
- if (!content?.parts) continue;
2657
- const finishReason = candidate.finishReason;
2658
- const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
2659
- for (const part of content.parts) {
2660
- if (typeof part.text === "string") {
2661
- fullText += part.text;
2662
- const chunk = {
2663
- text: part.text,
2664
- done: isDone
2665
- };
2666
- this.callbacks.onChunk?.(chunk);
2667
- }
2668
- if (part.functionCall) {
2669
- const fc = part.functionCall;
2670
- const toolCall = {
2671
- id: fc.name,
2672
- name: fc.name,
2673
- arguments: fc.args ?? {}
2674
- };
2675
- toolCalls.push(toolCall);
2676
- this.callbacks.onToolCall?.(toolCall);
2677
- }
2678
- }
2679
- }
2680
- }
2681
- }
2682
- } catch {
2683
- }
2706
+ for await (const item of adapter.parseResponse(stream)) {
2707
+ if ("name" in item && "arguments" in item) {
2708
+ const toolCall = item;
2709
+ toolCalls.push(toolCall);
2710
+ this.callbacks.onToolCall?.(toolCall);
2711
+ } else {
2712
+ const chunk = item;
2713
+ if (chunk.text) {
2714
+ fullText += chunk.text;
2684
2715
  }
2716
+ this.callbacks.onChunk?.(chunk);
2685
2717
  }
2686
- } finally {
2687
- reader.releaseLock();
2688
- }
2689
- if (wasContentFiltered) {
2690
- throw new ContentFilterError({
2691
- code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
2692
- message: "Response was blocked by Gemini content safety filter.",
2693
- provider: "gemini",
2694
- suggestion: "Rephrase your question or adjust safety settings."
2695
- });
2696
2718
  }
2697
- if (fullText.length > 0) {
2698
- this.callbacks.onChunk?.({ text: "", done: true });
2719
+ this.callbacks.onChunk?.({ text: "", done: true });
2720
+ let usage = emptyUsage2();
2721
+ if ("lastUsage" in adapter) {
2722
+ usage = adapter.lastUsage;
2699
2723
  }
2700
2724
  if (usage.total > 0) {
2701
2725
  this.callbacks.onTokenUsage?.(usage);
@@ -2707,25 +2731,30 @@ var LLMOrchestrator = class {
2707
2731
  }
2708
2732
  /**
2709
2733
  * Create the appropriate adapter for the given config.
2710
- * Currently only Gemini is implemented; other providers will be added
2711
- * as the SDK evolves.
2734
+ *
2735
+ * Built-in providers:
2736
+ * - `'gemini'` — uses the bundled `GeminiAdapter`.
2737
+ *
2738
+ * Custom adapters:
2739
+ * - Pass `{ adapter: myAdapter }` to use any `LLMProviderAdapter`.
2740
+ * Example: `llm: { adapter: new OpenAIAdapter({ ... }) }`
2712
2741
  */
2713
2742
  createAdapter(config) {
2743
+ if ("adapter" in config) {
2744
+ return config.adapter;
2745
+ }
2714
2746
  switch (config.provider) {
2715
2747
  case "gemini":
2716
2748
  return new GeminiAdapter(config);
2717
- case "openai":
2718
- return new OpenAIAdapter(config);
2719
2749
  default:
2720
2750
  throw new Error(
2721
- `LLM provider "${config.provider}" is not yet supported. Currently only "gemini" and "openai" are implemented.`
2751
+ `LLM provider "${config.provider}" is not yet supported. Use { adapter: yourAdapter } for custom providers.`
2722
2752
  );
2723
2753
  }
2724
2754
  }
2725
2755
  /** Convenience accessor for the current provider name. */
2726
2756
  get providerName() {
2727
- if (this._config.provider === "gemini") return "gemini";
2728
- if (this._config.provider === "openai") return "openai";
2757
+ if ("provider" in this._config) return this._config.provider;
2729
2758
  return void 0;
2730
2759
  }
2731
2760
  /** Log a debug message if debug mode is enabled. */
@@ -2878,7 +2907,7 @@ var ToolExecutor = class {
2878
2907
  break;
2879
2908
  }
2880
2909
  }
2881
- if (rounds >= this.maxRounds && allToolCalls.length > 0) {
2910
+ if (rounds >= this.maxRounds) {
2882
2911
  this.log(
2883
2912
  `Max rounds (${this.maxRounds}) reached. Returning current text.`
2884
2913
  );
@@ -2981,6 +3010,19 @@ var ToolExecutor = class {
2981
3010
  return s.value;
2982
3011
  }
2983
3012
  const tc = toolCalls[i];
3013
+ if (!tc) {
3014
+ const errorMsg2 = s.reason instanceof Error ? s.reason.message : String(s.reason);
3015
+ return {
3016
+ toolCallId: `unknown-${i}`,
3017
+ record: {
3018
+ name: "unknown",
3019
+ args: {},
3020
+ result: void 0,
3021
+ durationMs: 0,
3022
+ error: errorMsg2
3023
+ }
3024
+ };
3025
+ }
2984
3026
  const errorMsg = s.reason instanceof Error ? s.reason.message : String(s.reason);
2985
3027
  return {
2986
3028
  toolCallId: tc.id,
@@ -4173,64 +4215,62 @@ var DeepgramSTT = class {
4173
4215
  }
4174
4216
  };
4175
4217
 
4176
- // src/voice/elevenlabs-tts.ts
4177
- var LOG_PREFIX7 = "[GuideKit:TTS]";
4178
- var DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
4179
- var DEFAULT_MODEL_ID = "eleven_flash_v2_5";
4180
- var DEFAULT_STABILITY = 0.5;
4181
- var DEFAULT_SIMILARITY_BOOST = 0.75;
4182
- function base64ToArrayBuffer(base64) {
4183
- const binaryString = atob(base64);
4184
- const length = binaryString.length;
4185
- const bytes = new Uint8Array(length);
4186
- for (let i = 0; i < length; i++) {
4187
- bytes[i] = binaryString.charCodeAt(i);
4218
+ // src/voice/elevenlabs-stt.ts
4219
+ var LOG_PREFIX7 = "[GuideKit:ElevenLabs-STT]";
4220
+ var ELEVENLABS_STT_ENDPOINT = "wss://api.elevenlabs.io/v1/speech-to-text/realtime";
4221
+ var DEFAULT_LANGUAGE2 = "en";
4222
+ var INACTIVITY_TIMEOUT_S = 30;
4223
+ var SAMPLE_RATE = 16e3;
4224
+ function float32ToInt162(float32) {
4225
+ const int16 = new Int16Array(float32.length);
4226
+ for (let i = 0; i < float32.length; i++) {
4227
+ const s = Math.max(-1, Math.min(1, float32[i]));
4228
+ int16[i] = s < 0 ? s * 32768 : s * 32767;
4188
4229
  }
4189
- return bytes.buffer;
4230
+ return int16;
4190
4231
  }
4191
- var ElevenLabsTTS = class {
4192
- // ---- Configuration ------------------------------------------------------
4232
+ function int16ToBase64(int16) {
4233
+ const bytes = new Uint8Array(int16.buffer);
4234
+ const CHUNK_SIZE = 8192;
4235
+ let binary = "";
4236
+ for (let i = 0; i < bytes.length; i += CHUNK_SIZE) {
4237
+ const chunk = bytes.subarray(i, i + CHUNK_SIZE);
4238
+ binary += String.fromCharCode(...chunk);
4239
+ }
4240
+ return btoa(binary);
4241
+ }
4242
+ var ElevenLabsSTT = class {
4243
+ // ---- Configuration -------------------------------------------------------
4193
4244
  apiKey;
4194
- voiceId;
4195
- modelId;
4245
+ language;
4196
4246
  debugEnabled;
4197
- // ---- Internal state -----------------------------------------------------
4247
+ // ---- Internal state ------------------------------------------------------
4198
4248
  wsManager = null;
4199
4249
  _connected = false;
4200
4250
  _suspended = false;
4201
- /**
4202
- * Whether the BOS (beginning-of-stream) handshake has been sent for the
4203
- * current WebSocket session. ElevenLabs requires the first message to
4204
- * contain voice settings and the API key before any text chunks.
4205
- */
4206
- bosSent = false;
4207
- /** Registered audio-event callbacks. */
4208
- audioCallbacks = /* @__PURE__ */ new Set();
4209
- // -----------------------------------------------------------------------
4251
+ /** Registered transcript callbacks. */
4252
+ transcriptCallbacks = /* @__PURE__ */ new Set();
4253
+ // -------------------------------------------------------------------------
4210
4254
  // Constructor
4211
- // -----------------------------------------------------------------------
4255
+ // -------------------------------------------------------------------------
4212
4256
  constructor(options) {
4213
4257
  this.apiKey = options.apiKey;
4214
- this.voiceId = options.voiceId ?? DEFAULT_VOICE_ID;
4215
- this.modelId = options.modelId ?? DEFAULT_MODEL_ID;
4258
+ this.language = options.language ?? DEFAULT_LANGUAGE2;
4216
4259
  this.debugEnabled = options.debug ?? false;
4217
- this.log("ElevenLabsTTS created", {
4218
- voiceId: this.voiceId,
4219
- modelId: this.modelId
4220
- });
4260
+ this.log("ElevenLabsSTT created", { language: this.language });
4221
4261
  }
4222
- // -----------------------------------------------------------------------
4262
+ // -------------------------------------------------------------------------
4223
4263
  // Public API
4224
- // -----------------------------------------------------------------------
4264
+ // -------------------------------------------------------------------------
4225
4265
  /** Whether the WebSocket is currently connected and ready. */
4226
4266
  get isConnected() {
4227
4267
  return this._connected;
4228
4268
  }
4229
4269
  /**
4230
- * Open a WebSocket connection to the ElevenLabs streaming TTS endpoint.
4270
+ * Open a WebSocket connection to ElevenLabs' real-time STT endpoint.
4231
4271
  *
4232
- * Resolves once the connection is established and the BOS handshake has
4233
- * been sent. Rejects if the connection cannot be established.
4272
+ * Resolves once the connection is established and the socket is ready to
4273
+ * receive audio frames. Rejects if the connection cannot be established.
4234
4274
  */
4235
4275
  async connect() {
4236
4276
  if (this._connected) {
@@ -4242,17 +4282,16 @@ var ElevenLabsTTS = class {
4242
4282
  return;
4243
4283
  }
4244
4284
  const url = this.buildUrl();
4245
- this.log("Connecting to", url);
4285
+ this.log("Connecting to", url.replace(this.apiKey, "***"));
4246
4286
  this.wsManager = new WebSocketManager({
4247
4287
  url,
4248
4288
  protocols: [],
4249
4289
  debug: this.debugEnabled,
4250
- label: "ElevenLabs-TTS"
4290
+ label: "ElevenLabs-STT"
4251
4291
  });
4252
4292
  this.wsManager.onOpen(() => {
4253
4293
  this._connected = true;
4254
- this.sendBOS();
4255
- this.log("Connected and BOS sent");
4294
+ this.log("Connected");
4256
4295
  });
4257
4296
  this.wsManager.onMessage((event) => {
4258
4297
  this.handleMessage(event);
@@ -4267,67 +4306,54 @@ var ElevenLabsTTS = class {
4267
4306
  return this.wsManager.connect();
4268
4307
  }
4269
4308
  /**
4270
- * Send text to be synthesised into speech.
4271
- *
4272
- * May be called multiple times to stream text incrementally. Each call
4273
- * sends a text chunk with `try_trigger_generation: true` so ElevenLabs
4274
- * can begin synthesising as soon as it has enough context.
4275
- *
4276
- * Call {@link flush} when the complete utterance has been sent.
4277
- */
4278
- speak(text) {
4279
- if (!this._connected || !this.wsManager || this._suspended) {
4280
- this.log("Cannot speak \u2014 not connected or suspended");
4281
- return;
4282
- }
4283
- if (!text) {
4284
- return;
4285
- }
4286
- const message = JSON.stringify({
4287
- text,
4288
- try_trigger_generation: true
4289
- });
4290
- this.log("Sending text chunk:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
4291
- this.wsManager.send(message);
4292
- }
4293
- /**
4294
- * Signal the end of text input for the current utterance.
4309
+ * Send audio data to ElevenLabs for transcription.
4295
4310
  *
4296
- * Sends the EOS (end-of-stream) marker to ElevenLabs. The server will
4297
- * flush any remaining audio and send a final chunk with `isFinal: true`.
4311
+ * Accepts either `Float32Array` (Web Audio API output) or `Int16Array`
4312
+ * (already encoded as linear16). Float32 data is automatically converted
4313
+ * to Int16 before encoding. Audio is sent as a base64-encoded JSON message.
4298
4314
  */
4299
- flush() {
4315
+ sendAudio(audioData) {
4300
4316
  if (!this._connected || !this.wsManager || this._suspended) {
4301
- this.log("Cannot flush \u2014 not connected or suspended");
4302
4317
  return;
4303
4318
  }
4304
- const message = JSON.stringify({ text: "" });
4305
- this.log("Sending EOS (flush)");
4306
- this.wsManager.send(message);
4319
+ const int16 = audioData instanceof Float32Array ? float32ToInt162(audioData) : audioData;
4320
+ const base64 = int16ToBase64(int16);
4321
+ this.wsManager.send(
4322
+ JSON.stringify({
4323
+ type: "input_audio_chunk",
4324
+ audio: base64,
4325
+ sample_rate: SAMPLE_RATE
4326
+ })
4327
+ );
4307
4328
  }
4308
4329
  /**
4309
- * Register a callback to receive audio output events.
4330
+ * Register a callback to receive transcript events.
4310
4331
  *
4311
4332
  * @returns An unsubscribe function. Calling it more than once is safe.
4312
4333
  */
4313
- onAudio(callback) {
4314
- this.audioCallbacks.add(callback);
4334
+ onTranscript(callback) {
4335
+ this.transcriptCallbacks.add(callback);
4315
4336
  let removed = false;
4316
4337
  return () => {
4317
4338
  if (removed) return;
4318
4339
  removed = true;
4319
- this.audioCallbacks.delete(callback);
4340
+ this.transcriptCallbacks.delete(callback);
4320
4341
  };
4321
4342
  }
4322
- /** Gracefully close the connection by sending EOS then closing. */
4343
+ /**
4344
+ * Gracefully close the connection.
4345
+ *
4346
+ * Sends a `commit_audio` message so ElevenLabs can finalise any pending
4347
+ * transcription before the socket is torn down.
4348
+ */
4323
4349
  close() {
4324
4350
  if (!this._connected || !this.wsManager) {
4325
4351
  this.log("Not connected \u2014 nothing to close");
4326
4352
  return;
4327
4353
  }
4328
- this.log("Closing connection");
4354
+ this.log("Sending commit_audio and closing");
4329
4355
  try {
4330
- this.wsManager.send(JSON.stringify({ text: "" }));
4356
+ this.wsManager.send(JSON.stringify({ type: "commit_audio" }));
4331
4357
  } catch {
4332
4358
  }
4333
4359
  this.wsManager.close();
@@ -4341,14 +4367,13 @@ var ElevenLabsTTS = class {
4341
4367
  this.wsManager = null;
4342
4368
  }
4343
4369
  this.cleanup();
4344
- this.audioCallbacks.clear();
4370
+ this.transcriptCallbacks.clear();
4345
4371
  }
4346
4372
  /**
4347
4373
  * Suspend the adapter (e.g. when the device goes offline).
4348
4374
  *
4349
- * Marks the adapter as suspended so that calls to `speak()` and `flush()`
4350
- * are silently dropped. The WebSocket itself is left open; ElevenLabs
4351
- * will close it after an inactivity timeout if the network went away.
4375
+ * Marks the adapter as suspended so that incoming `sendAudio` calls are
4376
+ * silently dropped. The WebSocket itself is left open.
4352
4377
  */
4353
4378
  suspend() {
4354
4379
  if (this._suspended) return;
@@ -4356,54 +4381,22 @@ var ElevenLabsTTS = class {
4356
4381
  this.log("Suspended");
4357
4382
  }
4358
4383
  /**
4359
- * Resume after a prior `suspend()`. If the underlying connection is
4360
- * still alive, the adapter returns to normal operation. If the connection
4361
- * was lost while suspended, callers should `close()` / `destroy()` and
4362
- * create a new instance.
4384
+ * Resume after a prior `suspend()`.
4363
4385
  */
4364
4386
  resume() {
4365
4387
  if (!this._suspended) return;
4366
4388
  this._suspended = false;
4367
4389
  this.log("Resumed");
4368
4390
  }
4369
- // -----------------------------------------------------------------------
4370
- // BOS handshake
4371
- // -----------------------------------------------------------------------
4372
- /**
4373
- * Send the BOS (beginning-of-stream) message.
4374
- *
4375
- * This must be the very first message on a new WebSocket session. It
4376
- * carries the API key and voice settings.
4377
- */
4378
- sendBOS() {
4379
- if (!this.wsManager || this.bosSent) {
4380
- return;
4381
- }
4382
- const bos = JSON.stringify({
4383
- text: " ",
4384
- voice_settings: {
4385
- stability: DEFAULT_STABILITY,
4386
- similarity_boost: DEFAULT_SIMILARITY_BOOST
4387
- },
4388
- xi_api_key: this.apiKey
4389
- });
4390
- this.wsManager.send(bos);
4391
- this.bosSent = true;
4392
- this.log("BOS handshake sent");
4393
- }
4394
- // -----------------------------------------------------------------------
4391
+ // -------------------------------------------------------------------------
4395
4392
  // Message handling
4396
- // -----------------------------------------------------------------------
4393
+ // -------------------------------------------------------------------------
4397
4394
  /**
4398
- * Parse incoming ElevenLabs JSON messages and emit audio events.
4399
- *
4400
- * ElevenLabs sends messages with the following shape:
4401
- * ```json
4402
- * { "audio": "base64encoded...", "isFinal": false }
4403
- * ```
4395
+ * Parse incoming ElevenLabs JSON messages and emit transcript events.
4404
4396
  *
4405
- * When `isFinal` is `true`, the server has finished synthesising the
4406
- * current utterance (i.e. after EOS was sent).
4397
+ * ElevenLabs sends two transcript message types:
4398
+ * - `partial_transcript`: interim result, `isFinal = false`
4399
+ * - `committed_transcript`: final result, `isFinal = true`
4407
4400
  */
4408
4401
  handleMessage(event) {
4409
4402
  if (typeof event.data !== "string") {
@@ -4416,47 +4409,1026 @@ var ElevenLabsTTS = class {
4416
4409
  this.log("Failed to parse message", event.data);
4417
4410
  return;
4418
4411
  }
4419
- if (parsed["error"] !== void 0) {
4420
- this.log("ElevenLabs error:", parsed["error"]);
4421
- return;
4412
+ const type = parsed["type"];
4413
+ if (type === "committed_transcript" || type === "partial_transcript") {
4414
+ this.handleTranscriptMessage(parsed, type === "committed_transcript");
4415
+ } else {
4416
+ this.log("Received message", type, parsed);
4417
+ }
4418
+ }
4419
+ /**
4420
+ * Extract transcript data from a transcript message and notify subscribers.
4421
+ */
4422
+ handleTranscriptMessage(parsed, isFinal) {
4423
+ const result = parsed["result"];
4424
+ const text = result?.text ?? "";
4425
+ const confidence = result?.confidence ?? 0;
4426
+ if (text.trim() === "") {
4427
+ return;
4428
+ }
4429
+ const transcriptEvent = {
4430
+ text,
4431
+ isFinal,
4432
+ confidence,
4433
+ timestamp: Date.now()
4434
+ };
4435
+ this.log(
4436
+ isFinal ? "Final transcript:" : "Interim transcript:",
4437
+ text,
4438
+ `(${(confidence * 100).toFixed(1)}%)`
4439
+ );
4440
+ this.emitTranscript(transcriptEvent);
4441
+ }
4442
+ // -------------------------------------------------------------------------
4443
+ // Subscriber notification
4444
+ // -------------------------------------------------------------------------
4445
+ /**
4446
+ * Emit a transcript event to all registered callbacks.
4447
+ *
4448
+ * Errors thrown by individual callbacks are caught and logged so one
4449
+ * misbehaving subscriber does not prevent others from receiving the event.
4450
+ */
4451
+ emitTranscript(event) {
4452
+ for (const cb of this.transcriptCallbacks) {
4453
+ try {
4454
+ cb(event);
4455
+ } catch (err) {
4456
+ console.error(LOG_PREFIX7, "Transcript callback threw:", err);
4457
+ }
4458
+ }
4459
+ }
4460
+ // -------------------------------------------------------------------------
4461
+ // URL building
4462
+ // -------------------------------------------------------------------------
4463
+ /** Build the ElevenLabs streaming STT endpoint URL with auth query params. */
4464
+ buildUrl() {
4465
+ const params = new URLSearchParams({
4466
+ xi_api_key: this.apiKey,
4467
+ language: this.language,
4468
+ inactivity_timeout: String(INACTIVITY_TIMEOUT_S)
4469
+ });
4470
+ return `${ELEVENLABS_STT_ENDPOINT}?${params.toString()}`;
4471
+ }
4472
+ // -------------------------------------------------------------------------
4473
+ // Cleanup
4474
+ // -------------------------------------------------------------------------
4475
+ /** Reset internal state after disconnection. */
4476
+ cleanup() {
4477
+ this._connected = false;
4478
+ }
4479
+ // -------------------------------------------------------------------------
4480
+ // Logging
4481
+ // -------------------------------------------------------------------------
4482
+ /** Conditional debug logging. */
4483
+ log(...args) {
4484
+ if (this.debugEnabled) {
4485
+ console.debug(LOG_PREFIX7, ...args);
4486
+ }
4487
+ }
4488
+ };
4489
+
4490
+ // src/voice/elevenlabs-tts.ts
4491
+ var LOG_PREFIX8 = "[GuideKit:TTS]";
4492
+ var DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
4493
+ var DEFAULT_MODEL_ID = "eleven_flash_v2_5";
4494
+ var DEFAULT_STABILITY = 0.5;
4495
+ var DEFAULT_SIMILARITY_BOOST = 0.75;
4496
+ function base64ToArrayBuffer(base64) {
4497
+ const binaryString = atob(base64);
4498
+ const length = binaryString.length;
4499
+ const bytes = new Uint8Array(length);
4500
+ for (let i = 0; i < length; i++) {
4501
+ bytes[i] = binaryString.charCodeAt(i);
4502
+ }
4503
+ return bytes.buffer;
4504
+ }
4505
+ var ElevenLabsTTS = class {
4506
+ // ---- Configuration ------------------------------------------------------
4507
+ apiKey;
4508
+ voiceId;
4509
+ modelId;
4510
+ debugEnabled;
4511
+ // ---- Internal state -----------------------------------------------------
4512
+ wsManager = null;
4513
+ _connected = false;
4514
+ _suspended = false;
4515
+ /**
4516
+ * Whether the BOS (beginning-of-stream) handshake has been sent for the
4517
+ * current WebSocket session. ElevenLabs requires the first message to
4518
+ * contain voice settings and the API key before any text chunks.
4519
+ */
4520
+ bosSent = false;
4521
+ /** Registered audio-event callbacks. */
4522
+ audioCallbacks = /* @__PURE__ */ new Set();
4523
+ // -----------------------------------------------------------------------
4524
+ // Constructor
4525
+ // -----------------------------------------------------------------------
4526
+ constructor(options) {
4527
+ this.apiKey = options.apiKey;
4528
+ this.voiceId = options.voiceId ?? DEFAULT_VOICE_ID;
4529
+ this.modelId = options.modelId ?? DEFAULT_MODEL_ID;
4530
+ this.debugEnabled = options.debug ?? false;
4531
+ this.log("ElevenLabsTTS created", {
4532
+ voiceId: this.voiceId,
4533
+ modelId: this.modelId
4534
+ });
4535
+ }
4536
+ // -----------------------------------------------------------------------
4537
+ // Public API
4538
+ // -----------------------------------------------------------------------
4539
+ /** Whether the WebSocket is currently connected and ready. */
4540
+ get isConnected() {
4541
+ return this._connected;
4542
+ }
4543
+ /**
4544
+ * Open a WebSocket connection to the ElevenLabs streaming TTS endpoint.
4545
+ *
4546
+ * Resolves once the connection is established and the BOS handshake has
4547
+ * been sent. Rejects if the connection cannot be established.
4548
+ */
4549
+ async connect() {
4550
+ if (this._connected) {
4551
+ this.log("Already connected \u2014 skipping");
4552
+ return;
4553
+ }
4554
+ if (typeof WebSocket === "undefined") {
4555
+ this.log("WebSocket API not available (SSR?) \u2014 cannot connect");
4556
+ return;
4557
+ }
4558
+ const url = this.buildUrl();
4559
+ this.log("Connecting to", url);
4560
+ this.wsManager = new WebSocketManager({
4561
+ url,
4562
+ protocols: [],
4563
+ debug: this.debugEnabled,
4564
+ label: "ElevenLabs-TTS"
4565
+ });
4566
+ this.wsManager.onOpen(() => {
4567
+ this._connected = true;
4568
+ this.sendBOS();
4569
+ this.log("Connected and BOS sent");
4570
+ });
4571
+ this.wsManager.onMessage((event) => {
4572
+ this.handleMessage(event);
4573
+ });
4574
+ this.wsManager.onClose((code, reason) => {
4575
+ this.log("Connection closed", { code, reason });
4576
+ this.cleanup();
4577
+ });
4578
+ this.wsManager.onError((event) => {
4579
+ this.log("WebSocket error", event);
4580
+ });
4581
+ return this.wsManager.connect();
4582
+ }
4583
+ /**
4584
+ * Send text to be synthesised into speech.
4585
+ *
4586
+ * May be called multiple times to stream text incrementally. Each call
4587
+ * sends a text chunk with `try_trigger_generation: true` so ElevenLabs
4588
+ * can begin synthesising as soon as it has enough context.
4589
+ *
4590
+ * Call {@link flush} when the complete utterance has been sent.
4591
+ */
4592
+ speak(text) {
4593
+ if (!this._connected || !this.wsManager || this._suspended) {
4594
+ this.log("Cannot speak \u2014 not connected or suspended");
4595
+ return;
4596
+ }
4597
+ if (!text) {
4598
+ return;
4599
+ }
4600
+ const message = JSON.stringify({
4601
+ text,
4602
+ try_trigger_generation: true
4603
+ });
4604
+ this.log("Sending text chunk:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
4605
+ this.wsManager.send(message);
4606
+ }
4607
+ /**
4608
+ * Signal the end of text input for the current utterance.
4609
+ *
4610
+ * Sends the EOS (end-of-stream) marker to ElevenLabs. The server will
4611
+ * flush any remaining audio and send a final chunk with `isFinal: true`.
4612
+ */
4613
+ flush() {
4614
+ if (!this._connected || !this.wsManager || this._suspended) {
4615
+ this.log("Cannot flush \u2014 not connected or suspended");
4616
+ return;
4617
+ }
4618
+ const message = JSON.stringify({ text: "" });
4619
+ this.log("Sending EOS (flush)");
4620
+ this.wsManager.send(message);
4621
+ }
4622
+ /**
4623
+ * Register a callback to receive audio output events.
4624
+ *
4625
+ * @returns An unsubscribe function. Calling it more than once is safe.
4626
+ */
4627
+ onAudio(callback) {
4628
+ this.audioCallbacks.add(callback);
4629
+ let removed = false;
4630
+ return () => {
4631
+ if (removed) return;
4632
+ removed = true;
4633
+ this.audioCallbacks.delete(callback);
4634
+ };
4635
+ }
4636
+ /** Gracefully close the connection by sending EOS then closing. */
4637
+ close() {
4638
+ if (!this._connected || !this.wsManager) {
4639
+ this.log("Not connected \u2014 nothing to close");
4640
+ return;
4641
+ }
4642
+ this.log("Closing connection");
4643
+ try {
4644
+ this.wsManager.send(JSON.stringify({ text: "" }));
4645
+ } catch {
4646
+ }
4647
+ this.wsManager.close();
4648
+ this.cleanup();
4649
+ }
4650
+ /** Force-destroy the connection without a graceful handshake. */
4651
+ destroy() {
4652
+ this.log("Destroying");
4653
+ if (this.wsManager) {
4654
+ this.wsManager.destroy();
4655
+ this.wsManager = null;
4656
+ }
4657
+ this.cleanup();
4658
+ this.audioCallbacks.clear();
4659
+ }
4660
+ /**
4661
+ * Suspend the adapter (e.g. when the device goes offline).
4662
+ *
4663
+ * Marks the adapter as suspended so that calls to `speak()` and `flush()`
4664
+ * are silently dropped. The WebSocket itself is left open; ElevenLabs
4665
+ * will close it after an inactivity timeout if the network went away.
4666
+ */
4667
+ suspend() {
4668
+ if (this._suspended) return;
4669
+ this._suspended = true;
4670
+ this.log("Suspended");
4671
+ }
4672
+ /**
4673
+ * Resume after a prior `suspend()`. If the underlying connection is
4674
+ * still alive, the adapter returns to normal operation. If the connection
4675
+ * was lost while suspended, callers should `close()` / `destroy()` and
4676
+ * create a new instance.
4677
+ */
4678
+ resume() {
4679
+ if (!this._suspended) return;
4680
+ this._suspended = false;
4681
+ this.log("Resumed");
4682
+ }
4683
+ // -----------------------------------------------------------------------
4684
+ // BOS handshake
4685
+ // -----------------------------------------------------------------------
4686
+ /**
4687
+ * Send the BOS (beginning-of-stream) message.
4688
+ *
4689
+ * This must be the very first message on a new WebSocket session. It
4690
+ * carries the API key and voice settings.
4691
+ */
4692
+ sendBOS() {
4693
+ if (!this.wsManager || this.bosSent) {
4694
+ return;
4695
+ }
4696
+ const bos = JSON.stringify({
4697
+ text: " ",
4698
+ voice_settings: {
4699
+ stability: DEFAULT_STABILITY,
4700
+ similarity_boost: DEFAULT_SIMILARITY_BOOST
4701
+ },
4702
+ xi_api_key: this.apiKey
4703
+ });
4704
+ this.wsManager.send(bos);
4705
+ this.bosSent = true;
4706
+ this.log("BOS handshake sent");
4707
+ }
4708
+ // -----------------------------------------------------------------------
4709
+ // Message handling
4710
+ // -----------------------------------------------------------------------
4711
+ /**
4712
+ * Parse incoming ElevenLabs JSON messages and emit audio events.
4713
+ *
4714
+ * ElevenLabs sends messages with the following shape:
4715
+ * ```json
4716
+ * { "audio": "base64encoded...", "isFinal": false }
4717
+ * ```
4718
+ *
4719
+ * When `isFinal` is `true`, the server has finished synthesising the
4720
+ * current utterance (i.e. after EOS was sent).
4721
+ */
4722
+ handleMessage(event) {
4723
+ if (typeof event.data !== "string") {
4724
+ return;
4725
+ }
4726
+ let parsed;
4727
+ try {
4728
+ parsed = JSON.parse(event.data);
4729
+ } catch {
4730
+ this.log("Failed to parse message", event.data);
4731
+ return;
4732
+ }
4733
+ if (parsed["error"] !== void 0) {
4734
+ this.log("ElevenLabs error:", parsed["error"]);
4735
+ return;
4736
+ }
4737
+ if (parsed["audio"] === void 0 || parsed["audio"] === null) {
4738
+ this.log("Non-audio message received", parsed);
4739
+ return;
4740
+ }
4741
+ const audioBase64 = parsed["audio"];
4742
+ const isFinal = parsed["isFinal"] === true;
4743
+ if (!audioBase64 || audioBase64.length === 0) {
4744
+ if (isFinal) {
4745
+ this.emitAudio({
4746
+ audio: new ArrayBuffer(0),
4747
+ isFinal: true,
4748
+ timestamp: Date.now()
4749
+ });
4750
+ }
4751
+ return;
4752
+ }
4753
+ let audioBuffer;
4754
+ try {
4755
+ audioBuffer = base64ToArrayBuffer(audioBase64);
4756
+ } catch (err) {
4757
+ this.log("Failed to decode base64 audio", err);
4758
+ return;
4759
+ }
4760
+ const audioEvent = {
4761
+ audio: audioBuffer,
4762
+ isFinal,
4763
+ timestamp: Date.now()
4764
+ };
4765
+ this.log(
4766
+ isFinal ? "Final audio chunk:" : "Audio chunk:",
4767
+ `${audioBuffer.byteLength} bytes`
4768
+ );
4769
+ this.emitAudio(audioEvent);
4770
+ }
4771
+ // -----------------------------------------------------------------------
4772
+ // Subscriber notification
4773
+ // -----------------------------------------------------------------------
4774
+ /**
4775
+ * Emit an audio event to all registered callbacks.
4776
+ *
4777
+ * Errors thrown by individual callbacks are caught and logged so one
4778
+ * misbehaving subscriber does not prevent others from receiving the event.
4779
+ */
4780
+ emitAudio(event) {
4781
+ for (const cb of this.audioCallbacks) {
4782
+ try {
4783
+ cb(event);
4784
+ } catch (err) {
4785
+ console.error(LOG_PREFIX8, "Audio callback threw:", err);
4786
+ }
4787
+ }
4788
+ }
4789
+ // -----------------------------------------------------------------------
4790
+ // URL building
4791
+ // -----------------------------------------------------------------------
4792
+ /** Build the ElevenLabs streaming TTS endpoint URL. */
4793
+ buildUrl() {
4794
+ const params = new URLSearchParams({
4795
+ model_id: this.modelId
4796
+ });
4797
+ return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
4798
+ }
4799
+ // -----------------------------------------------------------------------
4800
+ // Cleanup
4801
+ // -----------------------------------------------------------------------
4802
+ /** Reset internal state after disconnection. */
4803
+ cleanup() {
4804
+ this._connected = false;
4805
+ this.bosSent = false;
4806
+ }
4807
+ // -----------------------------------------------------------------------
4808
+ // Logging
4809
+ // -----------------------------------------------------------------------
4810
+ /** Conditional debug logging. */
4811
+ log(...args) {
4812
+ if (this.debugEnabled) {
4813
+ console.debug(LOG_PREFIX8, ...args);
4814
+ }
4815
+ }
4816
+ };
4817
+
4818
+ // src/voice/web-speech-stt.ts
4819
+ var LOG_PREFIX9 = "[GuideKit:WebSpeech-STT]";
4820
+ var DEFAULT_LANGUAGE3 = "en-US";
4821
+ var WebSpeechSTT = class {
4822
+ // ---- Configuration -------------------------------------------------------
4823
+ language;
4824
+ continuous;
4825
+ interimResultsEnabled;
4826
+ debugEnabled;
4827
+ // ---- Internal state ------------------------------------------------------
4828
+ recognition = null;
4829
+ _connected = false;
4830
+ _suspended = false;
4831
+ /**
4832
+ * Whether we intentionally stopped recognition. Used to distinguish
4833
+ * between intentional stop and unexpected end (for auto-restart in
4834
+ * continuous mode).
4835
+ */
4836
+ _intentionalStop = false;
4837
+ /** Registered transcript callbacks. */
4838
+ transcriptCallbacks = /* @__PURE__ */ new Set();
4839
+ // -------------------------------------------------------------------------
4840
+ // Constructor
4841
+ // -------------------------------------------------------------------------
4842
+ constructor(options = {}) {
4843
+ this.language = options.language ?? DEFAULT_LANGUAGE3;
4844
+ this.continuous = options.continuous ?? true;
4845
+ this.interimResultsEnabled = options.interimResults ?? true;
4846
+ this.debugEnabled = options.debug ?? false;
4847
+ this.log("WebSpeechSTT created", {
4848
+ language: this.language,
4849
+ continuous: this.continuous,
4850
+ interimResults: this.interimResultsEnabled
4851
+ });
4852
+ }
4853
+ // -------------------------------------------------------------------------
4854
+ // Static methods
4855
+ // -------------------------------------------------------------------------
4856
+ /**
4857
+ * Check whether the Web Speech API SpeechRecognition is supported in the
4858
+ * current environment. Safe to call in SSR (returns false).
4859
+ */
4860
+ static isSupported() {
4861
+ if (typeof window === "undefined") return false;
4862
+ return typeof window["SpeechRecognition"] !== "undefined" || typeof globalThis.webkitSpeechRecognition !== "undefined";
4863
+ }
4864
+ // -------------------------------------------------------------------------
4865
+ // Public API
4866
+ // -------------------------------------------------------------------------
4867
+ /** Whether recognition is currently active and connected. */
4868
+ get isConnected() {
4869
+ return this._connected;
4870
+ }
4871
+ /**
4872
+ * Start speech recognition.
4873
+ *
4874
+ * Creates the SpeechRecognition instance and begins listening. Resolves
4875
+ * once the recognition session has started. Rejects if the API is not
4876
+ * supported or the browser denies permission.
4877
+ */
4878
+ async connect() {
4879
+ if (this._connected) {
4880
+ this.log("Already connected \u2014 skipping");
4881
+ return;
4882
+ }
4883
+ if (typeof window === "undefined") {
4884
+ this.log("SSR environment detected \u2014 cannot connect");
4885
+ return;
4886
+ }
4887
+ const SpeechRecognitionClass = this.resolveSpeechRecognition();
4888
+ if (!SpeechRecognitionClass) {
4889
+ throw new Error(
4890
+ "Web Speech API (SpeechRecognition) is not supported in this browser."
4891
+ );
4892
+ }
4893
+ this.recognition = new SpeechRecognitionClass();
4894
+ this.recognition.lang = this.language;
4895
+ this.recognition.continuous = this.continuous;
4896
+ this.recognition.interimResults = this.interimResultsEnabled;
4897
+ this.recognition.maxAlternatives = 1;
4898
+ this.recognition.onstart = () => {
4899
+ this._connected = true;
4900
+ this._intentionalStop = false;
4901
+ this.log("Recognition started");
4902
+ };
4903
+ this.recognition.onresult = (event) => {
4904
+ this.handleResult(event);
4905
+ };
4906
+ this.recognition.onerror = (event) => {
4907
+ this.handleError(event);
4908
+ };
4909
+ this.recognition.onend = () => {
4910
+ this.log("Recognition ended");
4911
+ const wasConnected = this._connected;
4912
+ this._connected = false;
4913
+ if (this.continuous && !this._intentionalStop && !this._suspended && wasConnected) {
4914
+ this.log("Auto-restarting continuous recognition");
4915
+ try {
4916
+ this.recognition?.start();
4917
+ } catch {
4918
+ this.log("Failed to auto-restart recognition");
4919
+ }
4920
+ }
4921
+ };
4922
+ return new Promise((resolve, reject) => {
4923
+ const onStart = () => {
4924
+ cleanup();
4925
+ resolve();
4926
+ };
4927
+ const onError = (event) => {
4928
+ cleanup();
4929
+ reject(new Error(`SpeechRecognition error: ${event.error} \u2014 ${event.message}`));
4930
+ };
4931
+ const cleanup = () => {
4932
+ if (this.recognition) {
4933
+ this.recognition.removeEventListener("start", onStart);
4934
+ this.recognition.removeEventListener("error", onError);
4935
+ }
4936
+ };
4937
+ this.recognition.addEventListener("start", onStart, { once: true });
4938
+ this.recognition.addEventListener("error", onError, { once: true });
4939
+ try {
4940
+ this.recognition.start();
4941
+ } catch (err) {
4942
+ cleanup();
4943
+ reject(err);
4944
+ }
4945
+ });
4946
+ }
4947
+ /**
4948
+ * Send audio data. No-op for Web Speech API since it captures audio
4949
+ * directly from the microphone via the browser's internal pipeline.
4950
+ *
4951
+ * Provided for interface compatibility with WebSocket-based STT adapters
4952
+ * (DeepgramSTT, ElevenLabsSTT).
4953
+ */
4954
+ sendAudio(_audioData) {
4955
+ }
4956
+ /**
4957
+ * Register a callback to receive transcript events.
4958
+ *
4959
+ * @returns An unsubscribe function. Calling it more than once is safe.
4960
+ */
4961
+ onTranscript(callback) {
4962
+ this.transcriptCallbacks.add(callback);
4963
+ let removed = false;
4964
+ return () => {
4965
+ if (removed) return;
4966
+ removed = true;
4967
+ this.transcriptCallbacks.delete(callback);
4968
+ };
4969
+ }
4970
+ /**
4971
+ * Gracefully stop recognition.
4972
+ *
4973
+ * Calls `stop()` on the SpeechRecognition instance which allows it to
4974
+ * deliver any pending final results before ending.
4975
+ */
4976
+ close() {
4977
+ if (!this.recognition) {
4978
+ this.log("Not connected \u2014 nothing to close");
4979
+ return;
4980
+ }
4981
+ this.log("Closing recognition");
4982
+ this._intentionalStop = true;
4983
+ try {
4984
+ this.recognition.stop();
4985
+ } catch {
4986
+ }
4987
+ this.cleanup();
4988
+ }
4989
+ /** Force-destroy the recognition without waiting for pending results. */
4990
+ destroy() {
4991
+ this.log("Destroying");
4992
+ this._intentionalStop = true;
4993
+ if (this.recognition) {
4994
+ try {
4995
+ this.recognition.abort();
4996
+ } catch {
4997
+ }
4998
+ this.recognition.onresult = null;
4999
+ this.recognition.onerror = null;
5000
+ this.recognition.onend = null;
5001
+ this.recognition.onstart = null;
5002
+ this.recognition = null;
5003
+ }
5004
+ this.cleanup();
5005
+ this.transcriptCallbacks.clear();
5006
+ }
5007
+ /**
5008
+ * Suspend the adapter (e.g. when the device goes offline).
5009
+ *
5010
+ * Stops recognition and marks the adapter as suspended so that auto-restart
5011
+ * does not trigger.
5012
+ */
5013
+ suspend() {
5014
+ if (this._suspended) return;
5015
+ this._suspended = true;
5016
+ this._intentionalStop = true;
5017
+ if (this.recognition && this._connected) {
5018
+ try {
5019
+ this.recognition.stop();
5020
+ } catch {
5021
+ }
5022
+ }
5023
+ this.log("Suspended");
5024
+ }
5025
+ /**
5026
+ * Resume after a prior `suspend()`. Restarts recognition if it was
5027
+ * running before suspension.
5028
+ */
5029
+ resume() {
5030
+ if (!this._suspended) return;
5031
+ this._suspended = false;
5032
+ this._intentionalStop = false;
5033
+ this.log("Resumed");
5034
+ if (this.recognition && !this._connected) {
5035
+ try {
5036
+ this.recognition.start();
5037
+ } catch {
5038
+ this.log("Failed to restart recognition after resume");
5039
+ }
5040
+ }
5041
+ }
5042
+ // -------------------------------------------------------------------------
5043
+ // Result handling
5044
+ // -------------------------------------------------------------------------
5045
+ /**
5046
+ * Handle SpeechRecognition result events.
5047
+ *
5048
+ * The `results` property is a SpeechRecognitionResultList containing all
5049
+ * results accumulated during this recognition session. We only process
5050
+ * results from `resultIndex` onward to avoid re-emitting old results.
5051
+ */
5052
+ handleResult(event) {
5053
+ for (let i = event.resultIndex; i < event.results.length; i++) {
5054
+ const result = event.results[i];
5055
+ if (!result) continue;
5056
+ const alternative = result[0];
5057
+ if (!alternative) continue;
5058
+ const transcript = alternative.transcript;
5059
+ if (!transcript || transcript.trim() === "") continue;
5060
+ const isFinal = result.isFinal;
5061
+ const confidence = alternative.confidence > 0 ? alternative.confidence : 0.85;
5062
+ const transcriptEvent = {
5063
+ text: transcript,
5064
+ isFinal,
5065
+ confidence,
5066
+ timestamp: Date.now()
5067
+ };
5068
+ this.log(
5069
+ isFinal ? "Final transcript:" : "Interim transcript:",
5070
+ transcript,
5071
+ `(${(confidence * 100).toFixed(1)}%)`
5072
+ );
5073
+ this.emitTranscript(transcriptEvent);
5074
+ }
5075
+ }
5076
+ // -------------------------------------------------------------------------
5077
+ // Error handling
5078
+ // -------------------------------------------------------------------------
5079
+ /**
5080
+ * Handle SpeechRecognition errors.
5081
+ *
5082
+ * Some errors are recoverable (e.g. `no-speech`) and some are fatal
5083
+ * (e.g. `not-allowed`). For recoverable errors in continuous mode,
5084
+ * recognition will auto-restart via the `onend` handler.
5085
+ */
5086
+ handleError(event) {
5087
+ const errorType = event.error;
5088
+ this.log("Recognition error:", errorType, event.message);
5089
+ if (errorType === "no-speech" || errorType === "aborted") {
5090
+ this.log("Non-fatal error \u2014 will recover");
5091
+ return;
5092
+ }
5093
+ if (errorType === "network") {
5094
+ this.log("Network error \u2014 recognition may auto-restart");
5095
+ return;
5096
+ }
5097
+ if (errorType === "not-allowed" || errorType === "service-not-allowed" || errorType === "language-not-supported") {
5098
+ this._intentionalStop = true;
5099
+ this.log("Fatal recognition error \u2014 stopping");
5100
+ }
5101
+ }
5102
+ // -------------------------------------------------------------------------
5103
+ // Subscriber notification
5104
+ // -------------------------------------------------------------------------
5105
+ /**
5106
+ * Emit a transcript event to all registered callbacks.
5107
+ *
5108
+ * Errors thrown by individual callbacks are caught and logged so one
5109
+ * misbehaving subscriber does not prevent others from receiving the event.
5110
+ */
5111
+ emitTranscript(event) {
5112
+ for (const cb of this.transcriptCallbacks) {
5113
+ try {
5114
+ cb(event);
5115
+ } catch (err) {
5116
+ console.error(LOG_PREFIX9, "Transcript callback threw:", err);
5117
+ }
5118
+ }
5119
+ }
5120
+ // -------------------------------------------------------------------------
5121
+ // SpeechRecognition resolution
5122
+ // -------------------------------------------------------------------------
5123
+ /**
5124
+ * Resolve the SpeechRecognition constructor, with the webkit-prefixed
5125
+ * fallback. Returns null if not available.
5126
+ */
5127
+ resolveSpeechRecognition() {
5128
+ if (typeof window === "undefined") return null;
5129
+ const win = window;
5130
+ if (typeof win["SpeechRecognition"] !== "undefined") {
5131
+ return win["SpeechRecognition"];
5132
+ }
5133
+ if (typeof globalThis.webkitSpeechRecognition !== "undefined") {
5134
+ return globalThis.webkitSpeechRecognition;
5135
+ }
5136
+ return null;
5137
+ }
5138
+ // -------------------------------------------------------------------------
5139
+ // Cleanup
5140
+ // -------------------------------------------------------------------------
5141
+ /** Reset internal state after disconnection. */
5142
+ cleanup() {
5143
+ this._connected = false;
5144
+ }
5145
+ // -------------------------------------------------------------------------
5146
+ // Logging
5147
+ // -------------------------------------------------------------------------
5148
+ /** Conditional debug logging. */
5149
+ log(...args) {
5150
+ if (this.debugEnabled) {
5151
+ console.debug(LOG_PREFIX9, ...args);
5152
+ }
5153
+ }
5154
+ };
5155
+
5156
+ // src/voice/web-speech-tts.ts
5157
+ var LOG_PREFIX10 = "[GuideKit:WebSpeech-TTS]";
5158
+ var DEFAULT_RATE = 1;
5159
+ var DEFAULT_PITCH = 1;
5160
+ var DEFAULT_LANGUAGE4 = "en-US";
5161
+ var WebSpeechTTS = class {
5162
+ // ---- Configuration -------------------------------------------------------
5163
+ voiceName;
5164
+ rate;
5165
+ pitch;
5166
+ language;
5167
+ debugEnabled;
5168
+ // ---- Internal state ------------------------------------------------------
5169
+ _connected = false;
5170
+ _suspended = false;
5171
+ /** Cached voice object resolved from voiceName. */
5172
+ _resolvedVoice = null;
5173
+ /** Whether voices have been loaded (they load async in some browsers). */
5174
+ _voicesLoaded = false;
5175
+ /** Registered audio-event callbacks. */
5176
+ audioCallbacks = /* @__PURE__ */ new Set();
5177
+ // -------------------------------------------------------------------------
5178
+ // Constructor
5179
+ // -------------------------------------------------------------------------
5180
+ constructor(options = {}) {
5181
+ this.voiceName = options.voice ?? null;
5182
+ this.rate = options.rate ?? DEFAULT_RATE;
5183
+ this.pitch = options.pitch ?? DEFAULT_PITCH;
5184
+ this.language = options.language ?? DEFAULT_LANGUAGE4;
5185
+ this.debugEnabled = options.debug ?? false;
5186
+ this.log("WebSpeechTTS created", {
5187
+ voice: this.voiceName,
5188
+ rate: this.rate,
5189
+ pitch: this.pitch,
5190
+ language: this.language
5191
+ });
5192
+ }
5193
+ // -------------------------------------------------------------------------
5194
+ // Static methods
5195
+ // -------------------------------------------------------------------------
5196
+ /**
5197
+ * Check whether the Web Speech API SpeechSynthesis is supported in the
5198
+ * current environment. Safe to call in SSR (returns false).
5199
+ */
5200
+ static isSupported() {
5201
+ if (typeof window === "undefined") return false;
5202
+ return typeof window.speechSynthesis !== "undefined";
5203
+ }
5204
+ // -------------------------------------------------------------------------
5205
+ // Public API
5206
+ // -------------------------------------------------------------------------
5207
+ /** Whether the adapter is connected (ready for speech). */
5208
+ get isConnected() {
5209
+ return this._connected;
5210
+ }
5211
+ /**
5212
+ * Initialize the adapter.
5213
+ *
5214
+ * Loads available voices and resolves the requested voice name. Voice
5215
+ * loading is async in some browsers (notably Chrome) so we wait for
5216
+ * the `voiceschanged` event if needed.
5217
+ */
5218
+ async connect() {
5219
+ if (this._connected) {
5220
+ this.log("Already connected \u2014 skipping");
5221
+ return;
5222
+ }
5223
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
5224
+ this.log("SpeechSynthesis not available \u2014 cannot connect");
5225
+ return;
5226
+ }
5227
+ await this.loadVoices();
5228
+ if (this.voiceName) {
5229
+ this._resolvedVoice = this.findVoice(this.voiceName);
5230
+ if (this._resolvedVoice) {
5231
+ this.log("Resolved voice:", this._resolvedVoice.name);
5232
+ } else {
5233
+ this.log("Requested voice not found:", this.voiceName, "\u2014 using browser default");
5234
+ }
5235
+ }
5236
+ this._connected = true;
5237
+ this.log("Connected");
5238
+ }
5239
+ /**
5240
+ * Speak the given text using the browser's speech synthesis engine.
5241
+ *
5242
+ * Returns a Promise that resolves when the utterance completes or is
5243
+ * cancelled. Rejects if an error occurs during synthesis.
5244
+ *
5245
+ * Also emits audio events to registered callbacks for VoicePipeline
5246
+ * compatibility.
5247
+ */
5248
+ speak(text) {
5249
+ if (!this._connected || this._suspended) {
5250
+ this.log("Cannot speak \u2014 not connected or suspended");
5251
+ return;
5252
+ }
5253
+ if (!text || !text.trim()) {
5254
+ return;
5255
+ }
5256
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
5257
+ return;
5258
+ }
5259
+ const synth = window.speechSynthesis;
5260
+ const utterance = new SpeechSynthesisUtterance(text);
5261
+ utterance.lang = this.language;
5262
+ utterance.rate = this.rate;
5263
+ utterance.pitch = this.pitch;
5264
+ if (this._resolvedVoice) {
5265
+ utterance.voice = this._resolvedVoice;
5266
+ }
5267
+ utterance.onstart = () => {
5268
+ this.log("Utterance started:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
5269
+ this.emitAudio({
5270
+ audio: new ArrayBuffer(0),
5271
+ isFinal: false,
5272
+ timestamp: Date.now()
5273
+ });
5274
+ };
5275
+ utterance.onend = () => {
5276
+ this.log("Utterance ended");
5277
+ this.emitAudio({
5278
+ audio: new ArrayBuffer(0),
5279
+ isFinal: true,
5280
+ timestamp: Date.now()
5281
+ });
5282
+ };
5283
+ utterance.onerror = (event) => {
5284
+ if (event.error === "canceled") {
5285
+ this.log("Utterance cancelled");
5286
+ this.emitAudio({
5287
+ audio: new ArrayBuffer(0),
5288
+ isFinal: true,
5289
+ timestamp: Date.now()
5290
+ });
5291
+ return;
5292
+ }
5293
+ this.log("Utterance error:", event.error);
5294
+ this.emitAudio({
5295
+ audio: new ArrayBuffer(0),
5296
+ isFinal: true,
5297
+ timestamp: Date.now()
5298
+ });
5299
+ };
5300
+ this.log("Speaking:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
5301
+ synth.speak(utterance);
5302
+ }
5303
+ /**
5304
+ * Flush / finalize the current utterance.
5305
+ *
5306
+ * No-op for Web Speech API since each speak() call is a complete
5307
+ * utterance. Provided for interface compatibility with ElevenLabsTTS.
5308
+ */
5309
+ flush() {
5310
+ }
5311
+ /**
5312
+ * Register a callback to receive audio output events.
5313
+ *
5314
+ * For Web Speech API, these events have empty audio buffers and are
5315
+ * used to signal utterance start/end for VoicePipeline state management.
5316
+ *
5317
+ * @returns An unsubscribe function. Calling it more than once is safe.
5318
+ */
5319
+ onAudio(callback) {
5320
+ this.audioCallbacks.add(callback);
5321
+ let removed = false;
5322
+ return () => {
5323
+ if (removed) return;
5324
+ removed = true;
5325
+ this.audioCallbacks.delete(callback);
5326
+ };
5327
+ }
5328
+ /** Stop current speech synthesis and cancel any queued utterances. */
5329
+ stop() {
5330
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
5331
+ return;
5332
+ }
5333
+ this.log("Stopping speech synthesis");
5334
+ window.speechSynthesis.cancel();
5335
+ }
5336
+ /** Gracefully close the adapter. */
5337
+ close() {
5338
+ this.log("Closing");
5339
+ this.stop();
5340
+ this.cleanup();
5341
+ }
5342
+ /** Force-destroy the adapter. */
5343
+ destroy() {
5344
+ this.log("Destroying");
5345
+ this.stop();
5346
+ this.cleanup();
5347
+ this.audioCallbacks.clear();
5348
+ }
5349
+ /**
5350
+ * Suspend the adapter (e.g. when the device goes offline).
5351
+ *
5352
+ * Pauses any active speech synthesis and marks the adapter as suspended.
5353
+ */
5354
+ suspend() {
5355
+ if (this._suspended) return;
5356
+ this._suspended = true;
5357
+ if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
5358
+ window.speechSynthesis.pause();
5359
+ }
5360
+ this.log("Suspended");
5361
+ }
5362
+ /**
5363
+ * Resume after a prior `suspend()`.
5364
+ */
5365
+ resume() {
5366
+ if (!this._suspended) return;
5367
+ this._suspended = false;
5368
+ if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
5369
+ window.speechSynthesis.resume();
4422
5370
  }
4423
- if (parsed["audio"] === void 0 || parsed["audio"] === null) {
4424
- this.log("Non-audio message received", parsed);
5371
+ this.log("Resumed");
5372
+ }
5373
+ // -------------------------------------------------------------------------
5374
+ // Voice loading
5375
+ // -------------------------------------------------------------------------
5376
+ /**
5377
+ * Load available voices from the browser.
5378
+ *
5379
+ * In Chrome and some other browsers, voices load asynchronously after
5380
+ * the page loads. We wait for the `voiceschanged` event with a timeout.
5381
+ */
5382
+ async loadVoices() {
5383
+ if (this._voicesLoaded) return;
5384
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") return;
5385
+ const synth = window.speechSynthesis;
5386
+ let voices = synth.getVoices();
5387
+ if (voices.length > 0) {
5388
+ this._voicesLoaded = true;
5389
+ this.log("Voices loaded:", voices.length, "available");
4425
5390
  return;
4426
5391
  }
4427
- const audioBase64 = parsed["audio"];
4428
- const isFinal = parsed["isFinal"] === true;
4429
- if (!audioBase64 || audioBase64.length === 0) {
4430
- if (isFinal) {
4431
- this.emitAudio({
4432
- audio: new ArrayBuffer(0),
4433
- isFinal: true,
4434
- timestamp: Date.now()
4435
- });
4436
- }
4437
- return;
5392
+ await new Promise((resolve) => {
5393
+ const onVoicesChanged = () => {
5394
+ synth.removeEventListener("voiceschanged", onVoicesChanged);
5395
+ clearTimeout(timeout);
5396
+ voices = synth.getVoices();
5397
+ this._voicesLoaded = true;
5398
+ this.log("Voices loaded (async):", voices.length, "available");
5399
+ resolve();
5400
+ };
5401
+ const timeout = setTimeout(() => {
5402
+ synth.removeEventListener("voiceschanged", onVoicesChanged);
5403
+ this._voicesLoaded = true;
5404
+ this.log("Voices loading timed out \u2014 proceeding with defaults");
5405
+ resolve();
5406
+ }, 2e3);
5407
+ synth.addEventListener("voiceschanged", onVoicesChanged);
5408
+ });
5409
+ }
5410
+ /**
5411
+ * Find a voice by name (case-insensitive partial match).
5412
+ */
5413
+ findVoice(name) {
5414
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
5415
+ return null;
4438
5416
  }
4439
- let audioBuffer;
4440
- try {
4441
- audioBuffer = base64ToArrayBuffer(audioBase64);
4442
- } catch (err) {
4443
- this.log("Failed to decode base64 audio", err);
4444
- return;
5417
+ const voices = window.speechSynthesis.getVoices();
5418
+ const lowerName = name.toLowerCase();
5419
+ const exact = voices.find((v) => v.name.toLowerCase() === lowerName);
5420
+ if (exact) return exact;
5421
+ const partial = voices.find((v) => v.name.toLowerCase().includes(lowerName));
5422
+ if (partial) return partial;
5423
+ if (lowerName.includes("-") || lowerName.length <= 5) {
5424
+ const langMatch = voices.find((v) => v.lang.toLowerCase().startsWith(lowerName));
5425
+ if (langMatch) return langMatch;
4445
5426
  }
4446
- const audioEvent = {
4447
- audio: audioBuffer,
4448
- isFinal,
4449
- timestamp: Date.now()
4450
- };
4451
- this.log(
4452
- isFinal ? "Final audio chunk:" : "Audio chunk:",
4453
- `${audioBuffer.byteLength} bytes`
4454
- );
4455
- this.emitAudio(audioEvent);
5427
+ return null;
4456
5428
  }
4457
- // -----------------------------------------------------------------------
5429
+ // -------------------------------------------------------------------------
4458
5430
  // Subscriber notification
4459
- // -----------------------------------------------------------------------
5431
+ // -------------------------------------------------------------------------
4460
5432
  /**
4461
5433
  * Emit an audio event to all registered callbacks.
4462
5434
  *
@@ -4468,41 +5440,30 @@ var ElevenLabsTTS = class {
4468
5440
  try {
4469
5441
  cb(event);
4470
5442
  } catch (err) {
4471
- console.error(LOG_PREFIX7, "Audio callback threw:", err);
5443
+ console.error(LOG_PREFIX10, "Audio callback threw:", err);
4472
5444
  }
4473
5445
  }
4474
5446
  }
4475
- // -----------------------------------------------------------------------
4476
- // URL building
4477
- // -----------------------------------------------------------------------
4478
- /** Build the ElevenLabs streaming TTS endpoint URL. */
4479
- buildUrl() {
4480
- const params = new URLSearchParams({
4481
- model_id: this.modelId
4482
- });
4483
- return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
4484
- }
4485
- // -----------------------------------------------------------------------
5447
+ // -------------------------------------------------------------------------
4486
5448
  // Cleanup
4487
- // -----------------------------------------------------------------------
4488
- /** Reset internal state after disconnection. */
5449
+ // -------------------------------------------------------------------------
5450
+ /** Reset internal state. */
4489
5451
  cleanup() {
4490
5452
  this._connected = false;
4491
- this.bosSent = false;
4492
5453
  }
4493
- // -----------------------------------------------------------------------
5454
+ // -------------------------------------------------------------------------
4494
5455
  // Logging
4495
- // -----------------------------------------------------------------------
5456
+ // -------------------------------------------------------------------------
4496
5457
  /** Conditional debug logging. */
4497
5458
  log(...args) {
4498
5459
  if (this.debugEnabled) {
4499
- console.debug(LOG_PREFIX7, ...args);
5460
+ console.debug(LOG_PREFIX10, ...args);
4500
5461
  }
4501
5462
  }
4502
5463
  };
4503
5464
 
4504
5465
  // src/voice/index.ts
4505
- var LOG_PREFIX8 = "[GuideKit:Voice]";
5466
+ var LOG_PREFIX11 = "[GuideKit:Voice]";
4506
5467
  var JITTER_BUFFER_MS = 150;
4507
5468
  var ECHO_WINDOW_MS = 3e3;
4508
5469
  var ECHO_OVERLAP_THRESHOLD = 0.6;
@@ -4610,17 +5571,42 @@ var VoicePipeline = class {
4610
5571
  cause: err instanceof Error ? err : void 0
4611
5572
  });
4612
5573
  }
4613
- this._stt = new DeepgramSTT({
4614
- apiKey: this._sttConfig.apiKey,
4615
- model: this._sttConfig.model,
4616
- debug: this._debug
4617
- });
4618
- this._tts = new ElevenLabsTTS({
4619
- apiKey: this._ttsConfig.apiKey,
4620
- voiceId: this._ttsConfig.voiceId,
4621
- modelId: this._ttsConfig.modelId,
4622
- debug: this._debug
4623
- });
5574
+ if (this._sttConfig.provider === "deepgram") {
5575
+ this._stt = new DeepgramSTT({
5576
+ apiKey: this._sttConfig.apiKey,
5577
+ model: this._sttConfig.model,
5578
+ debug: this._debug
5579
+ });
5580
+ } else if (this._sttConfig.provider === "elevenlabs") {
5581
+ this._stt = new ElevenLabsSTT({
5582
+ apiKey: this._sttConfig.apiKey,
5583
+ language: this._sttConfig.language,
5584
+ debug: this._debug
5585
+ });
5586
+ } else {
5587
+ this._stt = new WebSpeechSTT({
5588
+ language: this._sttConfig.language,
5589
+ continuous: this._sttConfig.continuous,
5590
+ interimResults: this._sttConfig.interimResults,
5591
+ debug: this._debug
5592
+ });
5593
+ }
5594
+ if (this._ttsConfig.provider === "elevenlabs") {
5595
+ this._tts = new ElevenLabsTTS({
5596
+ apiKey: this._ttsConfig.apiKey,
5597
+ voiceId: this._ttsConfig.voiceId,
5598
+ modelId: "modelId" in this._ttsConfig ? this._ttsConfig.modelId : void 0,
5599
+ debug: this._debug
5600
+ });
5601
+ } else {
5602
+ this._tts = new WebSpeechTTS({
5603
+ voice: this._ttsConfig.voice,
5604
+ rate: this._ttsConfig.rate,
5605
+ pitch: this._ttsConfig.pitch,
5606
+ language: this._ttsConfig.language,
5607
+ debug: this._debug
5608
+ });
5609
+ }
4624
5610
  this._log("Initialization complete");
4625
5611
  }
4626
5612
  // ────────────────────────────────────────────────────────────────────
@@ -4760,10 +5746,11 @@ var VoicePipeline = class {
4760
5746
  // ────────────────────────────────────────────────────────────────────
4761
5747
  // speak()
4762
5748
  // ────────────────────────────────────────────────────────────────────
4763
- /** Speak text via ElevenLabs TTS. */
5749
+ /** Speak text via TTS (ElevenLabs or Web Speech API). */
4764
5750
  async speak(text) {
4765
5751
  if (this._destroyed || !text.trim()) return;
4766
- if (!this._tts || !this._audioContext) {
5752
+ const isWebSpeechTTS = this._tts instanceof WebSpeechTTS;
5753
+ if (!this._tts || !this._audioContext && !isWebSpeechTTS) {
4767
5754
  this._log("TTS or AudioContext not available \u2014 cannot speak");
4768
5755
  this._bus.emit("voice:degraded", { reason: "TTS not available", fallback: "text" });
4769
5756
  this._setState("idle");
@@ -4807,11 +5794,24 @@ var VoicePipeline = class {
4807
5794
  }
4808
5795
  resolve();
4809
5796
  };
4810
- this._unsubTTSAudio = this._tts.onAudio((event) => {
4811
- this._handleTTSAudio(event, done);
4812
- });
4813
- this._tts.speak(text);
4814
- this._tts.flush();
5797
+ if (isWebSpeechTTS) {
5798
+ this._unsubTTSAudio = this._tts.onAudio(
5799
+ (event) => {
5800
+ if (event.isFinal) {
5801
+ done();
5802
+ }
5803
+ }
5804
+ );
5805
+ this._tts.speak(text);
5806
+ } else {
5807
+ this._unsubTTSAudio = this._tts.onAudio(
5808
+ (event) => {
5809
+ this._handleTTSAudio(event, done);
5810
+ }
5811
+ );
5812
+ this._tts.speak(text);
5813
+ this._tts.flush();
5814
+ }
4815
5815
  });
4816
5816
  }
4817
5817
  // ────────────────────────────────────────────────────────────────────
@@ -4840,7 +5840,9 @@ var VoicePipeline = class {
4840
5840
  this._pendingLLMAbort.abort();
4841
5841
  this._pendingLLMAbort = null;
4842
5842
  }
4843
- if (this._tts?.isConnected) {
5843
+ if (this._tts instanceof WebSpeechTTS) {
5844
+ this._tts.stop();
5845
+ } else if (this._tts?.isConnected) {
4844
5846
  this._tts.close();
4845
5847
  }
4846
5848
  }
@@ -4929,7 +5931,7 @@ var VoicePipeline = class {
4929
5931
  try {
4930
5932
  cb(next, prev);
4931
5933
  } catch (err) {
4932
- console.error(LOG_PREFIX8, "State change callback threw:", err);
5934
+ console.error(LOG_PREFIX11, "State change callback threw:", err);
4933
5935
  }
4934
5936
  }
4935
5937
  }
@@ -5060,7 +6062,7 @@ var VoicePipeline = class {
5060
6062
  try {
5061
6063
  cb(text, isFinal);
5062
6064
  } catch (err) {
5063
- console.error(LOG_PREFIX8, "Transcript callback threw:", err);
6065
+ console.error(LOG_PREFIX11, "Transcript callback threw:", err);
5064
6066
  }
5065
6067
  }
5066
6068
  if (isFinal && this._state === "listening") {
@@ -5163,8 +6165,14 @@ var VoicePipeline = class {
5163
6165
  * sequential playback via AudioBufferSourceNode.
5164
6166
  */
5165
6167
  _decodeAndSchedule(audioData, onDone) {
6168
+ let onDoneCalled = false;
6169
+ const safeOnDone = onDone ? () => {
6170
+ if (onDoneCalled) return;
6171
+ onDoneCalled = true;
6172
+ onDone();
6173
+ } : void 0;
5166
6174
  if (!this._audioContext || this._state !== "speaking") {
5167
- onDone?.();
6175
+ safeOnDone?.();
5168
6176
  return;
5169
6177
  }
5170
6178
  const ctx = this._audioContext;
@@ -5173,7 +6181,7 @@ var VoicePipeline = class {
5173
6181
  copy,
5174
6182
  (decodedBuffer) => {
5175
6183
  if (this._state !== "speaking" || !this._audioContext) {
5176
- onDone?.();
6184
+ safeOnDone?.();
5177
6185
  return;
5178
6186
  }
5179
6187
  const source = ctx.createBufferSource();
@@ -5186,8 +6194,8 @@ var VoicePipeline = class {
5186
6194
  if (this._lastScheduledSource === source) {
5187
6195
  this._lastScheduledSource = null;
5188
6196
  }
5189
- if (onDone) {
5190
- onDone();
6197
+ if (safeOnDone) {
6198
+ safeOnDone();
5191
6199
  }
5192
6200
  };
5193
6201
  const now = ctx.currentTime;
@@ -5203,7 +6211,7 @@ var VoicePipeline = class {
5203
6211
  },
5204
6212
  (err) => {
5205
6213
  this._log("Failed to decode audio chunk:", err);
5206
- onDone?.();
6214
+ safeOnDone?.();
5207
6215
  }
5208
6216
  );
5209
6217
  }
@@ -5262,13 +6270,13 @@ var VoicePipeline = class {
5262
6270
  // ════════════════════════════════════════════════════════════════════
5263
6271
  _log(...args) {
5264
6272
  if (this._debug) {
5265
- console.debug(LOG_PREFIX8, ...args);
6273
+ console.debug(LOG_PREFIX11, ...args);
5266
6274
  }
5267
6275
  }
5268
6276
  };
5269
6277
 
5270
6278
  // src/visual/index.ts
5271
- var LOG_PREFIX9 = "[GuideKit:Visual]";
6279
+ var LOG_PREFIX12 = "[GuideKit:Visual]";
5272
6280
  var DEFAULT_OVERLAY_COLOR = "rgba(0, 0, 0, 0.5)";
5273
6281
  var DEFAULT_SPOTLIGHT_COLOR = "#4a9eed";
5274
6282
  var DEFAULT_ANIMATION_DURATION = 300;
@@ -6185,16 +7193,16 @@ var VisualGuidance = class {
6185
7193
  if (!this.debug) return;
6186
7194
  if (typeof console !== "undefined") {
6187
7195
  if (data) {
6188
- console.log(`${LOG_PREFIX9} ${message}`, data);
7196
+ console.log(`${LOG_PREFIX12} ${message}`, data);
6189
7197
  } else {
6190
- console.log(`${LOG_PREFIX9} ${message}`);
7198
+ console.log(`${LOG_PREFIX12} ${message}`);
6191
7199
  }
6192
7200
  }
6193
7201
  }
6194
7202
  };
6195
7203
 
6196
7204
  // src/awareness/index.ts
6197
- var LOG_PREFIX10 = "[GuideKit:Awareness]";
7205
+ var LOG_PREFIX13 = "[GuideKit:Awareness]";
6198
7206
  var DEFAULT_IDLE_TIMEOUT_MS = 6e4;
6199
7207
  var DEFAULT_DWELL_TIMEOUT_MS = 8e3;
6200
7208
  var DEFAULT_RAGE_CLICK_THRESHOLD = 3;
@@ -6556,13 +7564,13 @@ var AwarenessSystem = class {
6556
7564
  /** Conditional debug logging. */
6557
7565
  log(...args) {
6558
7566
  if (this.debugEnabled) {
6559
- console.debug(LOG_PREFIX10, ...args);
7567
+ console.debug(LOG_PREFIX13, ...args);
6560
7568
  }
6561
7569
  }
6562
7570
  };
6563
7571
 
6564
7572
  // src/awareness/proactive.ts
6565
- var LOG_PREFIX11 = "[GuideKit:Proactive]";
7573
+ var LOG_PREFIX14 = "[GuideKit:Proactive]";
6566
7574
  var STORAGE_KEY = "guidekit:visited";
6567
7575
  var SEVEN_DAYS_MS = 7 * 24 * 60 * 60 * 1e3;
6568
7576
  var DWELL_COOLDOWNS = [3e4, 6e4, 12e4];
@@ -6600,7 +7608,7 @@ var ProactiveTriggerEngine = class {
6600
7608
  set quietMode(value) {
6601
7609
  this._quietMode = value;
6602
7610
  if (this.debug) {
6603
- console.debug(LOG_PREFIX11, `Quiet mode ${value ? "enabled" : "disabled"}`);
7611
+ console.debug(LOG_PREFIX14, `Quiet mode ${value ? "enabled" : "disabled"}`);
6604
7612
  }
6605
7613
  }
6606
7614
  // ---- Lifecycle -----------------------------------------------------------
@@ -6630,7 +7638,7 @@ var ProactiveTriggerEngine = class {
6630
7638
  })
6631
7639
  );
6632
7640
  if (this.debug) {
6633
- console.debug(LOG_PREFIX11, "Started \u2014 subscribed to awareness & dom events");
7641
+ console.debug(LOG_PREFIX14, "Started \u2014 subscribed to awareness & dom events");
6634
7642
  }
6635
7643
  }
6636
7644
  /** Unsubscribe all bus listeners and clear internal state. */
@@ -6645,7 +7653,7 @@ var ProactiveTriggerEngine = class {
6645
7653
  this.formTimers.clear();
6646
7654
  this.started = false;
6647
7655
  if (this.debug) {
6648
- console.debug(LOG_PREFIX11, "Stopped \u2014 all listeners removed");
7656
+ console.debug(LOG_PREFIX14, "Stopped \u2014 all listeners removed");
6649
7657
  }
6650
7658
  }
6651
7659
  /** Alias for {@link stop}. */
@@ -6680,7 +7688,7 @@ var ProactiveTriggerEngine = class {
6680
7688
  }, FORM_ABANDON_MS);
6681
7689
  this.formTimers.set(formSelector, timer);
6682
7690
  if (this.debug) {
6683
- console.debug(LOG_PREFIX11, `Form interaction started: ${formSelector}`);
7691
+ console.debug(LOG_PREFIX14, `Form interaction started: ${formSelector}`);
6684
7692
  }
6685
7693
  }
6686
7694
  /** Reset all cooldowns and internal tracking state (useful for testing). */
@@ -6694,7 +7702,7 @@ var ProactiveTriggerEngine = class {
6694
7702
  }
6695
7703
  this.formTimers.clear();
6696
7704
  if (this.debug) {
6697
- console.debug(LOG_PREFIX11, "All cooldowns and state reset");
7705
+ console.debug(LOG_PREFIX14, "All cooldowns and state reset");
6698
7706
  }
6699
7707
  }
6700
7708
  // ---- Internal handlers ---------------------------------------------------
@@ -6711,22 +7719,23 @@ var ProactiveTriggerEngine = class {
6711
7719
  message: "First-time visitor detected. Show a visual greeting (no audio)."
6712
7720
  }, "greeting");
6713
7721
  if (this.debug) {
6714
- console.debug(LOG_PREFIX11, "First visit \u2014 greeting triggered");
7722
+ console.debug(LOG_PREFIX14, "First visit \u2014 greeting triggered");
6715
7723
  }
6716
7724
  return;
6717
7725
  }
6718
7726
  const visitedAt = parseInt(visited, 10);
6719
- if (!Number.isNaN(visitedAt)) {
6720
- const elapsed = Date.now() - visitedAt;
6721
- if (elapsed <= SEVEN_DAYS_MS && this.debug) {
6722
- console.debug(LOG_PREFIX11, "Return visitor within 7 days \u2014 silent");
6723
- } else if (this.debug) {
6724
- console.debug(LOG_PREFIX11, "Return visitor after 7 days");
6725
- }
7727
+ if (Number.isNaN(visitedAt)) {
7728
+ return;
7729
+ }
7730
+ const elapsed = Date.now() - visitedAt;
7731
+ if (elapsed <= SEVEN_DAYS_MS && this.debug) {
7732
+ console.debug(LOG_PREFIX14, "Return visitor within 7 days \u2014 silent");
7733
+ } else if (this.debug) {
7734
+ console.debug(LOG_PREFIX14, "Return visitor after 7 days");
6726
7735
  }
6727
7736
  } catch {
6728
7737
  if (this.debug) {
6729
- console.warn(LOG_PREFIX11, "localStorage unavailable \u2014 skipping greeting check");
7738
+ console.warn(LOG_PREFIX14, "localStorage unavailable \u2014 skipping greeting check");
6730
7739
  }
6731
7740
  }
6732
7741
  }
@@ -6744,7 +7753,7 @@ var ProactiveTriggerEngine = class {
6744
7753
  const count = this.dwellCounts.get(sectionId) ?? 0;
6745
7754
  if (count >= DWELL_COOLDOWNS.length + 1) {
6746
7755
  if (this.debug) {
6747
- console.debug(LOG_PREFIX11, `Dwell cap reached for section "${sectionId}" \u2014 suppressed`);
7756
+ console.debug(LOG_PREFIX14, `Dwell cap reached for section "${sectionId}" \u2014 suppressed`);
6748
7757
  }
6749
7758
  return;
6750
7759
  }
@@ -6754,7 +7763,7 @@ var ProactiveTriggerEngine = class {
6754
7763
  const lastFired = this.cooldowns.get(key) ?? 0;
6755
7764
  if (Date.now() - lastFired < cooldownMs) {
6756
7765
  if (this.debug) {
6757
- console.debug(LOG_PREFIX11, `Dwell cooldown active for "${sectionId}" \u2014 suppressed`);
7766
+ console.debug(LOG_PREFIX14, `Dwell cooldown active for "${sectionId}" \u2014 suppressed`);
6758
7767
  }
6759
7768
  return;
6760
7769
  }
@@ -6770,7 +7779,7 @@ var ProactiveTriggerEngine = class {
6770
7779
  const sectionKey = selector;
6771
7780
  if (this.frustrationFired.has(sectionKey)) {
6772
7781
  if (this.debug) {
6773
- console.debug(LOG_PREFIX11, `Frustration already fired for "${selector}" \u2014 suppressed`);
7782
+ console.debug(LOG_PREFIX14, `Frustration already fired for "${selector}" \u2014 suppressed`);
6774
7783
  }
6775
7784
  return;
6776
7785
  }
@@ -6786,7 +7795,7 @@ var ProactiveTriggerEngine = class {
6786
7795
  const key = "navigation-commentary";
6787
7796
  if (this.isCooldownActive(key, NAVIGATION_COOLDOWN_MS)) {
6788
7797
  if (this.debug) {
6789
- console.debug(LOG_PREFIX11, "Navigation cooldown active \u2014 suppressed");
7798
+ console.debug(LOG_PREFIX14, "Navigation cooldown active \u2014 suppressed");
6790
7799
  }
6791
7800
  return;
6792
7801
  }
@@ -6809,7 +7818,7 @@ var ProactiveTriggerEngine = class {
6809
7818
  fireTrigger(partial, cooldownKey) {
6810
7819
  if (this._quietMode) {
6811
7820
  if (this.debug) {
6812
- console.debug(LOG_PREFIX11, `Quiet mode \u2014 suppressed trigger: ${partial.type}`);
7821
+ console.debug(LOG_PREFIX14, `Quiet mode \u2014 suppressed trigger: ${partial.type}`);
6813
7822
  }
6814
7823
  return;
6815
7824
  }
@@ -6819,13 +7828,13 @@ var ProactiveTriggerEngine = class {
6819
7828
  };
6820
7829
  this.cooldowns.set(cooldownKey, trigger.timestamp);
6821
7830
  if (this.debug) {
6822
- console.debug(LOG_PREFIX11, "Trigger fired:", trigger.type, trigger);
7831
+ console.debug(LOG_PREFIX14, "Trigger fired:", trigger.type, trigger);
6823
7832
  }
6824
7833
  if (this.onTrigger) {
6825
7834
  try {
6826
7835
  this.onTrigger(trigger);
6827
7836
  } catch (err) {
6828
- console.error(LOG_PREFIX11, "onTrigger callback error:", err);
7837
+ console.error(LOG_PREFIX14, "onTrigger callback error:", err);
6829
7838
  }
6830
7839
  }
6831
7840
  }
@@ -6838,7 +7847,7 @@ var ProactiveTriggerEngine = class {
6838
7847
  };
6839
7848
 
6840
7849
  // src/llm/rate-limiter.ts
6841
- var LOG_PREFIX12 = "[GuideKit:RateLimiter]";
7850
+ var LOG_PREFIX15 = "[GuideKit:RateLimiter]";
6842
7851
  var DEFAULT_MAX_LLM_CALLS_PER_MINUTE = 10;
6843
7852
  var DEFAULT_MAX_STT_MINUTES_PER_SESSION = 60;
6844
7853
  var DEFAULT_MAX_TTS_CHARS_PER_SESSION = 5e4;
@@ -6933,7 +7942,19 @@ var RateLimiter = class {
6933
7942
  get sttMinutesUsed() {
6934
7943
  let totalMs = this.sttMs;
6935
7944
  if (this.sttStartedAt !== null) {
6936
- totalMs += Date.now() - this.sttStartedAt;
7945
+ const activeMs = Date.now() - this.sttStartedAt;
7946
+ const maxSessionMs = this.maxSTTMinutesPerSession * 6e4;
7947
+ const maxActiveMs = maxSessionMs * 2;
7948
+ if (activeMs > maxActiveMs) {
7949
+ console.warn(
7950
+ `${LOG_PREFIX15} STT stream running for ${Math.round(activeMs / 6e4)}min without sttStop() \u2014 capping at 2x session limit (${this.maxSTTMinutesPerSession * 2}min).`
7951
+ );
7952
+ this.sttMs += maxActiveMs;
7953
+ this.sttStartedAt = null;
7954
+ totalMs = this.sttMs;
7955
+ } else {
7956
+ totalMs += activeMs;
7957
+ }
6937
7958
  }
6938
7959
  return totalMs / 6e4;
6939
7960
  }
@@ -7005,7 +8026,7 @@ var RateLimiter = class {
7005
8026
  }
7006
8027
  log(...args) {
7007
8028
  if (this.debug) {
7008
- console.debug(LOG_PREFIX12, ...args);
8029
+ console.debug(LOG_PREFIX15, ...args);
7009
8030
  }
7010
8031
  }
7011
8032
  };
@@ -7238,7 +8259,7 @@ var BUILTIN_LOCALES = {
7238
8259
  pt
7239
8260
  };
7240
8261
  var SUPPORTED_LOCALE_CODES = new Set(Object.keys(BUILTIN_LOCALES));
7241
- var LOG_PREFIX13 = "[GuideKit:I18n]";
8262
+ var LOG_PREFIX16 = "[GuideKit:I18n]";
7242
8263
  function isSupportedLocale(code) {
7243
8264
  return SUPPORTED_LOCALE_CODES.has(code);
7244
8265
  }
@@ -7276,7 +8297,7 @@ var I18n = class {
7276
8297
  this.strings = strings;
7277
8298
  this.resolvedLocale = resolvedLocale;
7278
8299
  if (this.debug) {
7279
- console.debug(`${LOG_PREFIX13} Initialized with locale "${this.resolvedLocale}"`);
8300
+ console.debug(`${LOG_PREFIX16} Initialized with locale "${this.resolvedLocale}"`);
7280
8301
  }
7281
8302
  }
7282
8303
  // -------------------------------------------------------------------------
@@ -7287,9 +8308,9 @@ var I18n = class {
7287
8308
  const value = this.strings[key];
7288
8309
  if (value === void 0) {
7289
8310
  if (this.debug) {
7290
- console.warn(`${LOG_PREFIX13} Missing translation key "${key}"`);
8311
+ console.warn(`${LOG_PREFIX16} Missing translation key "${key}"`);
7291
8312
  }
7292
- return en[key] ?? key;
8313
+ return en[key] ?? (typeof process !== "undefined" && process.env?.NODE_ENV === "production" ? key : `[MISSING: ${key}]`);
7293
8314
  }
7294
8315
  return value;
7295
8316
  }
@@ -7303,7 +8324,7 @@ var I18n = class {
7303
8324
  this.strings = strings;
7304
8325
  this.resolvedLocale = resolvedLocale;
7305
8326
  if (this.debug) {
7306
- console.debug(`${LOG_PREFIX13} Locale changed to "${this.resolvedLocale}"`);
8327
+ console.debug(`${LOG_PREFIX16} Locale changed to "${this.resolvedLocale}"`);
7307
8328
  }
7308
8329
  }
7309
8330
  /** The current resolved locale code (e.g. 'en', 'fr', or 'custom'). */
@@ -7323,7 +8344,7 @@ var I18n = class {
7323
8344
  if (locale === "auto") {
7324
8345
  const detected = detectLocaleFromDocument();
7325
8346
  if (this.debug) {
7326
- console.debug(`${LOG_PREFIX13} Auto-detected locale "${detected}"`);
8347
+ console.debug(`${LOG_PREFIX16} Auto-detected locale "${detected}"`);
7327
8348
  }
7328
8349
  return {
7329
8350
  strings: BUILTIN_LOCALES[detected],
@@ -7338,7 +8359,7 @@ var I18n = class {
7338
8359
  }
7339
8360
  if (this.debug) {
7340
8361
  console.warn(
7341
- `${LOG_PREFIX13} Unknown locale "${String(locale)}", falling back to "en"`
8362
+ `${LOG_PREFIX16} Unknown locale "${String(locale)}", falling back to "en"`
7342
8363
  );
7343
8364
  }
7344
8365
  return {
@@ -7349,7 +8370,7 @@ var I18n = class {
7349
8370
  };
7350
8371
 
7351
8372
  // src/auth/token-manager.ts
7352
- var LOG_PREFIX14 = "[GuideKit:Auth]";
8373
+ var LOG_PREFIX17 = "[GuideKit:Auth]";
7353
8374
  var REFRESH_THRESHOLD = 0.8;
7354
8375
  var MAX_RETRY_ATTEMPTS = 3;
7355
8376
  var RETRY_BASE_MS = 1e3;
@@ -7628,7 +8649,7 @@ var TokenManager = class {
7628
8649
  }
7629
8650
  log(message) {
7630
8651
  if (this.debug) {
7631
- console.debug(`${LOG_PREFIX14} ${message}`);
8652
+ console.debug(`${LOG_PREFIX17} ${message}`);
7632
8653
  }
7633
8654
  }
7634
8655
  };
@@ -7760,6 +8781,11 @@ var GuideKitCore = class {
7760
8781
  debug: this._debug
7761
8782
  });
7762
8783
  await this.tokenManager.start();
8784
+ if (!this._options.llm) {
8785
+ console.warn(
8786
+ "[GuideKit] tokenEndpoint provided without llm config. The session token handles auth only \u2014 llm: { provider, apiKey } is still required for LLM calls. See: https://guidekit.dev/docs/provider#token-endpoint"
8787
+ );
8788
+ }
7763
8789
  this.resourceManager.register({
7764
8790
  name: "token-manager",
7765
8791
  cleanup: () => this.tokenManager?.destroy()
@@ -7882,21 +8908,50 @@ var GuideKitCore = class {
7882
8908
  }
7883
8909
  });
7884
8910
  this.registerBuiltinTools();
7885
- if (this._options.stt && this._options.tts) {
7886
- const sttConfig = this._options.stt;
7887
- const ttsConfig = this._options.tts;
7888
- if (sttConfig.provider === "deepgram" && ttsConfig.provider === "elevenlabs") {
8911
+ {
8912
+ const sttConfig = this._options.stt ?? { provider: "web-speech" };
8913
+ const ttsConfig = this._options.tts ?? { provider: "web-speech" };
8914
+ let voiceSttConfig;
8915
+ let voiceTtsConfig;
8916
+ if (sttConfig.provider === "deepgram") {
8917
+ voiceSttConfig = {
8918
+ provider: "deepgram",
8919
+ apiKey: sttConfig.apiKey,
8920
+ model: sttConfig.model
8921
+ };
8922
+ } else if (sttConfig.provider === "elevenlabs") {
8923
+ voiceSttConfig = {
8924
+ provider: "elevenlabs",
8925
+ apiKey: sttConfig.apiKey,
8926
+ language: sttConfig.language
8927
+ };
8928
+ } else {
8929
+ voiceSttConfig = {
8930
+ provider: "web-speech",
8931
+ language: sttConfig.language,
8932
+ continuous: sttConfig.continuous,
8933
+ interimResults: sttConfig.interimResults
8934
+ };
8935
+ }
8936
+ if (ttsConfig.provider === "elevenlabs") {
8937
+ voiceTtsConfig = {
8938
+ provider: "elevenlabs",
8939
+ apiKey: ttsConfig.apiKey,
8940
+ voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
8941
+ };
8942
+ } else {
8943
+ voiceTtsConfig = {
8944
+ provider: "web-speech",
8945
+ voice: ttsConfig.voice,
8946
+ rate: ttsConfig.rate,
8947
+ pitch: ttsConfig.pitch,
8948
+ language: ttsConfig.language
8949
+ };
8950
+ }
8951
+ try {
7889
8952
  this.voicePipeline = new VoicePipeline({
7890
- sttConfig: {
7891
- provider: "deepgram",
7892
- apiKey: sttConfig.apiKey,
7893
- model: "model" in sttConfig ? sttConfig.model : void 0
7894
- },
7895
- ttsConfig: {
7896
- provider: "elevenlabs",
7897
- apiKey: ttsConfig.apiKey,
7898
- voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
7899
- },
8953
+ sttConfig: voiceSttConfig,
8954
+ ttsConfig: voiceTtsConfig,
7900
8955
  debug: this._debug
7901
8956
  });
7902
8957
  this.voicePipeline.onStateChange((state, previous) => {
@@ -7929,6 +8984,11 @@ var GuideKitCore = class {
7929
8984
  name: "voice-pipeline",
7930
8985
  cleanup: () => this.voicePipeline?.destroy()
7931
8986
  });
8987
+ } catch (_err) {
8988
+ this.voicePipeline = null;
8989
+ if (this._debug) {
8990
+ console.debug("[GuideKit:Core] Voice pipeline unavailable in this environment");
8991
+ }
7932
8992
  }
7933
8993
  }
7934
8994
  const session = this.contextManager.restoreSession();
@@ -8053,7 +9113,7 @@ var GuideKitCore = class {
8053
9113
  return responseText;
8054
9114
  } catch (error) {
8055
9115
  const err = error instanceof GuideKitError ? error : new GuideKitError({
8056
- code: "UNKNOWN",
9116
+ code: ErrorCodes.UNKNOWN,
8057
9117
  message: error instanceof Error ? error.message : "Unknown error",
8058
9118
  recoverable: false,
8059
9119
  suggestion: "Check the console for details."
@@ -8309,172 +9369,11 @@ var GuideKitCore = class {
8309
9369
  };
8310
9370
  }
8311
9371
  /**
8312
- * Register all built-in tool handlers with the ToolExecutor.
8313
- * Called once during init() after VisualGuidance and all subsystems are ready.
9372
+ * Unified built-in tool specifications single source of truth for both
9373
+ * tool definitions (sent to LLM) and handler registration.
8314
9374
  */
8315
- registerBuiltinTools() {
8316
- if (!this.toolExecutor) return;
8317
- this.toolExecutor.registerTool({
8318
- name: "highlight",
8319
- execute: async (args) => {
8320
- const sectionId = args.sectionId;
8321
- const selector = args.selector;
8322
- const tooltip = args.tooltip;
8323
- const position = args.position;
8324
- const result = this.highlight({ sectionId, selector, tooltip, position });
8325
- return { success: result };
8326
- }
8327
- });
8328
- this.toolExecutor.registerTool({
8329
- name: "dismissHighlight",
8330
- execute: async () => {
8331
- this.dismissHighlight();
8332
- return { success: true };
8333
- }
8334
- });
8335
- this.toolExecutor.registerTool({
8336
- name: "scrollToSection",
8337
- execute: async (args) => {
8338
- const sectionId = args.sectionId;
8339
- const offset = args.offset;
8340
- this.scrollToSection(sectionId, offset);
8341
- return { success: true };
8342
- }
8343
- });
8344
- this.toolExecutor.registerTool({
8345
- name: "navigate",
8346
- execute: async (args) => {
8347
- const href = args.href;
8348
- const result = await this.navigate(href);
8349
- return { success: result, navigatedTo: result ? href : null };
8350
- }
8351
- });
8352
- this.toolExecutor.registerTool({
8353
- name: "startTour",
8354
- execute: async (args) => {
8355
- const sectionIds = args.sectionIds;
8356
- const mode = args.mode ?? "manual";
8357
- this.startTour(sectionIds, mode);
8358
- return { success: true, steps: sectionIds.length };
8359
- }
8360
- });
8361
- this.toolExecutor.registerTool({
8362
- name: "readPageContent",
8363
- execute: async (args) => {
8364
- const sectionId = args.sectionId;
8365
- const query = args.query;
8366
- const model = this._currentPageModel;
8367
- if (!model) return { error: "No page model available" };
8368
- if (sectionId) {
8369
- const section = model.sections.find((s) => s.id === sectionId);
8370
- if (section) {
8371
- const contentMapResult = await this.contextManager.getContent(sectionId);
8372
- return {
8373
- sectionId: section.id,
8374
- label: section.label,
8375
- summary: section.summary,
8376
- contentMap: contentMapResult
8377
- };
8378
- }
8379
- return { error: `Section "${sectionId}" not found` };
8380
- }
8381
- if (query) {
8382
- const queryLower = query.toLowerCase();
8383
- const matches = model.sections.filter(
8384
- (s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
8385
- );
8386
- return {
8387
- query,
8388
- results: matches.slice(0, 5).map((s) => ({
8389
- sectionId: s.id,
8390
- label: s.label,
8391
- snippet: s.summary?.slice(0, 200)
8392
- }))
8393
- };
8394
- }
8395
- return { error: "Provide either sectionId or query" };
8396
- }
8397
- });
8398
- this.toolExecutor.registerTool({
8399
- name: "getVisibleSections",
8400
- execute: async () => {
8401
- const model = this._currentPageModel;
8402
- if (!model) return { sections: [] };
8403
- return {
8404
- sections: model.sections.slice(0, 10).map((s) => ({
8405
- id: s.id,
8406
- label: s.label,
8407
- selector: s.selector,
8408
- score: s.score
8409
- }))
8410
- };
8411
- }
8412
- });
8413
- this.toolExecutor.registerTool({
8414
- name: "clickElement",
8415
- execute: async (args) => {
8416
- if (typeof document === "undefined") return { success: false, error: "Not in browser" };
8417
- const selector = args.selector;
8418
- const el = document.querySelector(selector);
8419
- if (!el) return { success: false, error: `Element not found: ${selector}` };
8420
- if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
8421
- const clickableRules = this._options.options?.clickableSelectors;
8422
- const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
8423
- try {
8424
- return el.matches(pattern);
8425
- } catch {
8426
- return selector === pattern;
8427
- }
8428
- }) ?? false;
8429
- if (!isInDevAllowList) {
8430
- const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
8431
- try {
8432
- return el.matches(pattern);
8433
- } catch {
8434
- return false;
8435
- }
8436
- });
8437
- if (defaultDenied) {
8438
- return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
8439
- }
8440
- }
8441
- if (clickableRules?.deny?.length) {
8442
- const denied = clickableRules.deny.some((pattern) => {
8443
- try {
8444
- return el.matches(pattern);
8445
- } catch {
8446
- return selector === pattern;
8447
- }
8448
- });
8449
- if (denied) {
8450
- return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
8451
- }
8452
- }
8453
- if (clickableRules?.allow?.length && !isInDevAllowList) {
8454
- return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
8455
- }
8456
- el.click();
8457
- return { success: true };
8458
- }
8459
- });
8460
- this.toolExecutor.registerTool({
8461
- name: "executeCustomAction",
8462
- execute: async (args) => {
8463
- const actionId = args.actionId;
8464
- const params = args.params ?? {};
8465
- const action = this.customActions.get(actionId);
8466
- if (!action) return { error: `Unknown action: ${actionId}` };
8467
- try {
8468
- const result = await action.handler(params);
8469
- return { success: true, result };
8470
- } catch (err) {
8471
- return { success: false, error: err instanceof Error ? err.message : String(err) };
8472
- }
8473
- }
8474
- });
8475
- }
8476
- getToolDefinitions() {
8477
- const builtinTools = [
9375
+ getBuiltinToolSpecs() {
9376
+ return [
8478
9377
  {
8479
9378
  name: "highlight",
8480
9379
  description: "Spotlight an element on the page to draw the user's attention. Use sectionId to highlight a page section, or selector for a specific CSS selector. Optionally add a tooltip with explanation text.",
@@ -8484,13 +9383,27 @@ var GuideKitCore = class {
8484
9383
  tooltip: { type: "string", description: "Text to show in tooltip" },
8485
9384
  position: { type: "string", enum: ["top", "bottom", "left", "right", "auto"], description: "Tooltip position" }
8486
9385
  },
8487
- schemaVersion: 1
9386
+ required: [],
9387
+ schemaVersion: 1,
9388
+ execute: async (args) => {
9389
+ const sectionId = args.sectionId;
9390
+ const selector = args.selector;
9391
+ const tooltip = args.tooltip;
9392
+ const position = args.position;
9393
+ const result = this.highlight({ sectionId, selector, tooltip, position });
9394
+ return { success: result };
9395
+ }
8488
9396
  },
8489
9397
  {
8490
9398
  name: "dismissHighlight",
8491
9399
  description: "Remove the current spotlight overlay.",
8492
9400
  parameters: {},
8493
- schemaVersion: 1
9401
+ required: [],
9402
+ schemaVersion: 1,
9403
+ execute: async () => {
9404
+ this.dismissHighlight();
9405
+ return { success: true };
9406
+ }
8494
9407
  },
8495
9408
  {
8496
9409
  name: "scrollToSection",
@@ -8499,7 +9412,14 @@ var GuideKitCore = class {
8499
9412
  sectionId: { type: "string", description: "ID of the section to scroll to" },
8500
9413
  offset: { type: "number", description: "Pixel offset for sticky headers" }
8501
9414
  },
8502
- schemaVersion: 1
9415
+ required: ["sectionId"],
9416
+ schemaVersion: 1,
9417
+ execute: async (args) => {
9418
+ const sectionId = args.sectionId;
9419
+ const offset = args.offset;
9420
+ this.scrollToSection(sectionId, offset);
9421
+ return { success: true };
9422
+ }
8503
9423
  },
8504
9424
  {
8505
9425
  name: "navigate",
@@ -8507,7 +9427,13 @@ var GuideKitCore = class {
8507
9427
  parameters: {
8508
9428
  href: { type: "string", description: "URL or path to navigate to (same-origin only)" }
8509
9429
  },
8510
- schemaVersion: 1
9430
+ required: ["href"],
9431
+ schemaVersion: 1,
9432
+ execute: async (args) => {
9433
+ const href = args.href;
9434
+ const result = await this.navigate(href);
9435
+ return { success: result, navigatedTo: result ? href : null };
9436
+ }
8511
9437
  },
8512
9438
  {
8513
9439
  name: "startTour",
@@ -8516,7 +9442,14 @@ var GuideKitCore = class {
8516
9442
  sectionIds: { type: "array", items: { type: "string" }, description: "Section IDs in tour order" },
8517
9443
  mode: { type: "string", enum: ["auto", "manual"], description: "auto advances automatically; manual waits for user" }
8518
9444
  },
8519
- schemaVersion: 1
9445
+ required: ["sectionIds"],
9446
+ schemaVersion: 1,
9447
+ execute: async (args) => {
9448
+ const sectionIds = args.sectionIds;
9449
+ const mode = args.mode ?? "manual";
9450
+ this.startTour(sectionIds, mode);
9451
+ return { success: true, steps: sectionIds.length };
9452
+ }
8520
9453
  },
8521
9454
  {
8522
9455
  name: "readPageContent",
@@ -8525,13 +9458,61 @@ var GuideKitCore = class {
8525
9458
  sectionId: { type: "string", description: "Section ID to read" },
8526
9459
  query: { type: "string", description: "Keyword to search for across sections" }
8527
9460
  },
8528
- schemaVersion: 1
9461
+ required: [],
9462
+ schemaVersion: 1,
9463
+ execute: async (args) => {
9464
+ const sectionId = args.sectionId;
9465
+ const query = args.query;
9466
+ const model = this._currentPageModel;
9467
+ if (!model) return { error: "No page model available" };
9468
+ if (sectionId) {
9469
+ const section = model.sections.find((s) => s.id === sectionId);
9470
+ if (section) {
9471
+ const contentMapResult = await this.contextManager.getContent(sectionId);
9472
+ return {
9473
+ sectionId: section.id,
9474
+ label: section.label,
9475
+ summary: section.summary,
9476
+ contentMap: contentMapResult
9477
+ };
9478
+ }
9479
+ return { error: `Section "${sectionId}" not found` };
9480
+ }
9481
+ if (query) {
9482
+ const queryLower = query.toLowerCase();
9483
+ const matches = model.sections.filter(
9484
+ (s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
9485
+ );
9486
+ return {
9487
+ query,
9488
+ results: matches.slice(0, 5).map((s) => ({
9489
+ sectionId: s.id,
9490
+ label: s.label,
9491
+ snippet: s.summary?.slice(0, 200)
9492
+ }))
9493
+ };
9494
+ }
9495
+ return { error: "Provide either sectionId or query" };
9496
+ }
8529
9497
  },
8530
9498
  {
8531
9499
  name: "getVisibleSections",
8532
9500
  description: "Get the list of sections currently visible in the user viewport.",
8533
9501
  parameters: {},
8534
- schemaVersion: 1
9502
+ required: [],
9503
+ schemaVersion: 1,
9504
+ execute: async () => {
9505
+ const model = this._currentPageModel;
9506
+ if (!model) return { sections: [] };
9507
+ return {
9508
+ sections: model.sections.slice(0, 10).map((s) => ({
9509
+ id: s.id,
9510
+ label: s.label,
9511
+ selector: s.selector,
9512
+ score: s.score
9513
+ }))
9514
+ };
9515
+ }
8535
9516
  },
8536
9517
  {
8537
9518
  name: "clickElement",
@@ -8539,7 +9520,52 @@ var GuideKitCore = class {
8539
9520
  parameters: {
8540
9521
  selector: { type: "string", description: "CSS selector of the element to click" }
8541
9522
  },
8542
- schemaVersion: 1
9523
+ required: ["selector"],
9524
+ schemaVersion: 1,
9525
+ execute: async (args) => {
9526
+ if (typeof document === "undefined") return { success: false, error: "Not in browser" };
9527
+ const selector = args.selector;
9528
+ const el = document.querySelector(selector);
9529
+ if (!el) return { success: false, error: `Element not found: ${selector}` };
9530
+ if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
9531
+ const clickableRules = this._options.options?.clickableSelectors;
9532
+ const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
9533
+ try {
9534
+ return el.matches(pattern);
9535
+ } catch {
9536
+ return selector === pattern;
9537
+ }
9538
+ }) ?? false;
9539
+ if (!isInDevAllowList) {
9540
+ const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
9541
+ try {
9542
+ return el.matches(pattern);
9543
+ } catch {
9544
+ return false;
9545
+ }
9546
+ });
9547
+ if (defaultDenied) {
9548
+ return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
9549
+ }
9550
+ }
9551
+ if (clickableRules?.deny?.length) {
9552
+ const denied = clickableRules.deny.some((pattern) => {
9553
+ try {
9554
+ return el.matches(pattern);
9555
+ } catch {
9556
+ return selector === pattern;
9557
+ }
9558
+ });
9559
+ if (denied) {
9560
+ return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
9561
+ }
9562
+ }
9563
+ if (clickableRules?.allow?.length && !isInDevAllowList) {
9564
+ return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
9565
+ }
9566
+ el.click();
9567
+ return { success: true };
9568
+ }
8543
9569
  },
8544
9570
  {
8545
9571
  name: "executeCustomAction",
@@ -8548,9 +9574,37 @@ var GuideKitCore = class {
8548
9574
  actionId: { type: "string", description: "ID of the custom action" },
8549
9575
  params: { type: "object", description: "Parameters for the action" }
8550
9576
  },
8551
- schemaVersion: 1
9577
+ required: ["actionId"],
9578
+ schemaVersion: 1,
9579
+ execute: async (args) => {
9580
+ const actionId = args.actionId;
9581
+ const params = args.params ?? {};
9582
+ const action = this.customActions.get(actionId);
9583
+ if (!action) return { error: `Unknown action: ${actionId}` };
9584
+ try {
9585
+ const result = await action.handler(params);
9586
+ return { success: true, result };
9587
+ } catch (err) {
9588
+ return { success: false, error: err instanceof Error ? err.message : String(err) };
9589
+ }
9590
+ }
8552
9591
  }
8553
9592
  ];
9593
+ }
9594
+ /**
9595
+ * Register all built-in tool handlers with the ToolExecutor.
9596
+ * Called once during init() after VisualGuidance and all subsystems are ready.
9597
+ */
9598
+ registerBuiltinTools() {
9599
+ if (!this.toolExecutor) return;
9600
+ for (const spec of this.getBuiltinToolSpecs()) {
9601
+ this.toolExecutor.registerTool({ name: spec.name, execute: spec.execute });
9602
+ }
9603
+ }
9604
+ getToolDefinitions() {
9605
+ const builtinTools = this.getBuiltinToolSpecs().map(
9606
+ ({ execute: _execute, ...def }) => def
9607
+ );
8554
9608
  for (const [actionId, action] of this.customActions) {
8555
9609
  builtinTools.push({
8556
9610
  name: `action_${actionId}`,
@@ -8563,6 +9617,6 @@ var GuideKitCore = class {
8563
9617
  }
8564
9618
  };
8565
9619
 
8566
- export { AuthenticationError, AwarenessSystem, BrowserSupportError, ConfigurationError, ConnectionManager, ContentFilterError, ContextManager, DOMScanner, ErrorCodes, EventBus, GeminiAdapter, GuideKitCore, GuideKitError, I18n, InitializationError, LLMOrchestrator, NavigationController, NetworkError, OpenAIAdapter, PermissionError, ProactiveTriggerEngine, RateLimitError, RateLimiter, ResourceExhaustedError, ResourceManager, SingletonGuard, TimeoutError, TokenManager, ToolExecutor, VisualGuidance, createEventBus, isGuideKitError };
9620
+ export { AuthenticationError, AwarenessSystem, BrowserSupportError, ConfigurationError, ConnectionManager, ContentFilterError, ContextManager, DOMScanner, ErrorCodes, EventBus, GeminiAdapter, GuideKitCore, GuideKitError, I18n, InitializationError, LLMOrchestrator, NavigationController, NetworkError, OpenAIAdapter, PermissionError, ProactiveTriggerEngine, RateLimitError, RateLimiter, ResourceExhaustedError, ResourceManager, SingletonGuard, TimeoutError, TokenManager, ToolExecutor, VisualGuidance, VoicePipeline, WebSpeechSTT, WebSpeechTTS, createEventBus, isGuideKitError };
8567
9621
  //# sourceMappingURL=index.js.map
8568
9622
  //# sourceMappingURL=index.js.map