@guidekit/core 0.1.0-beta.1 → 0.1.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1093,7 +1093,7 @@ var DOMScanner = class {
1093
1093
  if (el.closest("[data-guidekit-ignore]")) return;
1094
1094
  const style = window.getComputedStyle(el);
1095
1095
  const position = style.position;
1096
- const zIndex = parseInt(style.zIndex, 10);
1096
+ const zIndex = parseInt(style.zIndex, 10) || 0;
1097
1097
  if ((position === "fixed" || position === "absolute") && !isNaN(zIndex) && zIndex >= 1e3) {
1098
1098
  const visible = isElementVisible(el);
1099
1099
  if (!visible) return;
@@ -1750,7 +1750,9 @@ var ErrorCodes = {
1750
1750
  // Content
1751
1751
  CONTENT_FILTER_TRIGGERED: "CONTENT_FILTER_TRIGGERED",
1752
1752
  // Privacy
1753
- PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED"
1753
+ PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED",
1754
+ // General
1755
+ UNKNOWN: "UNKNOWN"
1754
1756
  };
1755
1757
  var GuideKitError = class extends Error {
1756
1758
  code;
@@ -1843,13 +1845,27 @@ function isGuideKitError(error) {
1843
1845
  var DEFAULT_OPENAI_MODEL = "gpt-4o";
1844
1846
  var DEFAULT_TIMEOUT_MS = 15e3;
1845
1847
  var OPENAI_CHAT_URL = "https://api.openai.com/v1/chat/completions";
1848
+ function emptyUsage() {
1849
+ return { prompt: 0, completion: 0, total: 0 };
1850
+ }
1846
1851
  var OpenAIAdapter = class {
1847
1852
  apiKey;
1848
1853
  model;
1854
+ /** Tracks whether the last extractChunks call emitted a done chunk. */
1855
+ lastExtractEmittedDone = false;
1856
+ /**
1857
+ * Token usage extracted from the most recent `parseResponse` call.
1858
+ * Updated as each SSE chunk is parsed.
1859
+ */
1860
+ _lastUsage = emptyUsage();
1849
1861
  constructor(config) {
1850
1862
  this.apiKey = config.apiKey;
1851
1863
  this.model = config.model ?? DEFAULT_OPENAI_MODEL;
1852
1864
  }
1865
+ /** Token usage from the most recent parseResponse call. */
1866
+ get lastUsage() {
1867
+ return this._lastUsage;
1868
+ }
1853
1869
  // -----------------------------------------------------------------------
1854
1870
  // LLMProviderAdapter implementation
1855
1871
  // -----------------------------------------------------------------------
@@ -1864,7 +1880,11 @@ var OpenAIAdapter = class {
1864
1880
  function: {
1865
1881
  name: tool.name,
1866
1882
  description: tool.description,
1867
- parameters: tool.parameters
1883
+ parameters: {
1884
+ type: "object",
1885
+ properties: { ...tool.parameters },
1886
+ required: tool.required ?? []
1887
+ }
1868
1888
  }
1869
1889
  }));
1870
1890
  }
@@ -1886,11 +1906,17 @@ var OpenAIAdapter = class {
1886
1906
  * prefixed by `data: `. The final line is `data: [DONE]`.
1887
1907
  * Text content arrives in `choices[0].delta.content` and tool calls
1888
1908
  * arrive in `choices[0].delta.tool_calls`.
1909
+ *
1910
+ * This method also:
1911
+ * - Detects content filtering and throws `ContentFilterError`.
1912
+ * - Tracks token usage (accessible via `lastUsage` after iteration).
1889
1913
  */
1890
1914
  async *parseResponse(stream) {
1891
1915
  const reader = stream.getReader();
1892
1916
  const decoder = new TextDecoder();
1893
1917
  let buffer = "";
1918
+ let doneEmitted = false;
1919
+ this._lastUsage = emptyUsage();
1894
1920
  const pendingToolCalls = /* @__PURE__ */ new Map();
1895
1921
  try {
1896
1922
  while (true) {
@@ -1906,7 +1932,10 @@ var OpenAIAdapter = class {
1906
1932
  if (jsonStr === "" || jsonStr === "[DONE]") {
1907
1933
  if (jsonStr === "[DONE]") {
1908
1934
  yield* this.flushPendingToolCalls(pendingToolCalls);
1909
- yield { text: "", done: true };
1935
+ if (!doneEmitted) {
1936
+ doneEmitted = true;
1937
+ yield { text: "", done: true };
1938
+ }
1910
1939
  }
1911
1940
  continue;
1912
1941
  }
@@ -1916,19 +1945,53 @@ var OpenAIAdapter = class {
1916
1945
  } catch {
1917
1946
  continue;
1918
1947
  }
1919
- yield* this.extractChunks(parsed, pendingToolCalls);
1948
+ if (this.isContentFiltered(parsed)) {
1949
+ throw new ContentFilterError({
1950
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
1951
+ message: "Response was blocked by provider content safety filter.",
1952
+ provider: "openai",
1953
+ suggestion: "Rephrase your question or adjust safety settings."
1954
+ });
1955
+ }
1956
+ const chunkUsage = this.extractUsage(parsed);
1957
+ if (chunkUsage) {
1958
+ this._lastUsage = chunkUsage;
1959
+ }
1960
+ yield* this.extractChunks(parsed, pendingToolCalls, doneEmitted);
1961
+ if (!doneEmitted && this.lastExtractEmittedDone) {
1962
+ doneEmitted = true;
1963
+ }
1920
1964
  }
1921
1965
  }
1922
1966
  if (buffer.trim().startsWith("data:")) {
1923
1967
  const jsonStr = buffer.trim().slice(5).trim();
1924
1968
  if (jsonStr === "[DONE]") {
1925
1969
  yield* this.flushPendingToolCalls(pendingToolCalls);
1926
- yield { text: "", done: true };
1970
+ if (!doneEmitted) {
1971
+ doneEmitted = true;
1972
+ yield { text: "", done: true };
1973
+ }
1927
1974
  } else if (jsonStr !== "") {
1928
1975
  try {
1929
1976
  const parsed = JSON.parse(jsonStr);
1930
- yield* this.extractChunks(parsed, pendingToolCalls);
1931
- } catch {
1977
+ if (this.isContentFiltered(parsed)) {
1978
+ throw new ContentFilterError({
1979
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
1980
+ message: "Response was blocked by provider content safety filter.",
1981
+ provider: "openai",
1982
+ suggestion: "Rephrase your question or adjust safety settings."
1983
+ });
1984
+ }
1985
+ const chunkUsage = this.extractUsage(parsed);
1986
+ if (chunkUsage) {
1987
+ this._lastUsage = chunkUsage;
1988
+ }
1989
+ yield* this.extractChunks(parsed, pendingToolCalls, doneEmitted);
1990
+ if (!doneEmitted && this.lastExtractEmittedDone) {
1991
+ doneEmitted = true;
1992
+ }
1993
+ } catch (error) {
1994
+ if (error instanceof ContentFilterError) throw error;
1932
1995
  }
1933
1996
  }
1934
1997
  }
@@ -1957,10 +2020,14 @@ var OpenAIAdapter = class {
1957
2020
  * the raw Response object.
1958
2021
  */
1959
2022
  async streamRequest(params) {
2023
+ const contentsArray = params.contents;
1960
2024
  const messages = [
1961
2025
  { role: "system", content: params.systemPrompt },
1962
- ...params.contents
2026
+ ...contentsArray
1963
2027
  ];
2028
+ if (params.userMessage) {
2029
+ messages.push({ role: "user", content: params.userMessage });
2030
+ }
1964
2031
  const body = {
1965
2032
  model: this.model,
1966
2033
  messages,
@@ -2043,7 +2110,8 @@ var OpenAIAdapter = class {
2043
2110
  * yield complete `ToolCall` objects when the finish_reason is 'tool_calls'
2044
2111
  * or when flushed.
2045
2112
  */
2046
- *extractChunks(parsed, pendingToolCalls) {
2113
+ *extractChunks(parsed, pendingToolCalls, doneEmitted) {
2114
+ this.lastExtractEmittedDone = false;
2047
2115
  const choices = parsed.choices;
2048
2116
  if (!choices || choices.length === 0) return;
2049
2117
  for (const choice of choices) {
@@ -2077,7 +2145,8 @@ var OpenAIAdapter = class {
2077
2145
  if (finishReason === "tool_calls") {
2078
2146
  yield* this.flushPendingToolCalls(pendingToolCalls);
2079
2147
  }
2080
- if (finishReason === "stop") {
2148
+ if (finishReason === "stop" && !doneEmitted && !this.lastExtractEmittedDone) {
2149
+ this.lastExtractEmittedDone = true;
2081
2150
  yield { text: "", done: true };
2082
2151
  }
2083
2152
  }
@@ -2093,7 +2162,8 @@ var OpenAIAdapter = class {
2093
2162
  let args = {};
2094
2163
  try {
2095
2164
  args = JSON.parse(tc.argumentsJson);
2096
- } catch {
2165
+ } catch (_e) {
2166
+ console.warn("[GuideKit:LLM] Failed to parse tool call arguments:", tc.argumentsJson);
2097
2167
  }
2098
2168
  yield {
2099
2169
  id: tc.id,
@@ -2196,16 +2266,26 @@ var DEFAULT_SAFETY_SETTINGS = [
2196
2266
  { category: "HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold: "BLOCK_ONLY_HIGH" },
2197
2267
  { category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_ONLY_HIGH" }
2198
2268
  ];
2199
- function emptyUsage() {
2269
+ function emptyUsage2() {
2200
2270
  return { prompt: 0, completion: 0, total: 0 };
2201
2271
  }
2202
2272
  var GeminiAdapter = class {
2203
2273
  apiKey;
2204
2274
  model;
2275
+ /**
2276
+ * Token usage extracted from the most recent `parseResponse` call.
2277
+ * Updated as each SSE chunk is parsed; the final value reflects the
2278
+ * cumulative usage metadata sent by Gemini (typically in the last chunk).
2279
+ */
2280
+ _lastUsage = emptyUsage2();
2205
2281
  constructor(config) {
2206
2282
  this.apiKey = config.apiKey;
2207
2283
  this.model = config.model ?? DEFAULT_GEMINI_MODEL;
2208
2284
  }
2285
+ /** Token usage from the most recent parseResponse call. */
2286
+ get lastUsage() {
2287
+ return this._lastUsage;
2288
+ }
2209
2289
  // -----------------------------------------------------------------------
2210
2290
  // LLMProviderAdapter implementation
2211
2291
  // -----------------------------------------------------------------------
@@ -2220,7 +2300,11 @@ var GeminiAdapter = class {
2220
2300
  functionDeclarations: tools.map((tool) => ({
2221
2301
  name: tool.name,
2222
2302
  description: tool.description,
2223
- parameters: tool.parameters
2303
+ parameters: {
2304
+ type: "object",
2305
+ properties: { ...tool.parameters },
2306
+ required: tool.required ?? []
2307
+ }
2224
2308
  }))
2225
2309
  }
2226
2310
  ];
@@ -2242,11 +2326,16 @@ var GeminiAdapter = class {
2242
2326
  * The Gemini `streamGenerateContent?alt=sse` endpoint sends each chunk
2243
2327
  * as a JSON object prefixed by `data: `. We parse line-by-line, extract
2244
2328
  * text parts and function call parts, and yield the appropriate types.
2329
+ *
2330
+ * This method also:
2331
+ * - Detects content filtering and throws `ContentFilterError`.
2332
+ * - Tracks token usage (accessible via `lastUsage` after iteration).
2245
2333
  */
2246
2334
  async *parseResponse(stream) {
2247
2335
  const reader = stream.getReader();
2248
2336
  const decoder = new TextDecoder();
2249
2337
  let buffer = "";
2338
+ this._lastUsage = emptyUsage2();
2250
2339
  try {
2251
2340
  while (true) {
2252
2341
  const { done, value } = await reader.read();
@@ -2265,6 +2354,18 @@ var GeminiAdapter = class {
2265
2354
  } catch {
2266
2355
  continue;
2267
2356
  }
2357
+ if (this.isContentFiltered(parsed)) {
2358
+ throw new ContentFilterError({
2359
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
2360
+ message: "Response was blocked by provider content safety filter.",
2361
+ provider: "gemini",
2362
+ suggestion: "Rephrase your question or adjust safety settings."
2363
+ });
2364
+ }
2365
+ const chunkUsage = this.extractUsage(parsed);
2366
+ if (chunkUsage) {
2367
+ this._lastUsage = chunkUsage;
2368
+ }
2268
2369
  yield* this.extractChunks(parsed);
2269
2370
  }
2270
2371
  }
@@ -2273,8 +2374,21 @@ var GeminiAdapter = class {
2273
2374
  if (jsonStr !== "" && jsonStr !== "[DONE]") {
2274
2375
  try {
2275
2376
  const parsed = JSON.parse(jsonStr);
2377
+ if (this.isContentFiltered(parsed)) {
2378
+ throw new ContentFilterError({
2379
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
2380
+ message: "Response was blocked by provider content safety filter.",
2381
+ provider: "gemini",
2382
+ suggestion: "Rephrase your question or adjust safety settings."
2383
+ });
2384
+ }
2385
+ const chunkUsage = this.extractUsage(parsed);
2386
+ if (chunkUsage) {
2387
+ this._lastUsage = chunkUsage;
2388
+ }
2276
2389
  yield* this.extractChunks(parsed);
2277
- } catch {
2390
+ } catch (error) {
2391
+ if (error instanceof ContentFilterError) throw error;
2278
2392
  }
2279
2393
  }
2280
2394
  }
@@ -2305,15 +2419,21 @@ var GeminiAdapter = class {
2305
2419
  /**
2306
2420
  * Build and execute a streaming request to the Gemini API.
2307
2421
  * Returns the raw `ReadableStream` for the response body together with
2308
- * a promise that resolves to token usage extracted from the final chunk.
2422
+ * the raw Response object.
2423
+ *
2424
+ * Note: The Gemini API key is passed as a URL query parameter (`key=`).
2425
+ * This is inherent to the Gemini REST SSE endpoint design; the key is
2426
+ * transmitted over HTTPS so it remains encrypted in transit. (H3)
2309
2427
  */
2310
2428
  async streamRequest(params) {
2429
+ const contentsArray = params.contents;
2430
+ const fullContents = params.userMessage ? [...contentsArray, { role: "user", parts: [{ text: params.userMessage }] }] : contentsArray;
2311
2431
  const url = `${GEMINI_BASE_URL}/${this.model}:streamGenerateContent?alt=sse&key=${this.apiKey}`;
2312
2432
  const body = {
2313
2433
  systemInstruction: {
2314
2434
  parts: [{ text: params.systemPrompt }]
2315
2435
  },
2316
- contents: params.contents,
2436
+ contents: fullContents,
2317
2437
  safetySettings: DEFAULT_SAFETY_SETTINGS,
2318
2438
  generationConfig: {
2319
2439
  temperature: 0.7,
@@ -2381,7 +2501,7 @@ var GeminiAdapter = class {
2381
2501
  return { stream: response.body, response };
2382
2502
  }
2383
2503
  // -----------------------------------------------------------------------
2384
- // Internal helpers
2504
+ // Public helpers (LLMProviderAdapter interface)
2385
2505
  // -----------------------------------------------------------------------
2386
2506
  /**
2387
2507
  * Extract `TextChunk` and `ToolCall` items from a single parsed Gemini
@@ -2554,7 +2674,8 @@ var LLMOrchestrator = class {
2554
2674
  updateConfig(config) {
2555
2675
  this._config = config;
2556
2676
  this._adapter = this.createAdapter(config);
2557
- this.log(`Config updated: provider=${config.provider}`);
2677
+ const label = "provider" in config ? config.provider : "custom adapter";
2678
+ this.log(`Config updated: ${label}`);
2558
2679
  }
2559
2680
  /** Get the current provider adapter. */
2560
2681
  get adapter() {
@@ -2565,139 +2686,42 @@ var LLMOrchestrator = class {
2565
2686
  // -----------------------------------------------------------------------
2566
2687
  /**
2567
2688
  * Execute a streaming LLM request and collect the results.
2689
+ *
2690
+ * This method is fully adapter-agnostic: it delegates streaming,
2691
+ * response parsing, content-filter detection, and usage extraction
2692
+ * entirely to the active `LLMProviderAdapter`. No provider-specific
2693
+ * SSE parsing lives in the orchestrator.
2568
2694
  */
2569
2695
  async executeStream(params, _isRetry) {
2570
- const geminiAdapter = this._adapter;
2571
- const historyContents = geminiAdapter.formatConversation(params.history);
2572
- const contents = [
2573
- ...historyContents,
2574
- { role: "user", parts: [{ text: params.userMessage }] }
2575
- ];
2576
- const tools = params.tools && params.tools.length > 0 ? geminiAdapter.formatTools(params.tools) : void 0;
2577
- const { stream } = await geminiAdapter.streamRequest({
2696
+ const adapter = this._adapter;
2697
+ const historyContents = adapter.formatConversation(params.history);
2698
+ const tools = params.tools && params.tools.length > 0 ? adapter.formatTools(params.tools) : void 0;
2699
+ const { stream } = await adapter.streamRequest({
2578
2700
  systemPrompt: params.systemPrompt,
2579
- contents,
2701
+ contents: historyContents,
2702
+ userMessage: params.userMessage,
2580
2703
  tools,
2581
2704
  signal: params.signal
2582
2705
  });
2583
2706
  let fullText = "";
2584
2707
  const toolCalls = [];
2585
- let usage = emptyUsage();
2586
- let wasContentFiltered = false;
2587
- const reader = stream.getReader();
2588
- const decoder = new TextDecoder();
2589
- let buffer = "";
2590
- try {
2591
- while (true) {
2592
- const { done, value } = await reader.read();
2593
- if (done) break;
2594
- buffer += decoder.decode(value, { stream: true });
2595
- const lines = buffer.split("\n");
2596
- buffer = lines.pop() ?? "";
2597
- for (const line of lines) {
2598
- const trimmed = line.trim();
2599
- if (!trimmed.startsWith("data:")) continue;
2600
- const jsonStr = trimmed.slice(5).trim();
2601
- if (jsonStr === "" || jsonStr === "[DONE]") continue;
2602
- let parsed;
2603
- try {
2604
- parsed = JSON.parse(jsonStr);
2605
- } catch {
2606
- continue;
2607
- }
2608
- if (geminiAdapter.isContentFiltered(parsed)) {
2609
- wasContentFiltered = true;
2610
- break;
2611
- }
2612
- const chunkUsage = geminiAdapter.extractUsage(parsed);
2613
- if (chunkUsage) {
2614
- usage = chunkUsage;
2615
- }
2616
- const candidates = parsed.candidates;
2617
- if (!candidates || candidates.length === 0) continue;
2618
- for (const candidate of candidates) {
2619
- const content = candidate.content;
2620
- if (!content?.parts) continue;
2621
- const finishReason = candidate.finishReason;
2622
- const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
2623
- for (const part of content.parts) {
2624
- if (typeof part.text === "string") {
2625
- fullText += part.text;
2626
- const chunk = { text: part.text, done: isDone };
2627
- this.callbacks.onChunk?.(chunk);
2628
- }
2629
- if (part.functionCall) {
2630
- const fc = part.functionCall;
2631
- const toolCall = {
2632
- id: fc.name,
2633
- name: fc.name,
2634
- arguments: fc.args ?? {}
2635
- };
2636
- toolCalls.push(toolCall);
2637
- this.callbacks.onToolCall?.(toolCall);
2638
- }
2639
- }
2640
- }
2641
- }
2642
- if (wasContentFiltered) break;
2643
- }
2644
- if (!wasContentFiltered && buffer.trim().startsWith("data:")) {
2645
- const jsonStr = buffer.trim().slice(5).trim();
2646
- if (jsonStr !== "" && jsonStr !== "[DONE]") {
2647
- try {
2648
- const parsed = JSON.parse(jsonStr);
2649
- if (geminiAdapter.isContentFiltered(parsed)) {
2650
- wasContentFiltered = true;
2651
- } else {
2652
- const chunkUsage = geminiAdapter.extractUsage(parsed);
2653
- if (chunkUsage) usage = chunkUsage;
2654
- const candidates = parsed.candidates;
2655
- if (candidates) {
2656
- for (const candidate of candidates) {
2657
- const content = candidate.content;
2658
- if (!content?.parts) continue;
2659
- const finishReason = candidate.finishReason;
2660
- const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
2661
- for (const part of content.parts) {
2662
- if (typeof part.text === "string") {
2663
- fullText += part.text;
2664
- const chunk = {
2665
- text: part.text,
2666
- done: isDone
2667
- };
2668
- this.callbacks.onChunk?.(chunk);
2669
- }
2670
- if (part.functionCall) {
2671
- const fc = part.functionCall;
2672
- const toolCall = {
2673
- id: fc.name,
2674
- name: fc.name,
2675
- arguments: fc.args ?? {}
2676
- };
2677
- toolCalls.push(toolCall);
2678
- this.callbacks.onToolCall?.(toolCall);
2679
- }
2680
- }
2681
- }
2682
- }
2683
- }
2684
- } catch {
2685
- }
2708
+ for await (const item of adapter.parseResponse(stream)) {
2709
+ if ("name" in item && "arguments" in item) {
2710
+ const toolCall = item;
2711
+ toolCalls.push(toolCall);
2712
+ this.callbacks.onToolCall?.(toolCall);
2713
+ } else {
2714
+ const chunk = item;
2715
+ if (chunk.text) {
2716
+ fullText += chunk.text;
2686
2717
  }
2718
+ this.callbacks.onChunk?.(chunk);
2687
2719
  }
2688
- } finally {
2689
- reader.releaseLock();
2690
- }
2691
- if (wasContentFiltered) {
2692
- throw new ContentFilterError({
2693
- code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
2694
- message: "Response was blocked by Gemini content safety filter.",
2695
- provider: "gemini",
2696
- suggestion: "Rephrase your question or adjust safety settings."
2697
- });
2698
2720
  }
2699
- if (fullText.length > 0) {
2700
- this.callbacks.onChunk?.({ text: "", done: true });
2721
+ this.callbacks.onChunk?.({ text: "", done: true });
2722
+ let usage = emptyUsage2();
2723
+ if ("lastUsage" in adapter) {
2724
+ usage = adapter.lastUsage;
2701
2725
  }
2702
2726
  if (usage.total > 0) {
2703
2727
  this.callbacks.onTokenUsage?.(usage);
@@ -2709,25 +2733,30 @@ var LLMOrchestrator = class {
2709
2733
  }
2710
2734
  /**
2711
2735
  * Create the appropriate adapter for the given config.
2712
- * Currently only Gemini is implemented; other providers will be added
2713
- * as the SDK evolves.
2736
+ *
2737
+ * Built-in providers:
2738
+ * - `'gemini'` — uses the bundled `GeminiAdapter`.
2739
+ *
2740
+ * Custom adapters:
2741
+ * - Pass `{ adapter: myAdapter }` to use any `LLMProviderAdapter`.
2742
+ * Example: `llm: { adapter: new OpenAIAdapter({ ... }) }`
2714
2743
  */
2715
2744
  createAdapter(config) {
2745
+ if ("adapter" in config) {
2746
+ return config.adapter;
2747
+ }
2716
2748
  switch (config.provider) {
2717
2749
  case "gemini":
2718
2750
  return new GeminiAdapter(config);
2719
- case "openai":
2720
- return new OpenAIAdapter(config);
2721
2751
  default:
2722
2752
  throw new Error(
2723
- `LLM provider "${config.provider}" is not yet supported. Currently only "gemini" and "openai" are implemented.`
2753
+ `LLM provider "${config.provider}" is not yet supported. Use { adapter: yourAdapter } for custom providers.`
2724
2754
  );
2725
2755
  }
2726
2756
  }
2727
2757
  /** Convenience accessor for the current provider name. */
2728
2758
  get providerName() {
2729
- if (this._config.provider === "gemini") return "gemini";
2730
- if (this._config.provider === "openai") return "openai";
2759
+ if ("provider" in this._config) return this._config.provider;
2731
2760
  return void 0;
2732
2761
  }
2733
2762
  /** Log a debug message if debug mode is enabled. */
@@ -2880,7 +2909,7 @@ var ToolExecutor = class {
2880
2909
  break;
2881
2910
  }
2882
2911
  }
2883
- if (rounds >= this.maxRounds && allToolCalls.length > 0) {
2912
+ if (rounds >= this.maxRounds) {
2884
2913
  this.log(
2885
2914
  `Max rounds (${this.maxRounds}) reached. Returning current text.`
2886
2915
  );
@@ -2983,6 +3012,19 @@ var ToolExecutor = class {
2983
3012
  return s.value;
2984
3013
  }
2985
3014
  const tc = toolCalls[i];
3015
+ if (!tc) {
3016
+ const errorMsg2 = s.reason instanceof Error ? s.reason.message : String(s.reason);
3017
+ return {
3018
+ toolCallId: `unknown-${i}`,
3019
+ record: {
3020
+ name: "unknown",
3021
+ args: {},
3022
+ result: void 0,
3023
+ durationMs: 0,
3024
+ error: errorMsg2
3025
+ }
3026
+ };
3027
+ }
2986
3028
  const errorMsg = s.reason instanceof Error ? s.reason.message : String(s.reason);
2987
3029
  return {
2988
3030
  toolCallId: tc.id,
@@ -4175,64 +4217,62 @@ var DeepgramSTT = class {
4175
4217
  }
4176
4218
  };
4177
4219
 
4178
- // src/voice/elevenlabs-tts.ts
4179
- var LOG_PREFIX7 = "[GuideKit:TTS]";
4180
- var DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
4181
- var DEFAULT_MODEL_ID = "eleven_flash_v2_5";
4182
- var DEFAULT_STABILITY = 0.5;
4183
- var DEFAULT_SIMILARITY_BOOST = 0.75;
4184
- function base64ToArrayBuffer(base64) {
4185
- const binaryString = atob(base64);
4186
- const length = binaryString.length;
4187
- const bytes = new Uint8Array(length);
4188
- for (let i = 0; i < length; i++) {
4189
- bytes[i] = binaryString.charCodeAt(i);
4220
+ // src/voice/elevenlabs-stt.ts
4221
+ var LOG_PREFIX7 = "[GuideKit:ElevenLabs-STT]";
4222
+ var ELEVENLABS_STT_ENDPOINT = "wss://api.elevenlabs.io/v1/speech-to-text/realtime";
4223
+ var DEFAULT_LANGUAGE2 = "en";
4224
+ var INACTIVITY_TIMEOUT_S = 30;
4225
+ var SAMPLE_RATE = 16e3;
4226
+ function float32ToInt162(float32) {
4227
+ const int16 = new Int16Array(float32.length);
4228
+ for (let i = 0; i < float32.length; i++) {
4229
+ const s = Math.max(-1, Math.min(1, float32[i]));
4230
+ int16[i] = s < 0 ? s * 32768 : s * 32767;
4190
4231
  }
4191
- return bytes.buffer;
4232
+ return int16;
4192
4233
  }
4193
- var ElevenLabsTTS = class {
4194
- // ---- Configuration ------------------------------------------------------
4234
+ function int16ToBase64(int16) {
4235
+ const bytes = new Uint8Array(int16.buffer);
4236
+ const CHUNK_SIZE = 8192;
4237
+ let binary = "";
4238
+ for (let i = 0; i < bytes.length; i += CHUNK_SIZE) {
4239
+ const chunk = bytes.subarray(i, i + CHUNK_SIZE);
4240
+ binary += String.fromCharCode(...chunk);
4241
+ }
4242
+ return btoa(binary);
4243
+ }
4244
+ var ElevenLabsSTT = class {
4245
+ // ---- Configuration -------------------------------------------------------
4195
4246
  apiKey;
4196
- voiceId;
4197
- modelId;
4247
+ language;
4198
4248
  debugEnabled;
4199
- // ---- Internal state -----------------------------------------------------
4249
+ // ---- Internal state ------------------------------------------------------
4200
4250
  wsManager = null;
4201
4251
  _connected = false;
4202
4252
  _suspended = false;
4203
- /**
4204
- * Whether the BOS (beginning-of-stream) handshake has been sent for the
4205
- * current WebSocket session. ElevenLabs requires the first message to
4206
- * contain voice settings and the API key before any text chunks.
4207
- */
4208
- bosSent = false;
4209
- /** Registered audio-event callbacks. */
4210
- audioCallbacks = /* @__PURE__ */ new Set();
4211
- // -----------------------------------------------------------------------
4253
+ /** Registered transcript callbacks. */
4254
+ transcriptCallbacks = /* @__PURE__ */ new Set();
4255
+ // -------------------------------------------------------------------------
4212
4256
  // Constructor
4213
- // -----------------------------------------------------------------------
4257
+ // -------------------------------------------------------------------------
4214
4258
  constructor(options) {
4215
4259
  this.apiKey = options.apiKey;
4216
- this.voiceId = options.voiceId ?? DEFAULT_VOICE_ID;
4217
- this.modelId = options.modelId ?? DEFAULT_MODEL_ID;
4260
+ this.language = options.language ?? DEFAULT_LANGUAGE2;
4218
4261
  this.debugEnabled = options.debug ?? false;
4219
- this.log("ElevenLabsTTS created", {
4220
- voiceId: this.voiceId,
4221
- modelId: this.modelId
4222
- });
4262
+ this.log("ElevenLabsSTT created", { language: this.language });
4223
4263
  }
4224
- // -----------------------------------------------------------------------
4264
+ // -------------------------------------------------------------------------
4225
4265
  // Public API
4226
- // -----------------------------------------------------------------------
4266
+ // -------------------------------------------------------------------------
4227
4267
  /** Whether the WebSocket is currently connected and ready. */
4228
4268
  get isConnected() {
4229
4269
  return this._connected;
4230
4270
  }
4231
4271
  /**
4232
- * Open a WebSocket connection to the ElevenLabs streaming TTS endpoint.
4272
+ * Open a WebSocket connection to ElevenLabs' real-time STT endpoint.
4233
4273
  *
4234
- * Resolves once the connection is established and the BOS handshake has
4235
- * been sent. Rejects if the connection cannot be established.
4274
+ * Resolves once the connection is established and the socket is ready to
4275
+ * receive audio frames. Rejects if the connection cannot be established.
4236
4276
  */
4237
4277
  async connect() {
4238
4278
  if (this._connected) {
@@ -4244,17 +4284,16 @@ var ElevenLabsTTS = class {
4244
4284
  return;
4245
4285
  }
4246
4286
  const url = this.buildUrl();
4247
- this.log("Connecting to", url);
4287
+ this.log("Connecting to", url.replace(this.apiKey, "***"));
4248
4288
  this.wsManager = new WebSocketManager({
4249
4289
  url,
4250
4290
  protocols: [],
4251
4291
  debug: this.debugEnabled,
4252
- label: "ElevenLabs-TTS"
4292
+ label: "ElevenLabs-STT"
4253
4293
  });
4254
4294
  this.wsManager.onOpen(() => {
4255
4295
  this._connected = true;
4256
- this.sendBOS();
4257
- this.log("Connected and BOS sent");
4296
+ this.log("Connected");
4258
4297
  });
4259
4298
  this.wsManager.onMessage((event) => {
4260
4299
  this.handleMessage(event);
@@ -4269,67 +4308,54 @@ var ElevenLabsTTS = class {
4269
4308
  return this.wsManager.connect();
4270
4309
  }
4271
4310
  /**
4272
- * Send text to be synthesised into speech.
4273
- *
4274
- * May be called multiple times to stream text incrementally. Each call
4275
- * sends a text chunk with `try_trigger_generation: true` so ElevenLabs
4276
- * can begin synthesising as soon as it has enough context.
4277
- *
4278
- * Call {@link flush} when the complete utterance has been sent.
4279
- */
4280
- speak(text) {
4281
- if (!this._connected || !this.wsManager || this._suspended) {
4282
- this.log("Cannot speak \u2014 not connected or suspended");
4283
- return;
4284
- }
4285
- if (!text) {
4286
- return;
4287
- }
4288
- const message = JSON.stringify({
4289
- text,
4290
- try_trigger_generation: true
4291
- });
4292
- this.log("Sending text chunk:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
4293
- this.wsManager.send(message);
4294
- }
4295
- /**
4296
- * Signal the end of text input for the current utterance.
4311
+ * Send audio data to ElevenLabs for transcription.
4297
4312
  *
4298
- * Sends the EOS (end-of-stream) marker to ElevenLabs. The server will
4299
- * flush any remaining audio and send a final chunk with `isFinal: true`.
4313
+ * Accepts either `Float32Array` (Web Audio API output) or `Int16Array`
4314
+ * (already encoded as linear16). Float32 data is automatically converted
4315
+ * to Int16 before encoding. Audio is sent as a base64-encoded JSON message.
4300
4316
  */
4301
- flush() {
4317
+ sendAudio(audioData) {
4302
4318
  if (!this._connected || !this.wsManager || this._suspended) {
4303
- this.log("Cannot flush \u2014 not connected or suspended");
4304
4319
  return;
4305
4320
  }
4306
- const message = JSON.stringify({ text: "" });
4307
- this.log("Sending EOS (flush)");
4308
- this.wsManager.send(message);
4321
+ const int16 = audioData instanceof Float32Array ? float32ToInt162(audioData) : audioData;
4322
+ const base64 = int16ToBase64(int16);
4323
+ this.wsManager.send(
4324
+ JSON.stringify({
4325
+ type: "input_audio_chunk",
4326
+ audio: base64,
4327
+ sample_rate: SAMPLE_RATE
4328
+ })
4329
+ );
4309
4330
  }
4310
4331
  /**
4311
- * Register a callback to receive audio output events.
4332
+ * Register a callback to receive transcript events.
4312
4333
  *
4313
4334
  * @returns An unsubscribe function. Calling it more than once is safe.
4314
4335
  */
4315
- onAudio(callback) {
4316
- this.audioCallbacks.add(callback);
4336
+ onTranscript(callback) {
4337
+ this.transcriptCallbacks.add(callback);
4317
4338
  let removed = false;
4318
4339
  return () => {
4319
4340
  if (removed) return;
4320
4341
  removed = true;
4321
- this.audioCallbacks.delete(callback);
4342
+ this.transcriptCallbacks.delete(callback);
4322
4343
  };
4323
4344
  }
4324
- /** Gracefully close the connection by sending EOS then closing. */
4345
+ /**
4346
+ * Gracefully close the connection.
4347
+ *
4348
+ * Sends a `commit_audio` message so ElevenLabs can finalise any pending
4349
+ * transcription before the socket is torn down.
4350
+ */
4325
4351
  close() {
4326
4352
  if (!this._connected || !this.wsManager) {
4327
4353
  this.log("Not connected \u2014 nothing to close");
4328
4354
  return;
4329
4355
  }
4330
- this.log("Closing connection");
4356
+ this.log("Sending commit_audio and closing");
4331
4357
  try {
4332
- this.wsManager.send(JSON.stringify({ text: "" }));
4358
+ this.wsManager.send(JSON.stringify({ type: "commit_audio" }));
4333
4359
  } catch {
4334
4360
  }
4335
4361
  this.wsManager.close();
@@ -4343,14 +4369,13 @@ var ElevenLabsTTS = class {
4343
4369
  this.wsManager = null;
4344
4370
  }
4345
4371
  this.cleanup();
4346
- this.audioCallbacks.clear();
4372
+ this.transcriptCallbacks.clear();
4347
4373
  }
4348
4374
  /**
4349
4375
  * Suspend the adapter (e.g. when the device goes offline).
4350
4376
  *
4351
- * Marks the adapter as suspended so that calls to `speak()` and `flush()`
4352
- * are silently dropped. The WebSocket itself is left open; ElevenLabs
4353
- * will close it after an inactivity timeout if the network went away.
4377
+ * Marks the adapter as suspended so that incoming `sendAudio` calls are
4378
+ * silently dropped. The WebSocket itself is left open.
4354
4379
  */
4355
4380
  suspend() {
4356
4381
  if (this._suspended) return;
@@ -4358,54 +4383,22 @@ var ElevenLabsTTS = class {
4358
4383
  this.log("Suspended");
4359
4384
  }
4360
4385
  /**
4361
- * Resume after a prior `suspend()`. If the underlying connection is
4362
- * still alive, the adapter returns to normal operation. If the connection
4363
- * was lost while suspended, callers should `close()` / `destroy()` and
4364
- * create a new instance.
4386
+ * Resume after a prior `suspend()`.
4365
4387
  */
4366
4388
  resume() {
4367
4389
  if (!this._suspended) return;
4368
4390
  this._suspended = false;
4369
4391
  this.log("Resumed");
4370
4392
  }
4371
- // -----------------------------------------------------------------------
4372
- // BOS handshake
4373
- // -----------------------------------------------------------------------
4374
- /**
4375
- * Send the BOS (beginning-of-stream) message.
4376
- *
4377
- * This must be the very first message on a new WebSocket session. It
4378
- * carries the API key and voice settings.
4379
- */
4380
- sendBOS() {
4381
- if (!this.wsManager || this.bosSent) {
4382
- return;
4383
- }
4384
- const bos = JSON.stringify({
4385
- text: " ",
4386
- voice_settings: {
4387
- stability: DEFAULT_STABILITY,
4388
- similarity_boost: DEFAULT_SIMILARITY_BOOST
4389
- },
4390
- xi_api_key: this.apiKey
4391
- });
4392
- this.wsManager.send(bos);
4393
- this.bosSent = true;
4394
- this.log("BOS handshake sent");
4395
- }
4396
- // -----------------------------------------------------------------------
4393
+ // -------------------------------------------------------------------------
4397
4394
  // Message handling
4398
- // -----------------------------------------------------------------------
4395
+ // -------------------------------------------------------------------------
4399
4396
  /**
4400
- * Parse incoming ElevenLabs JSON messages and emit audio events.
4401
- *
4402
- * ElevenLabs sends messages with the following shape:
4403
- * ```json
4404
- * { "audio": "base64encoded...", "isFinal": false }
4405
- * ```
4397
+ * Parse incoming ElevenLabs JSON messages and emit transcript events.
4406
4398
  *
4407
- * When `isFinal` is `true`, the server has finished synthesising the
4408
- * current utterance (i.e. after EOS was sent).
4399
+ * ElevenLabs sends two transcript message types:
4400
+ * - `partial_transcript`: interim result, `isFinal = false`
4401
+ * - `committed_transcript`: final result, `isFinal = true`
4409
4402
  */
4410
4403
  handleMessage(event) {
4411
4404
  if (typeof event.data !== "string") {
@@ -4418,47 +4411,1026 @@ var ElevenLabsTTS = class {
4418
4411
  this.log("Failed to parse message", event.data);
4419
4412
  return;
4420
4413
  }
4421
- if (parsed["error"] !== void 0) {
4422
- this.log("ElevenLabs error:", parsed["error"]);
4423
- return;
4414
+ const type = parsed["type"];
4415
+ if (type === "committed_transcript" || type === "partial_transcript") {
4416
+ this.handleTranscriptMessage(parsed, type === "committed_transcript");
4417
+ } else {
4418
+ this.log("Received message", type, parsed);
4419
+ }
4420
+ }
4421
+ /**
4422
+ * Extract transcript data from a transcript message and notify subscribers.
4423
+ */
4424
+ handleTranscriptMessage(parsed, isFinal) {
4425
+ const result = parsed["result"];
4426
+ const text = result?.text ?? "";
4427
+ const confidence = result?.confidence ?? 0;
4428
+ if (text.trim() === "") {
4429
+ return;
4430
+ }
4431
+ const transcriptEvent = {
4432
+ text,
4433
+ isFinal,
4434
+ confidence,
4435
+ timestamp: Date.now()
4436
+ };
4437
+ this.log(
4438
+ isFinal ? "Final transcript:" : "Interim transcript:",
4439
+ text,
4440
+ `(${(confidence * 100).toFixed(1)}%)`
4441
+ );
4442
+ this.emitTranscript(transcriptEvent);
4443
+ }
4444
+ // -------------------------------------------------------------------------
4445
+ // Subscriber notification
4446
+ // -------------------------------------------------------------------------
4447
+ /**
4448
+ * Emit a transcript event to all registered callbacks.
4449
+ *
4450
+ * Errors thrown by individual callbacks are caught and logged so one
4451
+ * misbehaving subscriber does not prevent others from receiving the event.
4452
+ */
4453
+ emitTranscript(event) {
4454
+ for (const cb of this.transcriptCallbacks) {
4455
+ try {
4456
+ cb(event);
4457
+ } catch (err) {
4458
+ console.error(LOG_PREFIX7, "Transcript callback threw:", err);
4459
+ }
4460
+ }
4461
+ }
4462
+ // -------------------------------------------------------------------------
4463
+ // URL building
4464
+ // -------------------------------------------------------------------------
4465
+ /** Build the ElevenLabs streaming STT endpoint URL with auth query params. */
4466
+ buildUrl() {
4467
+ const params = new URLSearchParams({
4468
+ xi_api_key: this.apiKey,
4469
+ language: this.language,
4470
+ inactivity_timeout: String(INACTIVITY_TIMEOUT_S)
4471
+ });
4472
+ return `${ELEVENLABS_STT_ENDPOINT}?${params.toString()}`;
4473
+ }
4474
+ // -------------------------------------------------------------------------
4475
+ // Cleanup
4476
+ // -------------------------------------------------------------------------
4477
+ /** Reset internal state after disconnection. */
4478
+ cleanup() {
4479
+ this._connected = false;
4480
+ }
4481
+ // -------------------------------------------------------------------------
4482
+ // Logging
4483
+ // -------------------------------------------------------------------------
4484
+ /** Conditional debug logging. */
4485
+ log(...args) {
4486
+ if (this.debugEnabled) {
4487
+ console.debug(LOG_PREFIX7, ...args);
4488
+ }
4489
+ }
4490
+ };
4491
+
4492
+ // src/voice/elevenlabs-tts.ts
4493
+ var LOG_PREFIX8 = "[GuideKit:TTS]";
4494
+ var DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
4495
+ var DEFAULT_MODEL_ID = "eleven_flash_v2_5";
4496
+ var DEFAULT_STABILITY = 0.5;
4497
+ var DEFAULT_SIMILARITY_BOOST = 0.75;
4498
+ function base64ToArrayBuffer(base64) {
4499
+ const binaryString = atob(base64);
4500
+ const length = binaryString.length;
4501
+ const bytes = new Uint8Array(length);
4502
+ for (let i = 0; i < length; i++) {
4503
+ bytes[i] = binaryString.charCodeAt(i);
4504
+ }
4505
+ return bytes.buffer;
4506
+ }
4507
+ var ElevenLabsTTS = class {
4508
+ // ---- Configuration ------------------------------------------------------
4509
+ apiKey;
4510
+ voiceId;
4511
+ modelId;
4512
+ debugEnabled;
4513
+ // ---- Internal state -----------------------------------------------------
4514
+ wsManager = null;
4515
+ _connected = false;
4516
+ _suspended = false;
4517
+ /**
4518
+ * Whether the BOS (beginning-of-stream) handshake has been sent for the
4519
+ * current WebSocket session. ElevenLabs requires the first message to
4520
+ * contain voice settings and the API key before any text chunks.
4521
+ */
4522
+ bosSent = false;
4523
+ /** Registered audio-event callbacks. */
4524
+ audioCallbacks = /* @__PURE__ */ new Set();
4525
+ // -----------------------------------------------------------------------
4526
+ // Constructor
4527
+ // -----------------------------------------------------------------------
4528
+ constructor(options) {
4529
+ this.apiKey = options.apiKey;
4530
+ this.voiceId = options.voiceId ?? DEFAULT_VOICE_ID;
4531
+ this.modelId = options.modelId ?? DEFAULT_MODEL_ID;
4532
+ this.debugEnabled = options.debug ?? false;
4533
+ this.log("ElevenLabsTTS created", {
4534
+ voiceId: this.voiceId,
4535
+ modelId: this.modelId
4536
+ });
4537
+ }
4538
+ // -----------------------------------------------------------------------
4539
+ // Public API
4540
+ // -----------------------------------------------------------------------
4541
+ /** Whether the WebSocket is currently connected and ready. */
4542
+ get isConnected() {
4543
+ return this._connected;
4544
+ }
4545
+ /**
4546
+ * Open a WebSocket connection to the ElevenLabs streaming TTS endpoint.
4547
+ *
4548
+ * Resolves once the connection is established and the BOS handshake has
4549
+ * been sent. Rejects if the connection cannot be established.
4550
+ */
4551
+ async connect() {
4552
+ if (this._connected) {
4553
+ this.log("Already connected \u2014 skipping");
4554
+ return;
4555
+ }
4556
+ if (typeof WebSocket === "undefined") {
4557
+ this.log("WebSocket API not available (SSR?) \u2014 cannot connect");
4558
+ return;
4559
+ }
4560
+ const url = this.buildUrl();
4561
+ this.log("Connecting to", url);
4562
+ this.wsManager = new WebSocketManager({
4563
+ url,
4564
+ protocols: [],
4565
+ debug: this.debugEnabled,
4566
+ label: "ElevenLabs-TTS"
4567
+ });
4568
+ this.wsManager.onOpen(() => {
4569
+ this._connected = true;
4570
+ this.sendBOS();
4571
+ this.log("Connected and BOS sent");
4572
+ });
4573
+ this.wsManager.onMessage((event) => {
4574
+ this.handleMessage(event);
4575
+ });
4576
+ this.wsManager.onClose((code, reason) => {
4577
+ this.log("Connection closed", { code, reason });
4578
+ this.cleanup();
4579
+ });
4580
+ this.wsManager.onError((event) => {
4581
+ this.log("WebSocket error", event);
4582
+ });
4583
+ return this.wsManager.connect();
4584
+ }
4585
+ /**
4586
+ * Send text to be synthesised into speech.
4587
+ *
4588
+ * May be called multiple times to stream text incrementally. Each call
4589
+ * sends a text chunk with `try_trigger_generation: true` so ElevenLabs
4590
+ * can begin synthesising as soon as it has enough context.
4591
+ *
4592
+ * Call {@link flush} when the complete utterance has been sent.
4593
+ */
4594
+ speak(text) {
4595
+ if (!this._connected || !this.wsManager || this._suspended) {
4596
+ this.log("Cannot speak \u2014 not connected or suspended");
4597
+ return;
4598
+ }
4599
+ if (!text) {
4600
+ return;
4601
+ }
4602
+ const message = JSON.stringify({
4603
+ text,
4604
+ try_trigger_generation: true
4605
+ });
4606
+ this.log("Sending text chunk:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
4607
+ this.wsManager.send(message);
4608
+ }
4609
+ /**
4610
+ * Signal the end of text input for the current utterance.
4611
+ *
4612
+ * Sends the EOS (end-of-stream) marker to ElevenLabs. The server will
4613
+ * flush any remaining audio and send a final chunk with `isFinal: true`.
4614
+ */
4615
+ flush() {
4616
+ if (!this._connected || !this.wsManager || this._suspended) {
4617
+ this.log("Cannot flush \u2014 not connected or suspended");
4618
+ return;
4619
+ }
4620
+ const message = JSON.stringify({ text: "" });
4621
+ this.log("Sending EOS (flush)");
4622
+ this.wsManager.send(message);
4623
+ }
4624
+ /**
4625
+ * Register a callback to receive audio output events.
4626
+ *
4627
+ * @returns An unsubscribe function. Calling it more than once is safe.
4628
+ */
4629
+ onAudio(callback) {
4630
+ this.audioCallbacks.add(callback);
4631
+ let removed = false;
4632
+ return () => {
4633
+ if (removed) return;
4634
+ removed = true;
4635
+ this.audioCallbacks.delete(callback);
4636
+ };
4637
+ }
4638
+ /** Gracefully close the connection by sending EOS then closing. */
4639
+ close() {
4640
+ if (!this._connected || !this.wsManager) {
4641
+ this.log("Not connected \u2014 nothing to close");
4642
+ return;
4643
+ }
4644
+ this.log("Closing connection");
4645
+ try {
4646
+ this.wsManager.send(JSON.stringify({ text: "" }));
4647
+ } catch {
4648
+ }
4649
+ this.wsManager.close();
4650
+ this.cleanup();
4651
+ }
4652
+ /** Force-destroy the connection without a graceful handshake. */
4653
+ destroy() {
4654
+ this.log("Destroying");
4655
+ if (this.wsManager) {
4656
+ this.wsManager.destroy();
4657
+ this.wsManager = null;
4658
+ }
4659
+ this.cleanup();
4660
+ this.audioCallbacks.clear();
4661
+ }
4662
+ /**
4663
+ * Suspend the adapter (e.g. when the device goes offline).
4664
+ *
4665
+ * Marks the adapter as suspended so that calls to `speak()` and `flush()`
4666
+ * are silently dropped. The WebSocket itself is left open; ElevenLabs
4667
+ * will close it after an inactivity timeout if the network went away.
4668
+ */
4669
+ suspend() {
4670
+ if (this._suspended) return;
4671
+ this._suspended = true;
4672
+ this.log("Suspended");
4673
+ }
4674
+ /**
4675
+ * Resume after a prior `suspend()`. If the underlying connection is
4676
+ * still alive, the adapter returns to normal operation. If the connection
4677
+ * was lost while suspended, callers should `close()` / `destroy()` and
4678
+ * create a new instance.
4679
+ */
4680
+ resume() {
4681
+ if (!this._suspended) return;
4682
+ this._suspended = false;
4683
+ this.log("Resumed");
4684
+ }
4685
+ // -----------------------------------------------------------------------
4686
+ // BOS handshake
4687
+ // -----------------------------------------------------------------------
4688
+ /**
4689
+ * Send the BOS (beginning-of-stream) message.
4690
+ *
4691
+ * This must be the very first message on a new WebSocket session. It
4692
+ * carries the API key and voice settings.
4693
+ */
4694
+ sendBOS() {
4695
+ if (!this.wsManager || this.bosSent) {
4696
+ return;
4697
+ }
4698
+ const bos = JSON.stringify({
4699
+ text: " ",
4700
+ voice_settings: {
4701
+ stability: DEFAULT_STABILITY,
4702
+ similarity_boost: DEFAULT_SIMILARITY_BOOST
4703
+ },
4704
+ xi_api_key: this.apiKey
4705
+ });
4706
+ this.wsManager.send(bos);
4707
+ this.bosSent = true;
4708
+ this.log("BOS handshake sent");
4709
+ }
4710
+ // -----------------------------------------------------------------------
4711
+ // Message handling
4712
+ // -----------------------------------------------------------------------
4713
+ /**
4714
+ * Parse incoming ElevenLabs JSON messages and emit audio events.
4715
+ *
4716
+ * ElevenLabs sends messages with the following shape:
4717
+ * ```json
4718
+ * { "audio": "base64encoded...", "isFinal": false }
4719
+ * ```
4720
+ *
4721
+ * When `isFinal` is `true`, the server has finished synthesising the
4722
+ * current utterance (i.e. after EOS was sent).
4723
+ */
4724
+ handleMessage(event) {
4725
+ if (typeof event.data !== "string") {
4726
+ return;
4727
+ }
4728
+ let parsed;
4729
+ try {
4730
+ parsed = JSON.parse(event.data);
4731
+ } catch {
4732
+ this.log("Failed to parse message", event.data);
4733
+ return;
4734
+ }
4735
+ if (parsed["error"] !== void 0) {
4736
+ this.log("ElevenLabs error:", parsed["error"]);
4737
+ return;
4738
+ }
4739
+ if (parsed["audio"] === void 0 || parsed["audio"] === null) {
4740
+ this.log("Non-audio message received", parsed);
4741
+ return;
4742
+ }
4743
+ const audioBase64 = parsed["audio"];
4744
+ const isFinal = parsed["isFinal"] === true;
4745
+ if (!audioBase64 || audioBase64.length === 0) {
4746
+ if (isFinal) {
4747
+ this.emitAudio({
4748
+ audio: new ArrayBuffer(0),
4749
+ isFinal: true,
4750
+ timestamp: Date.now()
4751
+ });
4752
+ }
4753
+ return;
4754
+ }
4755
+ let audioBuffer;
4756
+ try {
4757
+ audioBuffer = base64ToArrayBuffer(audioBase64);
4758
+ } catch (err) {
4759
+ this.log("Failed to decode base64 audio", err);
4760
+ return;
4761
+ }
4762
+ const audioEvent = {
4763
+ audio: audioBuffer,
4764
+ isFinal,
4765
+ timestamp: Date.now()
4766
+ };
4767
+ this.log(
4768
+ isFinal ? "Final audio chunk:" : "Audio chunk:",
4769
+ `${audioBuffer.byteLength} bytes`
4770
+ );
4771
+ this.emitAudio(audioEvent);
4772
+ }
4773
+ // -----------------------------------------------------------------------
4774
+ // Subscriber notification
4775
+ // -----------------------------------------------------------------------
4776
+ /**
4777
+ * Emit an audio event to all registered callbacks.
4778
+ *
4779
+ * Errors thrown by individual callbacks are caught and logged so one
4780
+ * misbehaving subscriber does not prevent others from receiving the event.
4781
+ */
4782
+ emitAudio(event) {
4783
+ for (const cb of this.audioCallbacks) {
4784
+ try {
4785
+ cb(event);
4786
+ } catch (err) {
4787
+ console.error(LOG_PREFIX8, "Audio callback threw:", err);
4788
+ }
4789
+ }
4790
+ }
4791
+ // -----------------------------------------------------------------------
4792
+ // URL building
4793
+ // -----------------------------------------------------------------------
4794
+ /** Build the ElevenLabs streaming TTS endpoint URL. */
4795
+ buildUrl() {
4796
+ const params = new URLSearchParams({
4797
+ model_id: this.modelId
4798
+ });
4799
+ return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
4800
+ }
4801
+ // -----------------------------------------------------------------------
4802
+ // Cleanup
4803
+ // -----------------------------------------------------------------------
4804
+ /** Reset internal state after disconnection. */
4805
+ cleanup() {
4806
+ this._connected = false;
4807
+ this.bosSent = false;
4808
+ }
4809
+ // -----------------------------------------------------------------------
4810
+ // Logging
4811
+ // -----------------------------------------------------------------------
4812
+ /** Conditional debug logging. */
4813
+ log(...args) {
4814
+ if (this.debugEnabled) {
4815
+ console.debug(LOG_PREFIX8, ...args);
4816
+ }
4817
+ }
4818
+ };
4819
+
4820
+ // src/voice/web-speech-stt.ts
4821
+ var LOG_PREFIX9 = "[GuideKit:WebSpeech-STT]";
4822
+ var DEFAULT_LANGUAGE3 = "en-US";
4823
+ var WebSpeechSTT = class {
4824
+ // ---- Configuration -------------------------------------------------------
4825
+ language;
4826
+ continuous;
4827
+ interimResultsEnabled;
4828
+ debugEnabled;
4829
+ // ---- Internal state ------------------------------------------------------
4830
+ recognition = null;
4831
+ _connected = false;
4832
+ _suspended = false;
4833
+ /**
4834
+ * Whether we intentionally stopped recognition. Used to distinguish
4835
+ * between intentional stop and unexpected end (for auto-restart in
4836
+ * continuous mode).
4837
+ */
4838
+ _intentionalStop = false;
4839
+ /** Registered transcript callbacks. */
4840
+ transcriptCallbacks = /* @__PURE__ */ new Set();
4841
+ // -------------------------------------------------------------------------
4842
+ // Constructor
4843
+ // -------------------------------------------------------------------------
4844
+ constructor(options = {}) {
4845
+ this.language = options.language ?? DEFAULT_LANGUAGE3;
4846
+ this.continuous = options.continuous ?? true;
4847
+ this.interimResultsEnabled = options.interimResults ?? true;
4848
+ this.debugEnabled = options.debug ?? false;
4849
+ this.log("WebSpeechSTT created", {
4850
+ language: this.language,
4851
+ continuous: this.continuous,
4852
+ interimResults: this.interimResultsEnabled
4853
+ });
4854
+ }
4855
+ // -------------------------------------------------------------------------
4856
+ // Static methods
4857
+ // -------------------------------------------------------------------------
4858
+ /**
4859
+ * Check whether the Web Speech API SpeechRecognition is supported in the
4860
+ * current environment. Safe to call in SSR (returns false).
4861
+ */
4862
+ static isSupported() {
4863
+ if (typeof window === "undefined") return false;
4864
+ return typeof window["SpeechRecognition"] !== "undefined" || typeof globalThis.webkitSpeechRecognition !== "undefined";
4865
+ }
4866
+ // -------------------------------------------------------------------------
4867
+ // Public API
4868
+ // -------------------------------------------------------------------------
4869
+ /** Whether recognition is currently active and connected. */
4870
+ get isConnected() {
4871
+ return this._connected;
4872
+ }
4873
+ /**
4874
+ * Start speech recognition.
4875
+ *
4876
+ * Creates the SpeechRecognition instance and begins listening. Resolves
4877
+ * once the recognition session has started. Rejects if the API is not
4878
+ * supported or the browser denies permission.
4879
+ */
4880
+ async connect() {
4881
+ if (this._connected) {
4882
+ this.log("Already connected \u2014 skipping");
4883
+ return;
4884
+ }
4885
+ if (typeof window === "undefined") {
4886
+ this.log("SSR environment detected \u2014 cannot connect");
4887
+ return;
4888
+ }
4889
+ const SpeechRecognitionClass = this.resolveSpeechRecognition();
4890
+ if (!SpeechRecognitionClass) {
4891
+ throw new Error(
4892
+ "Web Speech API (SpeechRecognition) is not supported in this browser."
4893
+ );
4894
+ }
4895
+ this.recognition = new SpeechRecognitionClass();
4896
+ this.recognition.lang = this.language;
4897
+ this.recognition.continuous = this.continuous;
4898
+ this.recognition.interimResults = this.interimResultsEnabled;
4899
+ this.recognition.maxAlternatives = 1;
4900
+ this.recognition.onstart = () => {
4901
+ this._connected = true;
4902
+ this._intentionalStop = false;
4903
+ this.log("Recognition started");
4904
+ };
4905
+ this.recognition.onresult = (event) => {
4906
+ this.handleResult(event);
4907
+ };
4908
+ this.recognition.onerror = (event) => {
4909
+ this.handleError(event);
4910
+ };
4911
+ this.recognition.onend = () => {
4912
+ this.log("Recognition ended");
4913
+ const wasConnected = this._connected;
4914
+ this._connected = false;
4915
+ if (this.continuous && !this._intentionalStop && !this._suspended && wasConnected) {
4916
+ this.log("Auto-restarting continuous recognition");
4917
+ try {
4918
+ this.recognition?.start();
4919
+ } catch {
4920
+ this.log("Failed to auto-restart recognition");
4921
+ }
4922
+ }
4923
+ };
4924
+ return new Promise((resolve, reject) => {
4925
+ const onStart = () => {
4926
+ cleanup();
4927
+ resolve();
4928
+ };
4929
+ const onError = (event) => {
4930
+ cleanup();
4931
+ reject(new Error(`SpeechRecognition error: ${event.error} \u2014 ${event.message}`));
4932
+ };
4933
+ const cleanup = () => {
4934
+ if (this.recognition) {
4935
+ this.recognition.removeEventListener("start", onStart);
4936
+ this.recognition.removeEventListener("error", onError);
4937
+ }
4938
+ };
4939
+ this.recognition.addEventListener("start", onStart, { once: true });
4940
+ this.recognition.addEventListener("error", onError, { once: true });
4941
+ try {
4942
+ this.recognition.start();
4943
+ } catch (err) {
4944
+ cleanup();
4945
+ reject(err);
4946
+ }
4947
+ });
4948
+ }
4949
+ /**
4950
+ * Send audio data. No-op for Web Speech API since it captures audio
4951
+ * directly from the microphone via the browser's internal pipeline.
4952
+ *
4953
+ * Provided for interface compatibility with WebSocket-based STT adapters
4954
+ * (DeepgramSTT, ElevenLabsSTT).
4955
+ */
4956
+ sendAudio(_audioData) {
4957
+ }
4958
+ /**
4959
+ * Register a callback to receive transcript events.
4960
+ *
4961
+ * @returns An unsubscribe function. Calling it more than once is safe.
4962
+ */
4963
+ onTranscript(callback) {
4964
+ this.transcriptCallbacks.add(callback);
4965
+ let removed = false;
4966
+ return () => {
4967
+ if (removed) return;
4968
+ removed = true;
4969
+ this.transcriptCallbacks.delete(callback);
4970
+ };
4971
+ }
4972
+ /**
4973
+ * Gracefully stop recognition.
4974
+ *
4975
+ * Calls `stop()` on the SpeechRecognition instance which allows it to
4976
+ * deliver any pending final results before ending.
4977
+ */
4978
+ close() {
4979
+ if (!this.recognition) {
4980
+ this.log("Not connected \u2014 nothing to close");
4981
+ return;
4982
+ }
4983
+ this.log("Closing recognition");
4984
+ this._intentionalStop = true;
4985
+ try {
4986
+ this.recognition.stop();
4987
+ } catch {
4988
+ }
4989
+ this.cleanup();
4990
+ }
4991
+ /** Force-destroy the recognition without waiting for pending results. */
4992
+ destroy() {
4993
+ this.log("Destroying");
4994
+ this._intentionalStop = true;
4995
+ if (this.recognition) {
4996
+ try {
4997
+ this.recognition.abort();
4998
+ } catch {
4999
+ }
5000
+ this.recognition.onresult = null;
5001
+ this.recognition.onerror = null;
5002
+ this.recognition.onend = null;
5003
+ this.recognition.onstart = null;
5004
+ this.recognition = null;
5005
+ }
5006
+ this.cleanup();
5007
+ this.transcriptCallbacks.clear();
5008
+ }
5009
+ /**
5010
+ * Suspend the adapter (e.g. when the device goes offline).
5011
+ *
5012
+ * Stops recognition and marks the adapter as suspended so that auto-restart
5013
+ * does not trigger.
5014
+ */
5015
+ suspend() {
5016
+ if (this._suspended) return;
5017
+ this._suspended = true;
5018
+ this._intentionalStop = true;
5019
+ if (this.recognition && this._connected) {
5020
+ try {
5021
+ this.recognition.stop();
5022
+ } catch {
5023
+ }
5024
+ }
5025
+ this.log("Suspended");
5026
+ }
5027
+ /**
5028
+ * Resume after a prior `suspend()`. Restarts recognition if it was
5029
+ * running before suspension.
5030
+ */
5031
+ resume() {
5032
+ if (!this._suspended) return;
5033
+ this._suspended = false;
5034
+ this._intentionalStop = false;
5035
+ this.log("Resumed");
5036
+ if (this.recognition && !this._connected) {
5037
+ try {
5038
+ this.recognition.start();
5039
+ } catch {
5040
+ this.log("Failed to restart recognition after resume");
5041
+ }
5042
+ }
5043
+ }
5044
+ // -------------------------------------------------------------------------
5045
+ // Result handling
5046
+ // -------------------------------------------------------------------------
5047
+ /**
5048
+ * Handle SpeechRecognition result events.
5049
+ *
5050
+ * The `results` property is a SpeechRecognitionResultList containing all
5051
+ * results accumulated during this recognition session. We only process
5052
+ * results from `resultIndex` onward to avoid re-emitting old results.
5053
+ */
5054
+ handleResult(event) {
5055
+ for (let i = event.resultIndex; i < event.results.length; i++) {
5056
+ const result = event.results[i];
5057
+ if (!result) continue;
5058
+ const alternative = result[0];
5059
+ if (!alternative) continue;
5060
+ const transcript = alternative.transcript;
5061
+ if (!transcript || transcript.trim() === "") continue;
5062
+ const isFinal = result.isFinal;
5063
+ const confidence = alternative.confidence > 0 ? alternative.confidence : 0.85;
5064
+ const transcriptEvent = {
5065
+ text: transcript,
5066
+ isFinal,
5067
+ confidence,
5068
+ timestamp: Date.now()
5069
+ };
5070
+ this.log(
5071
+ isFinal ? "Final transcript:" : "Interim transcript:",
5072
+ transcript,
5073
+ `(${(confidence * 100).toFixed(1)}%)`
5074
+ );
5075
+ this.emitTranscript(transcriptEvent);
5076
+ }
5077
+ }
5078
+ // -------------------------------------------------------------------------
5079
+ // Error handling
5080
+ // -------------------------------------------------------------------------
5081
+ /**
5082
+ * Handle SpeechRecognition errors.
5083
+ *
5084
+ * Some errors are recoverable (e.g. `no-speech`) and some are fatal
5085
+ * (e.g. `not-allowed`). For recoverable errors in continuous mode,
5086
+ * recognition will auto-restart via the `onend` handler.
5087
+ */
5088
+ handleError(event) {
5089
+ const errorType = event.error;
5090
+ this.log("Recognition error:", errorType, event.message);
5091
+ if (errorType === "no-speech" || errorType === "aborted") {
5092
+ this.log("Non-fatal error \u2014 will recover");
5093
+ return;
5094
+ }
5095
+ if (errorType === "network") {
5096
+ this.log("Network error \u2014 recognition may auto-restart");
5097
+ return;
5098
+ }
5099
+ if (errorType === "not-allowed" || errorType === "service-not-allowed" || errorType === "language-not-supported") {
5100
+ this._intentionalStop = true;
5101
+ this.log("Fatal recognition error \u2014 stopping");
5102
+ }
5103
+ }
5104
+ // -------------------------------------------------------------------------
5105
+ // Subscriber notification
5106
+ // -------------------------------------------------------------------------
5107
+ /**
5108
+ * Emit a transcript event to all registered callbacks.
5109
+ *
5110
+ * Errors thrown by individual callbacks are caught and logged so one
5111
+ * misbehaving subscriber does not prevent others from receiving the event.
5112
+ */
5113
+ emitTranscript(event) {
5114
+ for (const cb of this.transcriptCallbacks) {
5115
+ try {
5116
+ cb(event);
5117
+ } catch (err) {
5118
+ console.error(LOG_PREFIX9, "Transcript callback threw:", err);
5119
+ }
5120
+ }
5121
+ }
5122
+ // -------------------------------------------------------------------------
5123
+ // SpeechRecognition resolution
5124
+ // -------------------------------------------------------------------------
5125
+ /**
5126
+ * Resolve the SpeechRecognition constructor, with the webkit-prefixed
5127
+ * fallback. Returns null if not available.
5128
+ */
5129
+ resolveSpeechRecognition() {
5130
+ if (typeof window === "undefined") return null;
5131
+ const win = window;
5132
+ if (typeof win["SpeechRecognition"] !== "undefined") {
5133
+ return win["SpeechRecognition"];
5134
+ }
5135
+ if (typeof globalThis.webkitSpeechRecognition !== "undefined") {
5136
+ return globalThis.webkitSpeechRecognition;
5137
+ }
5138
+ return null;
5139
+ }
5140
+ // -------------------------------------------------------------------------
5141
+ // Cleanup
5142
+ // -------------------------------------------------------------------------
5143
+ /** Reset internal state after disconnection. */
5144
+ cleanup() {
5145
+ this._connected = false;
5146
+ }
5147
+ // -------------------------------------------------------------------------
5148
+ // Logging
5149
+ // -------------------------------------------------------------------------
5150
+ /** Conditional debug logging. */
5151
+ log(...args) {
5152
+ if (this.debugEnabled) {
5153
+ console.debug(LOG_PREFIX9, ...args);
5154
+ }
5155
+ }
5156
+ };
5157
+
5158
+ // src/voice/web-speech-tts.ts
5159
+ var LOG_PREFIX10 = "[GuideKit:WebSpeech-TTS]";
5160
+ var DEFAULT_RATE = 1;
5161
+ var DEFAULT_PITCH = 1;
5162
+ var DEFAULT_LANGUAGE4 = "en-US";
5163
+ var WebSpeechTTS = class {
5164
+ // ---- Configuration -------------------------------------------------------
5165
+ voiceName;
5166
+ rate;
5167
+ pitch;
5168
+ language;
5169
+ debugEnabled;
5170
+ // ---- Internal state ------------------------------------------------------
5171
+ _connected = false;
5172
+ _suspended = false;
5173
+ /** Cached voice object resolved from voiceName. */
5174
+ _resolvedVoice = null;
5175
+ /** Whether voices have been loaded (they load async in some browsers). */
5176
+ _voicesLoaded = false;
5177
+ /** Registered audio-event callbacks. */
5178
+ audioCallbacks = /* @__PURE__ */ new Set();
5179
+ // -------------------------------------------------------------------------
5180
+ // Constructor
5181
+ // -------------------------------------------------------------------------
5182
+ constructor(options = {}) {
5183
+ this.voiceName = options.voice ?? null;
5184
+ this.rate = options.rate ?? DEFAULT_RATE;
5185
+ this.pitch = options.pitch ?? DEFAULT_PITCH;
5186
+ this.language = options.language ?? DEFAULT_LANGUAGE4;
5187
+ this.debugEnabled = options.debug ?? false;
5188
+ this.log("WebSpeechTTS created", {
5189
+ voice: this.voiceName,
5190
+ rate: this.rate,
5191
+ pitch: this.pitch,
5192
+ language: this.language
5193
+ });
5194
+ }
5195
+ // -------------------------------------------------------------------------
5196
+ // Static methods
5197
+ // -------------------------------------------------------------------------
5198
+ /**
5199
+ * Check whether the Web Speech API SpeechSynthesis is supported in the
5200
+ * current environment. Safe to call in SSR (returns false).
5201
+ */
5202
+ static isSupported() {
5203
+ if (typeof window === "undefined") return false;
5204
+ return typeof window.speechSynthesis !== "undefined";
5205
+ }
5206
+ // -------------------------------------------------------------------------
5207
+ // Public API
5208
+ // -------------------------------------------------------------------------
5209
+ /** Whether the adapter is connected (ready for speech). */
5210
+ get isConnected() {
5211
+ return this._connected;
5212
+ }
5213
+ /**
5214
+ * Initialize the adapter.
5215
+ *
5216
+ * Loads available voices and resolves the requested voice name. Voice
5217
+ * loading is async in some browsers (notably Chrome) so we wait for
5218
+ * the `voiceschanged` event if needed.
5219
+ */
5220
+ async connect() {
5221
+ if (this._connected) {
5222
+ this.log("Already connected \u2014 skipping");
5223
+ return;
5224
+ }
5225
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
5226
+ this.log("SpeechSynthesis not available \u2014 cannot connect");
5227
+ return;
5228
+ }
5229
+ await this.loadVoices();
5230
+ if (this.voiceName) {
5231
+ this._resolvedVoice = this.findVoice(this.voiceName);
5232
+ if (this._resolvedVoice) {
5233
+ this.log("Resolved voice:", this._resolvedVoice.name);
5234
+ } else {
5235
+ this.log("Requested voice not found:", this.voiceName, "\u2014 using browser default");
5236
+ }
5237
+ }
5238
+ this._connected = true;
5239
+ this.log("Connected");
5240
+ }
5241
+ /**
5242
+ * Speak the given text using the browser's speech synthesis engine.
5243
+ *
5244
+ * Returns a Promise that resolves when the utterance completes or is
5245
+ * cancelled. Rejects if an error occurs during synthesis.
5246
+ *
5247
+ * Also emits audio events to registered callbacks for VoicePipeline
5248
+ * compatibility.
5249
+ */
5250
+ speak(text) {
5251
+ if (!this._connected || this._suspended) {
5252
+ this.log("Cannot speak \u2014 not connected or suspended");
5253
+ return;
5254
+ }
5255
+ if (!text || !text.trim()) {
5256
+ return;
5257
+ }
5258
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
5259
+ return;
5260
+ }
5261
+ const synth = window.speechSynthesis;
5262
+ const utterance = new SpeechSynthesisUtterance(text);
5263
+ utterance.lang = this.language;
5264
+ utterance.rate = this.rate;
5265
+ utterance.pitch = this.pitch;
5266
+ if (this._resolvedVoice) {
5267
+ utterance.voice = this._resolvedVoice;
5268
+ }
5269
+ utterance.onstart = () => {
5270
+ this.log("Utterance started:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
5271
+ this.emitAudio({
5272
+ audio: new ArrayBuffer(0),
5273
+ isFinal: false,
5274
+ timestamp: Date.now()
5275
+ });
5276
+ };
5277
+ utterance.onend = () => {
5278
+ this.log("Utterance ended");
5279
+ this.emitAudio({
5280
+ audio: new ArrayBuffer(0),
5281
+ isFinal: true,
5282
+ timestamp: Date.now()
5283
+ });
5284
+ };
5285
+ utterance.onerror = (event) => {
5286
+ if (event.error === "canceled") {
5287
+ this.log("Utterance cancelled");
5288
+ this.emitAudio({
5289
+ audio: new ArrayBuffer(0),
5290
+ isFinal: true,
5291
+ timestamp: Date.now()
5292
+ });
5293
+ return;
5294
+ }
5295
+ this.log("Utterance error:", event.error);
5296
+ this.emitAudio({
5297
+ audio: new ArrayBuffer(0),
5298
+ isFinal: true,
5299
+ timestamp: Date.now()
5300
+ });
5301
+ };
5302
+ this.log("Speaking:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
5303
+ synth.speak(utterance);
5304
+ }
5305
+ /**
5306
+ * Flush / finalize the current utterance.
5307
+ *
5308
+ * No-op for Web Speech API since each speak() call is a complete
5309
+ * utterance. Provided for interface compatibility with ElevenLabsTTS.
5310
+ */
5311
+ flush() {
5312
+ }
5313
+ /**
5314
+ * Register a callback to receive audio output events.
5315
+ *
5316
+ * For Web Speech API, these events have empty audio buffers and are
5317
+ * used to signal utterance start/end for VoicePipeline state management.
5318
+ *
5319
+ * @returns An unsubscribe function. Calling it more than once is safe.
5320
+ */
5321
+ onAudio(callback) {
5322
+ this.audioCallbacks.add(callback);
5323
+ let removed = false;
5324
+ return () => {
5325
+ if (removed) return;
5326
+ removed = true;
5327
+ this.audioCallbacks.delete(callback);
5328
+ };
5329
+ }
5330
+ /** Stop current speech synthesis and cancel any queued utterances. */
5331
+ stop() {
5332
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
5333
+ return;
5334
+ }
5335
+ this.log("Stopping speech synthesis");
5336
+ window.speechSynthesis.cancel();
5337
+ }
5338
+ /** Gracefully close the adapter. */
5339
+ close() {
5340
+ this.log("Closing");
5341
+ this.stop();
5342
+ this.cleanup();
5343
+ }
5344
+ /** Force-destroy the adapter. */
5345
+ destroy() {
5346
+ this.log("Destroying");
5347
+ this.stop();
5348
+ this.cleanup();
5349
+ this.audioCallbacks.clear();
5350
+ }
5351
+ /**
5352
+ * Suspend the adapter (e.g. when the device goes offline).
5353
+ *
5354
+ * Pauses any active speech synthesis and marks the adapter as suspended.
5355
+ */
5356
+ suspend() {
5357
+ if (this._suspended) return;
5358
+ this._suspended = true;
5359
+ if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
5360
+ window.speechSynthesis.pause();
5361
+ }
5362
+ this.log("Suspended");
5363
+ }
5364
+ /**
5365
+ * Resume after a prior `suspend()`.
5366
+ */
5367
+ resume() {
5368
+ if (!this._suspended) return;
5369
+ this._suspended = false;
5370
+ if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
5371
+ window.speechSynthesis.resume();
4424
5372
  }
4425
- if (parsed["audio"] === void 0 || parsed["audio"] === null) {
4426
- this.log("Non-audio message received", parsed);
5373
+ this.log("Resumed");
5374
+ }
5375
+ // -------------------------------------------------------------------------
5376
+ // Voice loading
5377
+ // -------------------------------------------------------------------------
5378
+ /**
5379
+ * Load available voices from the browser.
5380
+ *
5381
+ * In Chrome and some other browsers, voices load asynchronously after
5382
+ * the page loads. We wait for the `voiceschanged` event with a timeout.
5383
+ */
5384
+ async loadVoices() {
5385
+ if (this._voicesLoaded) return;
5386
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") return;
5387
+ const synth = window.speechSynthesis;
5388
+ let voices = synth.getVoices();
5389
+ if (voices.length > 0) {
5390
+ this._voicesLoaded = true;
5391
+ this.log("Voices loaded:", voices.length, "available");
4427
5392
  return;
4428
5393
  }
4429
- const audioBase64 = parsed["audio"];
4430
- const isFinal = parsed["isFinal"] === true;
4431
- if (!audioBase64 || audioBase64.length === 0) {
4432
- if (isFinal) {
4433
- this.emitAudio({
4434
- audio: new ArrayBuffer(0),
4435
- isFinal: true,
4436
- timestamp: Date.now()
4437
- });
4438
- }
4439
- return;
5394
+ await new Promise((resolve) => {
5395
+ const onVoicesChanged = () => {
5396
+ synth.removeEventListener("voiceschanged", onVoicesChanged);
5397
+ clearTimeout(timeout);
5398
+ voices = synth.getVoices();
5399
+ this._voicesLoaded = true;
5400
+ this.log("Voices loaded (async):", voices.length, "available");
5401
+ resolve();
5402
+ };
5403
+ const timeout = setTimeout(() => {
5404
+ synth.removeEventListener("voiceschanged", onVoicesChanged);
5405
+ this._voicesLoaded = true;
5406
+ this.log("Voices loading timed out \u2014 proceeding with defaults");
5407
+ resolve();
5408
+ }, 2e3);
5409
+ synth.addEventListener("voiceschanged", onVoicesChanged);
5410
+ });
5411
+ }
5412
+ /**
5413
+ * Find a voice by name (case-insensitive partial match).
5414
+ */
5415
+ findVoice(name) {
5416
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
5417
+ return null;
4440
5418
  }
4441
- let audioBuffer;
4442
- try {
4443
- audioBuffer = base64ToArrayBuffer(audioBase64);
4444
- } catch (err) {
4445
- this.log("Failed to decode base64 audio", err);
4446
- return;
5419
+ const voices = window.speechSynthesis.getVoices();
5420
+ const lowerName = name.toLowerCase();
5421
+ const exact = voices.find((v) => v.name.toLowerCase() === lowerName);
5422
+ if (exact) return exact;
5423
+ const partial = voices.find((v) => v.name.toLowerCase().includes(lowerName));
5424
+ if (partial) return partial;
5425
+ if (lowerName.includes("-") || lowerName.length <= 5) {
5426
+ const langMatch = voices.find((v) => v.lang.toLowerCase().startsWith(lowerName));
5427
+ if (langMatch) return langMatch;
4447
5428
  }
4448
- const audioEvent = {
4449
- audio: audioBuffer,
4450
- isFinal,
4451
- timestamp: Date.now()
4452
- };
4453
- this.log(
4454
- isFinal ? "Final audio chunk:" : "Audio chunk:",
4455
- `${audioBuffer.byteLength} bytes`
4456
- );
4457
- this.emitAudio(audioEvent);
5429
+ return null;
4458
5430
  }
4459
- // -----------------------------------------------------------------------
5431
+ // -------------------------------------------------------------------------
4460
5432
  // Subscriber notification
4461
- // -----------------------------------------------------------------------
5433
+ // -------------------------------------------------------------------------
4462
5434
  /**
4463
5435
  * Emit an audio event to all registered callbacks.
4464
5436
  *
@@ -4470,41 +5442,30 @@ var ElevenLabsTTS = class {
4470
5442
  try {
4471
5443
  cb(event);
4472
5444
  } catch (err) {
4473
- console.error(LOG_PREFIX7, "Audio callback threw:", err);
5445
+ console.error(LOG_PREFIX10, "Audio callback threw:", err);
4474
5446
  }
4475
5447
  }
4476
5448
  }
4477
- // -----------------------------------------------------------------------
4478
- // URL building
4479
- // -----------------------------------------------------------------------
4480
- /** Build the ElevenLabs streaming TTS endpoint URL. */
4481
- buildUrl() {
4482
- const params = new URLSearchParams({
4483
- model_id: this.modelId
4484
- });
4485
- return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
4486
- }
4487
- // -----------------------------------------------------------------------
5449
+ // -------------------------------------------------------------------------
4488
5450
  // Cleanup
4489
- // -----------------------------------------------------------------------
4490
- /** Reset internal state after disconnection. */
5451
+ // -------------------------------------------------------------------------
5452
+ /** Reset internal state. */
4491
5453
  cleanup() {
4492
5454
  this._connected = false;
4493
- this.bosSent = false;
4494
5455
  }
4495
- // -----------------------------------------------------------------------
5456
+ // -------------------------------------------------------------------------
4496
5457
  // Logging
4497
- // -----------------------------------------------------------------------
5458
+ // -------------------------------------------------------------------------
4498
5459
  /** Conditional debug logging. */
4499
5460
  log(...args) {
4500
5461
  if (this.debugEnabled) {
4501
- console.debug(LOG_PREFIX7, ...args);
5462
+ console.debug(LOG_PREFIX10, ...args);
4502
5463
  }
4503
5464
  }
4504
5465
  };
4505
5466
 
4506
5467
  // src/voice/index.ts
4507
- var LOG_PREFIX8 = "[GuideKit:Voice]";
5468
+ var LOG_PREFIX11 = "[GuideKit:Voice]";
4508
5469
  var JITTER_BUFFER_MS = 150;
4509
5470
  var ECHO_WINDOW_MS = 3e3;
4510
5471
  var ECHO_OVERLAP_THRESHOLD = 0.6;
@@ -4612,17 +5573,42 @@ var VoicePipeline = class {
4612
5573
  cause: err instanceof Error ? err : void 0
4613
5574
  });
4614
5575
  }
4615
- this._stt = new DeepgramSTT({
4616
- apiKey: this._sttConfig.apiKey,
4617
- model: this._sttConfig.model,
4618
- debug: this._debug
4619
- });
4620
- this._tts = new ElevenLabsTTS({
4621
- apiKey: this._ttsConfig.apiKey,
4622
- voiceId: this._ttsConfig.voiceId,
4623
- modelId: this._ttsConfig.modelId,
4624
- debug: this._debug
4625
- });
5576
+ if (this._sttConfig.provider === "deepgram") {
5577
+ this._stt = new DeepgramSTT({
5578
+ apiKey: this._sttConfig.apiKey,
5579
+ model: this._sttConfig.model,
5580
+ debug: this._debug
5581
+ });
5582
+ } else if (this._sttConfig.provider === "elevenlabs") {
5583
+ this._stt = new ElevenLabsSTT({
5584
+ apiKey: this._sttConfig.apiKey,
5585
+ language: this._sttConfig.language,
5586
+ debug: this._debug
5587
+ });
5588
+ } else {
5589
+ this._stt = new WebSpeechSTT({
5590
+ language: this._sttConfig.language,
5591
+ continuous: this._sttConfig.continuous,
5592
+ interimResults: this._sttConfig.interimResults,
5593
+ debug: this._debug
5594
+ });
5595
+ }
5596
+ if (this._ttsConfig.provider === "elevenlabs") {
5597
+ this._tts = new ElevenLabsTTS({
5598
+ apiKey: this._ttsConfig.apiKey,
5599
+ voiceId: this._ttsConfig.voiceId,
5600
+ modelId: "modelId" in this._ttsConfig ? this._ttsConfig.modelId : void 0,
5601
+ debug: this._debug
5602
+ });
5603
+ } else {
5604
+ this._tts = new WebSpeechTTS({
5605
+ voice: this._ttsConfig.voice,
5606
+ rate: this._ttsConfig.rate,
5607
+ pitch: this._ttsConfig.pitch,
5608
+ language: this._ttsConfig.language,
5609
+ debug: this._debug
5610
+ });
5611
+ }
4626
5612
  this._log("Initialization complete");
4627
5613
  }
4628
5614
  // ────────────────────────────────────────────────────────────────────
@@ -4762,10 +5748,11 @@ var VoicePipeline = class {
4762
5748
  // ────────────────────────────────────────────────────────────────────
4763
5749
  // speak()
4764
5750
  // ────────────────────────────────────────────────────────────────────
4765
- /** Speak text via ElevenLabs TTS. */
5751
+ /** Speak text via TTS (ElevenLabs or Web Speech API). */
4766
5752
  async speak(text) {
4767
5753
  if (this._destroyed || !text.trim()) return;
4768
- if (!this._tts || !this._audioContext) {
5754
+ const isWebSpeechTTS = this._tts instanceof WebSpeechTTS;
5755
+ if (!this._tts || !this._audioContext && !isWebSpeechTTS) {
4769
5756
  this._log("TTS or AudioContext not available \u2014 cannot speak");
4770
5757
  this._bus.emit("voice:degraded", { reason: "TTS not available", fallback: "text" });
4771
5758
  this._setState("idle");
@@ -4809,11 +5796,24 @@ var VoicePipeline = class {
4809
5796
  }
4810
5797
  resolve();
4811
5798
  };
4812
- this._unsubTTSAudio = this._tts.onAudio((event) => {
4813
- this._handleTTSAudio(event, done);
4814
- });
4815
- this._tts.speak(text);
4816
- this._tts.flush();
5799
+ if (isWebSpeechTTS) {
5800
+ this._unsubTTSAudio = this._tts.onAudio(
5801
+ (event) => {
5802
+ if (event.isFinal) {
5803
+ done();
5804
+ }
5805
+ }
5806
+ );
5807
+ this._tts.speak(text);
5808
+ } else {
5809
+ this._unsubTTSAudio = this._tts.onAudio(
5810
+ (event) => {
5811
+ this._handleTTSAudio(event, done);
5812
+ }
5813
+ );
5814
+ this._tts.speak(text);
5815
+ this._tts.flush();
5816
+ }
4817
5817
  });
4818
5818
  }
4819
5819
  // ────────────────────────────────────────────────────────────────────
@@ -4842,7 +5842,9 @@ var VoicePipeline = class {
4842
5842
  this._pendingLLMAbort.abort();
4843
5843
  this._pendingLLMAbort = null;
4844
5844
  }
4845
- if (this._tts?.isConnected) {
5845
+ if (this._tts instanceof WebSpeechTTS) {
5846
+ this._tts.stop();
5847
+ } else if (this._tts?.isConnected) {
4846
5848
  this._tts.close();
4847
5849
  }
4848
5850
  }
@@ -4931,7 +5933,7 @@ var VoicePipeline = class {
4931
5933
  try {
4932
5934
  cb(next, prev);
4933
5935
  } catch (err) {
4934
- console.error(LOG_PREFIX8, "State change callback threw:", err);
5936
+ console.error(LOG_PREFIX11, "State change callback threw:", err);
4935
5937
  }
4936
5938
  }
4937
5939
  }
@@ -5062,7 +6064,7 @@ var VoicePipeline = class {
5062
6064
  try {
5063
6065
  cb(text, isFinal);
5064
6066
  } catch (err) {
5065
- console.error(LOG_PREFIX8, "Transcript callback threw:", err);
6067
+ console.error(LOG_PREFIX11, "Transcript callback threw:", err);
5066
6068
  }
5067
6069
  }
5068
6070
  if (isFinal && this._state === "listening") {
@@ -5165,8 +6167,14 @@ var VoicePipeline = class {
5165
6167
  * sequential playback via AudioBufferSourceNode.
5166
6168
  */
5167
6169
  _decodeAndSchedule(audioData, onDone) {
6170
+ let onDoneCalled = false;
6171
+ const safeOnDone = onDone ? () => {
6172
+ if (onDoneCalled) return;
6173
+ onDoneCalled = true;
6174
+ onDone();
6175
+ } : void 0;
5168
6176
  if (!this._audioContext || this._state !== "speaking") {
5169
- onDone?.();
6177
+ safeOnDone?.();
5170
6178
  return;
5171
6179
  }
5172
6180
  const ctx = this._audioContext;
@@ -5175,7 +6183,7 @@ var VoicePipeline = class {
5175
6183
  copy,
5176
6184
  (decodedBuffer) => {
5177
6185
  if (this._state !== "speaking" || !this._audioContext) {
5178
- onDone?.();
6186
+ safeOnDone?.();
5179
6187
  return;
5180
6188
  }
5181
6189
  const source = ctx.createBufferSource();
@@ -5188,8 +6196,8 @@ var VoicePipeline = class {
5188
6196
  if (this._lastScheduledSource === source) {
5189
6197
  this._lastScheduledSource = null;
5190
6198
  }
5191
- if (onDone) {
5192
- onDone();
6199
+ if (safeOnDone) {
6200
+ safeOnDone();
5193
6201
  }
5194
6202
  };
5195
6203
  const now = ctx.currentTime;
@@ -5205,7 +6213,7 @@ var VoicePipeline = class {
5205
6213
  },
5206
6214
  (err) => {
5207
6215
  this._log("Failed to decode audio chunk:", err);
5208
- onDone?.();
6216
+ safeOnDone?.();
5209
6217
  }
5210
6218
  );
5211
6219
  }
@@ -5264,13 +6272,13 @@ var VoicePipeline = class {
5264
6272
  // ════════════════════════════════════════════════════════════════════
5265
6273
  _log(...args) {
5266
6274
  if (this._debug) {
5267
- console.debug(LOG_PREFIX8, ...args);
6275
+ console.debug(LOG_PREFIX11, ...args);
5268
6276
  }
5269
6277
  }
5270
6278
  };
5271
6279
 
5272
6280
  // src/visual/index.ts
5273
- var LOG_PREFIX9 = "[GuideKit:Visual]";
6281
+ var LOG_PREFIX12 = "[GuideKit:Visual]";
5274
6282
  var DEFAULT_OVERLAY_COLOR = "rgba(0, 0, 0, 0.5)";
5275
6283
  var DEFAULT_SPOTLIGHT_COLOR = "#4a9eed";
5276
6284
  var DEFAULT_ANIMATION_DURATION = 300;
@@ -6187,16 +7195,16 @@ var VisualGuidance = class {
6187
7195
  if (!this.debug) return;
6188
7196
  if (typeof console !== "undefined") {
6189
7197
  if (data) {
6190
- console.log(`${LOG_PREFIX9} ${message}`, data);
7198
+ console.log(`${LOG_PREFIX12} ${message}`, data);
6191
7199
  } else {
6192
- console.log(`${LOG_PREFIX9} ${message}`);
7200
+ console.log(`${LOG_PREFIX12} ${message}`);
6193
7201
  }
6194
7202
  }
6195
7203
  }
6196
7204
  };
6197
7205
 
6198
7206
  // src/awareness/index.ts
6199
- var LOG_PREFIX10 = "[GuideKit:Awareness]";
7207
+ var LOG_PREFIX13 = "[GuideKit:Awareness]";
6200
7208
  var DEFAULT_IDLE_TIMEOUT_MS = 6e4;
6201
7209
  var DEFAULT_DWELL_TIMEOUT_MS = 8e3;
6202
7210
  var DEFAULT_RAGE_CLICK_THRESHOLD = 3;
@@ -6558,13 +7566,13 @@ var AwarenessSystem = class {
6558
7566
  /** Conditional debug logging. */
6559
7567
  log(...args) {
6560
7568
  if (this.debugEnabled) {
6561
- console.debug(LOG_PREFIX10, ...args);
7569
+ console.debug(LOG_PREFIX13, ...args);
6562
7570
  }
6563
7571
  }
6564
7572
  };
6565
7573
 
6566
7574
  // src/awareness/proactive.ts
6567
- var LOG_PREFIX11 = "[GuideKit:Proactive]";
7575
+ var LOG_PREFIX14 = "[GuideKit:Proactive]";
6568
7576
  var STORAGE_KEY = "guidekit:visited";
6569
7577
  var SEVEN_DAYS_MS = 7 * 24 * 60 * 60 * 1e3;
6570
7578
  var DWELL_COOLDOWNS = [3e4, 6e4, 12e4];
@@ -6602,7 +7610,7 @@ var ProactiveTriggerEngine = class {
6602
7610
  set quietMode(value) {
6603
7611
  this._quietMode = value;
6604
7612
  if (this.debug) {
6605
- console.debug(LOG_PREFIX11, `Quiet mode ${value ? "enabled" : "disabled"}`);
7613
+ console.debug(LOG_PREFIX14, `Quiet mode ${value ? "enabled" : "disabled"}`);
6606
7614
  }
6607
7615
  }
6608
7616
  // ---- Lifecycle -----------------------------------------------------------
@@ -6632,7 +7640,7 @@ var ProactiveTriggerEngine = class {
6632
7640
  })
6633
7641
  );
6634
7642
  if (this.debug) {
6635
- console.debug(LOG_PREFIX11, "Started \u2014 subscribed to awareness & dom events");
7643
+ console.debug(LOG_PREFIX14, "Started \u2014 subscribed to awareness & dom events");
6636
7644
  }
6637
7645
  }
6638
7646
  /** Unsubscribe all bus listeners and clear internal state. */
@@ -6647,7 +7655,7 @@ var ProactiveTriggerEngine = class {
6647
7655
  this.formTimers.clear();
6648
7656
  this.started = false;
6649
7657
  if (this.debug) {
6650
- console.debug(LOG_PREFIX11, "Stopped \u2014 all listeners removed");
7658
+ console.debug(LOG_PREFIX14, "Stopped \u2014 all listeners removed");
6651
7659
  }
6652
7660
  }
6653
7661
  /** Alias for {@link stop}. */
@@ -6682,7 +7690,7 @@ var ProactiveTriggerEngine = class {
6682
7690
  }, FORM_ABANDON_MS);
6683
7691
  this.formTimers.set(formSelector, timer);
6684
7692
  if (this.debug) {
6685
- console.debug(LOG_PREFIX11, `Form interaction started: ${formSelector}`);
7693
+ console.debug(LOG_PREFIX14, `Form interaction started: ${formSelector}`);
6686
7694
  }
6687
7695
  }
6688
7696
  /** Reset all cooldowns and internal tracking state (useful for testing). */
@@ -6696,7 +7704,7 @@ var ProactiveTriggerEngine = class {
6696
7704
  }
6697
7705
  this.formTimers.clear();
6698
7706
  if (this.debug) {
6699
- console.debug(LOG_PREFIX11, "All cooldowns and state reset");
7707
+ console.debug(LOG_PREFIX14, "All cooldowns and state reset");
6700
7708
  }
6701
7709
  }
6702
7710
  // ---- Internal handlers ---------------------------------------------------
@@ -6713,22 +7721,23 @@ var ProactiveTriggerEngine = class {
6713
7721
  message: "First-time visitor detected. Show a visual greeting (no audio)."
6714
7722
  }, "greeting");
6715
7723
  if (this.debug) {
6716
- console.debug(LOG_PREFIX11, "First visit \u2014 greeting triggered");
7724
+ console.debug(LOG_PREFIX14, "First visit \u2014 greeting triggered");
6717
7725
  }
6718
7726
  return;
6719
7727
  }
6720
7728
  const visitedAt = parseInt(visited, 10);
6721
- if (!Number.isNaN(visitedAt)) {
6722
- const elapsed = Date.now() - visitedAt;
6723
- if (elapsed <= SEVEN_DAYS_MS && this.debug) {
6724
- console.debug(LOG_PREFIX11, "Return visitor within 7 days \u2014 silent");
6725
- } else if (this.debug) {
6726
- console.debug(LOG_PREFIX11, "Return visitor after 7 days");
6727
- }
7729
+ if (Number.isNaN(visitedAt)) {
7730
+ return;
7731
+ }
7732
+ const elapsed = Date.now() - visitedAt;
7733
+ if (elapsed <= SEVEN_DAYS_MS && this.debug) {
7734
+ console.debug(LOG_PREFIX14, "Return visitor within 7 days \u2014 silent");
7735
+ } else if (this.debug) {
7736
+ console.debug(LOG_PREFIX14, "Return visitor after 7 days");
6728
7737
  }
6729
7738
  } catch {
6730
7739
  if (this.debug) {
6731
- console.warn(LOG_PREFIX11, "localStorage unavailable \u2014 skipping greeting check");
7740
+ console.warn(LOG_PREFIX14, "localStorage unavailable \u2014 skipping greeting check");
6732
7741
  }
6733
7742
  }
6734
7743
  }
@@ -6746,7 +7755,7 @@ var ProactiveTriggerEngine = class {
6746
7755
  const count = this.dwellCounts.get(sectionId) ?? 0;
6747
7756
  if (count >= DWELL_COOLDOWNS.length + 1) {
6748
7757
  if (this.debug) {
6749
- console.debug(LOG_PREFIX11, `Dwell cap reached for section "${sectionId}" \u2014 suppressed`);
7758
+ console.debug(LOG_PREFIX14, `Dwell cap reached for section "${sectionId}" \u2014 suppressed`);
6750
7759
  }
6751
7760
  return;
6752
7761
  }
@@ -6756,7 +7765,7 @@ var ProactiveTriggerEngine = class {
6756
7765
  const lastFired = this.cooldowns.get(key) ?? 0;
6757
7766
  if (Date.now() - lastFired < cooldownMs) {
6758
7767
  if (this.debug) {
6759
- console.debug(LOG_PREFIX11, `Dwell cooldown active for "${sectionId}" \u2014 suppressed`);
7768
+ console.debug(LOG_PREFIX14, `Dwell cooldown active for "${sectionId}" \u2014 suppressed`);
6760
7769
  }
6761
7770
  return;
6762
7771
  }
@@ -6772,7 +7781,7 @@ var ProactiveTriggerEngine = class {
6772
7781
  const sectionKey = selector;
6773
7782
  if (this.frustrationFired.has(sectionKey)) {
6774
7783
  if (this.debug) {
6775
- console.debug(LOG_PREFIX11, `Frustration already fired for "${selector}" \u2014 suppressed`);
7784
+ console.debug(LOG_PREFIX14, `Frustration already fired for "${selector}" \u2014 suppressed`);
6776
7785
  }
6777
7786
  return;
6778
7787
  }
@@ -6788,7 +7797,7 @@ var ProactiveTriggerEngine = class {
6788
7797
  const key = "navigation-commentary";
6789
7798
  if (this.isCooldownActive(key, NAVIGATION_COOLDOWN_MS)) {
6790
7799
  if (this.debug) {
6791
- console.debug(LOG_PREFIX11, "Navigation cooldown active \u2014 suppressed");
7800
+ console.debug(LOG_PREFIX14, "Navigation cooldown active \u2014 suppressed");
6792
7801
  }
6793
7802
  return;
6794
7803
  }
@@ -6811,7 +7820,7 @@ var ProactiveTriggerEngine = class {
6811
7820
  fireTrigger(partial, cooldownKey) {
6812
7821
  if (this._quietMode) {
6813
7822
  if (this.debug) {
6814
- console.debug(LOG_PREFIX11, `Quiet mode \u2014 suppressed trigger: ${partial.type}`);
7823
+ console.debug(LOG_PREFIX14, `Quiet mode \u2014 suppressed trigger: ${partial.type}`);
6815
7824
  }
6816
7825
  return;
6817
7826
  }
@@ -6821,13 +7830,13 @@ var ProactiveTriggerEngine = class {
6821
7830
  };
6822
7831
  this.cooldowns.set(cooldownKey, trigger.timestamp);
6823
7832
  if (this.debug) {
6824
- console.debug(LOG_PREFIX11, "Trigger fired:", trigger.type, trigger);
7833
+ console.debug(LOG_PREFIX14, "Trigger fired:", trigger.type, trigger);
6825
7834
  }
6826
7835
  if (this.onTrigger) {
6827
7836
  try {
6828
7837
  this.onTrigger(trigger);
6829
7838
  } catch (err) {
6830
- console.error(LOG_PREFIX11, "onTrigger callback error:", err);
7839
+ console.error(LOG_PREFIX14, "onTrigger callback error:", err);
6831
7840
  }
6832
7841
  }
6833
7842
  }
@@ -6840,7 +7849,7 @@ var ProactiveTriggerEngine = class {
6840
7849
  };
6841
7850
 
6842
7851
  // src/llm/rate-limiter.ts
6843
- var LOG_PREFIX12 = "[GuideKit:RateLimiter]";
7852
+ var LOG_PREFIX15 = "[GuideKit:RateLimiter]";
6844
7853
  var DEFAULT_MAX_LLM_CALLS_PER_MINUTE = 10;
6845
7854
  var DEFAULT_MAX_STT_MINUTES_PER_SESSION = 60;
6846
7855
  var DEFAULT_MAX_TTS_CHARS_PER_SESSION = 5e4;
@@ -6935,7 +7944,19 @@ var RateLimiter = class {
6935
7944
  get sttMinutesUsed() {
6936
7945
  let totalMs = this.sttMs;
6937
7946
  if (this.sttStartedAt !== null) {
6938
- totalMs += Date.now() - this.sttStartedAt;
7947
+ const activeMs = Date.now() - this.sttStartedAt;
7948
+ const maxSessionMs = this.maxSTTMinutesPerSession * 6e4;
7949
+ const maxActiveMs = maxSessionMs * 2;
7950
+ if (activeMs > maxActiveMs) {
7951
+ console.warn(
7952
+ `${LOG_PREFIX15} STT stream running for ${Math.round(activeMs / 6e4)}min without sttStop() \u2014 capping at 2x session limit (${this.maxSTTMinutesPerSession * 2}min).`
7953
+ );
7954
+ this.sttMs += maxActiveMs;
7955
+ this.sttStartedAt = null;
7956
+ totalMs = this.sttMs;
7957
+ } else {
7958
+ totalMs += activeMs;
7959
+ }
6939
7960
  }
6940
7961
  return totalMs / 6e4;
6941
7962
  }
@@ -7007,7 +8028,7 @@ var RateLimiter = class {
7007
8028
  }
7008
8029
  log(...args) {
7009
8030
  if (this.debug) {
7010
- console.debug(LOG_PREFIX12, ...args);
8031
+ console.debug(LOG_PREFIX15, ...args);
7011
8032
  }
7012
8033
  }
7013
8034
  };
@@ -7240,7 +8261,7 @@ var BUILTIN_LOCALES = {
7240
8261
  pt
7241
8262
  };
7242
8263
  var SUPPORTED_LOCALE_CODES = new Set(Object.keys(BUILTIN_LOCALES));
7243
- var LOG_PREFIX13 = "[GuideKit:I18n]";
8264
+ var LOG_PREFIX16 = "[GuideKit:I18n]";
7244
8265
  function isSupportedLocale(code) {
7245
8266
  return SUPPORTED_LOCALE_CODES.has(code);
7246
8267
  }
@@ -7278,7 +8299,7 @@ var I18n = class {
7278
8299
  this.strings = strings;
7279
8300
  this.resolvedLocale = resolvedLocale;
7280
8301
  if (this.debug) {
7281
- console.debug(`${LOG_PREFIX13} Initialized with locale "${this.resolvedLocale}"`);
8302
+ console.debug(`${LOG_PREFIX16} Initialized with locale "${this.resolvedLocale}"`);
7282
8303
  }
7283
8304
  }
7284
8305
  // -------------------------------------------------------------------------
@@ -7289,9 +8310,9 @@ var I18n = class {
7289
8310
  const value = this.strings[key];
7290
8311
  if (value === void 0) {
7291
8312
  if (this.debug) {
7292
- console.warn(`${LOG_PREFIX13} Missing translation key "${key}"`);
8313
+ console.warn(`${LOG_PREFIX16} Missing translation key "${key}"`);
7293
8314
  }
7294
- return en[key] ?? key;
8315
+ return en[key] ?? (typeof process !== "undefined" && process.env?.NODE_ENV === "production" ? key : `[MISSING: ${key}]`);
7295
8316
  }
7296
8317
  return value;
7297
8318
  }
@@ -7305,7 +8326,7 @@ var I18n = class {
7305
8326
  this.strings = strings;
7306
8327
  this.resolvedLocale = resolvedLocale;
7307
8328
  if (this.debug) {
7308
- console.debug(`${LOG_PREFIX13} Locale changed to "${this.resolvedLocale}"`);
8329
+ console.debug(`${LOG_PREFIX16} Locale changed to "${this.resolvedLocale}"`);
7309
8330
  }
7310
8331
  }
7311
8332
  /** The current resolved locale code (e.g. 'en', 'fr', or 'custom'). */
@@ -7325,7 +8346,7 @@ var I18n = class {
7325
8346
  if (locale === "auto") {
7326
8347
  const detected = detectLocaleFromDocument();
7327
8348
  if (this.debug) {
7328
- console.debug(`${LOG_PREFIX13} Auto-detected locale "${detected}"`);
8349
+ console.debug(`${LOG_PREFIX16} Auto-detected locale "${detected}"`);
7329
8350
  }
7330
8351
  return {
7331
8352
  strings: BUILTIN_LOCALES[detected],
@@ -7340,7 +8361,7 @@ var I18n = class {
7340
8361
  }
7341
8362
  if (this.debug) {
7342
8363
  console.warn(
7343
- `${LOG_PREFIX13} Unknown locale "${String(locale)}", falling back to "en"`
8364
+ `${LOG_PREFIX16} Unknown locale "${String(locale)}", falling back to "en"`
7344
8365
  );
7345
8366
  }
7346
8367
  return {
@@ -7351,7 +8372,7 @@ var I18n = class {
7351
8372
  };
7352
8373
 
7353
8374
  // src/auth/token-manager.ts
7354
- var LOG_PREFIX14 = "[GuideKit:Auth]";
8375
+ var LOG_PREFIX17 = "[GuideKit:Auth]";
7355
8376
  var REFRESH_THRESHOLD = 0.8;
7356
8377
  var MAX_RETRY_ATTEMPTS = 3;
7357
8378
  var RETRY_BASE_MS = 1e3;
@@ -7630,7 +8651,7 @@ var TokenManager = class {
7630
8651
  }
7631
8652
  log(message) {
7632
8653
  if (this.debug) {
7633
- console.debug(`${LOG_PREFIX14} ${message}`);
8654
+ console.debug(`${LOG_PREFIX17} ${message}`);
7634
8655
  }
7635
8656
  }
7636
8657
  };
@@ -7762,6 +8783,11 @@ var GuideKitCore = class {
7762
8783
  debug: this._debug
7763
8784
  });
7764
8785
  await this.tokenManager.start();
8786
+ if (!this._options.llm) {
8787
+ console.warn(
8788
+ "[GuideKit] tokenEndpoint provided without llm config. The session token handles auth only \u2014 llm: { provider, apiKey } is still required for LLM calls. See: https://guidekit.dev/docs/provider#token-endpoint"
8789
+ );
8790
+ }
7765
8791
  this.resourceManager.register({
7766
8792
  name: "token-manager",
7767
8793
  cleanup: () => this.tokenManager?.destroy()
@@ -7884,21 +8910,50 @@ var GuideKitCore = class {
7884
8910
  }
7885
8911
  });
7886
8912
  this.registerBuiltinTools();
7887
- if (this._options.stt && this._options.tts) {
7888
- const sttConfig = this._options.stt;
7889
- const ttsConfig = this._options.tts;
7890
- if (sttConfig.provider === "deepgram" && ttsConfig.provider === "elevenlabs") {
8913
+ {
8914
+ const sttConfig = this._options.stt ?? { provider: "web-speech" };
8915
+ const ttsConfig = this._options.tts ?? { provider: "web-speech" };
8916
+ let voiceSttConfig;
8917
+ let voiceTtsConfig;
8918
+ if (sttConfig.provider === "deepgram") {
8919
+ voiceSttConfig = {
8920
+ provider: "deepgram",
8921
+ apiKey: sttConfig.apiKey,
8922
+ model: sttConfig.model
8923
+ };
8924
+ } else if (sttConfig.provider === "elevenlabs") {
8925
+ voiceSttConfig = {
8926
+ provider: "elevenlabs",
8927
+ apiKey: sttConfig.apiKey,
8928
+ language: sttConfig.language
8929
+ };
8930
+ } else {
8931
+ voiceSttConfig = {
8932
+ provider: "web-speech",
8933
+ language: sttConfig.language,
8934
+ continuous: sttConfig.continuous,
8935
+ interimResults: sttConfig.interimResults
8936
+ };
8937
+ }
8938
+ if (ttsConfig.provider === "elevenlabs") {
8939
+ voiceTtsConfig = {
8940
+ provider: "elevenlabs",
8941
+ apiKey: ttsConfig.apiKey,
8942
+ voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
8943
+ };
8944
+ } else {
8945
+ voiceTtsConfig = {
8946
+ provider: "web-speech",
8947
+ voice: ttsConfig.voice,
8948
+ rate: ttsConfig.rate,
8949
+ pitch: ttsConfig.pitch,
8950
+ language: ttsConfig.language
8951
+ };
8952
+ }
8953
+ try {
7891
8954
  this.voicePipeline = new VoicePipeline({
7892
- sttConfig: {
7893
- provider: "deepgram",
7894
- apiKey: sttConfig.apiKey,
7895
- model: "model" in sttConfig ? sttConfig.model : void 0
7896
- },
7897
- ttsConfig: {
7898
- provider: "elevenlabs",
7899
- apiKey: ttsConfig.apiKey,
7900
- voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
7901
- },
8955
+ sttConfig: voiceSttConfig,
8956
+ ttsConfig: voiceTtsConfig,
7902
8957
  debug: this._debug
7903
8958
  });
7904
8959
  this.voicePipeline.onStateChange((state, previous) => {
@@ -7931,6 +8986,11 @@ var GuideKitCore = class {
7931
8986
  name: "voice-pipeline",
7932
8987
  cleanup: () => this.voicePipeline?.destroy()
7933
8988
  });
8989
+ } catch (_err) {
8990
+ this.voicePipeline = null;
8991
+ if (this._debug) {
8992
+ console.debug("[GuideKit:Core] Voice pipeline unavailable in this environment");
8993
+ }
7934
8994
  }
7935
8995
  }
7936
8996
  const session = this.contextManager.restoreSession();
@@ -8055,7 +9115,7 @@ var GuideKitCore = class {
8055
9115
  return responseText;
8056
9116
  } catch (error) {
8057
9117
  const err = error instanceof GuideKitError ? error : new GuideKitError({
8058
- code: "UNKNOWN",
9118
+ code: ErrorCodes.UNKNOWN,
8059
9119
  message: error instanceof Error ? error.message : "Unknown error",
8060
9120
  recoverable: false,
8061
9121
  suggestion: "Check the console for details."
@@ -8311,172 +9371,11 @@ var GuideKitCore = class {
8311
9371
  };
8312
9372
  }
8313
9373
  /**
8314
- * Register all built-in tool handlers with the ToolExecutor.
8315
- * Called once during init() after VisualGuidance and all subsystems are ready.
9374
+ * Unified built-in tool specifications single source of truth for both
9375
+ * tool definitions (sent to LLM) and handler registration.
8316
9376
  */
8317
- registerBuiltinTools() {
8318
- if (!this.toolExecutor) return;
8319
- this.toolExecutor.registerTool({
8320
- name: "highlight",
8321
- execute: async (args) => {
8322
- const sectionId = args.sectionId;
8323
- const selector = args.selector;
8324
- const tooltip = args.tooltip;
8325
- const position = args.position;
8326
- const result = this.highlight({ sectionId, selector, tooltip, position });
8327
- return { success: result };
8328
- }
8329
- });
8330
- this.toolExecutor.registerTool({
8331
- name: "dismissHighlight",
8332
- execute: async () => {
8333
- this.dismissHighlight();
8334
- return { success: true };
8335
- }
8336
- });
8337
- this.toolExecutor.registerTool({
8338
- name: "scrollToSection",
8339
- execute: async (args) => {
8340
- const sectionId = args.sectionId;
8341
- const offset = args.offset;
8342
- this.scrollToSection(sectionId, offset);
8343
- return { success: true };
8344
- }
8345
- });
8346
- this.toolExecutor.registerTool({
8347
- name: "navigate",
8348
- execute: async (args) => {
8349
- const href = args.href;
8350
- const result = await this.navigate(href);
8351
- return { success: result, navigatedTo: result ? href : null };
8352
- }
8353
- });
8354
- this.toolExecutor.registerTool({
8355
- name: "startTour",
8356
- execute: async (args) => {
8357
- const sectionIds = args.sectionIds;
8358
- const mode = args.mode ?? "manual";
8359
- this.startTour(sectionIds, mode);
8360
- return { success: true, steps: sectionIds.length };
8361
- }
8362
- });
8363
- this.toolExecutor.registerTool({
8364
- name: "readPageContent",
8365
- execute: async (args) => {
8366
- const sectionId = args.sectionId;
8367
- const query = args.query;
8368
- const model = this._currentPageModel;
8369
- if (!model) return { error: "No page model available" };
8370
- if (sectionId) {
8371
- const section = model.sections.find((s) => s.id === sectionId);
8372
- if (section) {
8373
- const contentMapResult = await this.contextManager.getContent(sectionId);
8374
- return {
8375
- sectionId: section.id,
8376
- label: section.label,
8377
- summary: section.summary,
8378
- contentMap: contentMapResult
8379
- };
8380
- }
8381
- return { error: `Section "${sectionId}" not found` };
8382
- }
8383
- if (query) {
8384
- const queryLower = query.toLowerCase();
8385
- const matches = model.sections.filter(
8386
- (s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
8387
- );
8388
- return {
8389
- query,
8390
- results: matches.slice(0, 5).map((s) => ({
8391
- sectionId: s.id,
8392
- label: s.label,
8393
- snippet: s.summary?.slice(0, 200)
8394
- }))
8395
- };
8396
- }
8397
- return { error: "Provide either sectionId or query" };
8398
- }
8399
- });
8400
- this.toolExecutor.registerTool({
8401
- name: "getVisibleSections",
8402
- execute: async () => {
8403
- const model = this._currentPageModel;
8404
- if (!model) return { sections: [] };
8405
- return {
8406
- sections: model.sections.slice(0, 10).map((s) => ({
8407
- id: s.id,
8408
- label: s.label,
8409
- selector: s.selector,
8410
- score: s.score
8411
- }))
8412
- };
8413
- }
8414
- });
8415
- this.toolExecutor.registerTool({
8416
- name: "clickElement",
8417
- execute: async (args) => {
8418
- if (typeof document === "undefined") return { success: false, error: "Not in browser" };
8419
- const selector = args.selector;
8420
- const el = document.querySelector(selector);
8421
- if (!el) return { success: false, error: `Element not found: ${selector}` };
8422
- if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
8423
- const clickableRules = this._options.options?.clickableSelectors;
8424
- const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
8425
- try {
8426
- return el.matches(pattern);
8427
- } catch {
8428
- return selector === pattern;
8429
- }
8430
- }) ?? false;
8431
- if (!isInDevAllowList) {
8432
- const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
8433
- try {
8434
- return el.matches(pattern);
8435
- } catch {
8436
- return false;
8437
- }
8438
- });
8439
- if (defaultDenied) {
8440
- return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
8441
- }
8442
- }
8443
- if (clickableRules?.deny?.length) {
8444
- const denied = clickableRules.deny.some((pattern) => {
8445
- try {
8446
- return el.matches(pattern);
8447
- } catch {
8448
- return selector === pattern;
8449
- }
8450
- });
8451
- if (denied) {
8452
- return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
8453
- }
8454
- }
8455
- if (clickableRules?.allow?.length && !isInDevAllowList) {
8456
- return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
8457
- }
8458
- el.click();
8459
- return { success: true };
8460
- }
8461
- });
8462
- this.toolExecutor.registerTool({
8463
- name: "executeCustomAction",
8464
- execute: async (args) => {
8465
- const actionId = args.actionId;
8466
- const params = args.params ?? {};
8467
- const action = this.customActions.get(actionId);
8468
- if (!action) return { error: `Unknown action: ${actionId}` };
8469
- try {
8470
- const result = await action.handler(params);
8471
- return { success: true, result };
8472
- } catch (err) {
8473
- return { success: false, error: err instanceof Error ? err.message : String(err) };
8474
- }
8475
- }
8476
- });
8477
- }
8478
- getToolDefinitions() {
8479
- const builtinTools = [
9377
+ getBuiltinToolSpecs() {
9378
+ return [
8480
9379
  {
8481
9380
  name: "highlight",
8482
9381
  description: "Spotlight an element on the page to draw the user's attention. Use sectionId to highlight a page section, or selector for a specific CSS selector. Optionally add a tooltip with explanation text.",
@@ -8486,13 +9385,27 @@ var GuideKitCore = class {
8486
9385
  tooltip: { type: "string", description: "Text to show in tooltip" },
8487
9386
  position: { type: "string", enum: ["top", "bottom", "left", "right", "auto"], description: "Tooltip position" }
8488
9387
  },
8489
- schemaVersion: 1
9388
+ required: [],
9389
+ schemaVersion: 1,
9390
+ execute: async (args) => {
9391
+ const sectionId = args.sectionId;
9392
+ const selector = args.selector;
9393
+ const tooltip = args.tooltip;
9394
+ const position = args.position;
9395
+ const result = this.highlight({ sectionId, selector, tooltip, position });
9396
+ return { success: result };
9397
+ }
8490
9398
  },
8491
9399
  {
8492
9400
  name: "dismissHighlight",
8493
9401
  description: "Remove the current spotlight overlay.",
8494
9402
  parameters: {},
8495
- schemaVersion: 1
9403
+ required: [],
9404
+ schemaVersion: 1,
9405
+ execute: async () => {
9406
+ this.dismissHighlight();
9407
+ return { success: true };
9408
+ }
8496
9409
  },
8497
9410
  {
8498
9411
  name: "scrollToSection",
@@ -8501,7 +9414,14 @@ var GuideKitCore = class {
8501
9414
  sectionId: { type: "string", description: "ID of the section to scroll to" },
8502
9415
  offset: { type: "number", description: "Pixel offset for sticky headers" }
8503
9416
  },
8504
- schemaVersion: 1
9417
+ required: ["sectionId"],
9418
+ schemaVersion: 1,
9419
+ execute: async (args) => {
9420
+ const sectionId = args.sectionId;
9421
+ const offset = args.offset;
9422
+ this.scrollToSection(sectionId, offset);
9423
+ return { success: true };
9424
+ }
8505
9425
  },
8506
9426
  {
8507
9427
  name: "navigate",
@@ -8509,7 +9429,13 @@ var GuideKitCore = class {
8509
9429
  parameters: {
8510
9430
  href: { type: "string", description: "URL or path to navigate to (same-origin only)" }
8511
9431
  },
8512
- schemaVersion: 1
9432
+ required: ["href"],
9433
+ schemaVersion: 1,
9434
+ execute: async (args) => {
9435
+ const href = args.href;
9436
+ const result = await this.navigate(href);
9437
+ return { success: result, navigatedTo: result ? href : null };
9438
+ }
8513
9439
  },
8514
9440
  {
8515
9441
  name: "startTour",
@@ -8518,7 +9444,14 @@ var GuideKitCore = class {
8518
9444
  sectionIds: { type: "array", items: { type: "string" }, description: "Section IDs in tour order" },
8519
9445
  mode: { type: "string", enum: ["auto", "manual"], description: "auto advances automatically; manual waits for user" }
8520
9446
  },
8521
- schemaVersion: 1
9447
+ required: ["sectionIds"],
9448
+ schemaVersion: 1,
9449
+ execute: async (args) => {
9450
+ const sectionIds = args.sectionIds;
9451
+ const mode = args.mode ?? "manual";
9452
+ this.startTour(sectionIds, mode);
9453
+ return { success: true, steps: sectionIds.length };
9454
+ }
8522
9455
  },
8523
9456
  {
8524
9457
  name: "readPageContent",
@@ -8527,13 +9460,61 @@ var GuideKitCore = class {
8527
9460
  sectionId: { type: "string", description: "Section ID to read" },
8528
9461
  query: { type: "string", description: "Keyword to search for across sections" }
8529
9462
  },
8530
- schemaVersion: 1
9463
+ required: [],
9464
+ schemaVersion: 1,
9465
+ execute: async (args) => {
9466
+ const sectionId = args.sectionId;
9467
+ const query = args.query;
9468
+ const model = this._currentPageModel;
9469
+ if (!model) return { error: "No page model available" };
9470
+ if (sectionId) {
9471
+ const section = model.sections.find((s) => s.id === sectionId);
9472
+ if (section) {
9473
+ const contentMapResult = await this.contextManager.getContent(sectionId);
9474
+ return {
9475
+ sectionId: section.id,
9476
+ label: section.label,
9477
+ summary: section.summary,
9478
+ contentMap: contentMapResult
9479
+ };
9480
+ }
9481
+ return { error: `Section "${sectionId}" not found` };
9482
+ }
9483
+ if (query) {
9484
+ const queryLower = query.toLowerCase();
9485
+ const matches = model.sections.filter(
9486
+ (s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
9487
+ );
9488
+ return {
9489
+ query,
9490
+ results: matches.slice(0, 5).map((s) => ({
9491
+ sectionId: s.id,
9492
+ label: s.label,
9493
+ snippet: s.summary?.slice(0, 200)
9494
+ }))
9495
+ };
9496
+ }
9497
+ return { error: "Provide either sectionId or query" };
9498
+ }
8531
9499
  },
8532
9500
  {
8533
9501
  name: "getVisibleSections",
8534
9502
  description: "Get the list of sections currently visible in the user viewport.",
8535
9503
  parameters: {},
8536
- schemaVersion: 1
9504
+ required: [],
9505
+ schemaVersion: 1,
9506
+ execute: async () => {
9507
+ const model = this._currentPageModel;
9508
+ if (!model) return { sections: [] };
9509
+ return {
9510
+ sections: model.sections.slice(0, 10).map((s) => ({
9511
+ id: s.id,
9512
+ label: s.label,
9513
+ selector: s.selector,
9514
+ score: s.score
9515
+ }))
9516
+ };
9517
+ }
8537
9518
  },
8538
9519
  {
8539
9520
  name: "clickElement",
@@ -8541,7 +9522,52 @@ var GuideKitCore = class {
8541
9522
  parameters: {
8542
9523
  selector: { type: "string", description: "CSS selector of the element to click" }
8543
9524
  },
8544
- schemaVersion: 1
9525
+ required: ["selector"],
9526
+ schemaVersion: 1,
9527
+ execute: async (args) => {
9528
+ if (typeof document === "undefined") return { success: false, error: "Not in browser" };
9529
+ const selector = args.selector;
9530
+ const el = document.querySelector(selector);
9531
+ if (!el) return { success: false, error: `Element not found: ${selector}` };
9532
+ if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
9533
+ const clickableRules = this._options.options?.clickableSelectors;
9534
+ const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
9535
+ try {
9536
+ return el.matches(pattern);
9537
+ } catch {
9538
+ return selector === pattern;
9539
+ }
9540
+ }) ?? false;
9541
+ if (!isInDevAllowList) {
9542
+ const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
9543
+ try {
9544
+ return el.matches(pattern);
9545
+ } catch {
9546
+ return false;
9547
+ }
9548
+ });
9549
+ if (defaultDenied) {
9550
+ return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
9551
+ }
9552
+ }
9553
+ if (clickableRules?.deny?.length) {
9554
+ const denied = clickableRules.deny.some((pattern) => {
9555
+ try {
9556
+ return el.matches(pattern);
9557
+ } catch {
9558
+ return selector === pattern;
9559
+ }
9560
+ });
9561
+ if (denied) {
9562
+ return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
9563
+ }
9564
+ }
9565
+ if (clickableRules?.allow?.length && !isInDevAllowList) {
9566
+ return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
9567
+ }
9568
+ el.click();
9569
+ return { success: true };
9570
+ }
8545
9571
  },
8546
9572
  {
8547
9573
  name: "executeCustomAction",
@@ -8550,9 +9576,37 @@ var GuideKitCore = class {
8550
9576
  actionId: { type: "string", description: "ID of the custom action" },
8551
9577
  params: { type: "object", description: "Parameters for the action" }
8552
9578
  },
8553
- schemaVersion: 1
9579
+ required: ["actionId"],
9580
+ schemaVersion: 1,
9581
+ execute: async (args) => {
9582
+ const actionId = args.actionId;
9583
+ const params = args.params ?? {};
9584
+ const action = this.customActions.get(actionId);
9585
+ if (!action) return { error: `Unknown action: ${actionId}` };
9586
+ try {
9587
+ const result = await action.handler(params);
9588
+ return { success: true, result };
9589
+ } catch (err) {
9590
+ return { success: false, error: err instanceof Error ? err.message : String(err) };
9591
+ }
9592
+ }
8554
9593
  }
8555
9594
  ];
9595
+ }
9596
+ /**
9597
+ * Register all built-in tool handlers with the ToolExecutor.
9598
+ * Called once during init() after VisualGuidance and all subsystems are ready.
9599
+ */
9600
+ registerBuiltinTools() {
9601
+ if (!this.toolExecutor) return;
9602
+ for (const spec of this.getBuiltinToolSpecs()) {
9603
+ this.toolExecutor.registerTool({ name: spec.name, execute: spec.execute });
9604
+ }
9605
+ }
9606
+ getToolDefinitions() {
9607
+ const builtinTools = this.getBuiltinToolSpecs().map(
9608
+ ({ execute: _execute, ...def }) => def
9609
+ );
8556
9610
  for (const [actionId, action] of this.customActions) {
8557
9611
  builtinTools.push({
8558
9612
  name: `action_${actionId}`,
@@ -8595,6 +9649,9 @@ exports.TimeoutError = TimeoutError;
8595
9649
  exports.TokenManager = TokenManager;
8596
9650
  exports.ToolExecutor = ToolExecutor;
8597
9651
  exports.VisualGuidance = VisualGuidance;
9652
+ exports.VoicePipeline = VoicePipeline;
9653
+ exports.WebSpeechSTT = WebSpeechSTT;
9654
+ exports.WebSpeechTTS = WebSpeechTTS;
8598
9655
  exports.createEventBus = createEventBus;
8599
9656
  exports.isGuideKitError = isGuideKitError;
8600
9657
  //# sourceMappingURL=index.cjs.map