@guidekit/core 0.1.0-beta.1 → 0.1.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1091,8 +1091,8 @@ var DOMScanner = class {
1091
1091
  if (el.closest("[data-guidekit-ignore]")) return;
1092
1092
  const style = window.getComputedStyle(el);
1093
1093
  const position = style.position;
1094
- const zIndex = parseInt(style.zIndex, 10);
1095
- if ((position === "fixed" || position === "absolute") && !isNaN(zIndex) && zIndex >= 1e3) {
1094
+ const zIndex = parseInt(style.zIndex, 10) || 0;
1095
+ if ((position === "fixed" || position === "absolute") && !Number.isNaN(zIndex) && zIndex >= 1e3) {
1096
1096
  const visible = isElementVisible(el);
1097
1097
  if (!visible) return;
1098
1098
  const overlayType = this.classifyOverlay(el, style);
@@ -1120,10 +1120,10 @@ var DOMScanner = class {
1120
1120
  return "dropdown";
1121
1121
  const width = parseFloat(style.width);
1122
1122
  const height = parseFloat(style.height);
1123
- if (typeof window !== "undefined" && !isNaN(width) && !isNaN(height) && width > window.innerWidth * 0.5 && height > window.innerHeight * 0.5) {
1123
+ if (typeof window !== "undefined" && !Number.isNaN(width) && !Number.isNaN(height) && width > window.innerWidth * 0.5 && height > window.innerHeight * 0.5) {
1124
1124
  return "modal";
1125
1125
  }
1126
- if (!isNaN(width) && width < 400) return "popover";
1126
+ if (!Number.isNaN(width) && width < 400) return "popover";
1127
1127
  return null;
1128
1128
  }
1129
1129
  // -------------------------------------------------------------------------
@@ -1748,7 +1748,9 @@ var ErrorCodes = {
1748
1748
  // Content
1749
1749
  CONTENT_FILTER_TRIGGERED: "CONTENT_FILTER_TRIGGERED",
1750
1750
  // Privacy
1751
- PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED"
1751
+ PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED",
1752
+ // General
1753
+ UNKNOWN: "UNKNOWN"
1752
1754
  };
1753
1755
  var GuideKitError = class extends Error {
1754
1756
  code;
@@ -1837,356 +1839,9 @@ function isGuideKitError(error) {
1837
1839
  return error instanceof GuideKitError;
1838
1840
  }
1839
1841
 
1840
- // src/llm/openai-adapter.ts
1841
- var DEFAULT_OPENAI_MODEL = "gpt-4o";
1842
- var DEFAULT_TIMEOUT_MS = 15e3;
1843
- var OPENAI_CHAT_URL = "https://api.openai.com/v1/chat/completions";
1844
- var OpenAIAdapter = class {
1845
- apiKey;
1846
- model;
1847
- constructor(config) {
1848
- this.apiKey = config.apiKey;
1849
- this.model = config.model ?? DEFAULT_OPENAI_MODEL;
1850
- }
1851
- // -----------------------------------------------------------------------
1852
- // LLMProviderAdapter implementation
1853
- // -----------------------------------------------------------------------
1854
- /**
1855
- * Convert GuideKit tool definitions into OpenAI's `tools` format.
1856
- * Each tool is wrapped as `{ type: 'function', function: { name, description, parameters } }`.
1857
- */
1858
- formatTools(tools) {
1859
- if (tools.length === 0) return void 0;
1860
- return tools.map((tool) => ({
1861
- type: "function",
1862
- function: {
1863
- name: tool.name,
1864
- description: tool.description,
1865
- parameters: tool.parameters
1866
- }
1867
- }));
1868
- }
1869
- /**
1870
- * Convert an array of `ConversationTurn` objects into OpenAI's messages
1871
- * format with `role: 'user' | 'assistant'`.
1872
- */
1873
- formatConversation(history) {
1874
- return history.map((turn) => ({
1875
- role: turn.role,
1876
- content: turn.content
1877
- }));
1878
- }
1879
- /**
1880
- * Parse an OpenAI SSE streaming response into an async iterable of
1881
- * `TextChunk` and `ToolCall` objects.
1882
- *
1883
- * The OpenAI streaming endpoint sends each chunk as a JSON object
1884
- * prefixed by `data: `. The final line is `data: [DONE]`.
1885
- * Text content arrives in `choices[0].delta.content` and tool calls
1886
- * arrive in `choices[0].delta.tool_calls`.
1887
- */
1888
- async *parseResponse(stream) {
1889
- const reader = stream.getReader();
1890
- const decoder = new TextDecoder();
1891
- let buffer = "";
1892
- const pendingToolCalls = /* @__PURE__ */ new Map();
1893
- try {
1894
- while (true) {
1895
- const { done, value } = await reader.read();
1896
- if (done) break;
1897
- buffer += decoder.decode(value, { stream: true });
1898
- const lines = buffer.split("\n");
1899
- buffer = lines.pop() ?? "";
1900
- for (const line of lines) {
1901
- const trimmed = line.trim();
1902
- if (!trimmed.startsWith("data:")) continue;
1903
- const jsonStr = trimmed.slice(5).trim();
1904
- if (jsonStr === "" || jsonStr === "[DONE]") {
1905
- if (jsonStr === "[DONE]") {
1906
- yield* this.flushPendingToolCalls(pendingToolCalls);
1907
- yield { text: "", done: true };
1908
- }
1909
- continue;
1910
- }
1911
- let parsed;
1912
- try {
1913
- parsed = JSON.parse(jsonStr);
1914
- } catch {
1915
- continue;
1916
- }
1917
- yield* this.extractChunks(parsed, pendingToolCalls);
1918
- }
1919
- }
1920
- if (buffer.trim().startsWith("data:")) {
1921
- const jsonStr = buffer.trim().slice(5).trim();
1922
- if (jsonStr === "[DONE]") {
1923
- yield* this.flushPendingToolCalls(pendingToolCalls);
1924
- yield { text: "", done: true };
1925
- } else if (jsonStr !== "") {
1926
- try {
1927
- const parsed = JSON.parse(jsonStr);
1928
- yield* this.extractChunks(parsed, pendingToolCalls);
1929
- } catch {
1930
- }
1931
- }
1932
- }
1933
- yield* this.flushPendingToolCalls(pendingToolCalls);
1934
- } finally {
1935
- reader.releaseLock();
1936
- }
1937
- }
1938
- /**
1939
- * Format a tool result so it can be sent back to OpenAI as a
1940
- * `tool` role message with the `tool_call_id`.
1941
- */
1942
- formatToolResult(callId, result) {
1943
- return {
1944
- role: "tool",
1945
- tool_call_id: callId,
1946
- content: typeof result === "string" ? result : JSON.stringify(result)
1947
- };
1948
- }
1949
- // -----------------------------------------------------------------------
1950
- // Streaming request
1951
- // -----------------------------------------------------------------------
1952
- /**
1953
- * Build and execute a streaming request to the OpenAI Chat Completions API.
1954
- * Returns the raw `ReadableStream` for the response body together with
1955
- * the raw Response object.
1956
- */
1957
- async streamRequest(params) {
1958
- const messages = [
1959
- { role: "system", content: params.systemPrompt },
1960
- ...params.contents
1961
- ];
1962
- const body = {
1963
- model: this.model,
1964
- messages,
1965
- stream: true,
1966
- temperature: 0.7,
1967
- top_p: 0.95
1968
- };
1969
- if (params.tools) {
1970
- body.tools = params.tools;
1971
- }
1972
- const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS;
1973
- const controller = new AbortController();
1974
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
1975
- if (params.signal) {
1976
- params.signal.addEventListener(
1977
- "abort",
1978
- () => controller.abort(params.signal.reason),
1979
- { once: true }
1980
- );
1981
- }
1982
- let response;
1983
- try {
1984
- response = await fetch(OPENAI_CHAT_URL, {
1985
- method: "POST",
1986
- headers: {
1987
- "Content-Type": "application/json",
1988
- Authorization: `Bearer ${this.apiKey}`
1989
- },
1990
- body: JSON.stringify(body),
1991
- signal: controller.signal
1992
- });
1993
- } catch (error) {
1994
- clearTimeout(timeoutId);
1995
- if (error instanceof DOMException && error.name === "AbortError") {
1996
- if (params.signal?.aborted) {
1997
- throw error;
1998
- }
1999
- throw new TimeoutError({
2000
- code: ErrorCodes.TIMEOUT_LLM_RESPONSE,
2001
- message: `OpenAI request timed out after ${timeoutMs}ms`,
2002
- provider: "openai",
2003
- recoverable: true,
2004
- suggestion: "Try again or increase the timeout.",
2005
- operationName: "openai.chatCompletions",
2006
- timeoutMs
2007
- });
2008
- }
2009
- throw new NetworkError({
2010
- code: ErrorCodes.NETWORK_CONNECTION_LOST,
2011
- message: `Failed to connect to OpenAI API: ${error.message}`,
2012
- provider: "openai",
2013
- suggestion: "Check your network connection and try again.",
2014
- cause: error instanceof Error ? error : void 0
2015
- });
2016
- }
2017
- clearTimeout(timeoutId);
2018
- if (!response.ok) {
2019
- await this.handleHttpError(response);
2020
- }
2021
- if (!response.body) {
2022
- throw new NetworkError({
2023
- code: ErrorCodes.NETWORK_CONNECTION_LOST,
2024
- message: "OpenAI response body is null -- streaming unavailable.",
2025
- provider: "openai",
2026
- suggestion: "Retry the request."
2027
- });
2028
- }
2029
- return { stream: response.body, response };
2030
- }
2031
- // -----------------------------------------------------------------------
2032
- // Internal helpers
2033
- // -----------------------------------------------------------------------
2034
- /**
2035
- * Extract `TextChunk` and accumulate `ToolCall` data from a single parsed
2036
- * OpenAI SSE JSON object.
2037
- *
2038
- * OpenAI tool calls arrive incrementally: the first chunk for a tool call
2039
- * carries the `id` and `function.name`, while subsequent chunks append to
2040
- * `function.arguments`. We accumulate these in `pendingToolCalls` and only
2041
- * yield complete `ToolCall` objects when the finish_reason is 'tool_calls'
2042
- * or when flushed.
2043
- */
2044
- *extractChunks(parsed, pendingToolCalls) {
2045
- const choices = parsed.choices;
2046
- if (!choices || choices.length === 0) return;
2047
- for (const choice of choices) {
2048
- const delta = choice.delta;
2049
- const finishReason = choice.finish_reason;
2050
- if (delta) {
2051
- if (typeof delta.content === "string" && delta.content !== "") {
2052
- yield {
2053
- text: delta.content,
2054
- done: false
2055
- };
2056
- }
2057
- const toolCallDeltas = delta.tool_calls;
2058
- if (toolCallDeltas) {
2059
- for (const tc of toolCallDeltas) {
2060
- const existing = pendingToolCalls.get(tc.index);
2061
- if (existing) {
2062
- if (tc.function?.arguments) {
2063
- existing.argumentsJson += tc.function.arguments;
2064
- }
2065
- } else {
2066
- pendingToolCalls.set(tc.index, {
2067
- id: tc.id ?? "",
2068
- name: tc.function?.name ?? "",
2069
- argumentsJson: tc.function?.arguments ?? ""
2070
- });
2071
- }
2072
- }
2073
- }
2074
- }
2075
- if (finishReason === "tool_calls") {
2076
- yield* this.flushPendingToolCalls(pendingToolCalls);
2077
- }
2078
- if (finishReason === "stop") {
2079
- yield { text: "", done: true };
2080
- }
2081
- }
2082
- }
2083
- /**
2084
- * Flush all accumulated pending tool calls as complete `ToolCall` objects.
2085
- */
2086
- *flushPendingToolCalls(pendingToolCalls) {
2087
- const sorted = [...pendingToolCalls.entries()].sort(
2088
- ([a], [b]) => a - b
2089
- );
2090
- for (const [, tc] of sorted) {
2091
- let args = {};
2092
- try {
2093
- args = JSON.parse(tc.argumentsJson);
2094
- } catch {
2095
- }
2096
- yield {
2097
- id: tc.id,
2098
- name: tc.name,
2099
- arguments: args
2100
- };
2101
- }
2102
- pendingToolCalls.clear();
2103
- }
2104
- /**
2105
- * Extract token usage from a parsed OpenAI response chunk.
2106
- * Usage data typically appears in the final chunk when `stream_options`
2107
- * includes `include_usage`, or in the non-streaming response.
2108
- * Returns `null` if no usage data is present.
2109
- */
2110
- extractUsage(parsed) {
2111
- const usage = parsed.usage;
2112
- if (!usage) return null;
2113
- return {
2114
- prompt: usage.prompt_tokens ?? 0,
2115
- completion: usage.completion_tokens ?? 0,
2116
- total: usage.total_tokens ?? 0
2117
- };
2118
- }
2119
- /**
2120
- * Check whether a parsed OpenAI chunk indicates the response was
2121
- * blocked by a content filter.
2122
- *
2123
- * OpenAI signals content filtering through:
2124
- * - `choices[].finish_reason === 'content_filter'`
2125
- * - `choices[].content_filter_results` with `filtered: true`
2126
- */
2127
- isContentFiltered(parsed) {
2128
- const choices = parsed.choices;
2129
- if (!choices || choices.length === 0) return false;
2130
- return choices.some((choice) => {
2131
- if (choice.finish_reason === "content_filter") return true;
2132
- const filterResults = choice.content_filter_results;
2133
- if (filterResults) {
2134
- return Object.values(filterResults).some((r) => r.filtered === true);
2135
- }
2136
- return false;
2137
- });
2138
- }
2139
- /**
2140
- * Translate an HTTP error response from OpenAI into the appropriate
2141
- * GuideKit error class.
2142
- */
2143
- async handleHttpError(response) {
2144
- let errorBody = "";
2145
- try {
2146
- errorBody = await response.text();
2147
- } catch {
2148
- }
2149
- const status = response.status;
2150
- if (status === 401 || status === 403) {
2151
- throw new AuthenticationError({
2152
- code: ErrorCodes.AUTH_INVALID_KEY,
2153
- message: `OpenAI API authentication failed (${status}): ${errorBody}`,
2154
- provider: "openai",
2155
- suggestion: "Verify your OpenAI API key is correct and has not expired."
2156
- });
2157
- }
2158
- if (status === 429) {
2159
- const retryAfterHeader = response.headers.get("retry-after");
2160
- const retryAfterMs = retryAfterHeader ? parseInt(retryAfterHeader, 10) * 1e3 : 6e4;
2161
- throw new RateLimitError({
2162
- code: ErrorCodes.RATE_LIMIT_PROVIDER,
2163
- message: `OpenAI API rate limit exceeded (429): ${errorBody}`,
2164
- provider: "openai",
2165
- recoverable: true,
2166
- suggestion: `Rate limited by OpenAI. Retry after ${Math.ceil(retryAfterMs / 1e3)}s.`,
2167
- retryAfterMs
2168
- });
2169
- }
2170
- if (status >= 500) {
2171
- throw new NetworkError({
2172
- code: ErrorCodes.NETWORK_CONNECTION_LOST,
2173
- message: `OpenAI API server error (${status}): ${errorBody}`,
2174
- provider: "openai",
2175
- suggestion: "The OpenAI API is experiencing issues. Please try again later."
2176
- });
2177
- }
2178
- throw new NetworkError({
2179
- code: ErrorCodes.NETWORK_CONNECTION_LOST,
2180
- message: `OpenAI API request failed (${status}): ${errorBody}`,
2181
- provider: "openai",
2182
- suggestion: "Check the request parameters and try again."
2183
- });
2184
- }
2185
- };
2186
-
2187
1842
  // src/llm/index.ts
2188
1843
  var DEFAULT_GEMINI_MODEL = "gemini-2.5-flash";
2189
- var DEFAULT_TIMEOUT_MS2 = 15e3;
1844
+ var DEFAULT_TIMEOUT_MS = 15e3;
2190
1845
  var GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models";
2191
1846
  var DEFAULT_SAFETY_SETTINGS = [
2192
1847
  { category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_ONLY_HIGH" },
@@ -2200,10 +1855,20 @@ function emptyUsage() {
2200
1855
  var GeminiAdapter = class {
2201
1856
  apiKey;
2202
1857
  model;
1858
+ /**
1859
+ * Token usage extracted from the most recent `parseResponse` call.
1860
+ * Updated as each SSE chunk is parsed; the final value reflects the
1861
+ * cumulative usage metadata sent by Gemini (typically in the last chunk).
1862
+ */
1863
+ _lastUsage = emptyUsage();
2203
1864
  constructor(config) {
2204
1865
  this.apiKey = config.apiKey;
2205
1866
  this.model = config.model ?? DEFAULT_GEMINI_MODEL;
2206
1867
  }
1868
+ /** Token usage from the most recent parseResponse call. */
1869
+ get lastUsage() {
1870
+ return this._lastUsage;
1871
+ }
2207
1872
  // -----------------------------------------------------------------------
2208
1873
  // LLMProviderAdapter implementation
2209
1874
  // -----------------------------------------------------------------------
@@ -2218,7 +1883,11 @@ var GeminiAdapter = class {
2218
1883
  functionDeclarations: tools.map((tool) => ({
2219
1884
  name: tool.name,
2220
1885
  description: tool.description,
2221
- parameters: tool.parameters
1886
+ parameters: {
1887
+ type: "object",
1888
+ properties: { ...tool.parameters },
1889
+ required: tool.required ?? []
1890
+ }
2222
1891
  }))
2223
1892
  }
2224
1893
  ];
@@ -2240,11 +1909,16 @@ var GeminiAdapter = class {
2240
1909
  * The Gemini `streamGenerateContent?alt=sse` endpoint sends each chunk
2241
1910
  * as a JSON object prefixed by `data: `. We parse line-by-line, extract
2242
1911
  * text parts and function call parts, and yield the appropriate types.
1912
+ *
1913
+ * This method also:
1914
+ * - Detects content filtering and throws `ContentFilterError`.
1915
+ * - Tracks token usage (accessible via `lastUsage` after iteration).
2243
1916
  */
2244
1917
  async *parseResponse(stream) {
2245
1918
  const reader = stream.getReader();
2246
1919
  const decoder = new TextDecoder();
2247
1920
  let buffer = "";
1921
+ this._lastUsage = emptyUsage();
2248
1922
  try {
2249
1923
  while (true) {
2250
1924
  const { done, value } = await reader.read();
@@ -2263,6 +1937,18 @@ var GeminiAdapter = class {
2263
1937
  } catch {
2264
1938
  continue;
2265
1939
  }
1940
+ if (this.isContentFiltered(parsed)) {
1941
+ throw new ContentFilterError({
1942
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
1943
+ message: "Response was blocked by provider content safety filter.",
1944
+ provider: "gemini",
1945
+ suggestion: "Rephrase your question or adjust safety settings."
1946
+ });
1947
+ }
1948
+ const chunkUsage = this.extractUsage(parsed);
1949
+ if (chunkUsage) {
1950
+ this._lastUsage = chunkUsage;
1951
+ }
2266
1952
  yield* this.extractChunks(parsed);
2267
1953
  }
2268
1954
  }
@@ -2271,8 +1957,21 @@ var GeminiAdapter = class {
2271
1957
  if (jsonStr !== "" && jsonStr !== "[DONE]") {
2272
1958
  try {
2273
1959
  const parsed = JSON.parse(jsonStr);
1960
+ if (this.isContentFiltered(parsed)) {
1961
+ throw new ContentFilterError({
1962
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
1963
+ message: "Response was blocked by provider content safety filter.",
1964
+ provider: "gemini",
1965
+ suggestion: "Rephrase your question or adjust safety settings."
1966
+ });
1967
+ }
1968
+ const chunkUsage = this.extractUsage(parsed);
1969
+ if (chunkUsage) {
1970
+ this._lastUsage = chunkUsage;
1971
+ }
2274
1972
  yield* this.extractChunks(parsed);
2275
- } catch {
1973
+ } catch (error) {
1974
+ if (error instanceof ContentFilterError) throw error;
2276
1975
  }
2277
1976
  }
2278
1977
  }
@@ -2303,15 +2002,21 @@ var GeminiAdapter = class {
2303
2002
  /**
2304
2003
  * Build and execute a streaming request to the Gemini API.
2305
2004
  * Returns the raw `ReadableStream` for the response body together with
2306
- * a promise that resolves to token usage extracted from the final chunk.
2005
+ * the raw Response object.
2006
+ *
2007
+ * Note: The Gemini API key is passed as a URL query parameter (`key=`).
2008
+ * This is inherent to the Gemini REST SSE endpoint design; the key is
2009
+ * transmitted over HTTPS so it remains encrypted in transit. (H3)
2307
2010
  */
2308
2011
  async streamRequest(params) {
2012
+ const contentsArray = params.contents;
2013
+ const fullContents = params.userMessage ? [...contentsArray, { role: "user", parts: [{ text: params.userMessage }] }] : contentsArray;
2309
2014
  const url = `${GEMINI_BASE_URL}/${this.model}:streamGenerateContent?alt=sse&key=${this.apiKey}`;
2310
2015
  const body = {
2311
2016
  systemInstruction: {
2312
2017
  parts: [{ text: params.systemPrompt }]
2313
2018
  },
2314
- contents: params.contents,
2019
+ contents: fullContents,
2315
2020
  safetySettings: DEFAULT_SAFETY_SETTINGS,
2316
2021
  generationConfig: {
2317
2022
  temperature: 0.7,
@@ -2322,7 +2027,7 @@ var GeminiAdapter = class {
2322
2027
  if (params.tools) {
2323
2028
  body.tools = params.tools;
2324
2029
  }
2325
- const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS2;
2030
+ const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS;
2326
2031
  const controller = new AbortController();
2327
2032
  const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
2328
2033
  if (params.signal) {
@@ -2379,7 +2084,7 @@ var GeminiAdapter = class {
2379
2084
  return { stream: response.body, response };
2380
2085
  }
2381
2086
  // -----------------------------------------------------------------------
2382
- // Internal helpers
2087
+ // Public helpers (LLMProviderAdapter interface)
2383
2088
  // -----------------------------------------------------------------------
2384
2089
  /**
2385
2090
  * Extract `TextChunk` and `ToolCall` items from a single parsed Gemini
@@ -2552,7 +2257,8 @@ var LLMOrchestrator = class {
2552
2257
  updateConfig(config) {
2553
2258
  this._config = config;
2554
2259
  this._adapter = this.createAdapter(config);
2555
- this.log(`Config updated: provider=${config.provider}`);
2260
+ const label = "provider" in config ? config.provider : "custom adapter";
2261
+ this.log(`Config updated: ${label}`);
2556
2262
  }
2557
2263
  /** Get the current provider adapter. */
2558
2264
  get adapter() {
@@ -2563,139 +2269,42 @@ var LLMOrchestrator = class {
2563
2269
  // -----------------------------------------------------------------------
2564
2270
  /**
2565
2271
  * Execute a streaming LLM request and collect the results.
2272
+ *
2273
+ * This method is fully adapter-agnostic: it delegates streaming,
2274
+ * response parsing, content-filter detection, and usage extraction
2275
+ * entirely to the active `LLMProviderAdapter`. No provider-specific
2276
+ * SSE parsing lives in the orchestrator.
2566
2277
  */
2567
2278
  async executeStream(params, _isRetry) {
2568
- const geminiAdapter = this._adapter;
2569
- const historyContents = geminiAdapter.formatConversation(params.history);
2570
- const contents = [
2571
- ...historyContents,
2572
- { role: "user", parts: [{ text: params.userMessage }] }
2573
- ];
2574
- const tools = params.tools && params.tools.length > 0 ? geminiAdapter.formatTools(params.tools) : void 0;
2575
- const { stream } = await geminiAdapter.streamRequest({
2279
+ const adapter = this._adapter;
2280
+ const historyContents = adapter.formatConversation(params.history);
2281
+ const tools = params.tools && params.tools.length > 0 ? adapter.formatTools(params.tools) : void 0;
2282
+ const { stream } = await adapter.streamRequest({
2576
2283
  systemPrompt: params.systemPrompt,
2577
- contents,
2284
+ contents: historyContents,
2285
+ userMessage: params.userMessage,
2578
2286
  tools,
2579
2287
  signal: params.signal
2580
2288
  });
2581
2289
  let fullText = "";
2582
2290
  const toolCalls = [];
2291
+ for await (const item of adapter.parseResponse(stream)) {
2292
+ if ("name" in item && "arguments" in item) {
2293
+ const toolCall = item;
2294
+ toolCalls.push(toolCall);
2295
+ this.callbacks.onToolCall?.(toolCall);
2296
+ } else {
2297
+ const chunk = item;
2298
+ if (chunk.text) {
2299
+ fullText += chunk.text;
2300
+ }
2301
+ this.callbacks.onChunk?.(chunk);
2302
+ }
2303
+ }
2304
+ this.callbacks.onChunk?.({ text: "", done: true });
2583
2305
  let usage = emptyUsage();
2584
- let wasContentFiltered = false;
2585
- const reader = stream.getReader();
2586
- const decoder = new TextDecoder();
2587
- let buffer = "";
2588
- try {
2589
- while (true) {
2590
- const { done, value } = await reader.read();
2591
- if (done) break;
2592
- buffer += decoder.decode(value, { stream: true });
2593
- const lines = buffer.split("\n");
2594
- buffer = lines.pop() ?? "";
2595
- for (const line of lines) {
2596
- const trimmed = line.trim();
2597
- if (!trimmed.startsWith("data:")) continue;
2598
- const jsonStr = trimmed.slice(5).trim();
2599
- if (jsonStr === "" || jsonStr === "[DONE]") continue;
2600
- let parsed;
2601
- try {
2602
- parsed = JSON.parse(jsonStr);
2603
- } catch {
2604
- continue;
2605
- }
2606
- if (geminiAdapter.isContentFiltered(parsed)) {
2607
- wasContentFiltered = true;
2608
- break;
2609
- }
2610
- const chunkUsage = geminiAdapter.extractUsage(parsed);
2611
- if (chunkUsage) {
2612
- usage = chunkUsage;
2613
- }
2614
- const candidates = parsed.candidates;
2615
- if (!candidates || candidates.length === 0) continue;
2616
- for (const candidate of candidates) {
2617
- const content = candidate.content;
2618
- if (!content?.parts) continue;
2619
- const finishReason = candidate.finishReason;
2620
- const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
2621
- for (const part of content.parts) {
2622
- if (typeof part.text === "string") {
2623
- fullText += part.text;
2624
- const chunk = { text: part.text, done: isDone };
2625
- this.callbacks.onChunk?.(chunk);
2626
- }
2627
- if (part.functionCall) {
2628
- const fc = part.functionCall;
2629
- const toolCall = {
2630
- id: fc.name,
2631
- name: fc.name,
2632
- arguments: fc.args ?? {}
2633
- };
2634
- toolCalls.push(toolCall);
2635
- this.callbacks.onToolCall?.(toolCall);
2636
- }
2637
- }
2638
- }
2639
- }
2640
- if (wasContentFiltered) break;
2641
- }
2642
- if (!wasContentFiltered && buffer.trim().startsWith("data:")) {
2643
- const jsonStr = buffer.trim().slice(5).trim();
2644
- if (jsonStr !== "" && jsonStr !== "[DONE]") {
2645
- try {
2646
- const parsed = JSON.parse(jsonStr);
2647
- if (geminiAdapter.isContentFiltered(parsed)) {
2648
- wasContentFiltered = true;
2649
- } else {
2650
- const chunkUsage = geminiAdapter.extractUsage(parsed);
2651
- if (chunkUsage) usage = chunkUsage;
2652
- const candidates = parsed.candidates;
2653
- if (candidates) {
2654
- for (const candidate of candidates) {
2655
- const content = candidate.content;
2656
- if (!content?.parts) continue;
2657
- const finishReason = candidate.finishReason;
2658
- const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
2659
- for (const part of content.parts) {
2660
- if (typeof part.text === "string") {
2661
- fullText += part.text;
2662
- const chunk = {
2663
- text: part.text,
2664
- done: isDone
2665
- };
2666
- this.callbacks.onChunk?.(chunk);
2667
- }
2668
- if (part.functionCall) {
2669
- const fc = part.functionCall;
2670
- const toolCall = {
2671
- id: fc.name,
2672
- name: fc.name,
2673
- arguments: fc.args ?? {}
2674
- };
2675
- toolCalls.push(toolCall);
2676
- this.callbacks.onToolCall?.(toolCall);
2677
- }
2678
- }
2679
- }
2680
- }
2681
- }
2682
- } catch {
2683
- }
2684
- }
2685
- }
2686
- } finally {
2687
- reader.releaseLock();
2688
- }
2689
- if (wasContentFiltered) {
2690
- throw new ContentFilterError({
2691
- code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
2692
- message: "Response was blocked by Gemini content safety filter.",
2693
- provider: "gemini",
2694
- suggestion: "Rephrase your question or adjust safety settings."
2695
- });
2696
- }
2697
- if (fullText.length > 0) {
2698
- this.callbacks.onChunk?.({ text: "", done: true });
2306
+ if ("lastUsage" in adapter) {
2307
+ usage = adapter.lastUsage;
2699
2308
  }
2700
2309
  if (usage.total > 0) {
2701
2310
  this.callbacks.onTokenUsage?.(usage);
@@ -2707,25 +2316,30 @@ var LLMOrchestrator = class {
2707
2316
  }
2708
2317
  /**
2709
2318
  * Create the appropriate adapter for the given config.
2710
- * Currently only Gemini is implemented; other providers will be added
2711
- * as the SDK evolves.
2319
+ *
2320
+ * Built-in providers:
2321
+ * - `'gemini'` — uses the bundled `GeminiAdapter`.
2322
+ *
2323
+ * Custom adapters:
2324
+ * - Pass `{ adapter: myAdapter }` to use any `LLMProviderAdapter`.
2325
+ * Example: `llm: { adapter: myCustomAdapter }`
2712
2326
  */
2713
2327
  createAdapter(config) {
2328
+ if ("adapter" in config) {
2329
+ return config.adapter;
2330
+ }
2714
2331
  switch (config.provider) {
2715
2332
  case "gemini":
2716
2333
  return new GeminiAdapter(config);
2717
- case "openai":
2718
- return new OpenAIAdapter(config);
2719
2334
  default:
2720
2335
  throw new Error(
2721
- `LLM provider "${config.provider}" is not yet supported. Currently only "gemini" and "openai" are implemented.`
2336
+ `LLM provider "${config.provider}" is not yet supported. Use { adapter: yourAdapter } for custom providers.`
2722
2337
  );
2723
2338
  }
2724
2339
  }
2725
2340
  /** Convenience accessor for the current provider name. */
2726
2341
  get providerName() {
2727
- if (this._config.provider === "gemini") return "gemini";
2728
- if (this._config.provider === "openai") return "openai";
2342
+ if ("provider" in this._config) return this._config.provider;
2729
2343
  return void 0;
2730
2344
  }
2731
2345
  /** Log a debug message if debug mode is enabled. */
@@ -2878,7 +2492,7 @@ var ToolExecutor = class {
2878
2492
  break;
2879
2493
  }
2880
2494
  }
2881
- if (rounds >= this.maxRounds && allToolCalls.length > 0) {
2495
+ if (rounds >= this.maxRounds) {
2882
2496
  this.log(
2883
2497
  `Max rounds (${this.maxRounds}) reached. Returning current text.`
2884
2498
  );
@@ -2981,6 +2595,19 @@ var ToolExecutor = class {
2981
2595
  return s.value;
2982
2596
  }
2983
2597
  const tc = toolCalls[i];
2598
+ if (!tc) {
2599
+ const errorMsg2 = s.reason instanceof Error ? s.reason.message : String(s.reason);
2600
+ return {
2601
+ toolCallId: `unknown-${i}`,
2602
+ record: {
2603
+ name: "unknown",
2604
+ args: {},
2605
+ result: void 0,
2606
+ durationMs: 0,
2607
+ error: errorMsg2
2608
+ }
2609
+ };
2610
+ }
2984
2611
  const errorMsg = s.reason instanceof Error ? s.reason.message : String(s.reason);
2985
2612
  return {
2986
2613
  toolCallId: tc.id,
@@ -4173,8 +3800,280 @@ var DeepgramSTT = class {
4173
3800
  }
4174
3801
  };
4175
3802
 
3803
+ // src/voice/elevenlabs-stt.ts
3804
+ var LOG_PREFIX7 = "[GuideKit:ElevenLabs-STT]";
3805
+ var ELEVENLABS_STT_ENDPOINT = "wss://api.elevenlabs.io/v1/speech-to-text/realtime";
3806
+ var DEFAULT_LANGUAGE2 = "en";
3807
+ var INACTIVITY_TIMEOUT_S = 30;
3808
+ var SAMPLE_RATE = 16e3;
3809
+ function float32ToInt162(float32) {
3810
+ const int16 = new Int16Array(float32.length);
3811
+ for (let i = 0; i < float32.length; i++) {
3812
+ const s = Math.max(-1, Math.min(1, float32[i]));
3813
+ int16[i] = s < 0 ? s * 32768 : s * 32767;
3814
+ }
3815
+ return int16;
3816
+ }
3817
+ function int16ToBase64(int16) {
3818
+ const bytes = new Uint8Array(int16.buffer);
3819
+ const CHUNK_SIZE = 8192;
3820
+ let binary = "";
3821
+ for (let i = 0; i < bytes.length; i += CHUNK_SIZE) {
3822
+ const chunk = bytes.subarray(i, i + CHUNK_SIZE);
3823
+ binary += String.fromCharCode(...chunk);
3824
+ }
3825
+ return btoa(binary);
3826
+ }
3827
+ var ElevenLabsSTT = class {
3828
+ // ---- Configuration -------------------------------------------------------
3829
+ apiKey;
3830
+ language;
3831
+ debugEnabled;
3832
+ // ---- Internal state ------------------------------------------------------
3833
+ wsManager = null;
3834
+ _connected = false;
3835
+ _suspended = false;
3836
+ /** Registered transcript callbacks. */
3837
+ transcriptCallbacks = /* @__PURE__ */ new Set();
3838
+ // -------------------------------------------------------------------------
3839
+ // Constructor
3840
+ // -------------------------------------------------------------------------
3841
+ constructor(options) {
3842
+ this.apiKey = options.apiKey;
3843
+ this.language = options.language ?? DEFAULT_LANGUAGE2;
3844
+ this.debugEnabled = options.debug ?? false;
3845
+ this.log("ElevenLabsSTT created", { language: this.language });
3846
+ }
3847
+ // -------------------------------------------------------------------------
3848
+ // Public API
3849
+ // -------------------------------------------------------------------------
3850
+ /** Whether the WebSocket is currently connected and ready. */
3851
+ get isConnected() {
3852
+ return this._connected;
3853
+ }
3854
+ /**
3855
+ * Open a WebSocket connection to ElevenLabs' real-time STT endpoint.
3856
+ *
3857
+ * Resolves once the connection is established and the socket is ready to
3858
+ * receive audio frames. Rejects if the connection cannot be established.
3859
+ */
3860
+ async connect() {
3861
+ if (this._connected) {
3862
+ this.log("Already connected \u2014 skipping");
3863
+ return;
3864
+ }
3865
+ if (typeof WebSocket === "undefined") {
3866
+ this.log("WebSocket API not available (SSR?) \u2014 cannot connect");
3867
+ return;
3868
+ }
3869
+ const url = this.buildUrl();
3870
+ this.log("Connecting to", url.replace(this.apiKey, "***"));
3871
+ this.wsManager = new WebSocketManager({
3872
+ url,
3873
+ protocols: [],
3874
+ debug: this.debugEnabled,
3875
+ label: "ElevenLabs-STT"
3876
+ });
3877
+ this.wsManager.onOpen(() => {
3878
+ this._connected = true;
3879
+ this.log("Connected");
3880
+ });
3881
+ this.wsManager.onMessage((event) => {
3882
+ this.handleMessage(event);
3883
+ });
3884
+ this.wsManager.onClose((code, reason) => {
3885
+ this.log("Connection closed", { code, reason });
3886
+ this.cleanup();
3887
+ });
3888
+ this.wsManager.onError((event) => {
3889
+ this.log("WebSocket error", event);
3890
+ });
3891
+ return this.wsManager.connect();
3892
+ }
3893
+ /**
3894
+ * Send audio data to ElevenLabs for transcription.
3895
+ *
3896
+ * Accepts either `Float32Array` (Web Audio API output) or `Int16Array`
3897
+ * (already encoded as linear16). Float32 data is automatically converted
3898
+ * to Int16 before encoding. Audio is sent as a base64-encoded JSON message.
3899
+ */
3900
+ sendAudio(audioData) {
3901
+ if (!this._connected || !this.wsManager || this._suspended) {
3902
+ return;
3903
+ }
3904
+ const int16 = audioData instanceof Float32Array ? float32ToInt162(audioData) : audioData;
3905
+ const base64 = int16ToBase64(int16);
3906
+ this.wsManager.send(
3907
+ JSON.stringify({
3908
+ type: "input_audio_chunk",
3909
+ audio: base64,
3910
+ sample_rate: SAMPLE_RATE
3911
+ })
3912
+ );
3913
+ }
3914
+ /**
3915
+ * Register a callback to receive transcript events.
3916
+ *
3917
+ * @returns An unsubscribe function. Calling it more than once is safe.
3918
+ */
3919
+ onTranscript(callback) {
3920
+ this.transcriptCallbacks.add(callback);
3921
+ let removed = false;
3922
+ return () => {
3923
+ if (removed) return;
3924
+ removed = true;
3925
+ this.transcriptCallbacks.delete(callback);
3926
+ };
3927
+ }
3928
+ /**
3929
+ * Gracefully close the connection.
3930
+ *
3931
+ * Sends a `commit_audio` message so ElevenLabs can finalise any pending
3932
+ * transcription before the socket is torn down.
3933
+ */
3934
+ close() {
3935
+ if (!this._connected || !this.wsManager) {
3936
+ this.log("Not connected \u2014 nothing to close");
3937
+ return;
3938
+ }
3939
+ this.log("Sending commit_audio and closing");
3940
+ try {
3941
+ this.wsManager.send(JSON.stringify({ type: "commit_audio" }));
3942
+ } catch {
3943
+ }
3944
+ this.wsManager.close();
3945
+ this.cleanup();
3946
+ }
3947
+ /** Force-destroy the connection without a graceful handshake. */
3948
+ destroy() {
3949
+ this.log("Destroying");
3950
+ if (this.wsManager) {
3951
+ this.wsManager.destroy();
3952
+ this.wsManager = null;
3953
+ }
3954
+ this.cleanup();
3955
+ this.transcriptCallbacks.clear();
3956
+ }
3957
+ /**
3958
+ * Suspend the adapter (e.g. when the device goes offline).
3959
+ *
3960
+ * Marks the adapter as suspended so that incoming `sendAudio` calls are
3961
+ * silently dropped. The WebSocket itself is left open.
3962
+ */
3963
+ suspend() {
3964
+ if (this._suspended) return;
3965
+ this._suspended = true;
3966
+ this.log("Suspended");
3967
+ }
3968
+ /**
3969
+ * Resume after a prior `suspend()`.
3970
+ */
3971
+ resume() {
3972
+ if (!this._suspended) return;
3973
+ this._suspended = false;
3974
+ this.log("Resumed");
3975
+ }
3976
+ // -------------------------------------------------------------------------
3977
+ // Message handling
3978
+ // -------------------------------------------------------------------------
3979
+ /**
3980
+ * Parse incoming ElevenLabs JSON messages and emit transcript events.
3981
+ *
3982
+ * ElevenLabs sends two transcript message types:
3983
+ * - `partial_transcript`: interim result, `isFinal = false`
3984
+ * - `committed_transcript`: final result, `isFinal = true`
3985
+ */
3986
+ handleMessage(event) {
3987
+ if (typeof event.data !== "string") {
3988
+ return;
3989
+ }
3990
+ let parsed;
3991
+ try {
3992
+ parsed = JSON.parse(event.data);
3993
+ } catch {
3994
+ this.log("Failed to parse message", event.data);
3995
+ return;
3996
+ }
3997
+ const type = parsed["type"];
3998
+ if (type === "committed_transcript" || type === "partial_transcript") {
3999
+ this.handleTranscriptMessage(parsed, type === "committed_transcript");
4000
+ } else {
4001
+ this.log("Received message", type, parsed);
4002
+ }
4003
+ }
4004
+ /**
4005
+ * Extract transcript data from a transcript message and notify subscribers.
4006
+ */
4007
+ handleTranscriptMessage(parsed, isFinal) {
4008
+ const result = parsed["result"];
4009
+ const text = result?.text ?? "";
4010
+ const confidence = result?.confidence ?? 0;
4011
+ if (text.trim() === "") {
4012
+ return;
4013
+ }
4014
+ const transcriptEvent = {
4015
+ text,
4016
+ isFinal,
4017
+ confidence,
4018
+ timestamp: Date.now()
4019
+ };
4020
+ this.log(
4021
+ isFinal ? "Final transcript:" : "Interim transcript:",
4022
+ text,
4023
+ `(${(confidence * 100).toFixed(1)}%)`
4024
+ );
4025
+ this.emitTranscript(transcriptEvent);
4026
+ }
4027
+ // -------------------------------------------------------------------------
4028
+ // Subscriber notification
4029
+ // -------------------------------------------------------------------------
4030
+ /**
4031
+ * Emit a transcript event to all registered callbacks.
4032
+ *
4033
+ * Errors thrown by individual callbacks are caught and logged so one
4034
+ * misbehaving subscriber does not prevent others from receiving the event.
4035
+ */
4036
+ emitTranscript(event) {
4037
+ for (const cb of this.transcriptCallbacks) {
4038
+ try {
4039
+ cb(event);
4040
+ } catch (err) {
4041
+ console.error(LOG_PREFIX7, "Transcript callback threw:", err);
4042
+ }
4043
+ }
4044
+ }
4045
+ // -------------------------------------------------------------------------
4046
+ // URL building
4047
+ // -------------------------------------------------------------------------
4048
+ /** Build the ElevenLabs streaming STT endpoint URL with auth query params. */
4049
+ buildUrl() {
4050
+ const params = new URLSearchParams({
4051
+ xi_api_key: this.apiKey,
4052
+ language: this.language,
4053
+ inactivity_timeout: String(INACTIVITY_TIMEOUT_S)
4054
+ });
4055
+ return `${ELEVENLABS_STT_ENDPOINT}?${params.toString()}`;
4056
+ }
4057
+ // -------------------------------------------------------------------------
4058
+ // Cleanup
4059
+ // -------------------------------------------------------------------------
4060
+ /** Reset internal state after disconnection. */
4061
+ cleanup() {
4062
+ this._connected = false;
4063
+ }
4064
+ // -------------------------------------------------------------------------
4065
+ // Logging
4066
+ // -------------------------------------------------------------------------
4067
+ /** Conditional debug logging. */
4068
+ log(...args) {
4069
+ if (this.debugEnabled) {
4070
+ console.debug(LOG_PREFIX7, ...args);
4071
+ }
4072
+ }
4073
+ };
4074
+
4176
4075
  // src/voice/elevenlabs-tts.ts
4177
- var LOG_PREFIX7 = "[GuideKit:TTS]";
4076
+ var LOG_PREFIX8 = "[GuideKit:TTS]";
4178
4077
  var DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
4179
4078
  var DEFAULT_MODEL_ID = "eleven_flash_v2_5";
4180
4079
  var DEFAULT_STABILITY = 0.5;
@@ -4416,47 +4315,705 @@ var ElevenLabsTTS = class {
4416
4315
  this.log("Failed to parse message", event.data);
4417
4316
  return;
4418
4317
  }
4419
- if (parsed["error"] !== void 0) {
4420
- this.log("ElevenLabs error:", parsed["error"]);
4421
- return;
4318
+ if (parsed["error"] !== void 0) {
4319
+ this.log("ElevenLabs error:", parsed["error"]);
4320
+ return;
4321
+ }
4322
+ if (parsed["audio"] === void 0 || parsed["audio"] === null) {
4323
+ this.log("Non-audio message received", parsed);
4324
+ return;
4325
+ }
4326
+ const audioBase64 = parsed["audio"];
4327
+ const isFinal = parsed["isFinal"] === true;
4328
+ if (!audioBase64 || audioBase64.length === 0) {
4329
+ if (isFinal) {
4330
+ this.emitAudio({
4331
+ audio: new ArrayBuffer(0),
4332
+ isFinal: true,
4333
+ timestamp: Date.now()
4334
+ });
4335
+ }
4336
+ return;
4337
+ }
4338
+ let audioBuffer;
4339
+ try {
4340
+ audioBuffer = base64ToArrayBuffer(audioBase64);
4341
+ } catch (err) {
4342
+ this.log("Failed to decode base64 audio", err);
4343
+ return;
4344
+ }
4345
+ const audioEvent = {
4346
+ audio: audioBuffer,
4347
+ isFinal,
4348
+ timestamp: Date.now()
4349
+ };
4350
+ this.log(
4351
+ isFinal ? "Final audio chunk:" : "Audio chunk:",
4352
+ `${audioBuffer.byteLength} bytes`
4353
+ );
4354
+ this.emitAudio(audioEvent);
4355
+ }
4356
+ // -----------------------------------------------------------------------
4357
+ // Subscriber notification
4358
+ // -----------------------------------------------------------------------
4359
+ /**
4360
+ * Emit an audio event to all registered callbacks.
4361
+ *
4362
+ * Errors thrown by individual callbacks are caught and logged so one
4363
+ * misbehaving subscriber does not prevent others from receiving the event.
4364
+ */
4365
+ emitAudio(event) {
4366
+ for (const cb of this.audioCallbacks) {
4367
+ try {
4368
+ cb(event);
4369
+ } catch (err) {
4370
+ console.error(LOG_PREFIX8, "Audio callback threw:", err);
4371
+ }
4372
+ }
4373
+ }
4374
+ // -----------------------------------------------------------------------
4375
+ // URL building
4376
+ // -----------------------------------------------------------------------
4377
+ /** Build the ElevenLabs streaming TTS endpoint URL. */
4378
+ buildUrl() {
4379
+ const params = new URLSearchParams({
4380
+ model_id: this.modelId
4381
+ });
4382
+ return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
4383
+ }
4384
+ // -----------------------------------------------------------------------
4385
+ // Cleanup
4386
+ // -----------------------------------------------------------------------
4387
+ /** Reset internal state after disconnection. */
4388
+ cleanup() {
4389
+ this._connected = false;
4390
+ this.bosSent = false;
4391
+ }
4392
+ // -----------------------------------------------------------------------
4393
+ // Logging
4394
+ // -----------------------------------------------------------------------
4395
+ /** Conditional debug logging. */
4396
+ log(...args) {
4397
+ if (this.debugEnabled) {
4398
+ console.debug(LOG_PREFIX8, ...args);
4399
+ }
4400
+ }
4401
+ };
4402
+
4403
+ // src/voice/web-speech-stt.ts
4404
+ var LOG_PREFIX9 = "[GuideKit:WebSpeech-STT]";
4405
+ var DEFAULT_LANGUAGE3 = "en-US";
4406
+ var WebSpeechSTT = class {
4407
+ // ---- Configuration -------------------------------------------------------
4408
+ language;
4409
+ continuous;
4410
+ interimResultsEnabled;
4411
+ debugEnabled;
4412
+ // ---- Internal state ------------------------------------------------------
4413
+ recognition = null;
4414
+ _connected = false;
4415
+ _suspended = false;
4416
+ /**
4417
+ * Whether we intentionally stopped recognition. Used to distinguish
4418
+ * between intentional stop and unexpected end (for auto-restart in
4419
+ * continuous mode).
4420
+ */
4421
+ _intentionalStop = false;
4422
+ /** Registered transcript callbacks. */
4423
+ transcriptCallbacks = /* @__PURE__ */ new Set();
4424
+ // -------------------------------------------------------------------------
4425
+ // Constructor
4426
+ // -------------------------------------------------------------------------
4427
+ constructor(options = {}) {
4428
+ this.language = options.language ?? DEFAULT_LANGUAGE3;
4429
+ this.continuous = options.continuous ?? true;
4430
+ this.interimResultsEnabled = options.interimResults ?? true;
4431
+ this.debugEnabled = options.debug ?? false;
4432
+ this.log("WebSpeechSTT created", {
4433
+ language: this.language,
4434
+ continuous: this.continuous,
4435
+ interimResults: this.interimResultsEnabled
4436
+ });
4437
+ }
4438
+ // -------------------------------------------------------------------------
4439
+ // Static methods
4440
+ // -------------------------------------------------------------------------
4441
+ /**
4442
+ * Check whether the Web Speech API SpeechRecognition is supported in the
4443
+ * current environment. Safe to call in SSR (returns false).
4444
+ */
4445
+ static isSupported() {
4446
+ if (typeof window === "undefined") return false;
4447
+ return typeof window["SpeechRecognition"] !== "undefined" || typeof globalThis.webkitSpeechRecognition !== "undefined";
4448
+ }
4449
+ // -------------------------------------------------------------------------
4450
+ // Public API
4451
+ // -------------------------------------------------------------------------
4452
+ /** Whether recognition is currently active and connected. */
4453
+ get isConnected() {
4454
+ return this._connected;
4455
+ }
4456
+ /**
4457
+ * Start speech recognition.
4458
+ *
4459
+ * Creates the SpeechRecognition instance and begins listening. Resolves
4460
+ * once the recognition session has started. Rejects if the API is not
4461
+ * supported or the browser denies permission.
4462
+ */
4463
+ async connect() {
4464
+ if (this._connected) {
4465
+ this.log("Already connected \u2014 skipping");
4466
+ return;
4467
+ }
4468
+ if (typeof window === "undefined") {
4469
+ this.log("SSR environment detected \u2014 cannot connect");
4470
+ return;
4471
+ }
4472
+ const SpeechRecognitionClass = this.resolveSpeechRecognition();
4473
+ if (!SpeechRecognitionClass) {
4474
+ throw new Error(
4475
+ "Web Speech API (SpeechRecognition) is not supported in this browser."
4476
+ );
4477
+ }
4478
+ this.recognition = new SpeechRecognitionClass();
4479
+ this.recognition.lang = this.language;
4480
+ this.recognition.continuous = this.continuous;
4481
+ this.recognition.interimResults = this.interimResultsEnabled;
4482
+ this.recognition.maxAlternatives = 1;
4483
+ this.recognition.onstart = () => {
4484
+ this._connected = true;
4485
+ this._intentionalStop = false;
4486
+ this.log("Recognition started");
4487
+ };
4488
+ this.recognition.onresult = (event) => {
4489
+ this.handleResult(event);
4490
+ };
4491
+ this.recognition.onerror = (event) => {
4492
+ this.handleError(event);
4493
+ };
4494
+ this.recognition.onend = () => {
4495
+ this.log("Recognition ended");
4496
+ const wasConnected = this._connected;
4497
+ this._connected = false;
4498
+ if (this.continuous && !this._intentionalStop && !this._suspended && wasConnected) {
4499
+ this.log("Auto-restarting continuous recognition");
4500
+ try {
4501
+ this.recognition?.start();
4502
+ } catch {
4503
+ this.log("Failed to auto-restart recognition");
4504
+ }
4505
+ }
4506
+ };
4507
+ return new Promise((resolve, reject) => {
4508
+ const onStart = () => {
4509
+ cleanup();
4510
+ resolve();
4511
+ };
4512
+ const onError = (event) => {
4513
+ cleanup();
4514
+ reject(new Error(`SpeechRecognition error: ${event.error} \u2014 ${event.message}`));
4515
+ };
4516
+ const cleanup = () => {
4517
+ if (this.recognition) {
4518
+ this.recognition.removeEventListener("start", onStart);
4519
+ this.recognition.removeEventListener("error", onError);
4520
+ }
4521
+ };
4522
+ this.recognition.addEventListener("start", onStart, { once: true });
4523
+ this.recognition.addEventListener("error", onError, { once: true });
4524
+ try {
4525
+ this.recognition.start();
4526
+ } catch (err) {
4527
+ cleanup();
4528
+ reject(err);
4529
+ }
4530
+ });
4531
+ }
4532
+ /**
4533
+ * Send audio data. No-op for Web Speech API since it captures audio
4534
+ * directly from the microphone via the browser's internal pipeline.
4535
+ *
4536
+ * Provided for interface compatibility with WebSocket-based STT adapters
4537
+ * (DeepgramSTT, ElevenLabsSTT).
4538
+ */
4539
+ sendAudio(_audioData) {
4540
+ }
4541
+ /**
4542
+ * Register a callback to receive transcript events.
4543
+ *
4544
+ * @returns An unsubscribe function. Calling it more than once is safe.
4545
+ */
4546
+ onTranscript(callback) {
4547
+ this.transcriptCallbacks.add(callback);
4548
+ let removed = false;
4549
+ return () => {
4550
+ if (removed) return;
4551
+ removed = true;
4552
+ this.transcriptCallbacks.delete(callback);
4553
+ };
4554
+ }
4555
+ /**
4556
+ * Gracefully stop recognition.
4557
+ *
4558
+ * Calls `stop()` on the SpeechRecognition instance which allows it to
4559
+ * deliver any pending final results before ending.
4560
+ */
4561
+ close() {
4562
+ if (!this.recognition) {
4563
+ this.log("Not connected \u2014 nothing to close");
4564
+ return;
4565
+ }
4566
+ this.log("Closing recognition");
4567
+ this._intentionalStop = true;
4568
+ try {
4569
+ this.recognition.stop();
4570
+ } catch {
4571
+ }
4572
+ this.cleanup();
4573
+ }
4574
+ /** Force-destroy the recognition without waiting for pending results. */
4575
+ destroy() {
4576
+ this.log("Destroying");
4577
+ this._intentionalStop = true;
4578
+ if (this.recognition) {
4579
+ try {
4580
+ this.recognition.abort();
4581
+ } catch {
4582
+ }
4583
+ this.recognition.onresult = null;
4584
+ this.recognition.onerror = null;
4585
+ this.recognition.onend = null;
4586
+ this.recognition.onstart = null;
4587
+ this.recognition = null;
4588
+ }
4589
+ this.cleanup();
4590
+ this.transcriptCallbacks.clear();
4591
+ }
4592
+ /**
4593
+ * Suspend the adapter (e.g. when the device goes offline).
4594
+ *
4595
+ * Stops recognition and marks the adapter as suspended so that auto-restart
4596
+ * does not trigger.
4597
+ */
4598
+ suspend() {
4599
+ if (this._suspended) return;
4600
+ this._suspended = true;
4601
+ this._intentionalStop = true;
4602
+ if (this.recognition && this._connected) {
4603
+ try {
4604
+ this.recognition.stop();
4605
+ } catch {
4606
+ }
4607
+ }
4608
+ this.log("Suspended");
4609
+ }
4610
+ /**
4611
+ * Resume after a prior `suspend()`. Restarts recognition if it was
4612
+ * running before suspension.
4613
+ */
4614
+ resume() {
4615
+ if (!this._suspended) return;
4616
+ this._suspended = false;
4617
+ this._intentionalStop = false;
4618
+ this.log("Resumed");
4619
+ if (this.recognition && !this._connected) {
4620
+ try {
4621
+ this.recognition.start();
4622
+ } catch {
4623
+ this.log("Failed to restart recognition after resume");
4624
+ }
4625
+ }
4626
+ }
4627
+ // -------------------------------------------------------------------------
4628
+ // Result handling
4629
+ // -------------------------------------------------------------------------
4630
+ /**
4631
+ * Handle SpeechRecognition result events.
4632
+ *
4633
+ * The `results` property is a SpeechRecognitionResultList containing all
4634
+ * results accumulated during this recognition session. We only process
4635
+ * results from `resultIndex` onward to avoid re-emitting old results.
4636
+ */
4637
+ handleResult(event) {
4638
+ for (let i = event.resultIndex; i < event.results.length; i++) {
4639
+ const result = event.results[i];
4640
+ if (!result) continue;
4641
+ const alternative = result[0];
4642
+ if (!alternative) continue;
4643
+ const transcript = alternative.transcript;
4644
+ if (!transcript || transcript.trim() === "") continue;
4645
+ const isFinal = result.isFinal;
4646
+ const confidence = alternative.confidence > 0 ? alternative.confidence : 0.85;
4647
+ const transcriptEvent = {
4648
+ text: transcript,
4649
+ isFinal,
4650
+ confidence,
4651
+ timestamp: Date.now()
4652
+ };
4653
+ this.log(
4654
+ isFinal ? "Final transcript:" : "Interim transcript:",
4655
+ transcript,
4656
+ `(${(confidence * 100).toFixed(1)}%)`
4657
+ );
4658
+ this.emitTranscript(transcriptEvent);
4659
+ }
4660
+ }
4661
+ // -------------------------------------------------------------------------
4662
+ // Error handling
4663
+ // -------------------------------------------------------------------------
4664
+ /**
4665
+ * Handle SpeechRecognition errors.
4666
+ *
4667
+ * Some errors are recoverable (e.g. `no-speech`) and some are fatal
4668
+ * (e.g. `not-allowed`). For recoverable errors in continuous mode,
4669
+ * recognition will auto-restart via the `onend` handler.
4670
+ */
4671
+ handleError(event) {
4672
+ const errorType = event.error;
4673
+ this.log("Recognition error:", errorType, event.message);
4674
+ if (errorType === "no-speech" || errorType === "aborted") {
4675
+ this.log("Non-fatal error \u2014 will recover");
4676
+ return;
4677
+ }
4678
+ if (errorType === "network") {
4679
+ this.log("Network error \u2014 recognition may auto-restart");
4680
+ return;
4681
+ }
4682
+ if (errorType === "not-allowed" || errorType === "service-not-allowed" || errorType === "language-not-supported") {
4683
+ this._intentionalStop = true;
4684
+ this.log("Fatal recognition error \u2014 stopping");
4685
+ }
4686
+ }
4687
+ // -------------------------------------------------------------------------
4688
+ // Subscriber notification
4689
+ // -------------------------------------------------------------------------
4690
+ /**
4691
+ * Emit a transcript event to all registered callbacks.
4692
+ *
4693
+ * Errors thrown by individual callbacks are caught and logged so one
4694
+ * misbehaving subscriber does not prevent others from receiving the event.
4695
+ */
4696
+ emitTranscript(event) {
4697
+ for (const cb of this.transcriptCallbacks) {
4698
+ try {
4699
+ cb(event);
4700
+ } catch (err) {
4701
+ console.error(LOG_PREFIX9, "Transcript callback threw:", err);
4702
+ }
4703
+ }
4704
+ }
4705
+ // -------------------------------------------------------------------------
4706
+ // SpeechRecognition resolution
4707
+ // -------------------------------------------------------------------------
4708
+ /**
4709
+ * Resolve the SpeechRecognition constructor, with the webkit-prefixed
4710
+ * fallback. Returns null if not available.
4711
+ */
4712
+ resolveSpeechRecognition() {
4713
+ if (typeof window === "undefined") return null;
4714
+ const win = window;
4715
+ if (typeof win["SpeechRecognition"] !== "undefined") {
4716
+ return win["SpeechRecognition"];
4717
+ }
4718
+ if (typeof globalThis.webkitSpeechRecognition !== "undefined") {
4719
+ return globalThis.webkitSpeechRecognition;
4720
+ }
4721
+ return null;
4722
+ }
4723
+ // -------------------------------------------------------------------------
4724
+ // Cleanup
4725
+ // -------------------------------------------------------------------------
4726
+ /** Reset internal state after disconnection. */
4727
+ cleanup() {
4728
+ this._connected = false;
4729
+ }
4730
+ // -------------------------------------------------------------------------
4731
+ // Logging
4732
+ // -------------------------------------------------------------------------
4733
+ /** Conditional debug logging. */
4734
+ log(...args) {
4735
+ if (this.debugEnabled) {
4736
+ console.debug(LOG_PREFIX9, ...args);
4737
+ }
4738
+ }
4739
+ };
4740
+
4741
+ // src/voice/web-speech-tts.ts
4742
+ var LOG_PREFIX10 = "[GuideKit:WebSpeech-TTS]";
4743
+ var DEFAULT_RATE = 1;
4744
+ var DEFAULT_PITCH = 1;
4745
+ var DEFAULT_LANGUAGE4 = "en-US";
4746
+ var WebSpeechTTS = class {
4747
+ // ---- Configuration -------------------------------------------------------
4748
+ voiceName;
4749
+ rate;
4750
+ pitch;
4751
+ language;
4752
+ debugEnabled;
4753
+ // ---- Internal state ------------------------------------------------------
4754
+ _connected = false;
4755
+ _suspended = false;
4756
+ /** Cached voice object resolved from voiceName. */
4757
+ _resolvedVoice = null;
4758
+ /** Whether voices have been loaded (they load async in some browsers). */
4759
+ _voicesLoaded = false;
4760
+ /** Registered audio-event callbacks. */
4761
+ audioCallbacks = /* @__PURE__ */ new Set();
4762
+ // -------------------------------------------------------------------------
4763
+ // Constructor
4764
+ // -------------------------------------------------------------------------
4765
+ constructor(options = {}) {
4766
+ this.voiceName = options.voice ?? null;
4767
+ this.rate = options.rate ?? DEFAULT_RATE;
4768
+ this.pitch = options.pitch ?? DEFAULT_PITCH;
4769
+ this.language = options.language ?? DEFAULT_LANGUAGE4;
4770
+ this.debugEnabled = options.debug ?? false;
4771
+ this.log("WebSpeechTTS created", {
4772
+ voice: this.voiceName,
4773
+ rate: this.rate,
4774
+ pitch: this.pitch,
4775
+ language: this.language
4776
+ });
4777
+ }
4778
+ // -------------------------------------------------------------------------
4779
+ // Static methods
4780
+ // -------------------------------------------------------------------------
4781
+ /**
4782
+ * Check whether the Web Speech API SpeechSynthesis is supported in the
4783
+ * current environment. Safe to call in SSR (returns false).
4784
+ */
4785
+ static isSupported() {
4786
+ if (typeof window === "undefined") return false;
4787
+ return typeof window.speechSynthesis !== "undefined";
4788
+ }
4789
+ // -------------------------------------------------------------------------
4790
+ // Public API
4791
+ // -------------------------------------------------------------------------
4792
+ /** Whether the adapter is connected (ready for speech). */
4793
+ get isConnected() {
4794
+ return this._connected;
4795
+ }
4796
+ /**
4797
+ * Initialize the adapter.
4798
+ *
4799
+ * Loads available voices and resolves the requested voice name. Voice
4800
+ * loading is async in some browsers (notably Chrome) so we wait for
4801
+ * the `voiceschanged` event if needed.
4802
+ */
4803
+ async connect() {
4804
+ if (this._connected) {
4805
+ this.log("Already connected \u2014 skipping");
4806
+ return;
4807
+ }
4808
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
4809
+ this.log("SpeechSynthesis not available \u2014 cannot connect");
4810
+ return;
4811
+ }
4812
+ await this.loadVoices();
4813
+ if (this.voiceName) {
4814
+ this._resolvedVoice = this.findVoice(this.voiceName);
4815
+ if (this._resolvedVoice) {
4816
+ this.log("Resolved voice:", this._resolvedVoice.name);
4817
+ } else {
4818
+ this.log("Requested voice not found:", this.voiceName, "\u2014 using browser default");
4819
+ }
4820
+ }
4821
+ this._connected = true;
4822
+ this.log("Connected");
4823
+ }
4824
+ /**
4825
+ * Speak the given text using the browser's speech synthesis engine.
4826
+ *
4827
+ * Returns a Promise that resolves when the utterance completes or is
4828
+ * cancelled. Rejects if an error occurs during synthesis.
4829
+ *
4830
+ * Also emits audio events to registered callbacks for VoicePipeline
4831
+ * compatibility.
4832
+ */
4833
+ speak(text) {
4834
+ if (!this._connected || this._suspended) {
4835
+ this.log("Cannot speak \u2014 not connected or suspended");
4836
+ return;
4837
+ }
4838
+ if (!text || !text.trim()) {
4839
+ return;
4840
+ }
4841
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
4842
+ return;
4843
+ }
4844
+ const synth = window.speechSynthesis;
4845
+ const utterance = new SpeechSynthesisUtterance(text);
4846
+ utterance.lang = this.language;
4847
+ utterance.rate = this.rate;
4848
+ utterance.pitch = this.pitch;
4849
+ if (this._resolvedVoice) {
4850
+ utterance.voice = this._resolvedVoice;
4851
+ }
4852
+ utterance.onstart = () => {
4853
+ this.log("Utterance started:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
4854
+ this.emitAudio({
4855
+ audio: new ArrayBuffer(0),
4856
+ isFinal: false,
4857
+ timestamp: Date.now()
4858
+ });
4859
+ };
4860
+ utterance.onend = () => {
4861
+ this.log("Utterance ended");
4862
+ this.emitAudio({
4863
+ audio: new ArrayBuffer(0),
4864
+ isFinal: true,
4865
+ timestamp: Date.now()
4866
+ });
4867
+ };
4868
+ utterance.onerror = (event) => {
4869
+ if (event.error === "canceled") {
4870
+ this.log("Utterance cancelled");
4871
+ this.emitAudio({
4872
+ audio: new ArrayBuffer(0),
4873
+ isFinal: true,
4874
+ timestamp: Date.now()
4875
+ });
4876
+ return;
4877
+ }
4878
+ this.log("Utterance error:", event.error);
4879
+ this.emitAudio({
4880
+ audio: new ArrayBuffer(0),
4881
+ isFinal: true,
4882
+ timestamp: Date.now()
4883
+ });
4884
+ };
4885
+ this.log("Speaking:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
4886
+ synth.speak(utterance);
4887
+ }
4888
+ /**
4889
+ * Flush / finalize the current utterance.
4890
+ *
4891
+ * No-op for Web Speech API since each speak() call is a complete
4892
+ * utterance. Provided for interface compatibility with ElevenLabsTTS.
4893
+ */
4894
+ flush() {
4895
+ }
4896
+ /**
4897
+ * Register a callback to receive audio output events.
4898
+ *
4899
+ * For Web Speech API, these events have empty audio buffers and are
4900
+ * used to signal utterance start/end for VoicePipeline state management.
4901
+ *
4902
+ * @returns An unsubscribe function. Calling it more than once is safe.
4903
+ */
4904
+ onAudio(callback) {
4905
+ this.audioCallbacks.add(callback);
4906
+ let removed = false;
4907
+ return () => {
4908
+ if (removed) return;
4909
+ removed = true;
4910
+ this.audioCallbacks.delete(callback);
4911
+ };
4912
+ }
4913
+ /** Stop current speech synthesis and cancel any queued utterances. */
4914
+ stop() {
4915
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
4916
+ return;
4917
+ }
4918
+ this.log("Stopping speech synthesis");
4919
+ window.speechSynthesis.cancel();
4920
+ }
4921
+ /** Gracefully close the adapter. */
4922
+ close() {
4923
+ this.log("Closing");
4924
+ this.stop();
4925
+ this.cleanup();
4926
+ }
4927
+ /** Force-destroy the adapter. */
4928
+ destroy() {
4929
+ this.log("Destroying");
4930
+ this.stop();
4931
+ this.cleanup();
4932
+ this.audioCallbacks.clear();
4933
+ }
4934
+ /**
4935
+ * Suspend the adapter (e.g. when the device goes offline).
4936
+ *
4937
+ * Pauses any active speech synthesis and marks the adapter as suspended.
4938
+ */
4939
+ suspend() {
4940
+ if (this._suspended) return;
4941
+ this._suspended = true;
4942
+ if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
4943
+ window.speechSynthesis.pause();
4944
+ }
4945
+ this.log("Suspended");
4946
+ }
4947
+ /**
4948
+ * Resume after a prior `suspend()`.
4949
+ */
4950
+ resume() {
4951
+ if (!this._suspended) return;
4952
+ this._suspended = false;
4953
+ if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
4954
+ window.speechSynthesis.resume();
4422
4955
  }
4423
- if (parsed["audio"] === void 0 || parsed["audio"] === null) {
4424
- this.log("Non-audio message received", parsed);
4956
+ this.log("Resumed");
4957
+ }
4958
+ // -------------------------------------------------------------------------
4959
+ // Voice loading
4960
+ // -------------------------------------------------------------------------
4961
+ /**
4962
+ * Load available voices from the browser.
4963
+ *
4964
+ * In Chrome and some other browsers, voices load asynchronously after
4965
+ * the page loads. We wait for the `voiceschanged` event with a timeout.
4966
+ */
4967
+ async loadVoices() {
4968
+ if (this._voicesLoaded) return;
4969
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") return;
4970
+ const synth = window.speechSynthesis;
4971
+ let voices = synth.getVoices();
4972
+ if (voices.length > 0) {
4973
+ this._voicesLoaded = true;
4974
+ this.log("Voices loaded:", voices.length, "available");
4425
4975
  return;
4426
4976
  }
4427
- const audioBase64 = parsed["audio"];
4428
- const isFinal = parsed["isFinal"] === true;
4429
- if (!audioBase64 || audioBase64.length === 0) {
4430
- if (isFinal) {
4431
- this.emitAudio({
4432
- audio: new ArrayBuffer(0),
4433
- isFinal: true,
4434
- timestamp: Date.now()
4435
- });
4436
- }
4437
- return;
4977
+ await new Promise((resolve) => {
4978
+ const onVoicesChanged = () => {
4979
+ synth.removeEventListener("voiceschanged", onVoicesChanged);
4980
+ clearTimeout(timeout);
4981
+ voices = synth.getVoices();
4982
+ this._voicesLoaded = true;
4983
+ this.log("Voices loaded (async):", voices.length, "available");
4984
+ resolve();
4985
+ };
4986
+ const timeout = setTimeout(() => {
4987
+ synth.removeEventListener("voiceschanged", onVoicesChanged);
4988
+ this._voicesLoaded = true;
4989
+ this.log("Voices loading timed out \u2014 proceeding with defaults");
4990
+ resolve();
4991
+ }, 2e3);
4992
+ synth.addEventListener("voiceschanged", onVoicesChanged);
4993
+ });
4994
+ }
4995
+ /**
4996
+ * Find a voice by name (case-insensitive partial match).
4997
+ */
4998
+ findVoice(name) {
4999
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
5000
+ return null;
4438
5001
  }
4439
- let audioBuffer;
4440
- try {
4441
- audioBuffer = base64ToArrayBuffer(audioBase64);
4442
- } catch (err) {
4443
- this.log("Failed to decode base64 audio", err);
4444
- return;
5002
+ const voices = window.speechSynthesis.getVoices();
5003
+ const lowerName = name.toLowerCase();
5004
+ const exact = voices.find((v) => v.name.toLowerCase() === lowerName);
5005
+ if (exact) return exact;
5006
+ const partial = voices.find((v) => v.name.toLowerCase().includes(lowerName));
5007
+ if (partial) return partial;
5008
+ if (lowerName.includes("-") || lowerName.length <= 5) {
5009
+ const langMatch = voices.find((v) => v.lang.toLowerCase().startsWith(lowerName));
5010
+ if (langMatch) return langMatch;
4445
5011
  }
4446
- const audioEvent = {
4447
- audio: audioBuffer,
4448
- isFinal,
4449
- timestamp: Date.now()
4450
- };
4451
- this.log(
4452
- isFinal ? "Final audio chunk:" : "Audio chunk:",
4453
- `${audioBuffer.byteLength} bytes`
4454
- );
4455
- this.emitAudio(audioEvent);
5012
+ return null;
4456
5013
  }
4457
- // -----------------------------------------------------------------------
5014
+ // -------------------------------------------------------------------------
4458
5015
  // Subscriber notification
4459
- // -----------------------------------------------------------------------
5016
+ // -------------------------------------------------------------------------
4460
5017
  /**
4461
5018
  * Emit an audio event to all registered callbacks.
4462
5019
  *
@@ -4468,41 +5025,30 @@ var ElevenLabsTTS = class {
4468
5025
  try {
4469
5026
  cb(event);
4470
5027
  } catch (err) {
4471
- console.error(LOG_PREFIX7, "Audio callback threw:", err);
5028
+ console.error(LOG_PREFIX10, "Audio callback threw:", err);
4472
5029
  }
4473
5030
  }
4474
5031
  }
4475
- // -----------------------------------------------------------------------
4476
- // URL building
4477
- // -----------------------------------------------------------------------
4478
- /** Build the ElevenLabs streaming TTS endpoint URL. */
4479
- buildUrl() {
4480
- const params = new URLSearchParams({
4481
- model_id: this.modelId
4482
- });
4483
- return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
4484
- }
4485
- // -----------------------------------------------------------------------
5032
+ // -------------------------------------------------------------------------
4486
5033
  // Cleanup
4487
- // -----------------------------------------------------------------------
4488
- /** Reset internal state after disconnection. */
5034
+ // -------------------------------------------------------------------------
5035
+ /** Reset internal state. */
4489
5036
  cleanup() {
4490
5037
  this._connected = false;
4491
- this.bosSent = false;
4492
5038
  }
4493
- // -----------------------------------------------------------------------
5039
+ // -------------------------------------------------------------------------
4494
5040
  // Logging
4495
- // -----------------------------------------------------------------------
5041
+ // -------------------------------------------------------------------------
4496
5042
  /** Conditional debug logging. */
4497
5043
  log(...args) {
4498
5044
  if (this.debugEnabled) {
4499
- console.debug(LOG_PREFIX7, ...args);
5045
+ console.debug(LOG_PREFIX10, ...args);
4500
5046
  }
4501
5047
  }
4502
5048
  };
4503
5049
 
4504
5050
  // src/voice/index.ts
4505
- var LOG_PREFIX8 = "[GuideKit:Voice]";
5051
+ var LOG_PREFIX11 = "[GuideKit:Voice]";
4506
5052
  var JITTER_BUFFER_MS = 150;
4507
5053
  var ECHO_WINDOW_MS = 3e3;
4508
5054
  var ECHO_OVERLAP_THRESHOLD = 0.6;
@@ -4610,17 +5156,42 @@ var VoicePipeline = class {
4610
5156
  cause: err instanceof Error ? err : void 0
4611
5157
  });
4612
5158
  }
4613
- this._stt = new DeepgramSTT({
4614
- apiKey: this._sttConfig.apiKey,
4615
- model: this._sttConfig.model,
4616
- debug: this._debug
4617
- });
4618
- this._tts = new ElevenLabsTTS({
4619
- apiKey: this._ttsConfig.apiKey,
4620
- voiceId: this._ttsConfig.voiceId,
4621
- modelId: this._ttsConfig.modelId,
4622
- debug: this._debug
4623
- });
5159
+ if (this._sttConfig.provider === "deepgram") {
5160
+ this._stt = new DeepgramSTT({
5161
+ apiKey: this._sttConfig.apiKey,
5162
+ model: this._sttConfig.model,
5163
+ debug: this._debug
5164
+ });
5165
+ } else if (this._sttConfig.provider === "elevenlabs") {
5166
+ this._stt = new ElevenLabsSTT({
5167
+ apiKey: this._sttConfig.apiKey,
5168
+ language: this._sttConfig.language,
5169
+ debug: this._debug
5170
+ });
5171
+ } else {
5172
+ this._stt = new WebSpeechSTT({
5173
+ language: this._sttConfig.language,
5174
+ continuous: this._sttConfig.continuous,
5175
+ interimResults: this._sttConfig.interimResults,
5176
+ debug: this._debug
5177
+ });
5178
+ }
5179
+ if (this._ttsConfig.provider === "elevenlabs") {
5180
+ this._tts = new ElevenLabsTTS({
5181
+ apiKey: this._ttsConfig.apiKey,
5182
+ voiceId: this._ttsConfig.voiceId,
5183
+ modelId: "modelId" in this._ttsConfig ? this._ttsConfig.modelId : void 0,
5184
+ debug: this._debug
5185
+ });
5186
+ } else {
5187
+ this._tts = new WebSpeechTTS({
5188
+ voice: this._ttsConfig.voice,
5189
+ rate: this._ttsConfig.rate,
5190
+ pitch: this._ttsConfig.pitch,
5191
+ language: this._ttsConfig.language,
5192
+ debug: this._debug
5193
+ });
5194
+ }
4624
5195
  this._log("Initialization complete");
4625
5196
  }
4626
5197
  // ────────────────────────────────────────────────────────────────────
@@ -4760,10 +5331,11 @@ var VoicePipeline = class {
4760
5331
  // ────────────────────────────────────────────────────────────────────
4761
5332
  // speak()
4762
5333
  // ────────────────────────────────────────────────────────────────────
4763
- /** Speak text via ElevenLabs TTS. */
5334
+ /** Speak text via TTS (ElevenLabs or Web Speech API). */
4764
5335
  async speak(text) {
4765
5336
  if (this._destroyed || !text.trim()) return;
4766
- if (!this._tts || !this._audioContext) {
5337
+ const isWebSpeechTTS = this._tts instanceof WebSpeechTTS;
5338
+ if (!this._tts || !this._audioContext && !isWebSpeechTTS) {
4767
5339
  this._log("TTS or AudioContext not available \u2014 cannot speak");
4768
5340
  this._bus.emit("voice:degraded", { reason: "TTS not available", fallback: "text" });
4769
5341
  this._setState("idle");
@@ -4807,11 +5379,24 @@ var VoicePipeline = class {
4807
5379
  }
4808
5380
  resolve();
4809
5381
  };
4810
- this._unsubTTSAudio = this._tts.onAudio((event) => {
4811
- this._handleTTSAudio(event, done);
4812
- });
4813
- this._tts.speak(text);
4814
- this._tts.flush();
5382
+ if (isWebSpeechTTS) {
5383
+ this._unsubTTSAudio = this._tts.onAudio(
5384
+ (event) => {
5385
+ if (event.isFinal) {
5386
+ done();
5387
+ }
5388
+ }
5389
+ );
5390
+ this._tts.speak(text);
5391
+ } else {
5392
+ this._unsubTTSAudio = this._tts.onAudio(
5393
+ (event) => {
5394
+ this._handleTTSAudio(event, done);
5395
+ }
5396
+ );
5397
+ this._tts.speak(text);
5398
+ this._tts.flush();
5399
+ }
4815
5400
  });
4816
5401
  }
4817
5402
  // ────────────────────────────────────────────────────────────────────
@@ -4840,7 +5425,9 @@ var VoicePipeline = class {
4840
5425
  this._pendingLLMAbort.abort();
4841
5426
  this._pendingLLMAbort = null;
4842
5427
  }
4843
- if (this._tts?.isConnected) {
5428
+ if (this._tts instanceof WebSpeechTTS) {
5429
+ this._tts.stop();
5430
+ } else if (this._tts?.isConnected) {
4844
5431
  this._tts.close();
4845
5432
  }
4846
5433
  }
@@ -4929,7 +5516,7 @@ var VoicePipeline = class {
4929
5516
  try {
4930
5517
  cb(next, prev);
4931
5518
  } catch (err) {
4932
- console.error(LOG_PREFIX8, "State change callback threw:", err);
5519
+ console.error(LOG_PREFIX11, "State change callback threw:", err);
4933
5520
  }
4934
5521
  }
4935
5522
  }
@@ -5060,7 +5647,7 @@ var VoicePipeline = class {
5060
5647
  try {
5061
5648
  cb(text, isFinal);
5062
5649
  } catch (err) {
5063
- console.error(LOG_PREFIX8, "Transcript callback threw:", err);
5650
+ console.error(LOG_PREFIX11, "Transcript callback threw:", err);
5064
5651
  }
5065
5652
  }
5066
5653
  if (isFinal && this._state === "listening") {
@@ -5163,8 +5750,14 @@ var VoicePipeline = class {
5163
5750
  * sequential playback via AudioBufferSourceNode.
5164
5751
  */
5165
5752
  _decodeAndSchedule(audioData, onDone) {
5753
+ let onDoneCalled = false;
5754
+ const safeOnDone = onDone ? () => {
5755
+ if (onDoneCalled) return;
5756
+ onDoneCalled = true;
5757
+ onDone();
5758
+ } : void 0;
5166
5759
  if (!this._audioContext || this._state !== "speaking") {
5167
- onDone?.();
5760
+ safeOnDone?.();
5168
5761
  return;
5169
5762
  }
5170
5763
  const ctx = this._audioContext;
@@ -5173,7 +5766,7 @@ var VoicePipeline = class {
5173
5766
  copy,
5174
5767
  (decodedBuffer) => {
5175
5768
  if (this._state !== "speaking" || !this._audioContext) {
5176
- onDone?.();
5769
+ safeOnDone?.();
5177
5770
  return;
5178
5771
  }
5179
5772
  const source = ctx.createBufferSource();
@@ -5186,8 +5779,8 @@ var VoicePipeline = class {
5186
5779
  if (this._lastScheduledSource === source) {
5187
5780
  this._lastScheduledSource = null;
5188
5781
  }
5189
- if (onDone) {
5190
- onDone();
5782
+ if (safeOnDone) {
5783
+ safeOnDone();
5191
5784
  }
5192
5785
  };
5193
5786
  const now = ctx.currentTime;
@@ -5203,7 +5796,7 @@ var VoicePipeline = class {
5203
5796
  },
5204
5797
  (err) => {
5205
5798
  this._log("Failed to decode audio chunk:", err);
5206
- onDone?.();
5799
+ safeOnDone?.();
5207
5800
  }
5208
5801
  );
5209
5802
  }
@@ -5262,13 +5855,13 @@ var VoicePipeline = class {
5262
5855
  // ════════════════════════════════════════════════════════════════════
5263
5856
  _log(...args) {
5264
5857
  if (this._debug) {
5265
- console.debug(LOG_PREFIX8, ...args);
5858
+ console.debug(LOG_PREFIX11, ...args);
5266
5859
  }
5267
5860
  }
5268
5861
  };
5269
5862
 
5270
5863
  // src/visual/index.ts
5271
- var LOG_PREFIX9 = "[GuideKit:Visual]";
5864
+ var LOG_PREFIX12 = "[GuideKit:Visual]";
5272
5865
  var DEFAULT_OVERLAY_COLOR = "rgba(0, 0, 0, 0.5)";
5273
5866
  var DEFAULT_SPOTLIGHT_COLOR = "#4a9eed";
5274
5867
  var DEFAULT_ANIMATION_DURATION = 300;
@@ -6185,16 +6778,16 @@ var VisualGuidance = class {
6185
6778
  if (!this.debug) return;
6186
6779
  if (typeof console !== "undefined") {
6187
6780
  if (data) {
6188
- console.log(`${LOG_PREFIX9} ${message}`, data);
6781
+ console.log(`${LOG_PREFIX12} ${message}`, data);
6189
6782
  } else {
6190
- console.log(`${LOG_PREFIX9} ${message}`);
6783
+ console.log(`${LOG_PREFIX12} ${message}`);
6191
6784
  }
6192
6785
  }
6193
6786
  }
6194
6787
  };
6195
6788
 
6196
6789
  // src/awareness/index.ts
6197
- var LOG_PREFIX10 = "[GuideKit:Awareness]";
6790
+ var LOG_PREFIX13 = "[GuideKit:Awareness]";
6198
6791
  var DEFAULT_IDLE_TIMEOUT_MS = 6e4;
6199
6792
  var DEFAULT_DWELL_TIMEOUT_MS = 8e3;
6200
6793
  var DEFAULT_RAGE_CLICK_THRESHOLD = 3;
@@ -6556,13 +7149,13 @@ var AwarenessSystem = class {
6556
7149
  /** Conditional debug logging. */
6557
7150
  log(...args) {
6558
7151
  if (this.debugEnabled) {
6559
- console.debug(LOG_PREFIX10, ...args);
7152
+ console.debug(LOG_PREFIX13, ...args);
6560
7153
  }
6561
7154
  }
6562
7155
  };
6563
7156
 
6564
7157
  // src/awareness/proactive.ts
6565
- var LOG_PREFIX11 = "[GuideKit:Proactive]";
7158
+ var LOG_PREFIX14 = "[GuideKit:Proactive]";
6566
7159
  var STORAGE_KEY = "guidekit:visited";
6567
7160
  var SEVEN_DAYS_MS = 7 * 24 * 60 * 60 * 1e3;
6568
7161
  var DWELL_COOLDOWNS = [3e4, 6e4, 12e4];
@@ -6600,7 +7193,7 @@ var ProactiveTriggerEngine = class {
6600
7193
  set quietMode(value) {
6601
7194
  this._quietMode = value;
6602
7195
  if (this.debug) {
6603
- console.debug(LOG_PREFIX11, `Quiet mode ${value ? "enabled" : "disabled"}`);
7196
+ console.debug(LOG_PREFIX14, `Quiet mode ${value ? "enabled" : "disabled"}`);
6604
7197
  }
6605
7198
  }
6606
7199
  // ---- Lifecycle -----------------------------------------------------------
@@ -6630,7 +7223,7 @@ var ProactiveTriggerEngine = class {
6630
7223
  })
6631
7224
  );
6632
7225
  if (this.debug) {
6633
- console.debug(LOG_PREFIX11, "Started \u2014 subscribed to awareness & dom events");
7226
+ console.debug(LOG_PREFIX14, "Started \u2014 subscribed to awareness & dom events");
6634
7227
  }
6635
7228
  }
6636
7229
  /** Unsubscribe all bus listeners and clear internal state. */
@@ -6645,7 +7238,7 @@ var ProactiveTriggerEngine = class {
6645
7238
  this.formTimers.clear();
6646
7239
  this.started = false;
6647
7240
  if (this.debug) {
6648
- console.debug(LOG_PREFIX11, "Stopped \u2014 all listeners removed");
7241
+ console.debug(LOG_PREFIX14, "Stopped \u2014 all listeners removed");
6649
7242
  }
6650
7243
  }
6651
7244
  /** Alias for {@link stop}. */
@@ -6680,7 +7273,7 @@ var ProactiveTriggerEngine = class {
6680
7273
  }, FORM_ABANDON_MS);
6681
7274
  this.formTimers.set(formSelector, timer);
6682
7275
  if (this.debug) {
6683
- console.debug(LOG_PREFIX11, `Form interaction started: ${formSelector}`);
7276
+ console.debug(LOG_PREFIX14, `Form interaction started: ${formSelector}`);
6684
7277
  }
6685
7278
  }
6686
7279
  /** Reset all cooldowns and internal tracking state (useful for testing). */
@@ -6694,7 +7287,7 @@ var ProactiveTriggerEngine = class {
6694
7287
  }
6695
7288
  this.formTimers.clear();
6696
7289
  if (this.debug) {
6697
- console.debug(LOG_PREFIX11, "All cooldowns and state reset");
7290
+ console.debug(LOG_PREFIX14, "All cooldowns and state reset");
6698
7291
  }
6699
7292
  }
6700
7293
  // ---- Internal handlers ---------------------------------------------------
@@ -6711,22 +7304,23 @@ var ProactiveTriggerEngine = class {
6711
7304
  message: "First-time visitor detected. Show a visual greeting (no audio)."
6712
7305
  }, "greeting");
6713
7306
  if (this.debug) {
6714
- console.debug(LOG_PREFIX11, "First visit \u2014 greeting triggered");
7307
+ console.debug(LOG_PREFIX14, "First visit \u2014 greeting triggered");
6715
7308
  }
6716
7309
  return;
6717
7310
  }
6718
7311
  const visitedAt = parseInt(visited, 10);
6719
- if (!Number.isNaN(visitedAt)) {
6720
- const elapsed = Date.now() - visitedAt;
6721
- if (elapsed <= SEVEN_DAYS_MS && this.debug) {
6722
- console.debug(LOG_PREFIX11, "Return visitor within 7 days \u2014 silent");
6723
- } else if (this.debug) {
6724
- console.debug(LOG_PREFIX11, "Return visitor after 7 days");
6725
- }
7312
+ if (Number.isNaN(visitedAt)) {
7313
+ return;
7314
+ }
7315
+ const elapsed = Date.now() - visitedAt;
7316
+ if (elapsed <= SEVEN_DAYS_MS && this.debug) {
7317
+ console.debug(LOG_PREFIX14, "Return visitor within 7 days \u2014 silent");
7318
+ } else if (this.debug) {
7319
+ console.debug(LOG_PREFIX14, "Return visitor after 7 days");
6726
7320
  }
6727
7321
  } catch {
6728
7322
  if (this.debug) {
6729
- console.warn(LOG_PREFIX11, "localStorage unavailable \u2014 skipping greeting check");
7323
+ console.warn(LOG_PREFIX14, "localStorage unavailable \u2014 skipping greeting check");
6730
7324
  }
6731
7325
  }
6732
7326
  }
@@ -6744,7 +7338,7 @@ var ProactiveTriggerEngine = class {
6744
7338
  const count = this.dwellCounts.get(sectionId) ?? 0;
6745
7339
  if (count >= DWELL_COOLDOWNS.length + 1) {
6746
7340
  if (this.debug) {
6747
- console.debug(LOG_PREFIX11, `Dwell cap reached for section "${sectionId}" \u2014 suppressed`);
7341
+ console.debug(LOG_PREFIX14, `Dwell cap reached for section "${sectionId}" \u2014 suppressed`);
6748
7342
  }
6749
7343
  return;
6750
7344
  }
@@ -6754,7 +7348,7 @@ var ProactiveTriggerEngine = class {
6754
7348
  const lastFired = this.cooldowns.get(key) ?? 0;
6755
7349
  if (Date.now() - lastFired < cooldownMs) {
6756
7350
  if (this.debug) {
6757
- console.debug(LOG_PREFIX11, `Dwell cooldown active for "${sectionId}" \u2014 suppressed`);
7351
+ console.debug(LOG_PREFIX14, `Dwell cooldown active for "${sectionId}" \u2014 suppressed`);
6758
7352
  }
6759
7353
  return;
6760
7354
  }
@@ -6770,7 +7364,7 @@ var ProactiveTriggerEngine = class {
6770
7364
  const sectionKey = selector;
6771
7365
  if (this.frustrationFired.has(sectionKey)) {
6772
7366
  if (this.debug) {
6773
- console.debug(LOG_PREFIX11, `Frustration already fired for "${selector}" \u2014 suppressed`);
7367
+ console.debug(LOG_PREFIX14, `Frustration already fired for "${selector}" \u2014 suppressed`);
6774
7368
  }
6775
7369
  return;
6776
7370
  }
@@ -6786,7 +7380,7 @@ var ProactiveTriggerEngine = class {
6786
7380
  const key = "navigation-commentary";
6787
7381
  if (this.isCooldownActive(key, NAVIGATION_COOLDOWN_MS)) {
6788
7382
  if (this.debug) {
6789
- console.debug(LOG_PREFIX11, "Navigation cooldown active \u2014 suppressed");
7383
+ console.debug(LOG_PREFIX14, "Navigation cooldown active \u2014 suppressed");
6790
7384
  }
6791
7385
  return;
6792
7386
  }
@@ -6809,7 +7403,7 @@ var ProactiveTriggerEngine = class {
6809
7403
  fireTrigger(partial, cooldownKey) {
6810
7404
  if (this._quietMode) {
6811
7405
  if (this.debug) {
6812
- console.debug(LOG_PREFIX11, `Quiet mode \u2014 suppressed trigger: ${partial.type}`);
7406
+ console.debug(LOG_PREFIX14, `Quiet mode \u2014 suppressed trigger: ${partial.type}`);
6813
7407
  }
6814
7408
  return;
6815
7409
  }
@@ -6819,13 +7413,13 @@ var ProactiveTriggerEngine = class {
6819
7413
  };
6820
7414
  this.cooldowns.set(cooldownKey, trigger.timestamp);
6821
7415
  if (this.debug) {
6822
- console.debug(LOG_PREFIX11, "Trigger fired:", trigger.type, trigger);
7416
+ console.debug(LOG_PREFIX14, "Trigger fired:", trigger.type, trigger);
6823
7417
  }
6824
7418
  if (this.onTrigger) {
6825
7419
  try {
6826
7420
  this.onTrigger(trigger);
6827
7421
  } catch (err) {
6828
- console.error(LOG_PREFIX11, "onTrigger callback error:", err);
7422
+ console.error(LOG_PREFIX14, "onTrigger callback error:", err);
6829
7423
  }
6830
7424
  }
6831
7425
  }
@@ -6838,7 +7432,7 @@ var ProactiveTriggerEngine = class {
6838
7432
  };
6839
7433
 
6840
7434
  // src/llm/rate-limiter.ts
6841
- var LOG_PREFIX12 = "[GuideKit:RateLimiter]";
7435
+ var LOG_PREFIX15 = "[GuideKit:RateLimiter]";
6842
7436
  var DEFAULT_MAX_LLM_CALLS_PER_MINUTE = 10;
6843
7437
  var DEFAULT_MAX_STT_MINUTES_PER_SESSION = 60;
6844
7438
  var DEFAULT_MAX_TTS_CHARS_PER_SESSION = 5e4;
@@ -6933,7 +7527,19 @@ var RateLimiter = class {
6933
7527
  get sttMinutesUsed() {
6934
7528
  let totalMs = this.sttMs;
6935
7529
  if (this.sttStartedAt !== null) {
6936
- totalMs += Date.now() - this.sttStartedAt;
7530
+ const activeMs = Date.now() - this.sttStartedAt;
7531
+ const maxSessionMs = this.maxSTTMinutesPerSession * 6e4;
7532
+ const maxActiveMs = maxSessionMs * 2;
7533
+ if (activeMs > maxActiveMs) {
7534
+ console.warn(
7535
+ `${LOG_PREFIX15} STT stream running for ${Math.round(activeMs / 6e4)}min without sttStop() \u2014 capping at 2x session limit (${this.maxSTTMinutesPerSession * 2}min).`
7536
+ );
7537
+ this.sttMs += maxActiveMs;
7538
+ this.sttStartedAt = null;
7539
+ totalMs = this.sttMs;
7540
+ } else {
7541
+ totalMs += activeMs;
7542
+ }
6937
7543
  }
6938
7544
  return totalMs / 6e4;
6939
7545
  }
@@ -7005,7 +7611,7 @@ var RateLimiter = class {
7005
7611
  }
7006
7612
  log(...args) {
7007
7613
  if (this.debug) {
7008
- console.debug(LOG_PREFIX12, ...args);
7614
+ console.debug(LOG_PREFIX15, ...args);
7009
7615
  }
7010
7616
  }
7011
7617
  };
@@ -7238,7 +7844,7 @@ var BUILTIN_LOCALES = {
7238
7844
  pt
7239
7845
  };
7240
7846
  var SUPPORTED_LOCALE_CODES = new Set(Object.keys(BUILTIN_LOCALES));
7241
- var LOG_PREFIX13 = "[GuideKit:I18n]";
7847
+ var LOG_PREFIX16 = "[GuideKit:I18n]";
7242
7848
  function isSupportedLocale(code) {
7243
7849
  return SUPPORTED_LOCALE_CODES.has(code);
7244
7850
  }
@@ -7276,7 +7882,7 @@ var I18n = class {
7276
7882
  this.strings = strings;
7277
7883
  this.resolvedLocale = resolvedLocale;
7278
7884
  if (this.debug) {
7279
- console.debug(`${LOG_PREFIX13} Initialized with locale "${this.resolvedLocale}"`);
7885
+ console.debug(`${LOG_PREFIX16} Initialized with locale "${this.resolvedLocale}"`);
7280
7886
  }
7281
7887
  }
7282
7888
  // -------------------------------------------------------------------------
@@ -7287,9 +7893,9 @@ var I18n = class {
7287
7893
  const value = this.strings[key];
7288
7894
  if (value === void 0) {
7289
7895
  if (this.debug) {
7290
- console.warn(`${LOG_PREFIX13} Missing translation key "${key}"`);
7896
+ console.warn(`${LOG_PREFIX16} Missing translation key "${key}"`);
7291
7897
  }
7292
- return en[key] ?? key;
7898
+ return en[key] ?? (typeof process !== "undefined" && process.env?.NODE_ENV === "production" ? key : `[MISSING: ${key}]`);
7293
7899
  }
7294
7900
  return value;
7295
7901
  }
@@ -7303,7 +7909,7 @@ var I18n = class {
7303
7909
  this.strings = strings;
7304
7910
  this.resolvedLocale = resolvedLocale;
7305
7911
  if (this.debug) {
7306
- console.debug(`${LOG_PREFIX13} Locale changed to "${this.resolvedLocale}"`);
7912
+ console.debug(`${LOG_PREFIX16} Locale changed to "${this.resolvedLocale}"`);
7307
7913
  }
7308
7914
  }
7309
7915
  /** The current resolved locale code (e.g. 'en', 'fr', or 'custom'). */
@@ -7323,7 +7929,7 @@ var I18n = class {
7323
7929
  if (locale === "auto") {
7324
7930
  const detected = detectLocaleFromDocument();
7325
7931
  if (this.debug) {
7326
- console.debug(`${LOG_PREFIX13} Auto-detected locale "${detected}"`);
7932
+ console.debug(`${LOG_PREFIX16} Auto-detected locale "${detected}"`);
7327
7933
  }
7328
7934
  return {
7329
7935
  strings: BUILTIN_LOCALES[detected],
@@ -7338,7 +7944,7 @@ var I18n = class {
7338
7944
  }
7339
7945
  if (this.debug) {
7340
7946
  console.warn(
7341
- `${LOG_PREFIX13} Unknown locale "${String(locale)}", falling back to "en"`
7947
+ `${LOG_PREFIX16} Unknown locale "${String(locale)}", falling back to "en"`
7342
7948
  );
7343
7949
  }
7344
7950
  return {
@@ -7349,7 +7955,7 @@ var I18n = class {
7349
7955
  };
7350
7956
 
7351
7957
  // src/auth/token-manager.ts
7352
- var LOG_PREFIX14 = "[GuideKit:Auth]";
7958
+ var LOG_PREFIX17 = "[GuideKit:Auth]";
7353
7959
  var REFRESH_THRESHOLD = 0.8;
7354
7960
  var MAX_RETRY_ATTEMPTS = 3;
7355
7961
  var RETRY_BASE_MS = 1e3;
@@ -7628,7 +8234,7 @@ var TokenManager = class {
7628
8234
  }
7629
8235
  log(message) {
7630
8236
  if (this.debug) {
7631
- console.debug(`${LOG_PREFIX14} ${message}`);
8237
+ console.debug(`${LOG_PREFIX17} ${message}`);
7632
8238
  }
7633
8239
  }
7634
8240
  };
@@ -7760,6 +8366,11 @@ var GuideKitCore = class {
7760
8366
  debug: this._debug
7761
8367
  });
7762
8368
  await this.tokenManager.start();
8369
+ if (!this._options.llm) {
8370
+ console.warn(
8371
+ "[GuideKit] tokenEndpoint provided without llm config. The session token handles auth only \u2014 llm: { provider, apiKey } is still required for LLM calls. See: https://guidekit.dev/docs/provider#token-endpoint"
8372
+ );
8373
+ }
7763
8374
  this.resourceManager.register({
7764
8375
  name: "token-manager",
7765
8376
  cleanup: () => this.tokenManager?.destroy()
@@ -7882,21 +8493,50 @@ var GuideKitCore = class {
7882
8493
  }
7883
8494
  });
7884
8495
  this.registerBuiltinTools();
7885
- if (this._options.stt && this._options.tts) {
7886
- const sttConfig = this._options.stt;
7887
- const ttsConfig = this._options.tts;
7888
- if (sttConfig.provider === "deepgram" && ttsConfig.provider === "elevenlabs") {
8496
+ {
8497
+ const sttConfig = this._options.stt ?? { provider: "web-speech" };
8498
+ const ttsConfig = this._options.tts ?? { provider: "web-speech" };
8499
+ let voiceSttConfig;
8500
+ let voiceTtsConfig;
8501
+ if (sttConfig.provider === "deepgram") {
8502
+ voiceSttConfig = {
8503
+ provider: "deepgram",
8504
+ apiKey: sttConfig.apiKey,
8505
+ model: sttConfig.model
8506
+ };
8507
+ } else if (sttConfig.provider === "elevenlabs") {
8508
+ voiceSttConfig = {
8509
+ provider: "elevenlabs",
8510
+ apiKey: sttConfig.apiKey,
8511
+ language: sttConfig.language
8512
+ };
8513
+ } else {
8514
+ voiceSttConfig = {
8515
+ provider: "web-speech",
8516
+ language: sttConfig.language,
8517
+ continuous: sttConfig.continuous,
8518
+ interimResults: sttConfig.interimResults
8519
+ };
8520
+ }
8521
+ if (ttsConfig.provider === "elevenlabs") {
8522
+ voiceTtsConfig = {
8523
+ provider: "elevenlabs",
8524
+ apiKey: ttsConfig.apiKey,
8525
+ voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
8526
+ };
8527
+ } else {
8528
+ voiceTtsConfig = {
8529
+ provider: "web-speech",
8530
+ voice: ttsConfig.voice,
8531
+ rate: ttsConfig.rate,
8532
+ pitch: ttsConfig.pitch,
8533
+ language: ttsConfig.language
8534
+ };
8535
+ }
8536
+ try {
7889
8537
  this.voicePipeline = new VoicePipeline({
7890
- sttConfig: {
7891
- provider: "deepgram",
7892
- apiKey: sttConfig.apiKey,
7893
- model: "model" in sttConfig ? sttConfig.model : void 0
7894
- },
7895
- ttsConfig: {
7896
- provider: "elevenlabs",
7897
- apiKey: ttsConfig.apiKey,
7898
- voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
7899
- },
8538
+ sttConfig: voiceSttConfig,
8539
+ ttsConfig: voiceTtsConfig,
7900
8540
  debug: this._debug
7901
8541
  });
7902
8542
  this.voicePipeline.onStateChange((state, previous) => {
@@ -7929,6 +8569,11 @@ var GuideKitCore = class {
7929
8569
  name: "voice-pipeline",
7930
8570
  cleanup: () => this.voicePipeline?.destroy()
7931
8571
  });
8572
+ } catch (_err) {
8573
+ this.voicePipeline = null;
8574
+ if (this._debug) {
8575
+ console.debug("[GuideKit:Core] Voice pipeline unavailable in this environment");
8576
+ }
7932
8577
  }
7933
8578
  }
7934
8579
  const session = this.contextManager.restoreSession();
@@ -8053,7 +8698,7 @@ var GuideKitCore = class {
8053
8698
  return responseText;
8054
8699
  } catch (error) {
8055
8700
  const err = error instanceof GuideKitError ? error : new GuideKitError({
8056
- code: "UNKNOWN",
8701
+ code: ErrorCodes.UNKNOWN,
8057
8702
  message: error instanceof Error ? error.message : "Unknown error",
8058
8703
  recoverable: false,
8059
8704
  suggestion: "Check the console for details."
@@ -8309,172 +8954,11 @@ var GuideKitCore = class {
8309
8954
  };
8310
8955
  }
8311
8956
  /**
8312
- * Register all built-in tool handlers with the ToolExecutor.
8313
- * Called once during init() after VisualGuidance and all subsystems are ready.
8957
+ * Unified built-in tool specifications single source of truth for both
8958
+ * tool definitions (sent to LLM) and handler registration.
8314
8959
  */
8315
- registerBuiltinTools() {
8316
- if (!this.toolExecutor) return;
8317
- this.toolExecutor.registerTool({
8318
- name: "highlight",
8319
- execute: async (args) => {
8320
- const sectionId = args.sectionId;
8321
- const selector = args.selector;
8322
- const tooltip = args.tooltip;
8323
- const position = args.position;
8324
- const result = this.highlight({ sectionId, selector, tooltip, position });
8325
- return { success: result };
8326
- }
8327
- });
8328
- this.toolExecutor.registerTool({
8329
- name: "dismissHighlight",
8330
- execute: async () => {
8331
- this.dismissHighlight();
8332
- return { success: true };
8333
- }
8334
- });
8335
- this.toolExecutor.registerTool({
8336
- name: "scrollToSection",
8337
- execute: async (args) => {
8338
- const sectionId = args.sectionId;
8339
- const offset = args.offset;
8340
- this.scrollToSection(sectionId, offset);
8341
- return { success: true };
8342
- }
8343
- });
8344
- this.toolExecutor.registerTool({
8345
- name: "navigate",
8346
- execute: async (args) => {
8347
- const href = args.href;
8348
- const result = await this.navigate(href);
8349
- return { success: result, navigatedTo: result ? href : null };
8350
- }
8351
- });
8352
- this.toolExecutor.registerTool({
8353
- name: "startTour",
8354
- execute: async (args) => {
8355
- const sectionIds = args.sectionIds;
8356
- const mode = args.mode ?? "manual";
8357
- this.startTour(sectionIds, mode);
8358
- return { success: true, steps: sectionIds.length };
8359
- }
8360
- });
8361
- this.toolExecutor.registerTool({
8362
- name: "readPageContent",
8363
- execute: async (args) => {
8364
- const sectionId = args.sectionId;
8365
- const query = args.query;
8366
- const model = this._currentPageModel;
8367
- if (!model) return { error: "No page model available" };
8368
- if (sectionId) {
8369
- const section = model.sections.find((s) => s.id === sectionId);
8370
- if (section) {
8371
- const contentMapResult = await this.contextManager.getContent(sectionId);
8372
- return {
8373
- sectionId: section.id,
8374
- label: section.label,
8375
- summary: section.summary,
8376
- contentMap: contentMapResult
8377
- };
8378
- }
8379
- return { error: `Section "${sectionId}" not found` };
8380
- }
8381
- if (query) {
8382
- const queryLower = query.toLowerCase();
8383
- const matches = model.sections.filter(
8384
- (s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
8385
- );
8386
- return {
8387
- query,
8388
- results: matches.slice(0, 5).map((s) => ({
8389
- sectionId: s.id,
8390
- label: s.label,
8391
- snippet: s.summary?.slice(0, 200)
8392
- }))
8393
- };
8394
- }
8395
- return { error: "Provide either sectionId or query" };
8396
- }
8397
- });
8398
- this.toolExecutor.registerTool({
8399
- name: "getVisibleSections",
8400
- execute: async () => {
8401
- const model = this._currentPageModel;
8402
- if (!model) return { sections: [] };
8403
- return {
8404
- sections: model.sections.slice(0, 10).map((s) => ({
8405
- id: s.id,
8406
- label: s.label,
8407
- selector: s.selector,
8408
- score: s.score
8409
- }))
8410
- };
8411
- }
8412
- });
8413
- this.toolExecutor.registerTool({
8414
- name: "clickElement",
8415
- execute: async (args) => {
8416
- if (typeof document === "undefined") return { success: false, error: "Not in browser" };
8417
- const selector = args.selector;
8418
- const el = document.querySelector(selector);
8419
- if (!el) return { success: false, error: `Element not found: ${selector}` };
8420
- if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
8421
- const clickableRules = this._options.options?.clickableSelectors;
8422
- const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
8423
- try {
8424
- return el.matches(pattern);
8425
- } catch {
8426
- return selector === pattern;
8427
- }
8428
- }) ?? false;
8429
- if (!isInDevAllowList) {
8430
- const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
8431
- try {
8432
- return el.matches(pattern);
8433
- } catch {
8434
- return false;
8435
- }
8436
- });
8437
- if (defaultDenied) {
8438
- return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
8439
- }
8440
- }
8441
- if (clickableRules?.deny?.length) {
8442
- const denied = clickableRules.deny.some((pattern) => {
8443
- try {
8444
- return el.matches(pattern);
8445
- } catch {
8446
- return selector === pattern;
8447
- }
8448
- });
8449
- if (denied) {
8450
- return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
8451
- }
8452
- }
8453
- if (clickableRules?.allow?.length && !isInDevAllowList) {
8454
- return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
8455
- }
8456
- el.click();
8457
- return { success: true };
8458
- }
8459
- });
8460
- this.toolExecutor.registerTool({
8461
- name: "executeCustomAction",
8462
- execute: async (args) => {
8463
- const actionId = args.actionId;
8464
- const params = args.params ?? {};
8465
- const action = this.customActions.get(actionId);
8466
- if (!action) return { error: `Unknown action: ${actionId}` };
8467
- try {
8468
- const result = await action.handler(params);
8469
- return { success: true, result };
8470
- } catch (err) {
8471
- return { success: false, error: err instanceof Error ? err.message : String(err) };
8472
- }
8473
- }
8474
- });
8475
- }
8476
- getToolDefinitions() {
8477
- const builtinTools = [
8960
+ getBuiltinToolSpecs() {
8961
+ return [
8478
8962
  {
8479
8963
  name: "highlight",
8480
8964
  description: "Spotlight an element on the page to draw the user's attention. Use sectionId to highlight a page section, or selector for a specific CSS selector. Optionally add a tooltip with explanation text.",
@@ -8484,13 +8968,27 @@ var GuideKitCore = class {
8484
8968
  tooltip: { type: "string", description: "Text to show in tooltip" },
8485
8969
  position: { type: "string", enum: ["top", "bottom", "left", "right", "auto"], description: "Tooltip position" }
8486
8970
  },
8487
- schemaVersion: 1
8971
+ required: [],
8972
+ schemaVersion: 1,
8973
+ execute: async (args) => {
8974
+ const sectionId = args.sectionId;
8975
+ const selector = args.selector;
8976
+ const tooltip = args.tooltip;
8977
+ const position = args.position;
8978
+ const result = this.highlight({ sectionId, selector, tooltip, position });
8979
+ return { success: result };
8980
+ }
8488
8981
  },
8489
8982
  {
8490
8983
  name: "dismissHighlight",
8491
8984
  description: "Remove the current spotlight overlay.",
8492
8985
  parameters: {},
8493
- schemaVersion: 1
8986
+ required: [],
8987
+ schemaVersion: 1,
8988
+ execute: async () => {
8989
+ this.dismissHighlight();
8990
+ return { success: true };
8991
+ }
8494
8992
  },
8495
8993
  {
8496
8994
  name: "scrollToSection",
@@ -8499,7 +8997,14 @@ var GuideKitCore = class {
8499
8997
  sectionId: { type: "string", description: "ID of the section to scroll to" },
8500
8998
  offset: { type: "number", description: "Pixel offset for sticky headers" }
8501
8999
  },
8502
- schemaVersion: 1
9000
+ required: ["sectionId"],
9001
+ schemaVersion: 1,
9002
+ execute: async (args) => {
9003
+ const sectionId = args.sectionId;
9004
+ const offset = args.offset;
9005
+ this.scrollToSection(sectionId, offset);
9006
+ return { success: true };
9007
+ }
8503
9008
  },
8504
9009
  {
8505
9010
  name: "navigate",
@@ -8507,7 +9012,13 @@ var GuideKitCore = class {
8507
9012
  parameters: {
8508
9013
  href: { type: "string", description: "URL or path to navigate to (same-origin only)" }
8509
9014
  },
8510
- schemaVersion: 1
9015
+ required: ["href"],
9016
+ schemaVersion: 1,
9017
+ execute: async (args) => {
9018
+ const href = args.href;
9019
+ const result = await this.navigate(href);
9020
+ return { success: result, navigatedTo: result ? href : null };
9021
+ }
8511
9022
  },
8512
9023
  {
8513
9024
  name: "startTour",
@@ -8516,7 +9027,14 @@ var GuideKitCore = class {
8516
9027
  sectionIds: { type: "array", items: { type: "string" }, description: "Section IDs in tour order" },
8517
9028
  mode: { type: "string", enum: ["auto", "manual"], description: "auto advances automatically; manual waits for user" }
8518
9029
  },
8519
- schemaVersion: 1
9030
+ required: ["sectionIds"],
9031
+ schemaVersion: 1,
9032
+ execute: async (args) => {
9033
+ const sectionIds = args.sectionIds;
9034
+ const mode = args.mode ?? "manual";
9035
+ this.startTour(sectionIds, mode);
9036
+ return { success: true, steps: sectionIds.length };
9037
+ }
8520
9038
  },
8521
9039
  {
8522
9040
  name: "readPageContent",
@@ -8525,13 +9043,61 @@ var GuideKitCore = class {
8525
9043
  sectionId: { type: "string", description: "Section ID to read" },
8526
9044
  query: { type: "string", description: "Keyword to search for across sections" }
8527
9045
  },
8528
- schemaVersion: 1
9046
+ required: [],
9047
+ schemaVersion: 1,
9048
+ execute: async (args) => {
9049
+ const sectionId = args.sectionId;
9050
+ const query = args.query;
9051
+ const model = this._currentPageModel;
9052
+ if (!model) return { error: "No page model available" };
9053
+ if (sectionId) {
9054
+ const section = model.sections.find((s) => s.id === sectionId);
9055
+ if (section) {
9056
+ const contentMapResult = await this.contextManager.getContent(sectionId);
9057
+ return {
9058
+ sectionId: section.id,
9059
+ label: section.label,
9060
+ summary: section.summary,
9061
+ contentMap: contentMapResult
9062
+ };
9063
+ }
9064
+ return { error: `Section "${sectionId}" not found` };
9065
+ }
9066
+ if (query) {
9067
+ const queryLower = query.toLowerCase();
9068
+ const matches = model.sections.filter(
9069
+ (s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
9070
+ );
9071
+ return {
9072
+ query,
9073
+ results: matches.slice(0, 5).map((s) => ({
9074
+ sectionId: s.id,
9075
+ label: s.label,
9076
+ snippet: s.summary?.slice(0, 200)
9077
+ }))
9078
+ };
9079
+ }
9080
+ return { error: "Provide either sectionId or query" };
9081
+ }
8529
9082
  },
8530
9083
  {
8531
9084
  name: "getVisibleSections",
8532
9085
  description: "Get the list of sections currently visible in the user viewport.",
8533
9086
  parameters: {},
8534
- schemaVersion: 1
9087
+ required: [],
9088
+ schemaVersion: 1,
9089
+ execute: async () => {
9090
+ const model = this._currentPageModel;
9091
+ if (!model) return { sections: [] };
9092
+ return {
9093
+ sections: model.sections.slice(0, 10).map((s) => ({
9094
+ id: s.id,
9095
+ label: s.label,
9096
+ selector: s.selector,
9097
+ score: s.score
9098
+ }))
9099
+ };
9100
+ }
8535
9101
  },
8536
9102
  {
8537
9103
  name: "clickElement",
@@ -8539,7 +9105,52 @@ var GuideKitCore = class {
8539
9105
  parameters: {
8540
9106
  selector: { type: "string", description: "CSS selector of the element to click" }
8541
9107
  },
8542
- schemaVersion: 1
9108
+ required: ["selector"],
9109
+ schemaVersion: 1,
9110
+ execute: async (args) => {
9111
+ if (typeof document === "undefined") return { success: false, error: "Not in browser" };
9112
+ const selector = args.selector;
9113
+ const el = document.querySelector(selector);
9114
+ if (!el) return { success: false, error: `Element not found: ${selector}` };
9115
+ if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
9116
+ const clickableRules = this._options.options?.clickableSelectors;
9117
+ const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
9118
+ try {
9119
+ return el.matches(pattern);
9120
+ } catch {
9121
+ return selector === pattern;
9122
+ }
9123
+ }) ?? false;
9124
+ if (!isInDevAllowList) {
9125
+ const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
9126
+ try {
9127
+ return el.matches(pattern);
9128
+ } catch {
9129
+ return false;
9130
+ }
9131
+ });
9132
+ if (defaultDenied) {
9133
+ return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
9134
+ }
9135
+ }
9136
+ if (clickableRules?.deny?.length) {
9137
+ const denied = clickableRules.deny.some((pattern) => {
9138
+ try {
9139
+ return el.matches(pattern);
9140
+ } catch {
9141
+ return selector === pattern;
9142
+ }
9143
+ });
9144
+ if (denied) {
9145
+ return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
9146
+ }
9147
+ }
9148
+ if (clickableRules?.allow?.length && !isInDevAllowList) {
9149
+ return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
9150
+ }
9151
+ el.click();
9152
+ return { success: true };
9153
+ }
8543
9154
  },
8544
9155
  {
8545
9156
  name: "executeCustomAction",
@@ -8548,9 +9159,37 @@ var GuideKitCore = class {
8548
9159
  actionId: { type: "string", description: "ID of the custom action" },
8549
9160
  params: { type: "object", description: "Parameters for the action" }
8550
9161
  },
8551
- schemaVersion: 1
9162
+ required: ["actionId"],
9163
+ schemaVersion: 1,
9164
+ execute: async (args) => {
9165
+ const actionId = args.actionId;
9166
+ const params = args.params ?? {};
9167
+ const action = this.customActions.get(actionId);
9168
+ if (!action) return { error: `Unknown action: ${actionId}` };
9169
+ try {
9170
+ const result = await action.handler(params);
9171
+ return { success: true, result };
9172
+ } catch (err) {
9173
+ return { success: false, error: err instanceof Error ? err.message : String(err) };
9174
+ }
9175
+ }
8552
9176
  }
8553
9177
  ];
9178
+ }
9179
+ /**
9180
+ * Register all built-in tool handlers with the ToolExecutor.
9181
+ * Called once during init() after VisualGuidance and all subsystems are ready.
9182
+ */
9183
+ registerBuiltinTools() {
9184
+ if (!this.toolExecutor) return;
9185
+ for (const spec of this.getBuiltinToolSpecs()) {
9186
+ this.toolExecutor.registerTool({ name: spec.name, execute: spec.execute });
9187
+ }
9188
+ }
9189
+ getToolDefinitions() {
9190
+ const builtinTools = this.getBuiltinToolSpecs().map(
9191
+ ({ execute: _execute, ...def }) => def
9192
+ );
8554
9193
  for (const [actionId, action] of this.customActions) {
8555
9194
  builtinTools.push({
8556
9195
  name: `action_${actionId}`,
@@ -8563,6 +9202,6 @@ var GuideKitCore = class {
8563
9202
  }
8564
9203
  };
8565
9204
 
8566
- export { AuthenticationError, AwarenessSystem, BrowserSupportError, ConfigurationError, ConnectionManager, ContentFilterError, ContextManager, DOMScanner, ErrorCodes, EventBus, GeminiAdapter, GuideKitCore, GuideKitError, I18n, InitializationError, LLMOrchestrator, NavigationController, NetworkError, OpenAIAdapter, PermissionError, ProactiveTriggerEngine, RateLimitError, RateLimiter, ResourceExhaustedError, ResourceManager, SingletonGuard, TimeoutError, TokenManager, ToolExecutor, VisualGuidance, createEventBus, isGuideKitError };
9205
+ export { AuthenticationError, AwarenessSystem, BrowserSupportError, ConfigurationError, ConnectionManager, ContentFilterError, ContextManager, DOMScanner, ErrorCodes, EventBus, GeminiAdapter, GuideKitCore, GuideKitError, I18n, InitializationError, LLMOrchestrator, NavigationController, NetworkError, PermissionError, ProactiveTriggerEngine, RateLimitError, RateLimiter, ResourceExhaustedError, ResourceManager, SingletonGuard, TimeoutError, TokenManager, ToolExecutor, VisualGuidance, VoicePipeline, WebSpeechSTT, WebSpeechTTS, createEventBus, isGuideKitError };
8567
9206
  //# sourceMappingURL=index.js.map
8568
9207
  //# sourceMappingURL=index.js.map