@guidekit/core 0.1.0-beta.1 → 0.1.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1093,8 +1093,8 @@ var DOMScanner = class {
1093
1093
  if (el.closest("[data-guidekit-ignore]")) return;
1094
1094
  const style = window.getComputedStyle(el);
1095
1095
  const position = style.position;
1096
- const zIndex = parseInt(style.zIndex, 10);
1097
- if ((position === "fixed" || position === "absolute") && !isNaN(zIndex) && zIndex >= 1e3) {
1096
+ const zIndex = parseInt(style.zIndex, 10) || 0;
1097
+ if ((position === "fixed" || position === "absolute") && !Number.isNaN(zIndex) && zIndex >= 1e3) {
1098
1098
  const visible = isElementVisible(el);
1099
1099
  if (!visible) return;
1100
1100
  const overlayType = this.classifyOverlay(el, style);
@@ -1122,10 +1122,10 @@ var DOMScanner = class {
1122
1122
  return "dropdown";
1123
1123
  const width = parseFloat(style.width);
1124
1124
  const height = parseFloat(style.height);
1125
- if (typeof window !== "undefined" && !isNaN(width) && !isNaN(height) && width > window.innerWidth * 0.5 && height > window.innerHeight * 0.5) {
1125
+ if (typeof window !== "undefined" && !Number.isNaN(width) && !Number.isNaN(height) && width > window.innerWidth * 0.5 && height > window.innerHeight * 0.5) {
1126
1126
  return "modal";
1127
1127
  }
1128
- if (!isNaN(width) && width < 400) return "popover";
1128
+ if (!Number.isNaN(width) && width < 400) return "popover";
1129
1129
  return null;
1130
1130
  }
1131
1131
  // -------------------------------------------------------------------------
@@ -1750,7 +1750,9 @@ var ErrorCodes = {
1750
1750
  // Content
1751
1751
  CONTENT_FILTER_TRIGGERED: "CONTENT_FILTER_TRIGGERED",
1752
1752
  // Privacy
1753
- PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED"
1753
+ PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED",
1754
+ // General
1755
+ UNKNOWN: "UNKNOWN"
1754
1756
  };
1755
1757
  var GuideKitError = class extends Error {
1756
1758
  code;
@@ -1839,356 +1841,9 @@ function isGuideKitError(error) {
1839
1841
  return error instanceof GuideKitError;
1840
1842
  }
1841
1843
 
1842
- // src/llm/openai-adapter.ts
1843
- var DEFAULT_OPENAI_MODEL = "gpt-4o";
1844
- var DEFAULT_TIMEOUT_MS = 15e3;
1845
- var OPENAI_CHAT_URL = "https://api.openai.com/v1/chat/completions";
1846
- var OpenAIAdapter = class {
1847
- apiKey;
1848
- model;
1849
- constructor(config) {
1850
- this.apiKey = config.apiKey;
1851
- this.model = config.model ?? DEFAULT_OPENAI_MODEL;
1852
- }
1853
- // -----------------------------------------------------------------------
1854
- // LLMProviderAdapter implementation
1855
- // -----------------------------------------------------------------------
1856
- /**
1857
- * Convert GuideKit tool definitions into OpenAI's `tools` format.
1858
- * Each tool is wrapped as `{ type: 'function', function: { name, description, parameters } }`.
1859
- */
1860
- formatTools(tools) {
1861
- if (tools.length === 0) return void 0;
1862
- return tools.map((tool) => ({
1863
- type: "function",
1864
- function: {
1865
- name: tool.name,
1866
- description: tool.description,
1867
- parameters: tool.parameters
1868
- }
1869
- }));
1870
- }
1871
- /**
1872
- * Convert an array of `ConversationTurn` objects into OpenAI's messages
1873
- * format with `role: 'user' | 'assistant'`.
1874
- */
1875
- formatConversation(history) {
1876
- return history.map((turn) => ({
1877
- role: turn.role,
1878
- content: turn.content
1879
- }));
1880
- }
1881
- /**
1882
- * Parse an OpenAI SSE streaming response into an async iterable of
1883
- * `TextChunk` and `ToolCall` objects.
1884
- *
1885
- * The OpenAI streaming endpoint sends each chunk as a JSON object
1886
- * prefixed by `data: `. The final line is `data: [DONE]`.
1887
- * Text content arrives in `choices[0].delta.content` and tool calls
1888
- * arrive in `choices[0].delta.tool_calls`.
1889
- */
1890
- async *parseResponse(stream) {
1891
- const reader = stream.getReader();
1892
- const decoder = new TextDecoder();
1893
- let buffer = "";
1894
- const pendingToolCalls = /* @__PURE__ */ new Map();
1895
- try {
1896
- while (true) {
1897
- const { done, value } = await reader.read();
1898
- if (done) break;
1899
- buffer += decoder.decode(value, { stream: true });
1900
- const lines = buffer.split("\n");
1901
- buffer = lines.pop() ?? "";
1902
- for (const line of lines) {
1903
- const trimmed = line.trim();
1904
- if (!trimmed.startsWith("data:")) continue;
1905
- const jsonStr = trimmed.slice(5).trim();
1906
- if (jsonStr === "" || jsonStr === "[DONE]") {
1907
- if (jsonStr === "[DONE]") {
1908
- yield* this.flushPendingToolCalls(pendingToolCalls);
1909
- yield { text: "", done: true };
1910
- }
1911
- continue;
1912
- }
1913
- let parsed;
1914
- try {
1915
- parsed = JSON.parse(jsonStr);
1916
- } catch {
1917
- continue;
1918
- }
1919
- yield* this.extractChunks(parsed, pendingToolCalls);
1920
- }
1921
- }
1922
- if (buffer.trim().startsWith("data:")) {
1923
- const jsonStr = buffer.trim().slice(5).trim();
1924
- if (jsonStr === "[DONE]") {
1925
- yield* this.flushPendingToolCalls(pendingToolCalls);
1926
- yield { text: "", done: true };
1927
- } else if (jsonStr !== "") {
1928
- try {
1929
- const parsed = JSON.parse(jsonStr);
1930
- yield* this.extractChunks(parsed, pendingToolCalls);
1931
- } catch {
1932
- }
1933
- }
1934
- }
1935
- yield* this.flushPendingToolCalls(pendingToolCalls);
1936
- } finally {
1937
- reader.releaseLock();
1938
- }
1939
- }
1940
- /**
1941
- * Format a tool result so it can be sent back to OpenAI as a
1942
- * `tool` role message with the `tool_call_id`.
1943
- */
1944
- formatToolResult(callId, result) {
1945
- return {
1946
- role: "tool",
1947
- tool_call_id: callId,
1948
- content: typeof result === "string" ? result : JSON.stringify(result)
1949
- };
1950
- }
1951
- // -----------------------------------------------------------------------
1952
- // Streaming request
1953
- // -----------------------------------------------------------------------
1954
- /**
1955
- * Build and execute a streaming request to the OpenAI Chat Completions API.
1956
- * Returns the raw `ReadableStream` for the response body together with
1957
- * the raw Response object.
1958
- */
1959
- async streamRequest(params) {
1960
- const messages = [
1961
- { role: "system", content: params.systemPrompt },
1962
- ...params.contents
1963
- ];
1964
- const body = {
1965
- model: this.model,
1966
- messages,
1967
- stream: true,
1968
- temperature: 0.7,
1969
- top_p: 0.95
1970
- };
1971
- if (params.tools) {
1972
- body.tools = params.tools;
1973
- }
1974
- const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS;
1975
- const controller = new AbortController();
1976
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
1977
- if (params.signal) {
1978
- params.signal.addEventListener(
1979
- "abort",
1980
- () => controller.abort(params.signal.reason),
1981
- { once: true }
1982
- );
1983
- }
1984
- let response;
1985
- try {
1986
- response = await fetch(OPENAI_CHAT_URL, {
1987
- method: "POST",
1988
- headers: {
1989
- "Content-Type": "application/json",
1990
- Authorization: `Bearer ${this.apiKey}`
1991
- },
1992
- body: JSON.stringify(body),
1993
- signal: controller.signal
1994
- });
1995
- } catch (error) {
1996
- clearTimeout(timeoutId);
1997
- if (error instanceof DOMException && error.name === "AbortError") {
1998
- if (params.signal?.aborted) {
1999
- throw error;
2000
- }
2001
- throw new TimeoutError({
2002
- code: ErrorCodes.TIMEOUT_LLM_RESPONSE,
2003
- message: `OpenAI request timed out after ${timeoutMs}ms`,
2004
- provider: "openai",
2005
- recoverable: true,
2006
- suggestion: "Try again or increase the timeout.",
2007
- operationName: "openai.chatCompletions",
2008
- timeoutMs
2009
- });
2010
- }
2011
- throw new NetworkError({
2012
- code: ErrorCodes.NETWORK_CONNECTION_LOST,
2013
- message: `Failed to connect to OpenAI API: ${error.message}`,
2014
- provider: "openai",
2015
- suggestion: "Check your network connection and try again.",
2016
- cause: error instanceof Error ? error : void 0
2017
- });
2018
- }
2019
- clearTimeout(timeoutId);
2020
- if (!response.ok) {
2021
- await this.handleHttpError(response);
2022
- }
2023
- if (!response.body) {
2024
- throw new NetworkError({
2025
- code: ErrorCodes.NETWORK_CONNECTION_LOST,
2026
- message: "OpenAI response body is null -- streaming unavailable.",
2027
- provider: "openai",
2028
- suggestion: "Retry the request."
2029
- });
2030
- }
2031
- return { stream: response.body, response };
2032
- }
2033
- // -----------------------------------------------------------------------
2034
- // Internal helpers
2035
- // -----------------------------------------------------------------------
2036
- /**
2037
- * Extract `TextChunk` and accumulate `ToolCall` data from a single parsed
2038
- * OpenAI SSE JSON object.
2039
- *
2040
- * OpenAI tool calls arrive incrementally: the first chunk for a tool call
2041
- * carries the `id` and `function.name`, while subsequent chunks append to
2042
- * `function.arguments`. We accumulate these in `pendingToolCalls` and only
2043
- * yield complete `ToolCall` objects when the finish_reason is 'tool_calls'
2044
- * or when flushed.
2045
- */
2046
- *extractChunks(parsed, pendingToolCalls) {
2047
- const choices = parsed.choices;
2048
- if (!choices || choices.length === 0) return;
2049
- for (const choice of choices) {
2050
- const delta = choice.delta;
2051
- const finishReason = choice.finish_reason;
2052
- if (delta) {
2053
- if (typeof delta.content === "string" && delta.content !== "") {
2054
- yield {
2055
- text: delta.content,
2056
- done: false
2057
- };
2058
- }
2059
- const toolCallDeltas = delta.tool_calls;
2060
- if (toolCallDeltas) {
2061
- for (const tc of toolCallDeltas) {
2062
- const existing = pendingToolCalls.get(tc.index);
2063
- if (existing) {
2064
- if (tc.function?.arguments) {
2065
- existing.argumentsJson += tc.function.arguments;
2066
- }
2067
- } else {
2068
- pendingToolCalls.set(tc.index, {
2069
- id: tc.id ?? "",
2070
- name: tc.function?.name ?? "",
2071
- argumentsJson: tc.function?.arguments ?? ""
2072
- });
2073
- }
2074
- }
2075
- }
2076
- }
2077
- if (finishReason === "tool_calls") {
2078
- yield* this.flushPendingToolCalls(pendingToolCalls);
2079
- }
2080
- if (finishReason === "stop") {
2081
- yield { text: "", done: true };
2082
- }
2083
- }
2084
- }
2085
- /**
2086
- * Flush all accumulated pending tool calls as complete `ToolCall` objects.
2087
- */
2088
- *flushPendingToolCalls(pendingToolCalls) {
2089
- const sorted = [...pendingToolCalls.entries()].sort(
2090
- ([a], [b]) => a - b
2091
- );
2092
- for (const [, tc] of sorted) {
2093
- let args = {};
2094
- try {
2095
- args = JSON.parse(tc.argumentsJson);
2096
- } catch {
2097
- }
2098
- yield {
2099
- id: tc.id,
2100
- name: tc.name,
2101
- arguments: args
2102
- };
2103
- }
2104
- pendingToolCalls.clear();
2105
- }
2106
- /**
2107
- * Extract token usage from a parsed OpenAI response chunk.
2108
- * Usage data typically appears in the final chunk when `stream_options`
2109
- * includes `include_usage`, or in the non-streaming response.
2110
- * Returns `null` if no usage data is present.
2111
- */
2112
- extractUsage(parsed) {
2113
- const usage = parsed.usage;
2114
- if (!usage) return null;
2115
- return {
2116
- prompt: usage.prompt_tokens ?? 0,
2117
- completion: usage.completion_tokens ?? 0,
2118
- total: usage.total_tokens ?? 0
2119
- };
2120
- }
2121
- /**
2122
- * Check whether a parsed OpenAI chunk indicates the response was
2123
- * blocked by a content filter.
2124
- *
2125
- * OpenAI signals content filtering through:
2126
- * - `choices[].finish_reason === 'content_filter'`
2127
- * - `choices[].content_filter_results` with `filtered: true`
2128
- */
2129
- isContentFiltered(parsed) {
2130
- const choices = parsed.choices;
2131
- if (!choices || choices.length === 0) return false;
2132
- return choices.some((choice) => {
2133
- if (choice.finish_reason === "content_filter") return true;
2134
- const filterResults = choice.content_filter_results;
2135
- if (filterResults) {
2136
- return Object.values(filterResults).some((r) => r.filtered === true);
2137
- }
2138
- return false;
2139
- });
2140
- }
2141
- /**
2142
- * Translate an HTTP error response from OpenAI into the appropriate
2143
- * GuideKit error class.
2144
- */
2145
- async handleHttpError(response) {
2146
- let errorBody = "";
2147
- try {
2148
- errorBody = await response.text();
2149
- } catch {
2150
- }
2151
- const status = response.status;
2152
- if (status === 401 || status === 403) {
2153
- throw new AuthenticationError({
2154
- code: ErrorCodes.AUTH_INVALID_KEY,
2155
- message: `OpenAI API authentication failed (${status}): ${errorBody}`,
2156
- provider: "openai",
2157
- suggestion: "Verify your OpenAI API key is correct and has not expired."
2158
- });
2159
- }
2160
- if (status === 429) {
2161
- const retryAfterHeader = response.headers.get("retry-after");
2162
- const retryAfterMs = retryAfterHeader ? parseInt(retryAfterHeader, 10) * 1e3 : 6e4;
2163
- throw new RateLimitError({
2164
- code: ErrorCodes.RATE_LIMIT_PROVIDER,
2165
- message: `OpenAI API rate limit exceeded (429): ${errorBody}`,
2166
- provider: "openai",
2167
- recoverable: true,
2168
- suggestion: `Rate limited by OpenAI. Retry after ${Math.ceil(retryAfterMs / 1e3)}s.`,
2169
- retryAfterMs
2170
- });
2171
- }
2172
- if (status >= 500) {
2173
- throw new NetworkError({
2174
- code: ErrorCodes.NETWORK_CONNECTION_LOST,
2175
- message: `OpenAI API server error (${status}): ${errorBody}`,
2176
- provider: "openai",
2177
- suggestion: "The OpenAI API is experiencing issues. Please try again later."
2178
- });
2179
- }
2180
- throw new NetworkError({
2181
- code: ErrorCodes.NETWORK_CONNECTION_LOST,
2182
- message: `OpenAI API request failed (${status}): ${errorBody}`,
2183
- provider: "openai",
2184
- suggestion: "Check the request parameters and try again."
2185
- });
2186
- }
2187
- };
2188
-
2189
1844
  // src/llm/index.ts
2190
1845
  var DEFAULT_GEMINI_MODEL = "gemini-2.5-flash";
2191
- var DEFAULT_TIMEOUT_MS2 = 15e3;
1846
+ var DEFAULT_TIMEOUT_MS = 15e3;
2192
1847
  var GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models";
2193
1848
  var DEFAULT_SAFETY_SETTINGS = [
2194
1849
  { category: "HARM_CATEGORY_HARASSMENT", threshold: "BLOCK_ONLY_HIGH" },
@@ -2202,10 +1857,20 @@ function emptyUsage() {
2202
1857
  var GeminiAdapter = class {
2203
1858
  apiKey;
2204
1859
  model;
1860
+ /**
1861
+ * Token usage extracted from the most recent `parseResponse` call.
1862
+ * Updated as each SSE chunk is parsed; the final value reflects the
1863
+ * cumulative usage metadata sent by Gemini (typically in the last chunk).
1864
+ */
1865
+ _lastUsage = emptyUsage();
2205
1866
  constructor(config) {
2206
1867
  this.apiKey = config.apiKey;
2207
1868
  this.model = config.model ?? DEFAULT_GEMINI_MODEL;
2208
1869
  }
1870
+ /** Token usage from the most recent parseResponse call. */
1871
+ get lastUsage() {
1872
+ return this._lastUsage;
1873
+ }
2209
1874
  // -----------------------------------------------------------------------
2210
1875
  // LLMProviderAdapter implementation
2211
1876
  // -----------------------------------------------------------------------
@@ -2220,7 +1885,11 @@ var GeminiAdapter = class {
2220
1885
  functionDeclarations: tools.map((tool) => ({
2221
1886
  name: tool.name,
2222
1887
  description: tool.description,
2223
- parameters: tool.parameters
1888
+ parameters: {
1889
+ type: "object",
1890
+ properties: { ...tool.parameters },
1891
+ required: tool.required ?? []
1892
+ }
2224
1893
  }))
2225
1894
  }
2226
1895
  ];
@@ -2242,11 +1911,16 @@ var GeminiAdapter = class {
2242
1911
  * The Gemini `streamGenerateContent?alt=sse` endpoint sends each chunk
2243
1912
  * as a JSON object prefixed by `data: `. We parse line-by-line, extract
2244
1913
  * text parts and function call parts, and yield the appropriate types.
1914
+ *
1915
+ * This method also:
1916
+ * - Detects content filtering and throws `ContentFilterError`.
1917
+ * - Tracks token usage (accessible via `lastUsage` after iteration).
2245
1918
  */
2246
1919
  async *parseResponse(stream) {
2247
1920
  const reader = stream.getReader();
2248
1921
  const decoder = new TextDecoder();
2249
1922
  let buffer = "";
1923
+ this._lastUsage = emptyUsage();
2250
1924
  try {
2251
1925
  while (true) {
2252
1926
  const { done, value } = await reader.read();
@@ -2265,6 +1939,18 @@ var GeminiAdapter = class {
2265
1939
  } catch {
2266
1940
  continue;
2267
1941
  }
1942
+ if (this.isContentFiltered(parsed)) {
1943
+ throw new ContentFilterError({
1944
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
1945
+ message: "Response was blocked by provider content safety filter.",
1946
+ provider: "gemini",
1947
+ suggestion: "Rephrase your question or adjust safety settings."
1948
+ });
1949
+ }
1950
+ const chunkUsage = this.extractUsage(parsed);
1951
+ if (chunkUsage) {
1952
+ this._lastUsage = chunkUsage;
1953
+ }
2268
1954
  yield* this.extractChunks(parsed);
2269
1955
  }
2270
1956
  }
@@ -2273,8 +1959,21 @@ var GeminiAdapter = class {
2273
1959
  if (jsonStr !== "" && jsonStr !== "[DONE]") {
2274
1960
  try {
2275
1961
  const parsed = JSON.parse(jsonStr);
1962
+ if (this.isContentFiltered(parsed)) {
1963
+ throw new ContentFilterError({
1964
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
1965
+ message: "Response was blocked by provider content safety filter.",
1966
+ provider: "gemini",
1967
+ suggestion: "Rephrase your question or adjust safety settings."
1968
+ });
1969
+ }
1970
+ const chunkUsage = this.extractUsage(parsed);
1971
+ if (chunkUsage) {
1972
+ this._lastUsage = chunkUsage;
1973
+ }
2276
1974
  yield* this.extractChunks(parsed);
2277
- } catch {
1975
+ } catch (error) {
1976
+ if (error instanceof ContentFilterError) throw error;
2278
1977
  }
2279
1978
  }
2280
1979
  }
@@ -2305,15 +2004,21 @@ var GeminiAdapter = class {
2305
2004
  /**
2306
2005
  * Build and execute a streaming request to the Gemini API.
2307
2006
  * Returns the raw `ReadableStream` for the response body together with
2308
- * a promise that resolves to token usage extracted from the final chunk.
2007
+ * the raw Response object.
2008
+ *
2009
+ * Note: The Gemini API key is passed as a URL query parameter (`key=`).
2010
+ * This is inherent to the Gemini REST SSE endpoint design; the key is
2011
+ * transmitted over HTTPS so it remains encrypted in transit. (H3)
2309
2012
  */
2310
2013
  async streamRequest(params) {
2014
+ const contentsArray = params.contents;
2015
+ const fullContents = params.userMessage ? [...contentsArray, { role: "user", parts: [{ text: params.userMessage }] }] : contentsArray;
2311
2016
  const url = `${GEMINI_BASE_URL}/${this.model}:streamGenerateContent?alt=sse&key=${this.apiKey}`;
2312
2017
  const body = {
2313
2018
  systemInstruction: {
2314
2019
  parts: [{ text: params.systemPrompt }]
2315
2020
  },
2316
- contents: params.contents,
2021
+ contents: fullContents,
2317
2022
  safetySettings: DEFAULT_SAFETY_SETTINGS,
2318
2023
  generationConfig: {
2319
2024
  temperature: 0.7,
@@ -2324,7 +2029,7 @@ var GeminiAdapter = class {
2324
2029
  if (params.tools) {
2325
2030
  body.tools = params.tools;
2326
2031
  }
2327
- const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS2;
2032
+ const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS;
2328
2033
  const controller = new AbortController();
2329
2034
  const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
2330
2035
  if (params.signal) {
@@ -2381,7 +2086,7 @@ var GeminiAdapter = class {
2381
2086
  return { stream: response.body, response };
2382
2087
  }
2383
2088
  // -----------------------------------------------------------------------
2384
- // Internal helpers
2089
+ // Public helpers (LLMProviderAdapter interface)
2385
2090
  // -----------------------------------------------------------------------
2386
2091
  /**
2387
2092
  * Extract `TextChunk` and `ToolCall` items from a single parsed Gemini
@@ -2554,7 +2259,8 @@ var LLMOrchestrator = class {
2554
2259
  updateConfig(config) {
2555
2260
  this._config = config;
2556
2261
  this._adapter = this.createAdapter(config);
2557
- this.log(`Config updated: provider=${config.provider}`);
2262
+ const label = "provider" in config ? config.provider : "custom adapter";
2263
+ this.log(`Config updated: ${label}`);
2558
2264
  }
2559
2265
  /** Get the current provider adapter. */
2560
2266
  get adapter() {
@@ -2565,139 +2271,42 @@ var LLMOrchestrator = class {
2565
2271
  // -----------------------------------------------------------------------
2566
2272
  /**
2567
2273
  * Execute a streaming LLM request and collect the results.
2274
+ *
2275
+ * This method is fully adapter-agnostic: it delegates streaming,
2276
+ * response parsing, content-filter detection, and usage extraction
2277
+ * entirely to the active `LLMProviderAdapter`. No provider-specific
2278
+ * SSE parsing lives in the orchestrator.
2568
2279
  */
2569
2280
  async executeStream(params, _isRetry) {
2570
- const geminiAdapter = this._adapter;
2571
- const historyContents = geminiAdapter.formatConversation(params.history);
2572
- const contents = [
2573
- ...historyContents,
2574
- { role: "user", parts: [{ text: params.userMessage }] }
2575
- ];
2576
- const tools = params.tools && params.tools.length > 0 ? geminiAdapter.formatTools(params.tools) : void 0;
2577
- const { stream } = await geminiAdapter.streamRequest({
2281
+ const adapter = this._adapter;
2282
+ const historyContents = adapter.formatConversation(params.history);
2283
+ const tools = params.tools && params.tools.length > 0 ? adapter.formatTools(params.tools) : void 0;
2284
+ const { stream } = await adapter.streamRequest({
2578
2285
  systemPrompt: params.systemPrompt,
2579
- contents,
2286
+ contents: historyContents,
2287
+ userMessage: params.userMessage,
2580
2288
  tools,
2581
2289
  signal: params.signal
2582
2290
  });
2583
2291
  let fullText = "";
2584
2292
  const toolCalls = [];
2293
+ for await (const item of adapter.parseResponse(stream)) {
2294
+ if ("name" in item && "arguments" in item) {
2295
+ const toolCall = item;
2296
+ toolCalls.push(toolCall);
2297
+ this.callbacks.onToolCall?.(toolCall);
2298
+ } else {
2299
+ const chunk = item;
2300
+ if (chunk.text) {
2301
+ fullText += chunk.text;
2302
+ }
2303
+ this.callbacks.onChunk?.(chunk);
2304
+ }
2305
+ }
2306
+ this.callbacks.onChunk?.({ text: "", done: true });
2585
2307
  let usage = emptyUsage();
2586
- let wasContentFiltered = false;
2587
- const reader = stream.getReader();
2588
- const decoder = new TextDecoder();
2589
- let buffer = "";
2590
- try {
2591
- while (true) {
2592
- const { done, value } = await reader.read();
2593
- if (done) break;
2594
- buffer += decoder.decode(value, { stream: true });
2595
- const lines = buffer.split("\n");
2596
- buffer = lines.pop() ?? "";
2597
- for (const line of lines) {
2598
- const trimmed = line.trim();
2599
- if (!trimmed.startsWith("data:")) continue;
2600
- const jsonStr = trimmed.slice(5).trim();
2601
- if (jsonStr === "" || jsonStr === "[DONE]") continue;
2602
- let parsed;
2603
- try {
2604
- parsed = JSON.parse(jsonStr);
2605
- } catch {
2606
- continue;
2607
- }
2608
- if (geminiAdapter.isContentFiltered(parsed)) {
2609
- wasContentFiltered = true;
2610
- break;
2611
- }
2612
- const chunkUsage = geminiAdapter.extractUsage(parsed);
2613
- if (chunkUsage) {
2614
- usage = chunkUsage;
2615
- }
2616
- const candidates = parsed.candidates;
2617
- if (!candidates || candidates.length === 0) continue;
2618
- for (const candidate of candidates) {
2619
- const content = candidate.content;
2620
- if (!content?.parts) continue;
2621
- const finishReason = candidate.finishReason;
2622
- const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
2623
- for (const part of content.parts) {
2624
- if (typeof part.text === "string") {
2625
- fullText += part.text;
2626
- const chunk = { text: part.text, done: isDone };
2627
- this.callbacks.onChunk?.(chunk);
2628
- }
2629
- if (part.functionCall) {
2630
- const fc = part.functionCall;
2631
- const toolCall = {
2632
- id: fc.name,
2633
- name: fc.name,
2634
- arguments: fc.args ?? {}
2635
- };
2636
- toolCalls.push(toolCall);
2637
- this.callbacks.onToolCall?.(toolCall);
2638
- }
2639
- }
2640
- }
2641
- }
2642
- if (wasContentFiltered) break;
2643
- }
2644
- if (!wasContentFiltered && buffer.trim().startsWith("data:")) {
2645
- const jsonStr = buffer.trim().slice(5).trim();
2646
- if (jsonStr !== "" && jsonStr !== "[DONE]") {
2647
- try {
2648
- const parsed = JSON.parse(jsonStr);
2649
- if (geminiAdapter.isContentFiltered(parsed)) {
2650
- wasContentFiltered = true;
2651
- } else {
2652
- const chunkUsage = geminiAdapter.extractUsage(parsed);
2653
- if (chunkUsage) usage = chunkUsage;
2654
- const candidates = parsed.candidates;
2655
- if (candidates) {
2656
- for (const candidate of candidates) {
2657
- const content = candidate.content;
2658
- if (!content?.parts) continue;
2659
- const finishReason = candidate.finishReason;
2660
- const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
2661
- for (const part of content.parts) {
2662
- if (typeof part.text === "string") {
2663
- fullText += part.text;
2664
- const chunk = {
2665
- text: part.text,
2666
- done: isDone
2667
- };
2668
- this.callbacks.onChunk?.(chunk);
2669
- }
2670
- if (part.functionCall) {
2671
- const fc = part.functionCall;
2672
- const toolCall = {
2673
- id: fc.name,
2674
- name: fc.name,
2675
- arguments: fc.args ?? {}
2676
- };
2677
- toolCalls.push(toolCall);
2678
- this.callbacks.onToolCall?.(toolCall);
2679
- }
2680
- }
2681
- }
2682
- }
2683
- }
2684
- } catch {
2685
- }
2686
- }
2687
- }
2688
- } finally {
2689
- reader.releaseLock();
2690
- }
2691
- if (wasContentFiltered) {
2692
- throw new ContentFilterError({
2693
- code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
2694
- message: "Response was blocked by Gemini content safety filter.",
2695
- provider: "gemini",
2696
- suggestion: "Rephrase your question or adjust safety settings."
2697
- });
2698
- }
2699
- if (fullText.length > 0) {
2700
- this.callbacks.onChunk?.({ text: "", done: true });
2308
+ if ("lastUsage" in adapter) {
2309
+ usage = adapter.lastUsage;
2701
2310
  }
2702
2311
  if (usage.total > 0) {
2703
2312
  this.callbacks.onTokenUsage?.(usage);
@@ -2709,25 +2318,30 @@ var LLMOrchestrator = class {
2709
2318
  }
2710
2319
  /**
2711
2320
  * Create the appropriate adapter for the given config.
2712
- * Currently only Gemini is implemented; other providers will be added
2713
- * as the SDK evolves.
2321
+ *
2322
+ * Built-in providers:
2323
+ * - `'gemini'` — uses the bundled `GeminiAdapter`.
2324
+ *
2325
+ * Custom adapters:
2326
+ * - Pass `{ adapter: myAdapter }` to use any `LLMProviderAdapter`.
2327
+ * Example: `llm: { adapter: myCustomAdapter }`
2714
2328
  */
2715
2329
  createAdapter(config) {
2330
+ if ("adapter" in config) {
2331
+ return config.adapter;
2332
+ }
2716
2333
  switch (config.provider) {
2717
2334
  case "gemini":
2718
2335
  return new GeminiAdapter(config);
2719
- case "openai":
2720
- return new OpenAIAdapter(config);
2721
2336
  default:
2722
2337
  throw new Error(
2723
- `LLM provider "${config.provider}" is not yet supported. Currently only "gemini" and "openai" are implemented.`
2338
+ `LLM provider "${config.provider}" is not yet supported. Use { adapter: yourAdapter } for custom providers.`
2724
2339
  );
2725
2340
  }
2726
2341
  }
2727
2342
  /** Convenience accessor for the current provider name. */
2728
2343
  get providerName() {
2729
- if (this._config.provider === "gemini") return "gemini";
2730
- if (this._config.provider === "openai") return "openai";
2344
+ if ("provider" in this._config) return this._config.provider;
2731
2345
  return void 0;
2732
2346
  }
2733
2347
  /** Log a debug message if debug mode is enabled. */
@@ -2880,7 +2494,7 @@ var ToolExecutor = class {
2880
2494
  break;
2881
2495
  }
2882
2496
  }
2883
- if (rounds >= this.maxRounds && allToolCalls.length > 0) {
2497
+ if (rounds >= this.maxRounds) {
2884
2498
  this.log(
2885
2499
  `Max rounds (${this.maxRounds}) reached. Returning current text.`
2886
2500
  );
@@ -2983,6 +2597,19 @@ var ToolExecutor = class {
2983
2597
  return s.value;
2984
2598
  }
2985
2599
  const tc = toolCalls[i];
2600
+ if (!tc) {
2601
+ const errorMsg2 = s.reason instanceof Error ? s.reason.message : String(s.reason);
2602
+ return {
2603
+ toolCallId: `unknown-${i}`,
2604
+ record: {
2605
+ name: "unknown",
2606
+ args: {},
2607
+ result: void 0,
2608
+ durationMs: 0,
2609
+ error: errorMsg2
2610
+ }
2611
+ };
2612
+ }
2986
2613
  const errorMsg = s.reason instanceof Error ? s.reason.message : String(s.reason);
2987
2614
  return {
2988
2615
  toolCallId: tc.id,
@@ -4175,8 +3802,280 @@ var DeepgramSTT = class {
4175
3802
  }
4176
3803
  };
4177
3804
 
3805
+ // src/voice/elevenlabs-stt.ts
3806
+ var LOG_PREFIX7 = "[GuideKit:ElevenLabs-STT]";
3807
+ var ELEVENLABS_STT_ENDPOINT = "wss://api.elevenlabs.io/v1/speech-to-text/realtime";
3808
+ var DEFAULT_LANGUAGE2 = "en";
3809
+ var INACTIVITY_TIMEOUT_S = 30;
3810
+ var SAMPLE_RATE = 16e3;
3811
+ function float32ToInt162(float32) {
3812
+ const int16 = new Int16Array(float32.length);
3813
+ for (let i = 0; i < float32.length; i++) {
3814
+ const s = Math.max(-1, Math.min(1, float32[i]));
3815
+ int16[i] = s < 0 ? s * 32768 : s * 32767;
3816
+ }
3817
+ return int16;
3818
+ }
3819
+ function int16ToBase64(int16) {
3820
+ const bytes = new Uint8Array(int16.buffer);
3821
+ const CHUNK_SIZE = 8192;
3822
+ let binary = "";
3823
+ for (let i = 0; i < bytes.length; i += CHUNK_SIZE) {
3824
+ const chunk = bytes.subarray(i, i + CHUNK_SIZE);
3825
+ binary += String.fromCharCode(...chunk);
3826
+ }
3827
+ return btoa(binary);
3828
+ }
3829
+ var ElevenLabsSTT = class {
3830
+ // ---- Configuration -------------------------------------------------------
3831
+ apiKey;
3832
+ language;
3833
+ debugEnabled;
3834
+ // ---- Internal state ------------------------------------------------------
3835
+ wsManager = null;
3836
+ _connected = false;
3837
+ _suspended = false;
3838
+ /** Registered transcript callbacks. */
3839
+ transcriptCallbacks = /* @__PURE__ */ new Set();
3840
+ // -------------------------------------------------------------------------
3841
+ // Constructor
3842
+ // -------------------------------------------------------------------------
3843
+ constructor(options) {
3844
+ this.apiKey = options.apiKey;
3845
+ this.language = options.language ?? DEFAULT_LANGUAGE2;
3846
+ this.debugEnabled = options.debug ?? false;
3847
+ this.log("ElevenLabsSTT created", { language: this.language });
3848
+ }
3849
+ // -------------------------------------------------------------------------
3850
+ // Public API
3851
+ // -------------------------------------------------------------------------
3852
+ /** Whether the WebSocket is currently connected and ready. */
3853
+ get isConnected() {
3854
+ return this._connected;
3855
+ }
3856
+ /**
3857
+ * Open a WebSocket connection to ElevenLabs' real-time STT endpoint.
3858
+ *
3859
+ * Resolves once the connection is established and the socket is ready to
3860
+ * receive audio frames. Rejects if the connection cannot be established.
3861
+ */
3862
+ async connect() {
3863
+ if (this._connected) {
3864
+ this.log("Already connected \u2014 skipping");
3865
+ return;
3866
+ }
3867
+ if (typeof WebSocket === "undefined") {
3868
+ this.log("WebSocket API not available (SSR?) \u2014 cannot connect");
3869
+ return;
3870
+ }
3871
+ const url = this.buildUrl();
3872
+ this.log("Connecting to", url.replace(this.apiKey, "***"));
3873
+ this.wsManager = new WebSocketManager({
3874
+ url,
3875
+ protocols: [],
3876
+ debug: this.debugEnabled,
3877
+ label: "ElevenLabs-STT"
3878
+ });
3879
+ this.wsManager.onOpen(() => {
3880
+ this._connected = true;
3881
+ this.log("Connected");
3882
+ });
3883
+ this.wsManager.onMessage((event) => {
3884
+ this.handleMessage(event);
3885
+ });
3886
+ this.wsManager.onClose((code, reason) => {
3887
+ this.log("Connection closed", { code, reason });
3888
+ this.cleanup();
3889
+ });
3890
+ this.wsManager.onError((event) => {
3891
+ this.log("WebSocket error", event);
3892
+ });
3893
+ return this.wsManager.connect();
3894
+ }
3895
+ /**
3896
+ * Send audio data to ElevenLabs for transcription.
3897
+ *
3898
+ * Accepts either `Float32Array` (Web Audio API output) or `Int16Array`
3899
+ * (already encoded as linear16). Float32 data is automatically converted
3900
+ * to Int16 before encoding. Audio is sent as a base64-encoded JSON message.
3901
+ */
3902
+ sendAudio(audioData) {
3903
+ if (!this._connected || !this.wsManager || this._suspended) {
3904
+ return;
3905
+ }
3906
+ const int16 = audioData instanceof Float32Array ? float32ToInt162(audioData) : audioData;
3907
+ const base64 = int16ToBase64(int16);
3908
+ this.wsManager.send(
3909
+ JSON.stringify({
3910
+ type: "input_audio_chunk",
3911
+ audio: base64,
3912
+ sample_rate: SAMPLE_RATE
3913
+ })
3914
+ );
3915
+ }
3916
+ /**
3917
+ * Register a callback to receive transcript events.
3918
+ *
3919
+ * @returns An unsubscribe function. Calling it more than once is safe.
3920
+ */
3921
+ onTranscript(callback) {
3922
+ this.transcriptCallbacks.add(callback);
3923
+ let removed = false;
3924
+ return () => {
3925
+ if (removed) return;
3926
+ removed = true;
3927
+ this.transcriptCallbacks.delete(callback);
3928
+ };
3929
+ }
3930
+ /**
3931
+ * Gracefully close the connection.
3932
+ *
3933
+ * Sends a `commit_audio` message so ElevenLabs can finalise any pending
3934
+ * transcription before the socket is torn down.
3935
+ */
3936
+ close() {
3937
+ if (!this._connected || !this.wsManager) {
3938
+ this.log("Not connected \u2014 nothing to close");
3939
+ return;
3940
+ }
3941
+ this.log("Sending commit_audio and closing");
3942
+ try {
3943
+ this.wsManager.send(JSON.stringify({ type: "commit_audio" }));
3944
+ } catch {
3945
+ }
3946
+ this.wsManager.close();
3947
+ this.cleanup();
3948
+ }
3949
+ /** Force-destroy the connection without a graceful handshake. */
3950
+ destroy() {
3951
+ this.log("Destroying");
3952
+ if (this.wsManager) {
3953
+ this.wsManager.destroy();
3954
+ this.wsManager = null;
3955
+ }
3956
+ this.cleanup();
3957
+ this.transcriptCallbacks.clear();
3958
+ }
3959
+ /**
3960
+ * Suspend the adapter (e.g. when the device goes offline).
3961
+ *
3962
+ * Marks the adapter as suspended so that incoming `sendAudio` calls are
3963
+ * silently dropped. The WebSocket itself is left open.
3964
+ */
3965
+ suspend() {
3966
+ if (this._suspended) return;
3967
+ this._suspended = true;
3968
+ this.log("Suspended");
3969
+ }
3970
+ /**
3971
+ * Resume after a prior `suspend()`.
3972
+ */
3973
+ resume() {
3974
+ if (!this._suspended) return;
3975
+ this._suspended = false;
3976
+ this.log("Resumed");
3977
+ }
3978
+ // -------------------------------------------------------------------------
3979
+ // Message handling
3980
+ // -------------------------------------------------------------------------
3981
+ /**
3982
+ * Parse incoming ElevenLabs JSON messages and emit transcript events.
3983
+ *
3984
+ * ElevenLabs sends two transcript message types:
3985
+ * - `partial_transcript`: interim result, `isFinal = false`
3986
+ * - `committed_transcript`: final result, `isFinal = true`
3987
+ */
3988
+ handleMessage(event) {
3989
+ if (typeof event.data !== "string") {
3990
+ return;
3991
+ }
3992
+ let parsed;
3993
+ try {
3994
+ parsed = JSON.parse(event.data);
3995
+ } catch {
3996
+ this.log("Failed to parse message", event.data);
3997
+ return;
3998
+ }
3999
+ const type = parsed["type"];
4000
+ if (type === "committed_transcript" || type === "partial_transcript") {
4001
+ this.handleTranscriptMessage(parsed, type === "committed_transcript");
4002
+ } else {
4003
+ this.log("Received message", type, parsed);
4004
+ }
4005
+ }
4006
+ /**
4007
+ * Extract transcript data from a transcript message and notify subscribers.
4008
+ */
4009
+ handleTranscriptMessage(parsed, isFinal) {
4010
+ const result = parsed["result"];
4011
+ const text = result?.text ?? "";
4012
+ const confidence = result?.confidence ?? 0;
4013
+ if (text.trim() === "") {
4014
+ return;
4015
+ }
4016
+ const transcriptEvent = {
4017
+ text,
4018
+ isFinal,
4019
+ confidence,
4020
+ timestamp: Date.now()
4021
+ };
4022
+ this.log(
4023
+ isFinal ? "Final transcript:" : "Interim transcript:",
4024
+ text,
4025
+ `(${(confidence * 100).toFixed(1)}%)`
4026
+ );
4027
+ this.emitTranscript(transcriptEvent);
4028
+ }
4029
+ // -------------------------------------------------------------------------
4030
+ // Subscriber notification
4031
+ // -------------------------------------------------------------------------
4032
+ /**
4033
+ * Emit a transcript event to all registered callbacks.
4034
+ *
4035
+ * Errors thrown by individual callbacks are caught and logged so one
4036
+ * misbehaving subscriber does not prevent others from receiving the event.
4037
+ */
4038
+ emitTranscript(event) {
4039
+ for (const cb of this.transcriptCallbacks) {
4040
+ try {
4041
+ cb(event);
4042
+ } catch (err) {
4043
+ console.error(LOG_PREFIX7, "Transcript callback threw:", err);
4044
+ }
4045
+ }
4046
+ }
4047
+ // -------------------------------------------------------------------------
4048
+ // URL building
4049
+ // -------------------------------------------------------------------------
4050
+ /** Build the ElevenLabs streaming STT endpoint URL with auth query params. */
4051
+ buildUrl() {
4052
+ const params = new URLSearchParams({
4053
+ xi_api_key: this.apiKey,
4054
+ language: this.language,
4055
+ inactivity_timeout: String(INACTIVITY_TIMEOUT_S)
4056
+ });
4057
+ return `${ELEVENLABS_STT_ENDPOINT}?${params.toString()}`;
4058
+ }
4059
+ // -------------------------------------------------------------------------
4060
+ // Cleanup
4061
+ // -------------------------------------------------------------------------
4062
+ /** Reset internal state after disconnection. */
4063
+ cleanup() {
4064
+ this._connected = false;
4065
+ }
4066
+ // -------------------------------------------------------------------------
4067
+ // Logging
4068
+ // -------------------------------------------------------------------------
4069
+ /** Conditional debug logging. */
4070
+ log(...args) {
4071
+ if (this.debugEnabled) {
4072
+ console.debug(LOG_PREFIX7, ...args);
4073
+ }
4074
+ }
4075
+ };
4076
+
4178
4077
  // src/voice/elevenlabs-tts.ts
4179
- var LOG_PREFIX7 = "[GuideKit:TTS]";
4078
+ var LOG_PREFIX8 = "[GuideKit:TTS]";
4180
4079
  var DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
4181
4080
  var DEFAULT_MODEL_ID = "eleven_flash_v2_5";
4182
4081
  var DEFAULT_STABILITY = 0.5;
@@ -4418,47 +4317,705 @@ var ElevenLabsTTS = class {
4418
4317
  this.log("Failed to parse message", event.data);
4419
4318
  return;
4420
4319
  }
4421
- if (parsed["error"] !== void 0) {
4422
- this.log("ElevenLabs error:", parsed["error"]);
4423
- return;
4320
+ if (parsed["error"] !== void 0) {
4321
+ this.log("ElevenLabs error:", parsed["error"]);
4322
+ return;
4323
+ }
4324
+ if (parsed["audio"] === void 0 || parsed["audio"] === null) {
4325
+ this.log("Non-audio message received", parsed);
4326
+ return;
4327
+ }
4328
+ const audioBase64 = parsed["audio"];
4329
+ const isFinal = parsed["isFinal"] === true;
4330
+ if (!audioBase64 || audioBase64.length === 0) {
4331
+ if (isFinal) {
4332
+ this.emitAudio({
4333
+ audio: new ArrayBuffer(0),
4334
+ isFinal: true,
4335
+ timestamp: Date.now()
4336
+ });
4337
+ }
4338
+ return;
4339
+ }
4340
+ let audioBuffer;
4341
+ try {
4342
+ audioBuffer = base64ToArrayBuffer(audioBase64);
4343
+ } catch (err) {
4344
+ this.log("Failed to decode base64 audio", err);
4345
+ return;
4346
+ }
4347
+ const audioEvent = {
4348
+ audio: audioBuffer,
4349
+ isFinal,
4350
+ timestamp: Date.now()
4351
+ };
4352
+ this.log(
4353
+ isFinal ? "Final audio chunk:" : "Audio chunk:",
4354
+ `${audioBuffer.byteLength} bytes`
4355
+ );
4356
+ this.emitAudio(audioEvent);
4357
+ }
4358
+ // -----------------------------------------------------------------------
4359
+ // Subscriber notification
4360
+ // -----------------------------------------------------------------------
4361
+ /**
4362
+ * Emit an audio event to all registered callbacks.
4363
+ *
4364
+ * Errors thrown by individual callbacks are caught and logged so one
4365
+ * misbehaving subscriber does not prevent others from receiving the event.
4366
+ */
4367
+ emitAudio(event) {
4368
+ for (const cb of this.audioCallbacks) {
4369
+ try {
4370
+ cb(event);
4371
+ } catch (err) {
4372
+ console.error(LOG_PREFIX8, "Audio callback threw:", err);
4373
+ }
4374
+ }
4375
+ }
4376
+ // -----------------------------------------------------------------------
4377
+ // URL building
4378
+ // -----------------------------------------------------------------------
4379
+ /** Build the ElevenLabs streaming TTS endpoint URL. */
4380
+ buildUrl() {
4381
+ const params = new URLSearchParams({
4382
+ model_id: this.modelId
4383
+ });
4384
+ return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
4385
+ }
4386
+ // -----------------------------------------------------------------------
4387
+ // Cleanup
4388
+ // -----------------------------------------------------------------------
4389
+ /** Reset internal state after disconnection. */
4390
+ cleanup() {
4391
+ this._connected = false;
4392
+ this.bosSent = false;
4393
+ }
4394
+ // -----------------------------------------------------------------------
4395
+ // Logging
4396
+ // -----------------------------------------------------------------------
4397
+ /** Conditional debug logging. */
4398
+ log(...args) {
4399
+ if (this.debugEnabled) {
4400
+ console.debug(LOG_PREFIX8, ...args);
4401
+ }
4402
+ }
4403
+ };
4404
+
4405
+ // src/voice/web-speech-stt.ts
4406
+ var LOG_PREFIX9 = "[GuideKit:WebSpeech-STT]";
4407
+ var DEFAULT_LANGUAGE3 = "en-US";
4408
+ var WebSpeechSTT = class {
4409
+ // ---- Configuration -------------------------------------------------------
4410
+ language;
4411
+ continuous;
4412
+ interimResultsEnabled;
4413
+ debugEnabled;
4414
+ // ---- Internal state ------------------------------------------------------
4415
+ recognition = null;
4416
+ _connected = false;
4417
+ _suspended = false;
4418
+ /**
4419
+ * Whether we intentionally stopped recognition. Used to distinguish
4420
+ * between intentional stop and unexpected end (for auto-restart in
4421
+ * continuous mode).
4422
+ */
4423
+ _intentionalStop = false;
4424
+ /** Registered transcript callbacks. */
4425
+ transcriptCallbacks = /* @__PURE__ */ new Set();
4426
+ // -------------------------------------------------------------------------
4427
+ // Constructor
4428
+ // -------------------------------------------------------------------------
4429
+ constructor(options = {}) {
4430
+ this.language = options.language ?? DEFAULT_LANGUAGE3;
4431
+ this.continuous = options.continuous ?? true;
4432
+ this.interimResultsEnabled = options.interimResults ?? true;
4433
+ this.debugEnabled = options.debug ?? false;
4434
+ this.log("WebSpeechSTT created", {
4435
+ language: this.language,
4436
+ continuous: this.continuous,
4437
+ interimResults: this.interimResultsEnabled
4438
+ });
4439
+ }
4440
+ // -------------------------------------------------------------------------
4441
+ // Static methods
4442
+ // -------------------------------------------------------------------------
4443
+ /**
4444
+ * Check whether the Web Speech API SpeechRecognition is supported in the
4445
+ * current environment. Safe to call in SSR (returns false).
4446
+ */
4447
+ static isSupported() {
4448
+ if (typeof window === "undefined") return false;
4449
+ return typeof window["SpeechRecognition"] !== "undefined" || typeof globalThis.webkitSpeechRecognition !== "undefined";
4450
+ }
4451
+ // -------------------------------------------------------------------------
4452
+ // Public API
4453
+ // -------------------------------------------------------------------------
4454
+ /** Whether recognition is currently active and connected. */
4455
+ get isConnected() {
4456
+ return this._connected;
4457
+ }
4458
+ /**
4459
+ * Start speech recognition.
4460
+ *
4461
+ * Creates the SpeechRecognition instance and begins listening. Resolves
4462
+ * once the recognition session has started. Rejects if the API is not
4463
+ * supported or the browser denies permission.
4464
+ */
4465
+ async connect() {
4466
+ if (this._connected) {
4467
+ this.log("Already connected \u2014 skipping");
4468
+ return;
4469
+ }
4470
+ if (typeof window === "undefined") {
4471
+ this.log("SSR environment detected \u2014 cannot connect");
4472
+ return;
4473
+ }
4474
+ const SpeechRecognitionClass = this.resolveSpeechRecognition();
4475
+ if (!SpeechRecognitionClass) {
4476
+ throw new Error(
4477
+ "Web Speech API (SpeechRecognition) is not supported in this browser."
4478
+ );
4479
+ }
4480
+ this.recognition = new SpeechRecognitionClass();
4481
+ this.recognition.lang = this.language;
4482
+ this.recognition.continuous = this.continuous;
4483
+ this.recognition.interimResults = this.interimResultsEnabled;
4484
+ this.recognition.maxAlternatives = 1;
4485
+ this.recognition.onstart = () => {
4486
+ this._connected = true;
4487
+ this._intentionalStop = false;
4488
+ this.log("Recognition started");
4489
+ };
4490
+ this.recognition.onresult = (event) => {
4491
+ this.handleResult(event);
4492
+ };
4493
+ this.recognition.onerror = (event) => {
4494
+ this.handleError(event);
4495
+ };
4496
+ this.recognition.onend = () => {
4497
+ this.log("Recognition ended");
4498
+ const wasConnected = this._connected;
4499
+ this._connected = false;
4500
+ if (this.continuous && !this._intentionalStop && !this._suspended && wasConnected) {
4501
+ this.log("Auto-restarting continuous recognition");
4502
+ try {
4503
+ this.recognition?.start();
4504
+ } catch {
4505
+ this.log("Failed to auto-restart recognition");
4506
+ }
4507
+ }
4508
+ };
4509
+ return new Promise((resolve, reject) => {
4510
+ const onStart = () => {
4511
+ cleanup();
4512
+ resolve();
4513
+ };
4514
+ const onError = (event) => {
4515
+ cleanup();
4516
+ reject(new Error(`SpeechRecognition error: ${event.error} \u2014 ${event.message}`));
4517
+ };
4518
+ const cleanup = () => {
4519
+ if (this.recognition) {
4520
+ this.recognition.removeEventListener("start", onStart);
4521
+ this.recognition.removeEventListener("error", onError);
4522
+ }
4523
+ };
4524
+ this.recognition.addEventListener("start", onStart, { once: true });
4525
+ this.recognition.addEventListener("error", onError, { once: true });
4526
+ try {
4527
+ this.recognition.start();
4528
+ } catch (err) {
4529
+ cleanup();
4530
+ reject(err);
4531
+ }
4532
+ });
4533
+ }
4534
+ /**
4535
+ * Send audio data. No-op for Web Speech API since it captures audio
4536
+ * directly from the microphone via the browser's internal pipeline.
4537
+ *
4538
+ * Provided for interface compatibility with WebSocket-based STT adapters
4539
+ * (DeepgramSTT, ElevenLabsSTT).
4540
+ */
4541
+ sendAudio(_audioData) {
4542
+ }
4543
+ /**
4544
+ * Register a callback to receive transcript events.
4545
+ *
4546
+ * @returns An unsubscribe function. Calling it more than once is safe.
4547
+ */
4548
+ onTranscript(callback) {
4549
+ this.transcriptCallbacks.add(callback);
4550
+ let removed = false;
4551
+ return () => {
4552
+ if (removed) return;
4553
+ removed = true;
4554
+ this.transcriptCallbacks.delete(callback);
4555
+ };
4556
+ }
4557
+ /**
4558
+ * Gracefully stop recognition.
4559
+ *
4560
+ * Calls `stop()` on the SpeechRecognition instance which allows it to
4561
+ * deliver any pending final results before ending.
4562
+ */
4563
+ close() {
4564
+ if (!this.recognition) {
4565
+ this.log("Not connected \u2014 nothing to close");
4566
+ return;
4567
+ }
4568
+ this.log("Closing recognition");
4569
+ this._intentionalStop = true;
4570
+ try {
4571
+ this.recognition.stop();
4572
+ } catch {
4573
+ }
4574
+ this.cleanup();
4575
+ }
4576
+ /** Force-destroy the recognition without waiting for pending results. */
4577
+ destroy() {
4578
+ this.log("Destroying");
4579
+ this._intentionalStop = true;
4580
+ if (this.recognition) {
4581
+ try {
4582
+ this.recognition.abort();
4583
+ } catch {
4584
+ }
4585
+ this.recognition.onresult = null;
4586
+ this.recognition.onerror = null;
4587
+ this.recognition.onend = null;
4588
+ this.recognition.onstart = null;
4589
+ this.recognition = null;
4590
+ }
4591
+ this.cleanup();
4592
+ this.transcriptCallbacks.clear();
4593
+ }
4594
+ /**
4595
+ * Suspend the adapter (e.g. when the device goes offline).
4596
+ *
4597
+ * Stops recognition and marks the adapter as suspended so that auto-restart
4598
+ * does not trigger.
4599
+ */
4600
+ suspend() {
4601
+ if (this._suspended) return;
4602
+ this._suspended = true;
4603
+ this._intentionalStop = true;
4604
+ if (this.recognition && this._connected) {
4605
+ try {
4606
+ this.recognition.stop();
4607
+ } catch {
4608
+ }
4609
+ }
4610
+ this.log("Suspended");
4611
+ }
4612
+ /**
4613
+ * Resume after a prior `suspend()`. Restarts recognition if it was
4614
+ * running before suspension.
4615
+ */
4616
+ resume() {
4617
+ if (!this._suspended) return;
4618
+ this._suspended = false;
4619
+ this._intentionalStop = false;
4620
+ this.log("Resumed");
4621
+ if (this.recognition && !this._connected) {
4622
+ try {
4623
+ this.recognition.start();
4624
+ } catch {
4625
+ this.log("Failed to restart recognition after resume");
4626
+ }
4627
+ }
4628
+ }
4629
+ // -------------------------------------------------------------------------
4630
+ // Result handling
4631
+ // -------------------------------------------------------------------------
4632
+ /**
4633
+ * Handle SpeechRecognition result events.
4634
+ *
4635
+ * The `results` property is a SpeechRecognitionResultList containing all
4636
+ * results accumulated during this recognition session. We only process
4637
+ * results from `resultIndex` onward to avoid re-emitting old results.
4638
+ */
4639
+ handleResult(event) {
4640
+ for (let i = event.resultIndex; i < event.results.length; i++) {
4641
+ const result = event.results[i];
4642
+ if (!result) continue;
4643
+ const alternative = result[0];
4644
+ if (!alternative) continue;
4645
+ const transcript = alternative.transcript;
4646
+ if (!transcript || transcript.trim() === "") continue;
4647
+ const isFinal = result.isFinal;
4648
+ const confidence = alternative.confidence > 0 ? alternative.confidence : 0.85;
4649
+ const transcriptEvent = {
4650
+ text: transcript,
4651
+ isFinal,
4652
+ confidence,
4653
+ timestamp: Date.now()
4654
+ };
4655
+ this.log(
4656
+ isFinal ? "Final transcript:" : "Interim transcript:",
4657
+ transcript,
4658
+ `(${(confidence * 100).toFixed(1)}%)`
4659
+ );
4660
+ this.emitTranscript(transcriptEvent);
4661
+ }
4662
+ }
4663
+ // -------------------------------------------------------------------------
4664
+ // Error handling
4665
+ // -------------------------------------------------------------------------
4666
+ /**
4667
+ * Handle SpeechRecognition errors.
4668
+ *
4669
+ * Some errors are recoverable (e.g. `no-speech`) and some are fatal
4670
+ * (e.g. `not-allowed`). For recoverable errors in continuous mode,
4671
+ * recognition will auto-restart via the `onend` handler.
4672
+ */
4673
+ handleError(event) {
4674
+ const errorType = event.error;
4675
+ this.log("Recognition error:", errorType, event.message);
4676
+ if (errorType === "no-speech" || errorType === "aborted") {
4677
+ this.log("Non-fatal error \u2014 will recover");
4678
+ return;
4679
+ }
4680
+ if (errorType === "network") {
4681
+ this.log("Network error \u2014 recognition may auto-restart");
4682
+ return;
4683
+ }
4684
+ if (errorType === "not-allowed" || errorType === "service-not-allowed" || errorType === "language-not-supported") {
4685
+ this._intentionalStop = true;
4686
+ this.log("Fatal recognition error \u2014 stopping");
4687
+ }
4688
+ }
4689
+ // -------------------------------------------------------------------------
4690
+ // Subscriber notification
4691
+ // -------------------------------------------------------------------------
4692
+ /**
4693
+ * Emit a transcript event to all registered callbacks.
4694
+ *
4695
+ * Errors thrown by individual callbacks are caught and logged so one
4696
+ * misbehaving subscriber does not prevent others from receiving the event.
4697
+ */
4698
+ emitTranscript(event) {
4699
+ for (const cb of this.transcriptCallbacks) {
4700
+ try {
4701
+ cb(event);
4702
+ } catch (err) {
4703
+ console.error(LOG_PREFIX9, "Transcript callback threw:", err);
4704
+ }
4705
+ }
4706
+ }
4707
+ // -------------------------------------------------------------------------
4708
+ // SpeechRecognition resolution
4709
+ // -------------------------------------------------------------------------
4710
+ /**
4711
+ * Resolve the SpeechRecognition constructor, with the webkit-prefixed
4712
+ * fallback. Returns null if not available.
4713
+ */
4714
+ resolveSpeechRecognition() {
4715
+ if (typeof window === "undefined") return null;
4716
+ const win = window;
4717
+ if (typeof win["SpeechRecognition"] !== "undefined") {
4718
+ return win["SpeechRecognition"];
4719
+ }
4720
+ if (typeof globalThis.webkitSpeechRecognition !== "undefined") {
4721
+ return globalThis.webkitSpeechRecognition;
4722
+ }
4723
+ return null;
4724
+ }
4725
+ // -------------------------------------------------------------------------
4726
+ // Cleanup
4727
+ // -------------------------------------------------------------------------
4728
+ /** Reset internal state after disconnection. */
4729
+ cleanup() {
4730
+ this._connected = false;
4731
+ }
4732
+ // -------------------------------------------------------------------------
4733
+ // Logging
4734
+ // -------------------------------------------------------------------------
4735
+ /** Conditional debug logging. */
4736
+ log(...args) {
4737
+ if (this.debugEnabled) {
4738
+ console.debug(LOG_PREFIX9, ...args);
4739
+ }
4740
+ }
4741
+ };
4742
+
4743
+ // src/voice/web-speech-tts.ts
4744
+ var LOG_PREFIX10 = "[GuideKit:WebSpeech-TTS]";
4745
+ var DEFAULT_RATE = 1;
4746
+ var DEFAULT_PITCH = 1;
4747
+ var DEFAULT_LANGUAGE4 = "en-US";
4748
+ var WebSpeechTTS = class {
4749
+ // ---- Configuration -------------------------------------------------------
4750
+ voiceName;
4751
+ rate;
4752
+ pitch;
4753
+ language;
4754
+ debugEnabled;
4755
+ // ---- Internal state ------------------------------------------------------
4756
+ _connected = false;
4757
+ _suspended = false;
4758
+ /** Cached voice object resolved from voiceName. */
4759
+ _resolvedVoice = null;
4760
+ /** Whether voices have been loaded (they load async in some browsers). */
4761
+ _voicesLoaded = false;
4762
+ /** Registered audio-event callbacks. */
4763
+ audioCallbacks = /* @__PURE__ */ new Set();
4764
+ // -------------------------------------------------------------------------
4765
+ // Constructor
4766
+ // -------------------------------------------------------------------------
4767
+ constructor(options = {}) {
4768
+ this.voiceName = options.voice ?? null;
4769
+ this.rate = options.rate ?? DEFAULT_RATE;
4770
+ this.pitch = options.pitch ?? DEFAULT_PITCH;
4771
+ this.language = options.language ?? DEFAULT_LANGUAGE4;
4772
+ this.debugEnabled = options.debug ?? false;
4773
+ this.log("WebSpeechTTS created", {
4774
+ voice: this.voiceName,
4775
+ rate: this.rate,
4776
+ pitch: this.pitch,
4777
+ language: this.language
4778
+ });
4779
+ }
4780
+ // -------------------------------------------------------------------------
4781
+ // Static methods
4782
+ // -------------------------------------------------------------------------
4783
+ /**
4784
+ * Check whether the Web Speech API SpeechSynthesis is supported in the
4785
+ * current environment. Safe to call in SSR (returns false).
4786
+ */
4787
+ static isSupported() {
4788
+ if (typeof window === "undefined") return false;
4789
+ return typeof window.speechSynthesis !== "undefined";
4790
+ }
4791
+ // -------------------------------------------------------------------------
4792
+ // Public API
4793
+ // -------------------------------------------------------------------------
4794
+ /** Whether the adapter is connected (ready for speech). */
4795
+ get isConnected() {
4796
+ return this._connected;
4797
+ }
4798
+ /**
4799
+ * Initialize the adapter.
4800
+ *
4801
+ * Loads available voices and resolves the requested voice name. Voice
4802
+ * loading is async in some browsers (notably Chrome) so we wait for
4803
+ * the `voiceschanged` event if needed.
4804
+ */
4805
+ async connect() {
4806
+ if (this._connected) {
4807
+ this.log("Already connected \u2014 skipping");
4808
+ return;
4809
+ }
4810
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
4811
+ this.log("SpeechSynthesis not available \u2014 cannot connect");
4812
+ return;
4813
+ }
4814
+ await this.loadVoices();
4815
+ if (this.voiceName) {
4816
+ this._resolvedVoice = this.findVoice(this.voiceName);
4817
+ if (this._resolvedVoice) {
4818
+ this.log("Resolved voice:", this._resolvedVoice.name);
4819
+ } else {
4820
+ this.log("Requested voice not found:", this.voiceName, "\u2014 using browser default");
4821
+ }
4822
+ }
4823
+ this._connected = true;
4824
+ this.log("Connected");
4825
+ }
4826
+ /**
4827
+ * Speak the given text using the browser's speech synthesis engine.
4828
+ *
4829
+ * Returns a Promise that resolves when the utterance completes or is
4830
+ * cancelled. Rejects if an error occurs during synthesis.
4831
+ *
4832
+ * Also emits audio events to registered callbacks for VoicePipeline
4833
+ * compatibility.
4834
+ */
4835
+ speak(text) {
4836
+ if (!this._connected || this._suspended) {
4837
+ this.log("Cannot speak \u2014 not connected or suspended");
4838
+ return;
4839
+ }
4840
+ if (!text || !text.trim()) {
4841
+ return;
4842
+ }
4843
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
4844
+ return;
4845
+ }
4846
+ const synth = window.speechSynthesis;
4847
+ const utterance = new SpeechSynthesisUtterance(text);
4848
+ utterance.lang = this.language;
4849
+ utterance.rate = this.rate;
4850
+ utterance.pitch = this.pitch;
4851
+ if (this._resolvedVoice) {
4852
+ utterance.voice = this._resolvedVoice;
4853
+ }
4854
+ utterance.onstart = () => {
4855
+ this.log("Utterance started:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
4856
+ this.emitAudio({
4857
+ audio: new ArrayBuffer(0),
4858
+ isFinal: false,
4859
+ timestamp: Date.now()
4860
+ });
4861
+ };
4862
+ utterance.onend = () => {
4863
+ this.log("Utterance ended");
4864
+ this.emitAudio({
4865
+ audio: new ArrayBuffer(0),
4866
+ isFinal: true,
4867
+ timestamp: Date.now()
4868
+ });
4869
+ };
4870
+ utterance.onerror = (event) => {
4871
+ if (event.error === "canceled") {
4872
+ this.log("Utterance cancelled");
4873
+ this.emitAudio({
4874
+ audio: new ArrayBuffer(0),
4875
+ isFinal: true,
4876
+ timestamp: Date.now()
4877
+ });
4878
+ return;
4879
+ }
4880
+ this.log("Utterance error:", event.error);
4881
+ this.emitAudio({
4882
+ audio: new ArrayBuffer(0),
4883
+ isFinal: true,
4884
+ timestamp: Date.now()
4885
+ });
4886
+ };
4887
+ this.log("Speaking:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
4888
+ synth.speak(utterance);
4889
+ }
4890
+ /**
4891
+ * Flush / finalize the current utterance.
4892
+ *
4893
+ * No-op for Web Speech API since each speak() call is a complete
4894
+ * utterance. Provided for interface compatibility with ElevenLabsTTS.
4895
+ */
4896
+ flush() {
4897
+ }
4898
+ /**
4899
+ * Register a callback to receive audio output events.
4900
+ *
4901
+ * For Web Speech API, these events have empty audio buffers and are
4902
+ * used to signal utterance start/end for VoicePipeline state management.
4903
+ *
4904
+ * @returns An unsubscribe function. Calling it more than once is safe.
4905
+ */
4906
+ onAudio(callback) {
4907
+ this.audioCallbacks.add(callback);
4908
+ let removed = false;
4909
+ return () => {
4910
+ if (removed) return;
4911
+ removed = true;
4912
+ this.audioCallbacks.delete(callback);
4913
+ };
4914
+ }
4915
+ /** Stop current speech synthesis and cancel any queued utterances. */
4916
+ stop() {
4917
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
4918
+ return;
4919
+ }
4920
+ this.log("Stopping speech synthesis");
4921
+ window.speechSynthesis.cancel();
4922
+ }
4923
+ /** Gracefully close the adapter. */
4924
+ close() {
4925
+ this.log("Closing");
4926
+ this.stop();
4927
+ this.cleanup();
4928
+ }
4929
+ /** Force-destroy the adapter. */
4930
+ destroy() {
4931
+ this.log("Destroying");
4932
+ this.stop();
4933
+ this.cleanup();
4934
+ this.audioCallbacks.clear();
4935
+ }
4936
+ /**
4937
+ * Suspend the adapter (e.g. when the device goes offline).
4938
+ *
4939
+ * Pauses any active speech synthesis and marks the adapter as suspended.
4940
+ */
4941
+ suspend() {
4942
+ if (this._suspended) return;
4943
+ this._suspended = true;
4944
+ if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
4945
+ window.speechSynthesis.pause();
4946
+ }
4947
+ this.log("Suspended");
4948
+ }
4949
+ /**
4950
+ * Resume after a prior `suspend()`.
4951
+ */
4952
+ resume() {
4953
+ if (!this._suspended) return;
4954
+ this._suspended = false;
4955
+ if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
4956
+ window.speechSynthesis.resume();
4424
4957
  }
4425
- if (parsed["audio"] === void 0 || parsed["audio"] === null) {
4426
- this.log("Non-audio message received", parsed);
4958
+ this.log("Resumed");
4959
+ }
4960
+ // -------------------------------------------------------------------------
4961
+ // Voice loading
4962
+ // -------------------------------------------------------------------------
4963
+ /**
4964
+ * Load available voices from the browser.
4965
+ *
4966
+ * In Chrome and some other browsers, voices load asynchronously after
4967
+ * the page loads. We wait for the `voiceschanged` event with a timeout.
4968
+ */
4969
+ async loadVoices() {
4970
+ if (this._voicesLoaded) return;
4971
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") return;
4972
+ const synth = window.speechSynthesis;
4973
+ let voices = synth.getVoices();
4974
+ if (voices.length > 0) {
4975
+ this._voicesLoaded = true;
4976
+ this.log("Voices loaded:", voices.length, "available");
4427
4977
  return;
4428
4978
  }
4429
- const audioBase64 = parsed["audio"];
4430
- const isFinal = parsed["isFinal"] === true;
4431
- if (!audioBase64 || audioBase64.length === 0) {
4432
- if (isFinal) {
4433
- this.emitAudio({
4434
- audio: new ArrayBuffer(0),
4435
- isFinal: true,
4436
- timestamp: Date.now()
4437
- });
4438
- }
4439
- return;
4979
+ await new Promise((resolve) => {
4980
+ const onVoicesChanged = () => {
4981
+ synth.removeEventListener("voiceschanged", onVoicesChanged);
4982
+ clearTimeout(timeout);
4983
+ voices = synth.getVoices();
4984
+ this._voicesLoaded = true;
4985
+ this.log("Voices loaded (async):", voices.length, "available");
4986
+ resolve();
4987
+ };
4988
+ const timeout = setTimeout(() => {
4989
+ synth.removeEventListener("voiceschanged", onVoicesChanged);
4990
+ this._voicesLoaded = true;
4991
+ this.log("Voices loading timed out \u2014 proceeding with defaults");
4992
+ resolve();
4993
+ }, 2e3);
4994
+ synth.addEventListener("voiceschanged", onVoicesChanged);
4995
+ });
4996
+ }
4997
+ /**
4998
+ * Find a voice by name (case-insensitive partial match).
4999
+ */
5000
+ findVoice(name) {
5001
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
5002
+ return null;
4440
5003
  }
4441
- let audioBuffer;
4442
- try {
4443
- audioBuffer = base64ToArrayBuffer(audioBase64);
4444
- } catch (err) {
4445
- this.log("Failed to decode base64 audio", err);
4446
- return;
5004
+ const voices = window.speechSynthesis.getVoices();
5005
+ const lowerName = name.toLowerCase();
5006
+ const exact = voices.find((v) => v.name.toLowerCase() === lowerName);
5007
+ if (exact) return exact;
5008
+ const partial = voices.find((v) => v.name.toLowerCase().includes(lowerName));
5009
+ if (partial) return partial;
5010
+ if (lowerName.includes("-") || lowerName.length <= 5) {
5011
+ const langMatch = voices.find((v) => v.lang.toLowerCase().startsWith(lowerName));
5012
+ if (langMatch) return langMatch;
4447
5013
  }
4448
- const audioEvent = {
4449
- audio: audioBuffer,
4450
- isFinal,
4451
- timestamp: Date.now()
4452
- };
4453
- this.log(
4454
- isFinal ? "Final audio chunk:" : "Audio chunk:",
4455
- `${audioBuffer.byteLength} bytes`
4456
- );
4457
- this.emitAudio(audioEvent);
5014
+ return null;
4458
5015
  }
4459
- // -----------------------------------------------------------------------
5016
+ // -------------------------------------------------------------------------
4460
5017
  // Subscriber notification
4461
- // -----------------------------------------------------------------------
5018
+ // -------------------------------------------------------------------------
4462
5019
  /**
4463
5020
  * Emit an audio event to all registered callbacks.
4464
5021
  *
@@ -4470,41 +5027,30 @@ var ElevenLabsTTS = class {
4470
5027
  try {
4471
5028
  cb(event);
4472
5029
  } catch (err) {
4473
- console.error(LOG_PREFIX7, "Audio callback threw:", err);
5030
+ console.error(LOG_PREFIX10, "Audio callback threw:", err);
4474
5031
  }
4475
5032
  }
4476
5033
  }
4477
- // -----------------------------------------------------------------------
4478
- // URL building
4479
- // -----------------------------------------------------------------------
4480
- /** Build the ElevenLabs streaming TTS endpoint URL. */
4481
- buildUrl() {
4482
- const params = new URLSearchParams({
4483
- model_id: this.modelId
4484
- });
4485
- return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
4486
- }
4487
- // -----------------------------------------------------------------------
5034
+ // -------------------------------------------------------------------------
4488
5035
  // Cleanup
4489
- // -----------------------------------------------------------------------
4490
- /** Reset internal state after disconnection. */
5036
+ // -------------------------------------------------------------------------
5037
+ /** Reset internal state. */
4491
5038
  cleanup() {
4492
5039
  this._connected = false;
4493
- this.bosSent = false;
4494
5040
  }
4495
- // -----------------------------------------------------------------------
5041
+ // -------------------------------------------------------------------------
4496
5042
  // Logging
4497
- // -----------------------------------------------------------------------
5043
+ // -------------------------------------------------------------------------
4498
5044
  /** Conditional debug logging. */
4499
5045
  log(...args) {
4500
5046
  if (this.debugEnabled) {
4501
- console.debug(LOG_PREFIX7, ...args);
5047
+ console.debug(LOG_PREFIX10, ...args);
4502
5048
  }
4503
5049
  }
4504
5050
  };
4505
5051
 
4506
5052
  // src/voice/index.ts
4507
- var LOG_PREFIX8 = "[GuideKit:Voice]";
5053
+ var LOG_PREFIX11 = "[GuideKit:Voice]";
4508
5054
  var JITTER_BUFFER_MS = 150;
4509
5055
  var ECHO_WINDOW_MS = 3e3;
4510
5056
  var ECHO_OVERLAP_THRESHOLD = 0.6;
@@ -4612,17 +5158,42 @@ var VoicePipeline = class {
4612
5158
  cause: err instanceof Error ? err : void 0
4613
5159
  });
4614
5160
  }
4615
- this._stt = new DeepgramSTT({
4616
- apiKey: this._sttConfig.apiKey,
4617
- model: this._sttConfig.model,
4618
- debug: this._debug
4619
- });
4620
- this._tts = new ElevenLabsTTS({
4621
- apiKey: this._ttsConfig.apiKey,
4622
- voiceId: this._ttsConfig.voiceId,
4623
- modelId: this._ttsConfig.modelId,
4624
- debug: this._debug
4625
- });
5161
+ if (this._sttConfig.provider === "deepgram") {
5162
+ this._stt = new DeepgramSTT({
5163
+ apiKey: this._sttConfig.apiKey,
5164
+ model: this._sttConfig.model,
5165
+ debug: this._debug
5166
+ });
5167
+ } else if (this._sttConfig.provider === "elevenlabs") {
5168
+ this._stt = new ElevenLabsSTT({
5169
+ apiKey: this._sttConfig.apiKey,
5170
+ language: this._sttConfig.language,
5171
+ debug: this._debug
5172
+ });
5173
+ } else {
5174
+ this._stt = new WebSpeechSTT({
5175
+ language: this._sttConfig.language,
5176
+ continuous: this._sttConfig.continuous,
5177
+ interimResults: this._sttConfig.interimResults,
5178
+ debug: this._debug
5179
+ });
5180
+ }
5181
+ if (this._ttsConfig.provider === "elevenlabs") {
5182
+ this._tts = new ElevenLabsTTS({
5183
+ apiKey: this._ttsConfig.apiKey,
5184
+ voiceId: this._ttsConfig.voiceId,
5185
+ modelId: "modelId" in this._ttsConfig ? this._ttsConfig.modelId : void 0,
5186
+ debug: this._debug
5187
+ });
5188
+ } else {
5189
+ this._tts = new WebSpeechTTS({
5190
+ voice: this._ttsConfig.voice,
5191
+ rate: this._ttsConfig.rate,
5192
+ pitch: this._ttsConfig.pitch,
5193
+ language: this._ttsConfig.language,
5194
+ debug: this._debug
5195
+ });
5196
+ }
4626
5197
  this._log("Initialization complete");
4627
5198
  }
4628
5199
  // ────────────────────────────────────────────────────────────────────
@@ -4762,10 +5333,11 @@ var VoicePipeline = class {
4762
5333
  // ────────────────────────────────────────────────────────────────────
4763
5334
  // speak()
4764
5335
  // ────────────────────────────────────────────────────────────────────
4765
- /** Speak text via ElevenLabs TTS. */
5336
+ /** Speak text via TTS (ElevenLabs or Web Speech API). */
4766
5337
  async speak(text) {
4767
5338
  if (this._destroyed || !text.trim()) return;
4768
- if (!this._tts || !this._audioContext) {
5339
+ const isWebSpeechTTS = this._tts instanceof WebSpeechTTS;
5340
+ if (!this._tts || !this._audioContext && !isWebSpeechTTS) {
4769
5341
  this._log("TTS or AudioContext not available \u2014 cannot speak");
4770
5342
  this._bus.emit("voice:degraded", { reason: "TTS not available", fallback: "text" });
4771
5343
  this._setState("idle");
@@ -4809,11 +5381,24 @@ var VoicePipeline = class {
4809
5381
  }
4810
5382
  resolve();
4811
5383
  };
4812
- this._unsubTTSAudio = this._tts.onAudio((event) => {
4813
- this._handleTTSAudio(event, done);
4814
- });
4815
- this._tts.speak(text);
4816
- this._tts.flush();
5384
+ if (isWebSpeechTTS) {
5385
+ this._unsubTTSAudio = this._tts.onAudio(
5386
+ (event) => {
5387
+ if (event.isFinal) {
5388
+ done();
5389
+ }
5390
+ }
5391
+ );
5392
+ this._tts.speak(text);
5393
+ } else {
5394
+ this._unsubTTSAudio = this._tts.onAudio(
5395
+ (event) => {
5396
+ this._handleTTSAudio(event, done);
5397
+ }
5398
+ );
5399
+ this._tts.speak(text);
5400
+ this._tts.flush();
5401
+ }
4817
5402
  });
4818
5403
  }
4819
5404
  // ────────────────────────────────────────────────────────────────────
@@ -4842,7 +5427,9 @@ var VoicePipeline = class {
4842
5427
  this._pendingLLMAbort.abort();
4843
5428
  this._pendingLLMAbort = null;
4844
5429
  }
4845
- if (this._tts?.isConnected) {
5430
+ if (this._tts instanceof WebSpeechTTS) {
5431
+ this._tts.stop();
5432
+ } else if (this._tts?.isConnected) {
4846
5433
  this._tts.close();
4847
5434
  }
4848
5435
  }
@@ -4931,7 +5518,7 @@ var VoicePipeline = class {
4931
5518
  try {
4932
5519
  cb(next, prev);
4933
5520
  } catch (err) {
4934
- console.error(LOG_PREFIX8, "State change callback threw:", err);
5521
+ console.error(LOG_PREFIX11, "State change callback threw:", err);
4935
5522
  }
4936
5523
  }
4937
5524
  }
@@ -5062,7 +5649,7 @@ var VoicePipeline = class {
5062
5649
  try {
5063
5650
  cb(text, isFinal);
5064
5651
  } catch (err) {
5065
- console.error(LOG_PREFIX8, "Transcript callback threw:", err);
5652
+ console.error(LOG_PREFIX11, "Transcript callback threw:", err);
5066
5653
  }
5067
5654
  }
5068
5655
  if (isFinal && this._state === "listening") {
@@ -5165,8 +5752,14 @@ var VoicePipeline = class {
5165
5752
  * sequential playback via AudioBufferSourceNode.
5166
5753
  */
5167
5754
  _decodeAndSchedule(audioData, onDone) {
5755
+ let onDoneCalled = false;
5756
+ const safeOnDone = onDone ? () => {
5757
+ if (onDoneCalled) return;
5758
+ onDoneCalled = true;
5759
+ onDone();
5760
+ } : void 0;
5168
5761
  if (!this._audioContext || this._state !== "speaking") {
5169
- onDone?.();
5762
+ safeOnDone?.();
5170
5763
  return;
5171
5764
  }
5172
5765
  const ctx = this._audioContext;
@@ -5175,7 +5768,7 @@ var VoicePipeline = class {
5175
5768
  copy,
5176
5769
  (decodedBuffer) => {
5177
5770
  if (this._state !== "speaking" || !this._audioContext) {
5178
- onDone?.();
5771
+ safeOnDone?.();
5179
5772
  return;
5180
5773
  }
5181
5774
  const source = ctx.createBufferSource();
@@ -5188,8 +5781,8 @@ var VoicePipeline = class {
5188
5781
  if (this._lastScheduledSource === source) {
5189
5782
  this._lastScheduledSource = null;
5190
5783
  }
5191
- if (onDone) {
5192
- onDone();
5784
+ if (safeOnDone) {
5785
+ safeOnDone();
5193
5786
  }
5194
5787
  };
5195
5788
  const now = ctx.currentTime;
@@ -5205,7 +5798,7 @@ var VoicePipeline = class {
5205
5798
  },
5206
5799
  (err) => {
5207
5800
  this._log("Failed to decode audio chunk:", err);
5208
- onDone?.();
5801
+ safeOnDone?.();
5209
5802
  }
5210
5803
  );
5211
5804
  }
@@ -5264,13 +5857,13 @@ var VoicePipeline = class {
5264
5857
  // ════════════════════════════════════════════════════════════════════
5265
5858
  _log(...args) {
5266
5859
  if (this._debug) {
5267
- console.debug(LOG_PREFIX8, ...args);
5860
+ console.debug(LOG_PREFIX11, ...args);
5268
5861
  }
5269
5862
  }
5270
5863
  };
5271
5864
 
5272
5865
  // src/visual/index.ts
5273
- var LOG_PREFIX9 = "[GuideKit:Visual]";
5866
+ var LOG_PREFIX12 = "[GuideKit:Visual]";
5274
5867
  var DEFAULT_OVERLAY_COLOR = "rgba(0, 0, 0, 0.5)";
5275
5868
  var DEFAULT_SPOTLIGHT_COLOR = "#4a9eed";
5276
5869
  var DEFAULT_ANIMATION_DURATION = 300;
@@ -6187,16 +6780,16 @@ var VisualGuidance = class {
6187
6780
  if (!this.debug) return;
6188
6781
  if (typeof console !== "undefined") {
6189
6782
  if (data) {
6190
- console.log(`${LOG_PREFIX9} ${message}`, data);
6783
+ console.log(`${LOG_PREFIX12} ${message}`, data);
6191
6784
  } else {
6192
- console.log(`${LOG_PREFIX9} ${message}`);
6785
+ console.log(`${LOG_PREFIX12} ${message}`);
6193
6786
  }
6194
6787
  }
6195
6788
  }
6196
6789
  };
6197
6790
 
6198
6791
  // src/awareness/index.ts
6199
- var LOG_PREFIX10 = "[GuideKit:Awareness]";
6792
+ var LOG_PREFIX13 = "[GuideKit:Awareness]";
6200
6793
  var DEFAULT_IDLE_TIMEOUT_MS = 6e4;
6201
6794
  var DEFAULT_DWELL_TIMEOUT_MS = 8e3;
6202
6795
  var DEFAULT_RAGE_CLICK_THRESHOLD = 3;
@@ -6558,13 +7151,13 @@ var AwarenessSystem = class {
6558
7151
  /** Conditional debug logging. */
6559
7152
  log(...args) {
6560
7153
  if (this.debugEnabled) {
6561
- console.debug(LOG_PREFIX10, ...args);
7154
+ console.debug(LOG_PREFIX13, ...args);
6562
7155
  }
6563
7156
  }
6564
7157
  };
6565
7158
 
6566
7159
  // src/awareness/proactive.ts
6567
- var LOG_PREFIX11 = "[GuideKit:Proactive]";
7160
+ var LOG_PREFIX14 = "[GuideKit:Proactive]";
6568
7161
  var STORAGE_KEY = "guidekit:visited";
6569
7162
  var SEVEN_DAYS_MS = 7 * 24 * 60 * 60 * 1e3;
6570
7163
  var DWELL_COOLDOWNS = [3e4, 6e4, 12e4];
@@ -6602,7 +7195,7 @@ var ProactiveTriggerEngine = class {
6602
7195
  set quietMode(value) {
6603
7196
  this._quietMode = value;
6604
7197
  if (this.debug) {
6605
- console.debug(LOG_PREFIX11, `Quiet mode ${value ? "enabled" : "disabled"}`);
7198
+ console.debug(LOG_PREFIX14, `Quiet mode ${value ? "enabled" : "disabled"}`);
6606
7199
  }
6607
7200
  }
6608
7201
  // ---- Lifecycle -----------------------------------------------------------
@@ -6632,7 +7225,7 @@ var ProactiveTriggerEngine = class {
6632
7225
  })
6633
7226
  );
6634
7227
  if (this.debug) {
6635
- console.debug(LOG_PREFIX11, "Started \u2014 subscribed to awareness & dom events");
7228
+ console.debug(LOG_PREFIX14, "Started \u2014 subscribed to awareness & dom events");
6636
7229
  }
6637
7230
  }
6638
7231
  /** Unsubscribe all bus listeners and clear internal state. */
@@ -6647,7 +7240,7 @@ var ProactiveTriggerEngine = class {
6647
7240
  this.formTimers.clear();
6648
7241
  this.started = false;
6649
7242
  if (this.debug) {
6650
- console.debug(LOG_PREFIX11, "Stopped \u2014 all listeners removed");
7243
+ console.debug(LOG_PREFIX14, "Stopped \u2014 all listeners removed");
6651
7244
  }
6652
7245
  }
6653
7246
  /** Alias for {@link stop}. */
@@ -6682,7 +7275,7 @@ var ProactiveTriggerEngine = class {
6682
7275
  }, FORM_ABANDON_MS);
6683
7276
  this.formTimers.set(formSelector, timer);
6684
7277
  if (this.debug) {
6685
- console.debug(LOG_PREFIX11, `Form interaction started: ${formSelector}`);
7278
+ console.debug(LOG_PREFIX14, `Form interaction started: ${formSelector}`);
6686
7279
  }
6687
7280
  }
6688
7281
  /** Reset all cooldowns and internal tracking state (useful for testing). */
@@ -6696,7 +7289,7 @@ var ProactiveTriggerEngine = class {
6696
7289
  }
6697
7290
  this.formTimers.clear();
6698
7291
  if (this.debug) {
6699
- console.debug(LOG_PREFIX11, "All cooldowns and state reset");
7292
+ console.debug(LOG_PREFIX14, "All cooldowns and state reset");
6700
7293
  }
6701
7294
  }
6702
7295
  // ---- Internal handlers ---------------------------------------------------
@@ -6713,22 +7306,23 @@ var ProactiveTriggerEngine = class {
6713
7306
  message: "First-time visitor detected. Show a visual greeting (no audio)."
6714
7307
  }, "greeting");
6715
7308
  if (this.debug) {
6716
- console.debug(LOG_PREFIX11, "First visit \u2014 greeting triggered");
7309
+ console.debug(LOG_PREFIX14, "First visit \u2014 greeting triggered");
6717
7310
  }
6718
7311
  return;
6719
7312
  }
6720
7313
  const visitedAt = parseInt(visited, 10);
6721
- if (!Number.isNaN(visitedAt)) {
6722
- const elapsed = Date.now() - visitedAt;
6723
- if (elapsed <= SEVEN_DAYS_MS && this.debug) {
6724
- console.debug(LOG_PREFIX11, "Return visitor within 7 days \u2014 silent");
6725
- } else if (this.debug) {
6726
- console.debug(LOG_PREFIX11, "Return visitor after 7 days");
6727
- }
7314
+ if (Number.isNaN(visitedAt)) {
7315
+ return;
7316
+ }
7317
+ const elapsed = Date.now() - visitedAt;
7318
+ if (elapsed <= SEVEN_DAYS_MS && this.debug) {
7319
+ console.debug(LOG_PREFIX14, "Return visitor within 7 days \u2014 silent");
7320
+ } else if (this.debug) {
7321
+ console.debug(LOG_PREFIX14, "Return visitor after 7 days");
6728
7322
  }
6729
7323
  } catch {
6730
7324
  if (this.debug) {
6731
- console.warn(LOG_PREFIX11, "localStorage unavailable \u2014 skipping greeting check");
7325
+ console.warn(LOG_PREFIX14, "localStorage unavailable \u2014 skipping greeting check");
6732
7326
  }
6733
7327
  }
6734
7328
  }
@@ -6746,7 +7340,7 @@ var ProactiveTriggerEngine = class {
6746
7340
  const count = this.dwellCounts.get(sectionId) ?? 0;
6747
7341
  if (count >= DWELL_COOLDOWNS.length + 1) {
6748
7342
  if (this.debug) {
6749
- console.debug(LOG_PREFIX11, `Dwell cap reached for section "${sectionId}" \u2014 suppressed`);
7343
+ console.debug(LOG_PREFIX14, `Dwell cap reached for section "${sectionId}" \u2014 suppressed`);
6750
7344
  }
6751
7345
  return;
6752
7346
  }
@@ -6756,7 +7350,7 @@ var ProactiveTriggerEngine = class {
6756
7350
  const lastFired = this.cooldowns.get(key) ?? 0;
6757
7351
  if (Date.now() - lastFired < cooldownMs) {
6758
7352
  if (this.debug) {
6759
- console.debug(LOG_PREFIX11, `Dwell cooldown active for "${sectionId}" \u2014 suppressed`);
7353
+ console.debug(LOG_PREFIX14, `Dwell cooldown active for "${sectionId}" \u2014 suppressed`);
6760
7354
  }
6761
7355
  return;
6762
7356
  }
@@ -6772,7 +7366,7 @@ var ProactiveTriggerEngine = class {
6772
7366
  const sectionKey = selector;
6773
7367
  if (this.frustrationFired.has(sectionKey)) {
6774
7368
  if (this.debug) {
6775
- console.debug(LOG_PREFIX11, `Frustration already fired for "${selector}" \u2014 suppressed`);
7369
+ console.debug(LOG_PREFIX14, `Frustration already fired for "${selector}" \u2014 suppressed`);
6776
7370
  }
6777
7371
  return;
6778
7372
  }
@@ -6788,7 +7382,7 @@ var ProactiveTriggerEngine = class {
6788
7382
  const key = "navigation-commentary";
6789
7383
  if (this.isCooldownActive(key, NAVIGATION_COOLDOWN_MS)) {
6790
7384
  if (this.debug) {
6791
- console.debug(LOG_PREFIX11, "Navigation cooldown active \u2014 suppressed");
7385
+ console.debug(LOG_PREFIX14, "Navigation cooldown active \u2014 suppressed");
6792
7386
  }
6793
7387
  return;
6794
7388
  }
@@ -6811,7 +7405,7 @@ var ProactiveTriggerEngine = class {
6811
7405
  fireTrigger(partial, cooldownKey) {
6812
7406
  if (this._quietMode) {
6813
7407
  if (this.debug) {
6814
- console.debug(LOG_PREFIX11, `Quiet mode \u2014 suppressed trigger: ${partial.type}`);
7408
+ console.debug(LOG_PREFIX14, `Quiet mode \u2014 suppressed trigger: ${partial.type}`);
6815
7409
  }
6816
7410
  return;
6817
7411
  }
@@ -6821,13 +7415,13 @@ var ProactiveTriggerEngine = class {
6821
7415
  };
6822
7416
  this.cooldowns.set(cooldownKey, trigger.timestamp);
6823
7417
  if (this.debug) {
6824
- console.debug(LOG_PREFIX11, "Trigger fired:", trigger.type, trigger);
7418
+ console.debug(LOG_PREFIX14, "Trigger fired:", trigger.type, trigger);
6825
7419
  }
6826
7420
  if (this.onTrigger) {
6827
7421
  try {
6828
7422
  this.onTrigger(trigger);
6829
7423
  } catch (err) {
6830
- console.error(LOG_PREFIX11, "onTrigger callback error:", err);
7424
+ console.error(LOG_PREFIX14, "onTrigger callback error:", err);
6831
7425
  }
6832
7426
  }
6833
7427
  }
@@ -6840,7 +7434,7 @@ var ProactiveTriggerEngine = class {
6840
7434
  };
6841
7435
 
6842
7436
  // src/llm/rate-limiter.ts
6843
- var LOG_PREFIX12 = "[GuideKit:RateLimiter]";
7437
+ var LOG_PREFIX15 = "[GuideKit:RateLimiter]";
6844
7438
  var DEFAULT_MAX_LLM_CALLS_PER_MINUTE = 10;
6845
7439
  var DEFAULT_MAX_STT_MINUTES_PER_SESSION = 60;
6846
7440
  var DEFAULT_MAX_TTS_CHARS_PER_SESSION = 5e4;
@@ -6935,7 +7529,19 @@ var RateLimiter = class {
6935
7529
  get sttMinutesUsed() {
6936
7530
  let totalMs = this.sttMs;
6937
7531
  if (this.sttStartedAt !== null) {
6938
- totalMs += Date.now() - this.sttStartedAt;
7532
+ const activeMs = Date.now() - this.sttStartedAt;
7533
+ const maxSessionMs = this.maxSTTMinutesPerSession * 6e4;
7534
+ const maxActiveMs = maxSessionMs * 2;
7535
+ if (activeMs > maxActiveMs) {
7536
+ console.warn(
7537
+ `${LOG_PREFIX15} STT stream running for ${Math.round(activeMs / 6e4)}min without sttStop() \u2014 capping at 2x session limit (${this.maxSTTMinutesPerSession * 2}min).`
7538
+ );
7539
+ this.sttMs += maxActiveMs;
7540
+ this.sttStartedAt = null;
7541
+ totalMs = this.sttMs;
7542
+ } else {
7543
+ totalMs += activeMs;
7544
+ }
6939
7545
  }
6940
7546
  return totalMs / 6e4;
6941
7547
  }
@@ -7007,7 +7613,7 @@ var RateLimiter = class {
7007
7613
  }
7008
7614
  log(...args) {
7009
7615
  if (this.debug) {
7010
- console.debug(LOG_PREFIX12, ...args);
7616
+ console.debug(LOG_PREFIX15, ...args);
7011
7617
  }
7012
7618
  }
7013
7619
  };
@@ -7240,7 +7846,7 @@ var BUILTIN_LOCALES = {
7240
7846
  pt
7241
7847
  };
7242
7848
  var SUPPORTED_LOCALE_CODES = new Set(Object.keys(BUILTIN_LOCALES));
7243
- var LOG_PREFIX13 = "[GuideKit:I18n]";
7849
+ var LOG_PREFIX16 = "[GuideKit:I18n]";
7244
7850
  function isSupportedLocale(code) {
7245
7851
  return SUPPORTED_LOCALE_CODES.has(code);
7246
7852
  }
@@ -7278,7 +7884,7 @@ var I18n = class {
7278
7884
  this.strings = strings;
7279
7885
  this.resolvedLocale = resolvedLocale;
7280
7886
  if (this.debug) {
7281
- console.debug(`${LOG_PREFIX13} Initialized with locale "${this.resolvedLocale}"`);
7887
+ console.debug(`${LOG_PREFIX16} Initialized with locale "${this.resolvedLocale}"`);
7282
7888
  }
7283
7889
  }
7284
7890
  // -------------------------------------------------------------------------
@@ -7289,9 +7895,9 @@ var I18n = class {
7289
7895
  const value = this.strings[key];
7290
7896
  if (value === void 0) {
7291
7897
  if (this.debug) {
7292
- console.warn(`${LOG_PREFIX13} Missing translation key "${key}"`);
7898
+ console.warn(`${LOG_PREFIX16} Missing translation key "${key}"`);
7293
7899
  }
7294
- return en[key] ?? key;
7900
+ return en[key] ?? (typeof process !== "undefined" && process.env?.NODE_ENV === "production" ? key : `[MISSING: ${key}]`);
7295
7901
  }
7296
7902
  return value;
7297
7903
  }
@@ -7305,7 +7911,7 @@ var I18n = class {
7305
7911
  this.strings = strings;
7306
7912
  this.resolvedLocale = resolvedLocale;
7307
7913
  if (this.debug) {
7308
- console.debug(`${LOG_PREFIX13} Locale changed to "${this.resolvedLocale}"`);
7914
+ console.debug(`${LOG_PREFIX16} Locale changed to "${this.resolvedLocale}"`);
7309
7915
  }
7310
7916
  }
7311
7917
  /** The current resolved locale code (e.g. 'en', 'fr', or 'custom'). */
@@ -7325,7 +7931,7 @@ var I18n = class {
7325
7931
  if (locale === "auto") {
7326
7932
  const detected = detectLocaleFromDocument();
7327
7933
  if (this.debug) {
7328
- console.debug(`${LOG_PREFIX13} Auto-detected locale "${detected}"`);
7934
+ console.debug(`${LOG_PREFIX16} Auto-detected locale "${detected}"`);
7329
7935
  }
7330
7936
  return {
7331
7937
  strings: BUILTIN_LOCALES[detected],
@@ -7340,7 +7946,7 @@ var I18n = class {
7340
7946
  }
7341
7947
  if (this.debug) {
7342
7948
  console.warn(
7343
- `${LOG_PREFIX13} Unknown locale "${String(locale)}", falling back to "en"`
7949
+ `${LOG_PREFIX16} Unknown locale "${String(locale)}", falling back to "en"`
7344
7950
  );
7345
7951
  }
7346
7952
  return {
@@ -7351,7 +7957,7 @@ var I18n = class {
7351
7957
  };
7352
7958
 
7353
7959
  // src/auth/token-manager.ts
7354
- var LOG_PREFIX14 = "[GuideKit:Auth]";
7960
+ var LOG_PREFIX17 = "[GuideKit:Auth]";
7355
7961
  var REFRESH_THRESHOLD = 0.8;
7356
7962
  var MAX_RETRY_ATTEMPTS = 3;
7357
7963
  var RETRY_BASE_MS = 1e3;
@@ -7630,7 +8236,7 @@ var TokenManager = class {
7630
8236
  }
7631
8237
  log(message) {
7632
8238
  if (this.debug) {
7633
- console.debug(`${LOG_PREFIX14} ${message}`);
8239
+ console.debug(`${LOG_PREFIX17} ${message}`);
7634
8240
  }
7635
8241
  }
7636
8242
  };
@@ -7762,6 +8368,11 @@ var GuideKitCore = class {
7762
8368
  debug: this._debug
7763
8369
  });
7764
8370
  await this.tokenManager.start();
8371
+ if (!this._options.llm) {
8372
+ console.warn(
8373
+ "[GuideKit] tokenEndpoint provided without llm config. The session token handles auth only \u2014 llm: { provider, apiKey } is still required for LLM calls. See: https://guidekit.dev/docs/provider#token-endpoint"
8374
+ );
8375
+ }
7765
8376
  this.resourceManager.register({
7766
8377
  name: "token-manager",
7767
8378
  cleanup: () => this.tokenManager?.destroy()
@@ -7884,21 +8495,50 @@ var GuideKitCore = class {
7884
8495
  }
7885
8496
  });
7886
8497
  this.registerBuiltinTools();
7887
- if (this._options.stt && this._options.tts) {
7888
- const sttConfig = this._options.stt;
7889
- const ttsConfig = this._options.tts;
7890
- if (sttConfig.provider === "deepgram" && ttsConfig.provider === "elevenlabs") {
8498
+ {
8499
+ const sttConfig = this._options.stt ?? { provider: "web-speech" };
8500
+ const ttsConfig = this._options.tts ?? { provider: "web-speech" };
8501
+ let voiceSttConfig;
8502
+ let voiceTtsConfig;
8503
+ if (sttConfig.provider === "deepgram") {
8504
+ voiceSttConfig = {
8505
+ provider: "deepgram",
8506
+ apiKey: sttConfig.apiKey,
8507
+ model: sttConfig.model
8508
+ };
8509
+ } else if (sttConfig.provider === "elevenlabs") {
8510
+ voiceSttConfig = {
8511
+ provider: "elevenlabs",
8512
+ apiKey: sttConfig.apiKey,
8513
+ language: sttConfig.language
8514
+ };
8515
+ } else {
8516
+ voiceSttConfig = {
8517
+ provider: "web-speech",
8518
+ language: sttConfig.language,
8519
+ continuous: sttConfig.continuous,
8520
+ interimResults: sttConfig.interimResults
8521
+ };
8522
+ }
8523
+ if (ttsConfig.provider === "elevenlabs") {
8524
+ voiceTtsConfig = {
8525
+ provider: "elevenlabs",
8526
+ apiKey: ttsConfig.apiKey,
8527
+ voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
8528
+ };
8529
+ } else {
8530
+ voiceTtsConfig = {
8531
+ provider: "web-speech",
8532
+ voice: ttsConfig.voice,
8533
+ rate: ttsConfig.rate,
8534
+ pitch: ttsConfig.pitch,
8535
+ language: ttsConfig.language
8536
+ };
8537
+ }
8538
+ try {
7891
8539
  this.voicePipeline = new VoicePipeline({
7892
- sttConfig: {
7893
- provider: "deepgram",
7894
- apiKey: sttConfig.apiKey,
7895
- model: "model" in sttConfig ? sttConfig.model : void 0
7896
- },
7897
- ttsConfig: {
7898
- provider: "elevenlabs",
7899
- apiKey: ttsConfig.apiKey,
7900
- voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
7901
- },
8540
+ sttConfig: voiceSttConfig,
8541
+ ttsConfig: voiceTtsConfig,
7902
8542
  debug: this._debug
7903
8543
  });
7904
8544
  this.voicePipeline.onStateChange((state, previous) => {
@@ -7931,6 +8571,11 @@ var GuideKitCore = class {
7931
8571
  name: "voice-pipeline",
7932
8572
  cleanup: () => this.voicePipeline?.destroy()
7933
8573
  });
8574
+ } catch (_err) {
8575
+ this.voicePipeline = null;
8576
+ if (this._debug) {
8577
+ console.debug("[GuideKit:Core] Voice pipeline unavailable in this environment");
8578
+ }
7934
8579
  }
7935
8580
  }
7936
8581
  const session = this.contextManager.restoreSession();
@@ -8055,7 +8700,7 @@ var GuideKitCore = class {
8055
8700
  return responseText;
8056
8701
  } catch (error) {
8057
8702
  const err = error instanceof GuideKitError ? error : new GuideKitError({
8058
- code: "UNKNOWN",
8703
+ code: ErrorCodes.UNKNOWN,
8059
8704
  message: error instanceof Error ? error.message : "Unknown error",
8060
8705
  recoverable: false,
8061
8706
  suggestion: "Check the console for details."
@@ -8311,172 +8956,11 @@ var GuideKitCore = class {
8311
8956
  };
8312
8957
  }
8313
8958
  /**
8314
- * Register all built-in tool handlers with the ToolExecutor.
8315
- * Called once during init() after VisualGuidance and all subsystems are ready.
8959
+ * Unified built-in tool specifications single source of truth for both
8960
+ * tool definitions (sent to LLM) and handler registration.
8316
8961
  */
8317
- registerBuiltinTools() {
8318
- if (!this.toolExecutor) return;
8319
- this.toolExecutor.registerTool({
8320
- name: "highlight",
8321
- execute: async (args) => {
8322
- const sectionId = args.sectionId;
8323
- const selector = args.selector;
8324
- const tooltip = args.tooltip;
8325
- const position = args.position;
8326
- const result = this.highlight({ sectionId, selector, tooltip, position });
8327
- return { success: result };
8328
- }
8329
- });
8330
- this.toolExecutor.registerTool({
8331
- name: "dismissHighlight",
8332
- execute: async () => {
8333
- this.dismissHighlight();
8334
- return { success: true };
8335
- }
8336
- });
8337
- this.toolExecutor.registerTool({
8338
- name: "scrollToSection",
8339
- execute: async (args) => {
8340
- const sectionId = args.sectionId;
8341
- const offset = args.offset;
8342
- this.scrollToSection(sectionId, offset);
8343
- return { success: true };
8344
- }
8345
- });
8346
- this.toolExecutor.registerTool({
8347
- name: "navigate",
8348
- execute: async (args) => {
8349
- const href = args.href;
8350
- const result = await this.navigate(href);
8351
- return { success: result, navigatedTo: result ? href : null };
8352
- }
8353
- });
8354
- this.toolExecutor.registerTool({
8355
- name: "startTour",
8356
- execute: async (args) => {
8357
- const sectionIds = args.sectionIds;
8358
- const mode = args.mode ?? "manual";
8359
- this.startTour(sectionIds, mode);
8360
- return { success: true, steps: sectionIds.length };
8361
- }
8362
- });
8363
- this.toolExecutor.registerTool({
8364
- name: "readPageContent",
8365
- execute: async (args) => {
8366
- const sectionId = args.sectionId;
8367
- const query = args.query;
8368
- const model = this._currentPageModel;
8369
- if (!model) return { error: "No page model available" };
8370
- if (sectionId) {
8371
- const section = model.sections.find((s) => s.id === sectionId);
8372
- if (section) {
8373
- const contentMapResult = await this.contextManager.getContent(sectionId);
8374
- return {
8375
- sectionId: section.id,
8376
- label: section.label,
8377
- summary: section.summary,
8378
- contentMap: contentMapResult
8379
- };
8380
- }
8381
- return { error: `Section "${sectionId}" not found` };
8382
- }
8383
- if (query) {
8384
- const queryLower = query.toLowerCase();
8385
- const matches = model.sections.filter(
8386
- (s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
8387
- );
8388
- return {
8389
- query,
8390
- results: matches.slice(0, 5).map((s) => ({
8391
- sectionId: s.id,
8392
- label: s.label,
8393
- snippet: s.summary?.slice(0, 200)
8394
- }))
8395
- };
8396
- }
8397
- return { error: "Provide either sectionId or query" };
8398
- }
8399
- });
8400
- this.toolExecutor.registerTool({
8401
- name: "getVisibleSections",
8402
- execute: async () => {
8403
- const model = this._currentPageModel;
8404
- if (!model) return { sections: [] };
8405
- return {
8406
- sections: model.sections.slice(0, 10).map((s) => ({
8407
- id: s.id,
8408
- label: s.label,
8409
- selector: s.selector,
8410
- score: s.score
8411
- }))
8412
- };
8413
- }
8414
- });
8415
- this.toolExecutor.registerTool({
8416
- name: "clickElement",
8417
- execute: async (args) => {
8418
- if (typeof document === "undefined") return { success: false, error: "Not in browser" };
8419
- const selector = args.selector;
8420
- const el = document.querySelector(selector);
8421
- if (!el) return { success: false, error: `Element not found: ${selector}` };
8422
- if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
8423
- const clickableRules = this._options.options?.clickableSelectors;
8424
- const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
8425
- try {
8426
- return el.matches(pattern);
8427
- } catch {
8428
- return selector === pattern;
8429
- }
8430
- }) ?? false;
8431
- if (!isInDevAllowList) {
8432
- const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
8433
- try {
8434
- return el.matches(pattern);
8435
- } catch {
8436
- return false;
8437
- }
8438
- });
8439
- if (defaultDenied) {
8440
- return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
8441
- }
8442
- }
8443
- if (clickableRules?.deny?.length) {
8444
- const denied = clickableRules.deny.some((pattern) => {
8445
- try {
8446
- return el.matches(pattern);
8447
- } catch {
8448
- return selector === pattern;
8449
- }
8450
- });
8451
- if (denied) {
8452
- return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
8453
- }
8454
- }
8455
- if (clickableRules?.allow?.length && !isInDevAllowList) {
8456
- return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
8457
- }
8458
- el.click();
8459
- return { success: true };
8460
- }
8461
- });
8462
- this.toolExecutor.registerTool({
8463
- name: "executeCustomAction",
8464
- execute: async (args) => {
8465
- const actionId = args.actionId;
8466
- const params = args.params ?? {};
8467
- const action = this.customActions.get(actionId);
8468
- if (!action) return { error: `Unknown action: ${actionId}` };
8469
- try {
8470
- const result = await action.handler(params);
8471
- return { success: true, result };
8472
- } catch (err) {
8473
- return { success: false, error: err instanceof Error ? err.message : String(err) };
8474
- }
8475
- }
8476
- });
8477
- }
8478
- getToolDefinitions() {
8479
- const builtinTools = [
8962
+ getBuiltinToolSpecs() {
8963
+ return [
8480
8964
  {
8481
8965
  name: "highlight",
8482
8966
  description: "Spotlight an element on the page to draw the user's attention. Use sectionId to highlight a page section, or selector for a specific CSS selector. Optionally add a tooltip with explanation text.",
@@ -8486,13 +8970,27 @@ var GuideKitCore = class {
8486
8970
  tooltip: { type: "string", description: "Text to show in tooltip" },
8487
8971
  position: { type: "string", enum: ["top", "bottom", "left", "right", "auto"], description: "Tooltip position" }
8488
8972
  },
8489
- schemaVersion: 1
8973
+ required: [],
8974
+ schemaVersion: 1,
8975
+ execute: async (args) => {
8976
+ const sectionId = args.sectionId;
8977
+ const selector = args.selector;
8978
+ const tooltip = args.tooltip;
8979
+ const position = args.position;
8980
+ const result = this.highlight({ sectionId, selector, tooltip, position });
8981
+ return { success: result };
8982
+ }
8490
8983
  },
8491
8984
  {
8492
8985
  name: "dismissHighlight",
8493
8986
  description: "Remove the current spotlight overlay.",
8494
8987
  parameters: {},
8495
- schemaVersion: 1
8988
+ required: [],
8989
+ schemaVersion: 1,
8990
+ execute: async () => {
8991
+ this.dismissHighlight();
8992
+ return { success: true };
8993
+ }
8496
8994
  },
8497
8995
  {
8498
8996
  name: "scrollToSection",
@@ -8501,7 +8999,14 @@ var GuideKitCore = class {
8501
8999
  sectionId: { type: "string", description: "ID of the section to scroll to" },
8502
9000
  offset: { type: "number", description: "Pixel offset for sticky headers" }
8503
9001
  },
8504
- schemaVersion: 1
9002
+ required: ["sectionId"],
9003
+ schemaVersion: 1,
9004
+ execute: async (args) => {
9005
+ const sectionId = args.sectionId;
9006
+ const offset = args.offset;
9007
+ this.scrollToSection(sectionId, offset);
9008
+ return { success: true };
9009
+ }
8505
9010
  },
8506
9011
  {
8507
9012
  name: "navigate",
@@ -8509,7 +9014,13 @@ var GuideKitCore = class {
8509
9014
  parameters: {
8510
9015
  href: { type: "string", description: "URL or path to navigate to (same-origin only)" }
8511
9016
  },
8512
- schemaVersion: 1
9017
+ required: ["href"],
9018
+ schemaVersion: 1,
9019
+ execute: async (args) => {
9020
+ const href = args.href;
9021
+ const result = await this.navigate(href);
9022
+ return { success: result, navigatedTo: result ? href : null };
9023
+ }
8513
9024
  },
8514
9025
  {
8515
9026
  name: "startTour",
@@ -8518,7 +9029,14 @@ var GuideKitCore = class {
8518
9029
  sectionIds: { type: "array", items: { type: "string" }, description: "Section IDs in tour order" },
8519
9030
  mode: { type: "string", enum: ["auto", "manual"], description: "auto advances automatically; manual waits for user" }
8520
9031
  },
8521
- schemaVersion: 1
9032
+ required: ["sectionIds"],
9033
+ schemaVersion: 1,
9034
+ execute: async (args) => {
9035
+ const sectionIds = args.sectionIds;
9036
+ const mode = args.mode ?? "manual";
9037
+ this.startTour(sectionIds, mode);
9038
+ return { success: true, steps: sectionIds.length };
9039
+ }
8522
9040
  },
8523
9041
  {
8524
9042
  name: "readPageContent",
@@ -8527,13 +9045,61 @@ var GuideKitCore = class {
8527
9045
  sectionId: { type: "string", description: "Section ID to read" },
8528
9046
  query: { type: "string", description: "Keyword to search for across sections" }
8529
9047
  },
8530
- schemaVersion: 1
9048
+ required: [],
9049
+ schemaVersion: 1,
9050
+ execute: async (args) => {
9051
+ const sectionId = args.sectionId;
9052
+ const query = args.query;
9053
+ const model = this._currentPageModel;
9054
+ if (!model) return { error: "No page model available" };
9055
+ if (sectionId) {
9056
+ const section = model.sections.find((s) => s.id === sectionId);
9057
+ if (section) {
9058
+ const contentMapResult = await this.contextManager.getContent(sectionId);
9059
+ return {
9060
+ sectionId: section.id,
9061
+ label: section.label,
9062
+ summary: section.summary,
9063
+ contentMap: contentMapResult
9064
+ };
9065
+ }
9066
+ return { error: `Section "${sectionId}" not found` };
9067
+ }
9068
+ if (query) {
9069
+ const queryLower = query.toLowerCase();
9070
+ const matches = model.sections.filter(
9071
+ (s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
9072
+ );
9073
+ return {
9074
+ query,
9075
+ results: matches.slice(0, 5).map((s) => ({
9076
+ sectionId: s.id,
9077
+ label: s.label,
9078
+ snippet: s.summary?.slice(0, 200)
9079
+ }))
9080
+ };
9081
+ }
9082
+ return { error: "Provide either sectionId or query" };
9083
+ }
8531
9084
  },
8532
9085
  {
8533
9086
  name: "getVisibleSections",
8534
9087
  description: "Get the list of sections currently visible in the user viewport.",
8535
9088
  parameters: {},
8536
- schemaVersion: 1
9089
+ required: [],
9090
+ schemaVersion: 1,
9091
+ execute: async () => {
9092
+ const model = this._currentPageModel;
9093
+ if (!model) return { sections: [] };
9094
+ return {
9095
+ sections: model.sections.slice(0, 10).map((s) => ({
9096
+ id: s.id,
9097
+ label: s.label,
9098
+ selector: s.selector,
9099
+ score: s.score
9100
+ }))
9101
+ };
9102
+ }
8537
9103
  },
8538
9104
  {
8539
9105
  name: "clickElement",
@@ -8541,7 +9107,52 @@ var GuideKitCore = class {
8541
9107
  parameters: {
8542
9108
  selector: { type: "string", description: "CSS selector of the element to click" }
8543
9109
  },
8544
- schemaVersion: 1
9110
+ required: ["selector"],
9111
+ schemaVersion: 1,
9112
+ execute: async (args) => {
9113
+ if (typeof document === "undefined") return { success: false, error: "Not in browser" };
9114
+ const selector = args.selector;
9115
+ const el = document.querySelector(selector);
9116
+ if (!el) return { success: false, error: `Element not found: ${selector}` };
9117
+ if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
9118
+ const clickableRules = this._options.options?.clickableSelectors;
9119
+ const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
9120
+ try {
9121
+ return el.matches(pattern);
9122
+ } catch {
9123
+ return selector === pattern;
9124
+ }
9125
+ }) ?? false;
9126
+ if (!isInDevAllowList) {
9127
+ const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
9128
+ try {
9129
+ return el.matches(pattern);
9130
+ } catch {
9131
+ return false;
9132
+ }
9133
+ });
9134
+ if (defaultDenied) {
9135
+ return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
9136
+ }
9137
+ }
9138
+ if (clickableRules?.deny?.length) {
9139
+ const denied = clickableRules.deny.some((pattern) => {
9140
+ try {
9141
+ return el.matches(pattern);
9142
+ } catch {
9143
+ return selector === pattern;
9144
+ }
9145
+ });
9146
+ if (denied) {
9147
+ return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
9148
+ }
9149
+ }
9150
+ if (clickableRules?.allow?.length && !isInDevAllowList) {
9151
+ return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
9152
+ }
9153
+ el.click();
9154
+ return { success: true };
9155
+ }
8545
9156
  },
8546
9157
  {
8547
9158
  name: "executeCustomAction",
@@ -8550,9 +9161,37 @@ var GuideKitCore = class {
8550
9161
  actionId: { type: "string", description: "ID of the custom action" },
8551
9162
  params: { type: "object", description: "Parameters for the action" }
8552
9163
  },
8553
- schemaVersion: 1
9164
+ required: ["actionId"],
9165
+ schemaVersion: 1,
9166
+ execute: async (args) => {
9167
+ const actionId = args.actionId;
9168
+ const params = args.params ?? {};
9169
+ const action = this.customActions.get(actionId);
9170
+ if (!action) return { error: `Unknown action: ${actionId}` };
9171
+ try {
9172
+ const result = await action.handler(params);
9173
+ return { success: true, result };
9174
+ } catch (err) {
9175
+ return { success: false, error: err instanceof Error ? err.message : String(err) };
9176
+ }
9177
+ }
8554
9178
  }
8555
9179
  ];
9180
+ }
9181
+ /**
9182
+ * Register all built-in tool handlers with the ToolExecutor.
9183
+ * Called once during init() after VisualGuidance and all subsystems are ready.
9184
+ */
9185
+ registerBuiltinTools() {
9186
+ if (!this.toolExecutor) return;
9187
+ for (const spec of this.getBuiltinToolSpecs()) {
9188
+ this.toolExecutor.registerTool({ name: spec.name, execute: spec.execute });
9189
+ }
9190
+ }
9191
+ getToolDefinitions() {
9192
+ const builtinTools = this.getBuiltinToolSpecs().map(
9193
+ ({ execute: _execute, ...def }) => def
9194
+ );
8556
9195
  for (const [actionId, action] of this.customActions) {
8557
9196
  builtinTools.push({
8558
9197
  name: `action_${actionId}`,
@@ -8583,7 +9222,6 @@ exports.InitializationError = InitializationError;
8583
9222
  exports.LLMOrchestrator = LLMOrchestrator;
8584
9223
  exports.NavigationController = NavigationController;
8585
9224
  exports.NetworkError = NetworkError;
8586
- exports.OpenAIAdapter = OpenAIAdapter;
8587
9225
  exports.PermissionError = PermissionError;
8588
9226
  exports.ProactiveTriggerEngine = ProactiveTriggerEngine;
8589
9227
  exports.RateLimitError = RateLimitError;
@@ -8595,6 +9233,9 @@ exports.TimeoutError = TimeoutError;
8595
9233
  exports.TokenManager = TokenManager;
8596
9234
  exports.ToolExecutor = ToolExecutor;
8597
9235
  exports.VisualGuidance = VisualGuidance;
9236
+ exports.VoicePipeline = VoicePipeline;
9237
+ exports.WebSpeechSTT = WebSpeechSTT;
9238
+ exports.WebSpeechTTS = WebSpeechTTS;
8598
9239
  exports.createEventBus = createEventBus;
8599
9240
  exports.isGuideKitError = isGuideKitError;
8600
9241
  //# sourceMappingURL=index.cjs.map