@guidekit/vanilla 0.1.0-beta.1 → 0.1.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2170,7 +2170,7 @@ var GuideKit = (() => {
2170
2170
  "../vad/dist/index.js"() {
2171
2171
  "use strict";
2172
2172
  init_ort_node_min();
2173
- VAD_VERSION = "0.1.0";
2173
+ VAD_VERSION = "0.1.0-beta.2";
2174
2174
  LOG_PREFIX = "[GuideKit:VAD]";
2175
2175
  DEFAULT_MODEL_URL = "https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.20/dist/silero_vad_v5.onnx";
2176
2176
  CACHE_NAME = `guidekit-vad-v${VAD_VERSION}`;
@@ -2330,9 +2330,9 @@ var GuideKit = (() => {
2330
2330
  this._resetStates();
2331
2331
  this._isCalibrating = true;
2332
2332
  this._calibrationSamples = [];
2333
- this._calibrationFramesNeeded = Math.ceil(
2333
+ this._calibrationFramesNeeded = Math.max(1, Math.floor(
2334
2334
  CALIBRATION_DURATION_MS / 1e3 * this._sampleRate / FRAME_SIZE
2335
- );
2335
+ ));
2336
2336
  this._log("Calibrating noise floor for", this._calibrationFramesNeeded, "frames");
2337
2337
  this._setupAudioPipeline(stream);
2338
2338
  }
@@ -3729,7 +3729,7 @@ var GuideKit = (() => {
3729
3729
  if (el.closest("[data-guidekit-ignore]")) return;
3730
3730
  const style = window.getComputedStyle(el);
3731
3731
  const position = style.position;
3732
- const zIndex = parseInt(style.zIndex, 10);
3732
+ const zIndex = parseInt(style.zIndex, 10) || 0;
3733
3733
  if ((position === "fixed" || position === "absolute") && !isNaN(zIndex) && zIndex >= 1e3) {
3734
3734
  const visible = isElementVisible(el);
3735
3735
  if (!visible) return;
@@ -4382,7 +4382,9 @@ ${recapLines.join("\n")}`,
4382
4382
  // Content
4383
4383
  CONTENT_FILTER_TRIGGERED: "CONTENT_FILTER_TRIGGERED",
4384
4384
  // Privacy
4385
- PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED"
4385
+ PRIVACY_HOOK_CANCELLED: "PRIVACY_HOOK_CANCELLED",
4386
+ // General
4387
+ UNKNOWN: "UNKNOWN"
4386
4388
  };
4387
4389
  var GuideKitError = class extends Error {
4388
4390
  code;
@@ -4461,351 +4463,6 @@ ${recapLines.join("\n")}`,
4461
4463
  this.name = "ContentFilterError";
4462
4464
  }
4463
4465
  };
4464
- var DEFAULT_OPENAI_MODEL = "gpt-4o";
4465
- var DEFAULT_TIMEOUT_MS = 15e3;
4466
- var OPENAI_CHAT_URL = "https://api.openai.com/v1/chat/completions";
4467
- var OpenAIAdapter = class {
4468
- apiKey;
4469
- model;
4470
- constructor(config) {
4471
- this.apiKey = config.apiKey;
4472
- this.model = config.model ?? DEFAULT_OPENAI_MODEL;
4473
- }
4474
- // -----------------------------------------------------------------------
4475
- // LLMProviderAdapter implementation
4476
- // -----------------------------------------------------------------------
4477
- /**
4478
- * Convert GuideKit tool definitions into OpenAI's `tools` format.
4479
- * Each tool is wrapped as `{ type: 'function', function: { name, description, parameters } }`.
4480
- */
4481
- formatTools(tools) {
4482
- if (tools.length === 0) return void 0;
4483
- return tools.map((tool) => ({
4484
- type: "function",
4485
- function: {
4486
- name: tool.name,
4487
- description: tool.description,
4488
- parameters: tool.parameters
4489
- }
4490
- }));
4491
- }
4492
- /**
4493
- * Convert an array of `ConversationTurn` objects into OpenAI's messages
4494
- * format with `role: 'user' | 'assistant'`.
4495
- */
4496
- formatConversation(history) {
4497
- return history.map((turn) => ({
4498
- role: turn.role,
4499
- content: turn.content
4500
- }));
4501
- }
4502
- /**
4503
- * Parse an OpenAI SSE streaming response into an async iterable of
4504
- * `TextChunk` and `ToolCall` objects.
4505
- *
4506
- * The OpenAI streaming endpoint sends each chunk as a JSON object
4507
- * prefixed by `data: `. The final line is `data: [DONE]`.
4508
- * Text content arrives in `choices[0].delta.content` and tool calls
4509
- * arrive in `choices[0].delta.tool_calls`.
4510
- */
4511
- async *parseResponse(stream) {
4512
- const reader = stream.getReader();
4513
- const decoder = new TextDecoder();
4514
- let buffer = "";
4515
- const pendingToolCalls = /* @__PURE__ */ new Map();
4516
- try {
4517
- while (true) {
4518
- const { done, value } = await reader.read();
4519
- if (done) break;
4520
- buffer += decoder.decode(value, { stream: true });
4521
- const lines = buffer.split("\n");
4522
- buffer = lines.pop() ?? "";
4523
- for (const line of lines) {
4524
- const trimmed = line.trim();
4525
- if (!trimmed.startsWith("data:")) continue;
4526
- const jsonStr = trimmed.slice(5).trim();
4527
- if (jsonStr === "" || jsonStr === "[DONE]") {
4528
- if (jsonStr === "[DONE]") {
4529
- yield* this.flushPendingToolCalls(pendingToolCalls);
4530
- yield { text: "", done: true };
4531
- }
4532
- continue;
4533
- }
4534
- let parsed;
4535
- try {
4536
- parsed = JSON.parse(jsonStr);
4537
- } catch {
4538
- continue;
4539
- }
4540
- yield* this.extractChunks(parsed, pendingToolCalls);
4541
- }
4542
- }
4543
- if (buffer.trim().startsWith("data:")) {
4544
- const jsonStr = buffer.trim().slice(5).trim();
4545
- if (jsonStr === "[DONE]") {
4546
- yield* this.flushPendingToolCalls(pendingToolCalls);
4547
- yield { text: "", done: true };
4548
- } else if (jsonStr !== "") {
4549
- try {
4550
- const parsed = JSON.parse(jsonStr);
4551
- yield* this.extractChunks(parsed, pendingToolCalls);
4552
- } catch {
4553
- }
4554
- }
4555
- }
4556
- yield* this.flushPendingToolCalls(pendingToolCalls);
4557
- } finally {
4558
- reader.releaseLock();
4559
- }
4560
- }
4561
- /**
4562
- * Format a tool result so it can be sent back to OpenAI as a
4563
- * `tool` role message with the `tool_call_id`.
4564
- */
4565
- formatToolResult(callId, result) {
4566
- return {
4567
- role: "tool",
4568
- tool_call_id: callId,
4569
- content: typeof result === "string" ? result : JSON.stringify(result)
4570
- };
4571
- }
4572
- // -----------------------------------------------------------------------
4573
- // Streaming request
4574
- // -----------------------------------------------------------------------
4575
- /**
4576
- * Build and execute a streaming request to the OpenAI Chat Completions API.
4577
- * Returns the raw `ReadableStream` for the response body together with
4578
- * the raw Response object.
4579
- */
4580
- async streamRequest(params) {
4581
- const messages = [
4582
- { role: "system", content: params.systemPrompt },
4583
- ...params.contents
4584
- ];
4585
- const body = {
4586
- model: this.model,
4587
- messages,
4588
- stream: true,
4589
- temperature: 0.7,
4590
- top_p: 0.95
4591
- };
4592
- if (params.tools) {
4593
- body.tools = params.tools;
4594
- }
4595
- const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS;
4596
- const controller = new AbortController();
4597
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
4598
- if (params.signal) {
4599
- params.signal.addEventListener(
4600
- "abort",
4601
- () => controller.abort(params.signal.reason),
4602
- { once: true }
4603
- );
4604
- }
4605
- let response;
4606
- try {
4607
- response = await fetch(OPENAI_CHAT_URL, {
4608
- method: "POST",
4609
- headers: {
4610
- "Content-Type": "application/json",
4611
- Authorization: `Bearer ${this.apiKey}`
4612
- },
4613
- body: JSON.stringify(body),
4614
- signal: controller.signal
4615
- });
4616
- } catch (error) {
4617
- clearTimeout(timeoutId);
4618
- if (error instanceof DOMException && error.name === "AbortError") {
4619
- if (params.signal?.aborted) {
4620
- throw error;
4621
- }
4622
- throw new TimeoutError({
4623
- code: ErrorCodes.TIMEOUT_LLM_RESPONSE,
4624
- message: `OpenAI request timed out after ${timeoutMs}ms`,
4625
- provider: "openai",
4626
- recoverable: true,
4627
- suggestion: "Try again or increase the timeout.",
4628
- operationName: "openai.chatCompletions",
4629
- timeoutMs
4630
- });
4631
- }
4632
- throw new NetworkError({
4633
- code: ErrorCodes.NETWORK_CONNECTION_LOST,
4634
- message: `Failed to connect to OpenAI API: ${error.message}`,
4635
- provider: "openai",
4636
- suggestion: "Check your network connection and try again.",
4637
- cause: error instanceof Error ? error : void 0
4638
- });
4639
- }
4640
- clearTimeout(timeoutId);
4641
- if (!response.ok) {
4642
- await this.handleHttpError(response);
4643
- }
4644
- if (!response.body) {
4645
- throw new NetworkError({
4646
- code: ErrorCodes.NETWORK_CONNECTION_LOST,
4647
- message: "OpenAI response body is null -- streaming unavailable.",
4648
- provider: "openai",
4649
- suggestion: "Retry the request."
4650
- });
4651
- }
4652
- return { stream: response.body, response };
4653
- }
4654
- // -----------------------------------------------------------------------
4655
- // Internal helpers
4656
- // -----------------------------------------------------------------------
4657
- /**
4658
- * Extract `TextChunk` and accumulate `ToolCall` data from a single parsed
4659
- * OpenAI SSE JSON object.
4660
- *
4661
- * OpenAI tool calls arrive incrementally: the first chunk for a tool call
4662
- * carries the `id` and `function.name`, while subsequent chunks append to
4663
- * `function.arguments`. We accumulate these in `pendingToolCalls` and only
4664
- * yield complete `ToolCall` objects when the finish_reason is 'tool_calls'
4665
- * or when flushed.
4666
- */
4667
- *extractChunks(parsed, pendingToolCalls) {
4668
- const choices = parsed.choices;
4669
- if (!choices || choices.length === 0) return;
4670
- for (const choice of choices) {
4671
- const delta = choice.delta;
4672
- const finishReason = choice.finish_reason;
4673
- if (delta) {
4674
- if (typeof delta.content === "string" && delta.content !== "") {
4675
- yield {
4676
- text: delta.content,
4677
- done: false
4678
- };
4679
- }
4680
- const toolCallDeltas = delta.tool_calls;
4681
- if (toolCallDeltas) {
4682
- for (const tc of toolCallDeltas) {
4683
- const existing = pendingToolCalls.get(tc.index);
4684
- if (existing) {
4685
- if (tc.function?.arguments) {
4686
- existing.argumentsJson += tc.function.arguments;
4687
- }
4688
- } else {
4689
- pendingToolCalls.set(tc.index, {
4690
- id: tc.id ?? "",
4691
- name: tc.function?.name ?? "",
4692
- argumentsJson: tc.function?.arguments ?? ""
4693
- });
4694
- }
4695
- }
4696
- }
4697
- }
4698
- if (finishReason === "tool_calls") {
4699
- yield* this.flushPendingToolCalls(pendingToolCalls);
4700
- }
4701
- if (finishReason === "stop") {
4702
- yield { text: "", done: true };
4703
- }
4704
- }
4705
- }
4706
- /**
4707
- * Flush all accumulated pending tool calls as complete `ToolCall` objects.
4708
- */
4709
- *flushPendingToolCalls(pendingToolCalls) {
4710
- const sorted = [...pendingToolCalls.entries()].sort(
4711
- ([a], [b]) => a - b
4712
- );
4713
- for (const [, tc] of sorted) {
4714
- let args = {};
4715
- try {
4716
- args = JSON.parse(tc.argumentsJson);
4717
- } catch {
4718
- }
4719
- yield {
4720
- id: tc.id,
4721
- name: tc.name,
4722
- arguments: args
4723
- };
4724
- }
4725
- pendingToolCalls.clear();
4726
- }
4727
- /**
4728
- * Extract token usage from a parsed OpenAI response chunk.
4729
- * Usage data typically appears in the final chunk when `stream_options`
4730
- * includes `include_usage`, or in the non-streaming response.
4731
- * Returns `null` if no usage data is present.
4732
- */
4733
- extractUsage(parsed) {
4734
- const usage = parsed.usage;
4735
- if (!usage) return null;
4736
- return {
4737
- prompt: usage.prompt_tokens ?? 0,
4738
- completion: usage.completion_tokens ?? 0,
4739
- total: usage.total_tokens ?? 0
4740
- };
4741
- }
4742
- /**
4743
- * Check whether a parsed OpenAI chunk indicates the response was
4744
- * blocked by a content filter.
4745
- *
4746
- * OpenAI signals content filtering through:
4747
- * - `choices[].finish_reason === 'content_filter'`
4748
- * - `choices[].content_filter_results` with `filtered: true`
4749
- */
4750
- isContentFiltered(parsed) {
4751
- const choices = parsed.choices;
4752
- if (!choices || choices.length === 0) return false;
4753
- return choices.some((choice) => {
4754
- if (choice.finish_reason === "content_filter") return true;
4755
- const filterResults = choice.content_filter_results;
4756
- if (filterResults) {
4757
- return Object.values(filterResults).some((r) => r.filtered === true);
4758
- }
4759
- return false;
4760
- });
4761
- }
4762
- /**
4763
- * Translate an HTTP error response from OpenAI into the appropriate
4764
- * GuideKit error class.
4765
- */
4766
- async handleHttpError(response) {
4767
- let errorBody = "";
4768
- try {
4769
- errorBody = await response.text();
4770
- } catch {
4771
- }
4772
- const status = response.status;
4773
- if (status === 401 || status === 403) {
4774
- throw new AuthenticationError({
4775
- code: ErrorCodes.AUTH_INVALID_KEY,
4776
- message: `OpenAI API authentication failed (${status}): ${errorBody}`,
4777
- provider: "openai",
4778
- suggestion: "Verify your OpenAI API key is correct and has not expired."
4779
- });
4780
- }
4781
- if (status === 429) {
4782
- const retryAfterHeader = response.headers.get("retry-after");
4783
- const retryAfterMs = retryAfterHeader ? parseInt(retryAfterHeader, 10) * 1e3 : 6e4;
4784
- throw new RateLimitError({
4785
- code: ErrorCodes.RATE_LIMIT_PROVIDER,
4786
- message: `OpenAI API rate limit exceeded (429): ${errorBody}`,
4787
- provider: "openai",
4788
- recoverable: true,
4789
- suggestion: `Rate limited by OpenAI. Retry after ${Math.ceil(retryAfterMs / 1e3)}s.`,
4790
- retryAfterMs
4791
- });
4792
- }
4793
- if (status >= 500) {
4794
- throw new NetworkError({
4795
- code: ErrorCodes.NETWORK_CONNECTION_LOST,
4796
- message: `OpenAI API server error (${status}): ${errorBody}`,
4797
- provider: "openai",
4798
- suggestion: "The OpenAI API is experiencing issues. Please try again later."
4799
- });
4800
- }
4801
- throw new NetworkError({
4802
- code: ErrorCodes.NETWORK_CONNECTION_LOST,
4803
- message: `OpenAI API request failed (${status}): ${errorBody}`,
4804
- provider: "openai",
4805
- suggestion: "Check the request parameters and try again."
4806
- });
4807
- }
4808
- };
4809
4466
  var DEFAULT_GEMINI_MODEL = "gemini-2.5-flash";
4810
4467
  var DEFAULT_TIMEOUT_MS2 = 15e3;
4811
4468
  var GEMINI_BASE_URL = "https://generativelanguage.googleapis.com/v1beta/models";
@@ -4815,16 +4472,26 @@ ${recapLines.join("\n")}`,
4815
4472
  { category: "HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold: "BLOCK_ONLY_HIGH" },
4816
4473
  { category: "HARM_CATEGORY_DANGEROUS_CONTENT", threshold: "BLOCK_ONLY_HIGH" }
4817
4474
  ];
4818
- function emptyUsage() {
4475
+ function emptyUsage2() {
4819
4476
  return { prompt: 0, completion: 0, total: 0 };
4820
4477
  }
4821
4478
  var GeminiAdapter = class {
4822
4479
  apiKey;
4823
4480
  model;
4481
+ /**
4482
+ * Token usage extracted from the most recent `parseResponse` call.
4483
+ * Updated as each SSE chunk is parsed; the final value reflects the
4484
+ * cumulative usage metadata sent by Gemini (typically in the last chunk).
4485
+ */
4486
+ _lastUsage = emptyUsage2();
4824
4487
  constructor(config) {
4825
4488
  this.apiKey = config.apiKey;
4826
4489
  this.model = config.model ?? DEFAULT_GEMINI_MODEL;
4827
4490
  }
4491
+ /** Token usage from the most recent parseResponse call. */
4492
+ get lastUsage() {
4493
+ return this._lastUsage;
4494
+ }
4828
4495
  // -----------------------------------------------------------------------
4829
4496
  // LLMProviderAdapter implementation
4830
4497
  // -----------------------------------------------------------------------
@@ -4839,7 +4506,11 @@ ${recapLines.join("\n")}`,
4839
4506
  functionDeclarations: tools.map((tool) => ({
4840
4507
  name: tool.name,
4841
4508
  description: tool.description,
4842
- parameters: tool.parameters
4509
+ parameters: {
4510
+ type: "object",
4511
+ properties: { ...tool.parameters },
4512
+ required: tool.required ?? []
4513
+ }
4843
4514
  }))
4844
4515
  }
4845
4516
  ];
@@ -4861,11 +4532,16 @@ ${recapLines.join("\n")}`,
4861
4532
  * The Gemini `streamGenerateContent?alt=sse` endpoint sends each chunk
4862
4533
  * as a JSON object prefixed by `data: `. We parse line-by-line, extract
4863
4534
  * text parts and function call parts, and yield the appropriate types.
4535
+ *
4536
+ * This method also:
4537
+ * - Detects content filtering and throws `ContentFilterError`.
4538
+ * - Tracks token usage (accessible via `lastUsage` after iteration).
4864
4539
  */
4865
4540
  async *parseResponse(stream) {
4866
4541
  const reader = stream.getReader();
4867
4542
  const decoder = new TextDecoder();
4868
4543
  let buffer = "";
4544
+ this._lastUsage = emptyUsage2();
4869
4545
  try {
4870
4546
  while (true) {
4871
4547
  const { done, value } = await reader.read();
@@ -4884,6 +4560,18 @@ ${recapLines.join("\n")}`,
4884
4560
  } catch {
4885
4561
  continue;
4886
4562
  }
4563
+ if (this.isContentFiltered(parsed)) {
4564
+ throw new ContentFilterError({
4565
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
4566
+ message: "Response was blocked by provider content safety filter.",
4567
+ provider: "gemini",
4568
+ suggestion: "Rephrase your question or adjust safety settings."
4569
+ });
4570
+ }
4571
+ const chunkUsage = this.extractUsage(parsed);
4572
+ if (chunkUsage) {
4573
+ this._lastUsage = chunkUsage;
4574
+ }
4887
4575
  yield* this.extractChunks(parsed);
4888
4576
  }
4889
4577
  }
@@ -4892,8 +4580,21 @@ ${recapLines.join("\n")}`,
4892
4580
  if (jsonStr !== "" && jsonStr !== "[DONE]") {
4893
4581
  try {
4894
4582
  const parsed = JSON.parse(jsonStr);
4583
+ if (this.isContentFiltered(parsed)) {
4584
+ throw new ContentFilterError({
4585
+ code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
4586
+ message: "Response was blocked by provider content safety filter.",
4587
+ provider: "gemini",
4588
+ suggestion: "Rephrase your question or adjust safety settings."
4589
+ });
4590
+ }
4591
+ const chunkUsage = this.extractUsage(parsed);
4592
+ if (chunkUsage) {
4593
+ this._lastUsage = chunkUsage;
4594
+ }
4895
4595
  yield* this.extractChunks(parsed);
4896
- } catch {
4596
+ } catch (error) {
4597
+ if (error instanceof ContentFilterError) throw error;
4897
4598
  }
4898
4599
  }
4899
4600
  }
@@ -4924,15 +4625,21 @@ ${recapLines.join("\n")}`,
4924
4625
  /**
4925
4626
  * Build and execute a streaming request to the Gemini API.
4926
4627
  * Returns the raw `ReadableStream` for the response body together with
4927
- * a promise that resolves to token usage extracted from the final chunk.
4628
+ * the raw Response object.
4629
+ *
4630
+ * Note: The Gemini API key is passed as a URL query parameter (`key=`).
4631
+ * This is inherent to the Gemini REST SSE endpoint design; the key is
4632
+ * transmitted over HTTPS so it remains encrypted in transit. (H3)
4928
4633
  */
4929
4634
  async streamRequest(params) {
4635
+ const contentsArray = params.contents;
4636
+ const fullContents = params.userMessage ? [...contentsArray, { role: "user", parts: [{ text: params.userMessage }] }] : contentsArray;
4930
4637
  const url = `${GEMINI_BASE_URL}/${this.model}:streamGenerateContent?alt=sse&key=${this.apiKey}`;
4931
4638
  const body = {
4932
4639
  systemInstruction: {
4933
4640
  parts: [{ text: params.systemPrompt }]
4934
4641
  },
4935
- contents: params.contents,
4642
+ contents: fullContents,
4936
4643
  safetySettings: DEFAULT_SAFETY_SETTINGS,
4937
4644
  generationConfig: {
4938
4645
  temperature: 0.7,
@@ -5000,7 +4707,7 @@ ${recapLines.join("\n")}`,
5000
4707
  return { stream: response.body, response };
5001
4708
  }
5002
4709
  // -----------------------------------------------------------------------
5003
- // Internal helpers
4710
+ // Public helpers (LLMProviderAdapter interface)
5004
4711
  // -----------------------------------------------------------------------
5005
4712
  /**
5006
4713
  * Extract `TextChunk` and `ToolCall` items from a single parsed Gemini
@@ -5173,7 +4880,8 @@ ${recapLines.join("\n")}`,
5173
4880
  updateConfig(config) {
5174
4881
  this._config = config;
5175
4882
  this._adapter = this.createAdapter(config);
5176
- this.log(`Config updated: provider=${config.provider}`);
4883
+ const label = "provider" in config ? config.provider : "custom adapter";
4884
+ this.log(`Config updated: ${label}`);
5177
4885
  }
5178
4886
  /** Get the current provider adapter. */
5179
4887
  get adapter() {
@@ -5184,139 +4892,42 @@ ${recapLines.join("\n")}`,
5184
4892
  // -----------------------------------------------------------------------
5185
4893
  /**
5186
4894
  * Execute a streaming LLM request and collect the results.
4895
+ *
4896
+ * This method is fully adapter-agnostic: it delegates streaming,
4897
+ * response parsing, content-filter detection, and usage extraction
4898
+ * entirely to the active `LLMProviderAdapter`. No provider-specific
4899
+ * SSE parsing lives in the orchestrator.
5187
4900
  */
5188
4901
  async executeStream(params, _isRetry) {
5189
- const geminiAdapter = this._adapter;
5190
- const historyContents = geminiAdapter.formatConversation(params.history);
5191
- const contents = [
5192
- ...historyContents,
5193
- { role: "user", parts: [{ text: params.userMessage }] }
5194
- ];
5195
- const tools = params.tools && params.tools.length > 0 ? geminiAdapter.formatTools(params.tools) : void 0;
5196
- const { stream } = await geminiAdapter.streamRequest({
4902
+ const adapter = this._adapter;
4903
+ const historyContents = adapter.formatConversation(params.history);
4904
+ const tools = params.tools && params.tools.length > 0 ? adapter.formatTools(params.tools) : void 0;
4905
+ const { stream } = await adapter.streamRequest({
5197
4906
  systemPrompt: params.systemPrompt,
5198
- contents,
4907
+ contents: historyContents,
4908
+ userMessage: params.userMessage,
5199
4909
  tools,
5200
4910
  signal: params.signal
5201
4911
  });
5202
4912
  let fullText = "";
5203
4913
  const toolCalls = [];
5204
- let usage = emptyUsage();
5205
- let wasContentFiltered = false;
5206
- const reader = stream.getReader();
5207
- const decoder = new TextDecoder();
5208
- let buffer = "";
5209
- try {
5210
- while (true) {
5211
- const { done, value } = await reader.read();
5212
- if (done) break;
5213
- buffer += decoder.decode(value, { stream: true });
5214
- const lines = buffer.split("\n");
5215
- buffer = lines.pop() ?? "";
5216
- for (const line of lines) {
5217
- const trimmed = line.trim();
5218
- if (!trimmed.startsWith("data:")) continue;
5219
- const jsonStr = trimmed.slice(5).trim();
5220
- if (jsonStr === "" || jsonStr === "[DONE]") continue;
5221
- let parsed;
5222
- try {
5223
- parsed = JSON.parse(jsonStr);
5224
- } catch {
5225
- continue;
5226
- }
5227
- if (geminiAdapter.isContentFiltered(parsed)) {
5228
- wasContentFiltered = true;
5229
- break;
5230
- }
5231
- const chunkUsage = geminiAdapter.extractUsage(parsed);
5232
- if (chunkUsage) {
5233
- usage = chunkUsage;
5234
- }
5235
- const candidates = parsed.candidates;
5236
- if (!candidates || candidates.length === 0) continue;
5237
- for (const candidate of candidates) {
5238
- const content = candidate.content;
5239
- if (!content?.parts) continue;
5240
- const finishReason = candidate.finishReason;
5241
- const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
5242
- for (const part of content.parts) {
5243
- if (typeof part.text === "string") {
5244
- fullText += part.text;
5245
- const chunk = { text: part.text, done: isDone };
5246
- this.callbacks.onChunk?.(chunk);
5247
- }
5248
- if (part.functionCall) {
5249
- const fc = part.functionCall;
5250
- const toolCall = {
5251
- id: fc.name,
5252
- name: fc.name,
5253
- arguments: fc.args ?? {}
5254
- };
5255
- toolCalls.push(toolCall);
5256
- this.callbacks.onToolCall?.(toolCall);
5257
- }
5258
- }
5259
- }
4914
+ for await (const item of adapter.parseResponse(stream)) {
4915
+ if ("name" in item && "arguments" in item) {
4916
+ const toolCall = item;
4917
+ toolCalls.push(toolCall);
4918
+ this.callbacks.onToolCall?.(toolCall);
4919
+ } else {
4920
+ const chunk = item;
4921
+ if (chunk.text) {
4922
+ fullText += chunk.text;
5260
4923
  }
5261
- if (wasContentFiltered) break;
4924
+ this.callbacks.onChunk?.(chunk);
5262
4925
  }
5263
- if (!wasContentFiltered && buffer.trim().startsWith("data:")) {
5264
- const jsonStr = buffer.trim().slice(5).trim();
5265
- if (jsonStr !== "" && jsonStr !== "[DONE]") {
5266
- try {
5267
- const parsed = JSON.parse(jsonStr);
5268
- if (geminiAdapter.isContentFiltered(parsed)) {
5269
- wasContentFiltered = true;
5270
- } else {
5271
- const chunkUsage = geminiAdapter.extractUsage(parsed);
5272
- if (chunkUsage) usage = chunkUsage;
5273
- const candidates = parsed.candidates;
5274
- if (candidates) {
5275
- for (const candidate of candidates) {
5276
- const content = candidate.content;
5277
- if (!content?.parts) continue;
5278
- const finishReason = candidate.finishReason;
5279
- const isDone = finishReason === "STOP" || finishReason === "MAX_TOKENS";
5280
- for (const part of content.parts) {
5281
- if (typeof part.text === "string") {
5282
- fullText += part.text;
5283
- const chunk = {
5284
- text: part.text,
5285
- done: isDone
5286
- };
5287
- this.callbacks.onChunk?.(chunk);
5288
- }
5289
- if (part.functionCall) {
5290
- const fc = part.functionCall;
5291
- const toolCall = {
5292
- id: fc.name,
5293
- name: fc.name,
5294
- arguments: fc.args ?? {}
5295
- };
5296
- toolCalls.push(toolCall);
5297
- this.callbacks.onToolCall?.(toolCall);
5298
- }
5299
- }
5300
- }
5301
- }
5302
- }
5303
- } catch {
5304
- }
5305
- }
5306
- }
5307
- } finally {
5308
- reader.releaseLock();
5309
- }
5310
- if (wasContentFiltered) {
5311
- throw new ContentFilterError({
5312
- code: ErrorCodes.CONTENT_FILTER_TRIGGERED,
5313
- message: "Response was blocked by Gemini content safety filter.",
5314
- provider: "gemini",
5315
- suggestion: "Rephrase your question or adjust safety settings."
5316
- });
5317
4926
  }
5318
- if (fullText.length > 0) {
5319
- this.callbacks.onChunk?.({ text: "", done: true });
4927
+ this.callbacks.onChunk?.({ text: "", done: true });
4928
+ let usage = emptyUsage2();
4929
+ if ("lastUsage" in adapter) {
4930
+ usage = adapter.lastUsage;
5320
4931
  }
5321
4932
  if (usage.total > 0) {
5322
4933
  this.callbacks.onTokenUsage?.(usage);
@@ -5328,25 +4939,30 @@ ${recapLines.join("\n")}`,
5328
4939
  }
5329
4940
  /**
5330
4941
  * Create the appropriate adapter for the given config.
5331
- * Currently only Gemini is implemented; other providers will be added
5332
- * as the SDK evolves.
4942
+ *
4943
+ * Built-in providers:
4944
+ * - `'gemini'` — uses the bundled `GeminiAdapter`.
4945
+ *
4946
+ * Custom adapters:
4947
+ * - Pass `{ adapter: myAdapter }` to use any `LLMProviderAdapter`.
4948
+ * Example: `llm: { adapter: new OpenAIAdapter({ ... }) }`
5333
4949
  */
5334
4950
  createAdapter(config) {
4951
+ if ("adapter" in config) {
4952
+ return config.adapter;
4953
+ }
5335
4954
  switch (config.provider) {
5336
4955
  case "gemini":
5337
4956
  return new GeminiAdapter(config);
5338
- case "openai":
5339
- return new OpenAIAdapter(config);
5340
4957
  default:
5341
4958
  throw new Error(
5342
- `LLM provider "${config.provider}" is not yet supported. Currently only "gemini" and "openai" are implemented.`
4959
+ `LLM provider "${config.provider}" is not yet supported. Use { adapter: yourAdapter } for custom providers.`
5343
4960
  );
5344
4961
  }
5345
4962
  }
5346
4963
  /** Convenience accessor for the current provider name. */
5347
4964
  get providerName() {
5348
- if (this._config.provider === "gemini") return "gemini";
5349
- if (this._config.provider === "openai") return "openai";
4965
+ if ("provider" in this._config) return this._config.provider;
5350
4966
  return void 0;
5351
4967
  }
5352
4968
  /** Log a debug message if debug mode is enabled. */
@@ -5497,7 +5113,7 @@ ${recapLines.join("\n")}`,
5497
5113
  break;
5498
5114
  }
5499
5115
  }
5500
- if (rounds >= this.maxRounds && allToolCalls.length > 0) {
5116
+ if (rounds >= this.maxRounds) {
5501
5117
  this.log(
5502
5118
  `Max rounds (${this.maxRounds}) reached. Returning current text.`
5503
5119
  );
@@ -5600,6 +5216,19 @@ ${recapLines.join("\n")}`,
5600
5216
  return s.value;
5601
5217
  }
5602
5218
  const tc = toolCalls[i];
5219
+ if (!tc) {
5220
+ const errorMsg2 = s.reason instanceof Error ? s.reason.message : String(s.reason);
5221
+ return {
5222
+ toolCallId: `unknown-${i}`,
5223
+ record: {
5224
+ name: "unknown",
5225
+ args: {},
5226
+ result: void 0,
5227
+ durationMs: 0,
5228
+ error: errorMsg2
5229
+ }
5230
+ };
5231
+ }
5603
5232
  const errorMsg = s.reason instanceof Error ? s.reason.message : String(s.reason);
5604
5233
  return {
5605
5234
  toolCallId: tc.id,
@@ -6783,7 +6412,277 @@ ${callDescriptions}` : callDescriptions;
6783
6412
  }
6784
6413
  }
6785
6414
  };
6786
- var LOG_PREFIX7 = "[GuideKit:TTS]";
6415
+ var LOG_PREFIX7 = "[GuideKit:ElevenLabs-STT]";
6416
+ var ELEVENLABS_STT_ENDPOINT = "wss://api.elevenlabs.io/v1/speech-to-text/realtime";
6417
+ var DEFAULT_LANGUAGE2 = "en";
6418
+ var INACTIVITY_TIMEOUT_S = 30;
6419
+ var SAMPLE_RATE = 16e3;
6420
+ function float32ToInt162(float32) {
6421
+ const int16 = new Int16Array(float32.length);
6422
+ for (let i = 0; i < float32.length; i++) {
6423
+ const s = Math.max(-1, Math.min(1, float32[i]));
6424
+ int16[i] = s < 0 ? s * 32768 : s * 32767;
6425
+ }
6426
+ return int16;
6427
+ }
6428
+ function int16ToBase64(int16) {
6429
+ const bytes = new Uint8Array(int16.buffer);
6430
+ const CHUNK_SIZE = 8192;
6431
+ let binary = "";
6432
+ for (let i = 0; i < bytes.length; i += CHUNK_SIZE) {
6433
+ const chunk = bytes.subarray(i, i + CHUNK_SIZE);
6434
+ binary += String.fromCharCode(...chunk);
6435
+ }
6436
+ return btoa(binary);
6437
+ }
6438
+ var ElevenLabsSTT = class {
6439
+ // ---- Configuration -------------------------------------------------------
6440
+ apiKey;
6441
+ language;
6442
+ debugEnabled;
6443
+ // ---- Internal state ------------------------------------------------------
6444
+ wsManager = null;
6445
+ _connected = false;
6446
+ _suspended = false;
6447
+ /** Registered transcript callbacks. */
6448
+ transcriptCallbacks = /* @__PURE__ */ new Set();
6449
+ // -------------------------------------------------------------------------
6450
+ // Constructor
6451
+ // -------------------------------------------------------------------------
6452
+ constructor(options) {
6453
+ this.apiKey = options.apiKey;
6454
+ this.language = options.language ?? DEFAULT_LANGUAGE2;
6455
+ this.debugEnabled = options.debug ?? false;
6456
+ this.log("ElevenLabsSTT created", { language: this.language });
6457
+ }
6458
+ // -------------------------------------------------------------------------
6459
+ // Public API
6460
+ // -------------------------------------------------------------------------
6461
+ /** Whether the WebSocket is currently connected and ready. */
6462
+ get isConnected() {
6463
+ return this._connected;
6464
+ }
6465
+ /**
6466
+ * Open a WebSocket connection to ElevenLabs' real-time STT endpoint.
6467
+ *
6468
+ * Resolves once the connection is established and the socket is ready to
6469
+ * receive audio frames. Rejects if the connection cannot be established.
6470
+ */
6471
+ async connect() {
6472
+ if (this._connected) {
6473
+ this.log("Already connected \u2014 skipping");
6474
+ return;
6475
+ }
6476
+ if (typeof WebSocket === "undefined") {
6477
+ this.log("WebSocket API not available (SSR?) \u2014 cannot connect");
6478
+ return;
6479
+ }
6480
+ const url = this.buildUrl();
6481
+ this.log("Connecting to", url.replace(this.apiKey, "***"));
6482
+ this.wsManager = new WebSocketManager({
6483
+ url,
6484
+ protocols: [],
6485
+ debug: this.debugEnabled,
6486
+ label: "ElevenLabs-STT"
6487
+ });
6488
+ this.wsManager.onOpen(() => {
6489
+ this._connected = true;
6490
+ this.log("Connected");
6491
+ });
6492
+ this.wsManager.onMessage((event) => {
6493
+ this.handleMessage(event);
6494
+ });
6495
+ this.wsManager.onClose((code, reason) => {
6496
+ this.log("Connection closed", { code, reason });
6497
+ this.cleanup();
6498
+ });
6499
+ this.wsManager.onError((event) => {
6500
+ this.log("WebSocket error", event);
6501
+ });
6502
+ return this.wsManager.connect();
6503
+ }
6504
+ /**
6505
+ * Send audio data to ElevenLabs for transcription.
6506
+ *
6507
+ * Accepts either `Float32Array` (Web Audio API output) or `Int16Array`
6508
+ * (already encoded as linear16). Float32 data is automatically converted
6509
+ * to Int16 before encoding. Audio is sent as a base64-encoded JSON message.
6510
+ */
6511
+ sendAudio(audioData) {
6512
+ if (!this._connected || !this.wsManager || this._suspended) {
6513
+ return;
6514
+ }
6515
+ const int16 = audioData instanceof Float32Array ? float32ToInt162(audioData) : audioData;
6516
+ const base64 = int16ToBase64(int16);
6517
+ this.wsManager.send(
6518
+ JSON.stringify({
6519
+ type: "input_audio_chunk",
6520
+ audio: base64,
6521
+ sample_rate: SAMPLE_RATE
6522
+ })
6523
+ );
6524
+ }
6525
+ /**
6526
+ * Register a callback to receive transcript events.
6527
+ *
6528
+ * @returns An unsubscribe function. Calling it more than once is safe.
6529
+ */
6530
+ onTranscript(callback) {
6531
+ this.transcriptCallbacks.add(callback);
6532
+ let removed = false;
6533
+ return () => {
6534
+ if (removed) return;
6535
+ removed = true;
6536
+ this.transcriptCallbacks.delete(callback);
6537
+ };
6538
+ }
6539
+ /**
6540
+ * Gracefully close the connection.
6541
+ *
6542
+ * Sends a `commit_audio` message so ElevenLabs can finalise any pending
6543
+ * transcription before the socket is torn down.
6544
+ */
6545
+ close() {
6546
+ if (!this._connected || !this.wsManager) {
6547
+ this.log("Not connected \u2014 nothing to close");
6548
+ return;
6549
+ }
6550
+ this.log("Sending commit_audio and closing");
6551
+ try {
6552
+ this.wsManager.send(JSON.stringify({ type: "commit_audio" }));
6553
+ } catch {
6554
+ }
6555
+ this.wsManager.close();
6556
+ this.cleanup();
6557
+ }
6558
+ /** Force-destroy the connection without a graceful handshake. */
6559
+ destroy() {
6560
+ this.log("Destroying");
6561
+ if (this.wsManager) {
6562
+ this.wsManager.destroy();
6563
+ this.wsManager = null;
6564
+ }
6565
+ this.cleanup();
6566
+ this.transcriptCallbacks.clear();
6567
+ }
6568
+ /**
6569
+ * Suspend the adapter (e.g. when the device goes offline).
6570
+ *
6571
+ * Marks the adapter as suspended so that incoming `sendAudio` calls are
6572
+ * silently dropped. The WebSocket itself is left open.
6573
+ */
6574
+ suspend() {
6575
+ if (this._suspended) return;
6576
+ this._suspended = true;
6577
+ this.log("Suspended");
6578
+ }
6579
+ /**
6580
+ * Resume after a prior `suspend()`.
6581
+ */
6582
+ resume() {
6583
+ if (!this._suspended) return;
6584
+ this._suspended = false;
6585
+ this.log("Resumed");
6586
+ }
6587
+ // -------------------------------------------------------------------------
6588
+ // Message handling
6589
+ // -------------------------------------------------------------------------
6590
+ /**
6591
+ * Parse incoming ElevenLabs JSON messages and emit transcript events.
6592
+ *
6593
+ * ElevenLabs sends two transcript message types:
6594
+ * - `partial_transcript`: interim result, `isFinal = false`
6595
+ * - `committed_transcript`: final result, `isFinal = true`
6596
+ */
6597
+ handleMessage(event) {
6598
+ if (typeof event.data !== "string") {
6599
+ return;
6600
+ }
6601
+ let parsed;
6602
+ try {
6603
+ parsed = JSON.parse(event.data);
6604
+ } catch {
6605
+ this.log("Failed to parse message", event.data);
6606
+ return;
6607
+ }
6608
+ const type = parsed["type"];
6609
+ if (type === "committed_transcript" || type === "partial_transcript") {
6610
+ this.handleTranscriptMessage(parsed, type === "committed_transcript");
6611
+ } else {
6612
+ this.log("Received message", type, parsed);
6613
+ }
6614
+ }
6615
+ /**
6616
+ * Extract transcript data from a transcript message and notify subscribers.
6617
+ */
6618
+ handleTranscriptMessage(parsed, isFinal) {
6619
+ const result = parsed["result"];
6620
+ const text = result?.text ?? "";
6621
+ const confidence = result?.confidence ?? 0;
6622
+ if (text.trim() === "") {
6623
+ return;
6624
+ }
6625
+ const transcriptEvent = {
6626
+ text,
6627
+ isFinal,
6628
+ confidence,
6629
+ timestamp: Date.now()
6630
+ };
6631
+ this.log(
6632
+ isFinal ? "Final transcript:" : "Interim transcript:",
6633
+ text,
6634
+ `(${(confidence * 100).toFixed(1)}%)`
6635
+ );
6636
+ this.emitTranscript(transcriptEvent);
6637
+ }
6638
+ // -------------------------------------------------------------------------
6639
+ // Subscriber notification
6640
+ // -------------------------------------------------------------------------
6641
+ /**
6642
+ * Emit a transcript event to all registered callbacks.
6643
+ *
6644
+ * Errors thrown by individual callbacks are caught and logged so one
6645
+ * misbehaving subscriber does not prevent others from receiving the event.
6646
+ */
6647
+ emitTranscript(event) {
6648
+ for (const cb of this.transcriptCallbacks) {
6649
+ try {
6650
+ cb(event);
6651
+ } catch (err) {
6652
+ console.error(LOG_PREFIX7, "Transcript callback threw:", err);
6653
+ }
6654
+ }
6655
+ }
6656
+ // -------------------------------------------------------------------------
6657
+ // URL building
6658
+ // -------------------------------------------------------------------------
6659
+ /** Build the ElevenLabs streaming STT endpoint URL with auth query params. */
6660
+ buildUrl() {
6661
+ const params = new URLSearchParams({
6662
+ xi_api_key: this.apiKey,
6663
+ language: this.language,
6664
+ inactivity_timeout: String(INACTIVITY_TIMEOUT_S)
6665
+ });
6666
+ return `${ELEVENLABS_STT_ENDPOINT}?${params.toString()}`;
6667
+ }
6668
+ // -------------------------------------------------------------------------
6669
+ // Cleanup
6670
+ // -------------------------------------------------------------------------
6671
+ /** Reset internal state after disconnection. */
6672
+ cleanup() {
6673
+ this._connected = false;
6674
+ }
6675
+ // -------------------------------------------------------------------------
6676
+ // Logging
6677
+ // -------------------------------------------------------------------------
6678
+ /** Conditional debug logging. */
6679
+ log(...args) {
6680
+ if (this.debugEnabled) {
6681
+ console.debug(LOG_PREFIX7, ...args);
6682
+ }
6683
+ }
6684
+ };
6685
+ var LOG_PREFIX8 = "[GuideKit:TTS]";
6787
6686
  var DEFAULT_VOICE_ID = "21m00Tcm4TlvDq8ikWAM";
6788
6687
  var DEFAULT_MODEL_ID = "eleven_flash_v2_5";
6789
6688
  var DEFAULT_STABILITY = 0.5;
@@ -7025,47 +6924,701 @@ ${callDescriptions}` : callDescriptions;
7025
6924
  this.log("Failed to parse message", event.data);
7026
6925
  return;
7027
6926
  }
7028
- if (parsed["error"] !== void 0) {
7029
- this.log("ElevenLabs error:", parsed["error"]);
7030
- return;
6927
+ if (parsed["error"] !== void 0) {
6928
+ this.log("ElevenLabs error:", parsed["error"]);
6929
+ return;
6930
+ }
6931
+ if (parsed["audio"] === void 0 || parsed["audio"] === null) {
6932
+ this.log("Non-audio message received", parsed);
6933
+ return;
6934
+ }
6935
+ const audioBase64 = parsed["audio"];
6936
+ const isFinal = parsed["isFinal"] === true;
6937
+ if (!audioBase64 || audioBase64.length === 0) {
6938
+ if (isFinal) {
6939
+ this.emitAudio({
6940
+ audio: new ArrayBuffer(0),
6941
+ isFinal: true,
6942
+ timestamp: Date.now()
6943
+ });
6944
+ }
6945
+ return;
6946
+ }
6947
+ let audioBuffer;
6948
+ try {
6949
+ audioBuffer = base64ToArrayBuffer(audioBase64);
6950
+ } catch (err) {
6951
+ this.log("Failed to decode base64 audio", err);
6952
+ return;
6953
+ }
6954
+ const audioEvent = {
6955
+ audio: audioBuffer,
6956
+ isFinal,
6957
+ timestamp: Date.now()
6958
+ };
6959
+ this.log(
6960
+ isFinal ? "Final audio chunk:" : "Audio chunk:",
6961
+ `${audioBuffer.byteLength} bytes`
6962
+ );
6963
+ this.emitAudio(audioEvent);
6964
+ }
6965
+ // -----------------------------------------------------------------------
6966
+ // Subscriber notification
6967
+ // -----------------------------------------------------------------------
6968
+ /**
6969
+ * Emit an audio event to all registered callbacks.
6970
+ *
6971
+ * Errors thrown by individual callbacks are caught and logged so one
6972
+ * misbehaving subscriber does not prevent others from receiving the event.
6973
+ */
6974
+ emitAudio(event) {
6975
+ for (const cb of this.audioCallbacks) {
6976
+ try {
6977
+ cb(event);
6978
+ } catch (err) {
6979
+ console.error(LOG_PREFIX8, "Audio callback threw:", err);
6980
+ }
6981
+ }
6982
+ }
6983
+ // -----------------------------------------------------------------------
6984
+ // URL building
6985
+ // -----------------------------------------------------------------------
6986
+ /** Build the ElevenLabs streaming TTS endpoint URL. */
6987
+ buildUrl() {
6988
+ const params = new URLSearchParams({
6989
+ model_id: this.modelId
6990
+ });
6991
+ return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
6992
+ }
6993
+ // -----------------------------------------------------------------------
6994
+ // Cleanup
6995
+ // -----------------------------------------------------------------------
6996
+ /** Reset internal state after disconnection. */
6997
+ cleanup() {
6998
+ this._connected = false;
6999
+ this.bosSent = false;
7000
+ }
7001
+ // -----------------------------------------------------------------------
7002
+ // Logging
7003
+ // -----------------------------------------------------------------------
7004
+ /** Conditional debug logging. */
7005
+ log(...args) {
7006
+ if (this.debugEnabled) {
7007
+ console.debug(LOG_PREFIX8, ...args);
7008
+ }
7009
+ }
7010
+ };
7011
+ var LOG_PREFIX9 = "[GuideKit:WebSpeech-STT]";
7012
+ var DEFAULT_LANGUAGE3 = "en-US";
7013
+ var WebSpeechSTT = class {
7014
+ // ---- Configuration -------------------------------------------------------
7015
+ language;
7016
+ continuous;
7017
+ interimResultsEnabled;
7018
+ debugEnabled;
7019
+ // ---- Internal state ------------------------------------------------------
7020
+ recognition = null;
7021
+ _connected = false;
7022
+ _suspended = false;
7023
+ /**
7024
+ * Whether we intentionally stopped recognition. Used to distinguish
7025
+ * between intentional stop and unexpected end (for auto-restart in
7026
+ * continuous mode).
7027
+ */
7028
+ _intentionalStop = false;
7029
+ /** Registered transcript callbacks. */
7030
+ transcriptCallbacks = /* @__PURE__ */ new Set();
7031
+ // -------------------------------------------------------------------------
7032
+ // Constructor
7033
+ // -------------------------------------------------------------------------
7034
+ constructor(options = {}) {
7035
+ this.language = options.language ?? DEFAULT_LANGUAGE3;
7036
+ this.continuous = options.continuous ?? true;
7037
+ this.interimResultsEnabled = options.interimResults ?? true;
7038
+ this.debugEnabled = options.debug ?? false;
7039
+ this.log("WebSpeechSTT created", {
7040
+ language: this.language,
7041
+ continuous: this.continuous,
7042
+ interimResults: this.interimResultsEnabled
7043
+ });
7044
+ }
7045
+ // -------------------------------------------------------------------------
7046
+ // Static methods
7047
+ // -------------------------------------------------------------------------
7048
+ /**
7049
+ * Check whether the Web Speech API SpeechRecognition is supported in the
7050
+ * current environment. Safe to call in SSR (returns false).
7051
+ */
7052
+ static isSupported() {
7053
+ if (typeof window === "undefined") return false;
7054
+ return typeof window["SpeechRecognition"] !== "undefined" || typeof globalThis.webkitSpeechRecognition !== "undefined";
7055
+ }
7056
+ // -------------------------------------------------------------------------
7057
+ // Public API
7058
+ // -------------------------------------------------------------------------
7059
+ /** Whether recognition is currently active and connected. */
7060
+ get isConnected() {
7061
+ return this._connected;
7062
+ }
7063
+ /**
7064
+ * Start speech recognition.
7065
+ *
7066
+ * Creates the SpeechRecognition instance and begins listening. Resolves
7067
+ * once the recognition session has started. Rejects if the API is not
7068
+ * supported or the browser denies permission.
7069
+ */
7070
+ async connect() {
7071
+ if (this._connected) {
7072
+ this.log("Already connected \u2014 skipping");
7073
+ return;
7074
+ }
7075
+ if (typeof window === "undefined") {
7076
+ this.log("SSR environment detected \u2014 cannot connect");
7077
+ return;
7078
+ }
7079
+ const SpeechRecognitionClass = this.resolveSpeechRecognition();
7080
+ if (!SpeechRecognitionClass) {
7081
+ throw new Error(
7082
+ "Web Speech API (SpeechRecognition) is not supported in this browser."
7083
+ );
7084
+ }
7085
+ this.recognition = new SpeechRecognitionClass();
7086
+ this.recognition.lang = this.language;
7087
+ this.recognition.continuous = this.continuous;
7088
+ this.recognition.interimResults = this.interimResultsEnabled;
7089
+ this.recognition.maxAlternatives = 1;
7090
+ this.recognition.onstart = () => {
7091
+ this._connected = true;
7092
+ this._intentionalStop = false;
7093
+ this.log("Recognition started");
7094
+ };
7095
+ this.recognition.onresult = (event) => {
7096
+ this.handleResult(event);
7097
+ };
7098
+ this.recognition.onerror = (event) => {
7099
+ this.handleError(event);
7100
+ };
7101
+ this.recognition.onend = () => {
7102
+ this.log("Recognition ended");
7103
+ const wasConnected = this._connected;
7104
+ this._connected = false;
7105
+ if (this.continuous && !this._intentionalStop && !this._suspended && wasConnected) {
7106
+ this.log("Auto-restarting continuous recognition");
7107
+ try {
7108
+ this.recognition?.start();
7109
+ } catch {
7110
+ this.log("Failed to auto-restart recognition");
7111
+ }
7112
+ }
7113
+ };
7114
+ return new Promise((resolve, reject) => {
7115
+ const onStart = () => {
7116
+ cleanup();
7117
+ resolve();
7118
+ };
7119
+ const onError = (event) => {
7120
+ cleanup();
7121
+ reject(new Error(`SpeechRecognition error: ${event.error} \u2014 ${event.message}`));
7122
+ };
7123
+ const cleanup = () => {
7124
+ if (this.recognition) {
7125
+ this.recognition.removeEventListener("start", onStart);
7126
+ this.recognition.removeEventListener("error", onError);
7127
+ }
7128
+ };
7129
+ this.recognition.addEventListener("start", onStart, { once: true });
7130
+ this.recognition.addEventListener("error", onError, { once: true });
7131
+ try {
7132
+ this.recognition.start();
7133
+ } catch (err) {
7134
+ cleanup();
7135
+ reject(err);
7136
+ }
7137
+ });
7138
+ }
7139
+ /**
7140
+ * Send audio data. No-op for Web Speech API since it captures audio
7141
+ * directly from the microphone via the browser's internal pipeline.
7142
+ *
7143
+ * Provided for interface compatibility with WebSocket-based STT adapters
7144
+ * (DeepgramSTT, ElevenLabsSTT).
7145
+ */
7146
+ sendAudio(_audioData) {
7147
+ }
7148
+ /**
7149
+ * Register a callback to receive transcript events.
7150
+ *
7151
+ * @returns An unsubscribe function. Calling it more than once is safe.
7152
+ */
7153
+ onTranscript(callback) {
7154
+ this.transcriptCallbacks.add(callback);
7155
+ let removed = false;
7156
+ return () => {
7157
+ if (removed) return;
7158
+ removed = true;
7159
+ this.transcriptCallbacks.delete(callback);
7160
+ };
7161
+ }
7162
+ /**
7163
+ * Gracefully stop recognition.
7164
+ *
7165
+ * Calls `stop()` on the SpeechRecognition instance which allows it to
7166
+ * deliver any pending final results before ending.
7167
+ */
7168
+ close() {
7169
+ if (!this.recognition) {
7170
+ this.log("Not connected \u2014 nothing to close");
7171
+ return;
7172
+ }
7173
+ this.log("Closing recognition");
7174
+ this._intentionalStop = true;
7175
+ try {
7176
+ this.recognition.stop();
7177
+ } catch {
7178
+ }
7179
+ this.cleanup();
7180
+ }
7181
+ /** Force-destroy the recognition without waiting for pending results. */
7182
+ destroy() {
7183
+ this.log("Destroying");
7184
+ this._intentionalStop = true;
7185
+ if (this.recognition) {
7186
+ try {
7187
+ this.recognition.abort();
7188
+ } catch {
7189
+ }
7190
+ this.recognition.onresult = null;
7191
+ this.recognition.onerror = null;
7192
+ this.recognition.onend = null;
7193
+ this.recognition.onstart = null;
7194
+ this.recognition = null;
7195
+ }
7196
+ this.cleanup();
7197
+ this.transcriptCallbacks.clear();
7198
+ }
7199
+ /**
7200
+ * Suspend the adapter (e.g. when the device goes offline).
7201
+ *
7202
+ * Stops recognition and marks the adapter as suspended so that auto-restart
7203
+ * does not trigger.
7204
+ */
7205
+ suspend() {
7206
+ if (this._suspended) return;
7207
+ this._suspended = true;
7208
+ this._intentionalStop = true;
7209
+ if (this.recognition && this._connected) {
7210
+ try {
7211
+ this.recognition.stop();
7212
+ } catch {
7213
+ }
7214
+ }
7215
+ this.log("Suspended");
7216
+ }
7217
+ /**
7218
+ * Resume after a prior `suspend()`. Restarts recognition if it was
7219
+ * running before suspension.
7220
+ */
7221
+ resume() {
7222
+ if (!this._suspended) return;
7223
+ this._suspended = false;
7224
+ this._intentionalStop = false;
7225
+ this.log("Resumed");
7226
+ if (this.recognition && !this._connected) {
7227
+ try {
7228
+ this.recognition.start();
7229
+ } catch {
7230
+ this.log("Failed to restart recognition after resume");
7231
+ }
7232
+ }
7233
+ }
7234
+ // -------------------------------------------------------------------------
7235
+ // Result handling
7236
+ // -------------------------------------------------------------------------
7237
+ /**
7238
+ * Handle SpeechRecognition result events.
7239
+ *
7240
+ * The `results` property is a SpeechRecognitionResultList containing all
7241
+ * results accumulated during this recognition session. We only process
7242
+ * results from `resultIndex` onward to avoid re-emitting old results.
7243
+ */
7244
+ handleResult(event) {
7245
+ for (let i = event.resultIndex; i < event.results.length; i++) {
7246
+ const result = event.results[i];
7247
+ if (!result) continue;
7248
+ const alternative = result[0];
7249
+ if (!alternative) continue;
7250
+ const transcript = alternative.transcript;
7251
+ if (!transcript || transcript.trim() === "") continue;
7252
+ const isFinal = result.isFinal;
7253
+ const confidence = alternative.confidence > 0 ? alternative.confidence : 0.85;
7254
+ const transcriptEvent = {
7255
+ text: transcript,
7256
+ isFinal,
7257
+ confidence,
7258
+ timestamp: Date.now()
7259
+ };
7260
+ this.log(
7261
+ isFinal ? "Final transcript:" : "Interim transcript:",
7262
+ transcript,
7263
+ `(${(confidence * 100).toFixed(1)}%)`
7264
+ );
7265
+ this.emitTranscript(transcriptEvent);
7266
+ }
7267
+ }
7268
+ // -------------------------------------------------------------------------
7269
+ // Error handling
7270
+ // -------------------------------------------------------------------------
7271
+ /**
7272
+ * Handle SpeechRecognition errors.
7273
+ *
7274
+ * Some errors are recoverable (e.g. `no-speech`) and some are fatal
7275
+ * (e.g. `not-allowed`). For recoverable errors in continuous mode,
7276
+ * recognition will auto-restart via the `onend` handler.
7277
+ */
7278
+ handleError(event) {
7279
+ const errorType = event.error;
7280
+ this.log("Recognition error:", errorType, event.message);
7281
+ if (errorType === "no-speech" || errorType === "aborted") {
7282
+ this.log("Non-fatal error \u2014 will recover");
7283
+ return;
7284
+ }
7285
+ if (errorType === "network") {
7286
+ this.log("Network error \u2014 recognition may auto-restart");
7287
+ return;
7288
+ }
7289
+ if (errorType === "not-allowed" || errorType === "service-not-allowed" || errorType === "language-not-supported") {
7290
+ this._intentionalStop = true;
7291
+ this.log("Fatal recognition error \u2014 stopping");
7292
+ }
7293
+ }
7294
+ // -------------------------------------------------------------------------
7295
+ // Subscriber notification
7296
+ // -------------------------------------------------------------------------
7297
+ /**
7298
+ * Emit a transcript event to all registered callbacks.
7299
+ *
7300
+ * Errors thrown by individual callbacks are caught and logged so one
7301
+ * misbehaving subscriber does not prevent others from receiving the event.
7302
+ */
7303
+ emitTranscript(event) {
7304
+ for (const cb of this.transcriptCallbacks) {
7305
+ try {
7306
+ cb(event);
7307
+ } catch (err) {
7308
+ console.error(LOG_PREFIX9, "Transcript callback threw:", err);
7309
+ }
7310
+ }
7311
+ }
7312
+ // -------------------------------------------------------------------------
7313
+ // SpeechRecognition resolution
7314
+ // -------------------------------------------------------------------------
7315
+ /**
7316
+ * Resolve the SpeechRecognition constructor, with the webkit-prefixed
7317
+ * fallback. Returns null if not available.
7318
+ */
7319
+ resolveSpeechRecognition() {
7320
+ if (typeof window === "undefined") return null;
7321
+ const win = window;
7322
+ if (typeof win["SpeechRecognition"] !== "undefined") {
7323
+ return win["SpeechRecognition"];
7324
+ }
7325
+ if (typeof globalThis.webkitSpeechRecognition !== "undefined") {
7326
+ return globalThis.webkitSpeechRecognition;
7327
+ }
7328
+ return null;
7329
+ }
7330
+ // -------------------------------------------------------------------------
7331
+ // Cleanup
7332
+ // -------------------------------------------------------------------------
7333
+ /** Reset internal state after disconnection. */
7334
+ cleanup() {
7335
+ this._connected = false;
7336
+ }
7337
+ // -------------------------------------------------------------------------
7338
+ // Logging
7339
+ // -------------------------------------------------------------------------
7340
+ /** Conditional debug logging. */
7341
+ log(...args) {
7342
+ if (this.debugEnabled) {
7343
+ console.debug(LOG_PREFIX9, ...args);
7344
+ }
7345
+ }
7346
+ };
7347
+ var LOG_PREFIX10 = "[GuideKit:WebSpeech-TTS]";
7348
+ var DEFAULT_RATE = 1;
7349
+ var DEFAULT_PITCH = 1;
7350
+ var DEFAULT_LANGUAGE4 = "en-US";
7351
+ var WebSpeechTTS = class {
7352
+ // ---- Configuration -------------------------------------------------------
7353
+ voiceName;
7354
+ rate;
7355
+ pitch;
7356
+ language;
7357
+ debugEnabled;
7358
+ // ---- Internal state ------------------------------------------------------
7359
+ _connected = false;
7360
+ _suspended = false;
7361
+ /** Cached voice object resolved from voiceName. */
7362
+ _resolvedVoice = null;
7363
+ /** Whether voices have been loaded (they load async in some browsers). */
7364
+ _voicesLoaded = false;
7365
+ /** Registered audio-event callbacks. */
7366
+ audioCallbacks = /* @__PURE__ */ new Set();
7367
+ // -------------------------------------------------------------------------
7368
+ // Constructor
7369
+ // -------------------------------------------------------------------------
7370
+ constructor(options = {}) {
7371
+ this.voiceName = options.voice ?? null;
7372
+ this.rate = options.rate ?? DEFAULT_RATE;
7373
+ this.pitch = options.pitch ?? DEFAULT_PITCH;
7374
+ this.language = options.language ?? DEFAULT_LANGUAGE4;
7375
+ this.debugEnabled = options.debug ?? false;
7376
+ this.log("WebSpeechTTS created", {
7377
+ voice: this.voiceName,
7378
+ rate: this.rate,
7379
+ pitch: this.pitch,
7380
+ language: this.language
7381
+ });
7382
+ }
7383
+ // -------------------------------------------------------------------------
7384
+ // Static methods
7385
+ // -------------------------------------------------------------------------
7386
+ /**
7387
+ * Check whether the Web Speech API SpeechSynthesis is supported in the
7388
+ * current environment. Safe to call in SSR (returns false).
7389
+ */
7390
+ static isSupported() {
7391
+ if (typeof window === "undefined") return false;
7392
+ return typeof window.speechSynthesis !== "undefined";
7393
+ }
7394
+ // -------------------------------------------------------------------------
7395
+ // Public API
7396
+ // -------------------------------------------------------------------------
7397
+ /** Whether the adapter is connected (ready for speech). */
7398
+ get isConnected() {
7399
+ return this._connected;
7400
+ }
7401
+ /**
7402
+ * Initialize the adapter.
7403
+ *
7404
+ * Loads available voices and resolves the requested voice name. Voice
7405
+ * loading is async in some browsers (notably Chrome) so we wait for
7406
+ * the `voiceschanged` event if needed.
7407
+ */
7408
+ async connect() {
7409
+ if (this._connected) {
7410
+ this.log("Already connected \u2014 skipping");
7411
+ return;
7412
+ }
7413
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
7414
+ this.log("SpeechSynthesis not available \u2014 cannot connect");
7415
+ return;
7416
+ }
7417
+ await this.loadVoices();
7418
+ if (this.voiceName) {
7419
+ this._resolvedVoice = this.findVoice(this.voiceName);
7420
+ if (this._resolvedVoice) {
7421
+ this.log("Resolved voice:", this._resolvedVoice.name);
7422
+ } else {
7423
+ this.log("Requested voice not found:", this.voiceName, "\u2014 using browser default");
7424
+ }
7425
+ }
7426
+ this._connected = true;
7427
+ this.log("Connected");
7428
+ }
7429
+ /**
7430
+ * Speak the given text using the browser's speech synthesis engine.
7431
+ *
7432
+ * Returns a Promise that resolves when the utterance completes or is
7433
+ * cancelled. Rejects if an error occurs during synthesis.
7434
+ *
7435
+ * Also emits audio events to registered callbacks for VoicePipeline
7436
+ * compatibility.
7437
+ */
7438
+ speak(text) {
7439
+ if (!this._connected || this._suspended) {
7440
+ this.log("Cannot speak \u2014 not connected or suspended");
7441
+ return;
7442
+ }
7443
+ if (!text || !text.trim()) {
7444
+ return;
7445
+ }
7446
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
7447
+ return;
7448
+ }
7449
+ const synth = window.speechSynthesis;
7450
+ const utterance = new SpeechSynthesisUtterance(text);
7451
+ utterance.lang = this.language;
7452
+ utterance.rate = this.rate;
7453
+ utterance.pitch = this.pitch;
7454
+ if (this._resolvedVoice) {
7455
+ utterance.voice = this._resolvedVoice;
7456
+ }
7457
+ utterance.onstart = () => {
7458
+ this.log("Utterance started:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
7459
+ this.emitAudio({
7460
+ audio: new ArrayBuffer(0),
7461
+ isFinal: false,
7462
+ timestamp: Date.now()
7463
+ });
7464
+ };
7465
+ utterance.onend = () => {
7466
+ this.log("Utterance ended");
7467
+ this.emitAudio({
7468
+ audio: new ArrayBuffer(0),
7469
+ isFinal: true,
7470
+ timestamp: Date.now()
7471
+ });
7472
+ };
7473
+ utterance.onerror = (event) => {
7474
+ if (event.error === "canceled") {
7475
+ this.log("Utterance cancelled");
7476
+ this.emitAudio({
7477
+ audio: new ArrayBuffer(0),
7478
+ isFinal: true,
7479
+ timestamp: Date.now()
7480
+ });
7481
+ return;
7482
+ }
7483
+ this.log("Utterance error:", event.error);
7484
+ this.emitAudio({
7485
+ audio: new ArrayBuffer(0),
7486
+ isFinal: true,
7487
+ timestamp: Date.now()
7488
+ });
7489
+ };
7490
+ this.log("Speaking:", text.slice(0, 80) + (text.length > 80 ? "..." : ""));
7491
+ synth.speak(utterance);
7492
+ }
7493
+ /**
7494
+ * Flush / finalize the current utterance.
7495
+ *
7496
+ * No-op for Web Speech API since each speak() call is a complete
7497
+ * utterance. Provided for interface compatibility with ElevenLabsTTS.
7498
+ */
7499
+ flush() {
7500
+ }
7501
+ /**
7502
+ * Register a callback to receive audio output events.
7503
+ *
7504
+ * For Web Speech API, these events have empty audio buffers and are
7505
+ * used to signal utterance start/end for VoicePipeline state management.
7506
+ *
7507
+ * @returns An unsubscribe function. Calling it more than once is safe.
7508
+ */
7509
+ onAudio(callback) {
7510
+ this.audioCallbacks.add(callback);
7511
+ let removed = false;
7512
+ return () => {
7513
+ if (removed) return;
7514
+ removed = true;
7515
+ this.audioCallbacks.delete(callback);
7516
+ };
7517
+ }
7518
+ /** Stop current speech synthesis and cancel any queued utterances. */
7519
+ stop() {
7520
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
7521
+ return;
7522
+ }
7523
+ this.log("Stopping speech synthesis");
7524
+ window.speechSynthesis.cancel();
7525
+ }
7526
+ /** Gracefully close the adapter. */
7527
+ close() {
7528
+ this.log("Closing");
7529
+ this.stop();
7530
+ this.cleanup();
7531
+ }
7532
+ /** Force-destroy the adapter. */
7533
+ destroy() {
7534
+ this.log("Destroying");
7535
+ this.stop();
7536
+ this.cleanup();
7537
+ this.audioCallbacks.clear();
7538
+ }
7539
+ /**
7540
+ * Suspend the adapter (e.g. when the device goes offline).
7541
+ *
7542
+ * Pauses any active speech synthesis and marks the adapter as suspended.
7543
+ */
7544
+ suspend() {
7545
+ if (this._suspended) return;
7546
+ this._suspended = true;
7547
+ if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
7548
+ window.speechSynthesis.pause();
7549
+ }
7550
+ this.log("Suspended");
7551
+ }
7552
+ /**
7553
+ * Resume after a prior `suspend()`.
7554
+ */
7555
+ resume() {
7556
+ if (!this._suspended) return;
7557
+ this._suspended = false;
7558
+ if (typeof window !== "undefined" && typeof window.speechSynthesis !== "undefined") {
7559
+ window.speechSynthesis.resume();
7031
7560
  }
7032
- if (parsed["audio"] === void 0 || parsed["audio"] === null) {
7033
- this.log("Non-audio message received", parsed);
7561
+ this.log("Resumed");
7562
+ }
7563
+ // -------------------------------------------------------------------------
7564
+ // Voice loading
7565
+ // -------------------------------------------------------------------------
7566
+ /**
7567
+ * Load available voices from the browser.
7568
+ *
7569
+ * In Chrome and some other browsers, voices load asynchronously after
7570
+ * the page loads. We wait for the `voiceschanged` event with a timeout.
7571
+ */
7572
+ async loadVoices() {
7573
+ if (this._voicesLoaded) return;
7574
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") return;
7575
+ const synth = window.speechSynthesis;
7576
+ let voices = synth.getVoices();
7577
+ if (voices.length > 0) {
7578
+ this._voicesLoaded = true;
7579
+ this.log("Voices loaded:", voices.length, "available");
7034
7580
  return;
7035
7581
  }
7036
- const audioBase64 = parsed["audio"];
7037
- const isFinal = parsed["isFinal"] === true;
7038
- if (!audioBase64 || audioBase64.length === 0) {
7039
- if (isFinal) {
7040
- this.emitAudio({
7041
- audio: new ArrayBuffer(0),
7042
- isFinal: true,
7043
- timestamp: Date.now()
7044
- });
7045
- }
7046
- return;
7582
+ await new Promise((resolve) => {
7583
+ const onVoicesChanged = () => {
7584
+ synth.removeEventListener("voiceschanged", onVoicesChanged);
7585
+ clearTimeout(timeout);
7586
+ voices = synth.getVoices();
7587
+ this._voicesLoaded = true;
7588
+ this.log("Voices loaded (async):", voices.length, "available");
7589
+ resolve();
7590
+ };
7591
+ const timeout = setTimeout(() => {
7592
+ synth.removeEventListener("voiceschanged", onVoicesChanged);
7593
+ this._voicesLoaded = true;
7594
+ this.log("Voices loading timed out \u2014 proceeding with defaults");
7595
+ resolve();
7596
+ }, 2e3);
7597
+ synth.addEventListener("voiceschanged", onVoicesChanged);
7598
+ });
7599
+ }
7600
+ /**
7601
+ * Find a voice by name (case-insensitive partial match).
7602
+ */
7603
+ findVoice(name) {
7604
+ if (typeof window === "undefined" || typeof window.speechSynthesis === "undefined") {
7605
+ return null;
7047
7606
  }
7048
- let audioBuffer;
7049
- try {
7050
- audioBuffer = base64ToArrayBuffer(audioBase64);
7051
- } catch (err) {
7052
- this.log("Failed to decode base64 audio", err);
7053
- return;
7607
+ const voices = window.speechSynthesis.getVoices();
7608
+ const lowerName = name.toLowerCase();
7609
+ const exact = voices.find((v) => v.name.toLowerCase() === lowerName);
7610
+ if (exact) return exact;
7611
+ const partial = voices.find((v) => v.name.toLowerCase().includes(lowerName));
7612
+ if (partial) return partial;
7613
+ if (lowerName.includes("-") || lowerName.length <= 5) {
7614
+ const langMatch = voices.find((v) => v.lang.toLowerCase().startsWith(lowerName));
7615
+ if (langMatch) return langMatch;
7054
7616
  }
7055
- const audioEvent = {
7056
- audio: audioBuffer,
7057
- isFinal,
7058
- timestamp: Date.now()
7059
- };
7060
- this.log(
7061
- isFinal ? "Final audio chunk:" : "Audio chunk:",
7062
- `${audioBuffer.byteLength} bytes`
7063
- );
7064
- this.emitAudio(audioEvent);
7617
+ return null;
7065
7618
  }
7066
- // -----------------------------------------------------------------------
7619
+ // -------------------------------------------------------------------------
7067
7620
  // Subscriber notification
7068
- // -----------------------------------------------------------------------
7621
+ // -------------------------------------------------------------------------
7069
7622
  /**
7070
7623
  * Emit an audio event to all registered callbacks.
7071
7624
  *
@@ -7077,39 +7630,28 @@ ${callDescriptions}` : callDescriptions;
7077
7630
  try {
7078
7631
  cb(event);
7079
7632
  } catch (err) {
7080
- console.error(LOG_PREFIX7, "Audio callback threw:", err);
7633
+ console.error(LOG_PREFIX10, "Audio callback threw:", err);
7081
7634
  }
7082
7635
  }
7083
7636
  }
7084
- // -----------------------------------------------------------------------
7085
- // URL building
7086
- // -----------------------------------------------------------------------
7087
- /** Build the ElevenLabs streaming TTS endpoint URL. */
7088
- buildUrl() {
7089
- const params = new URLSearchParams({
7090
- model_id: this.modelId
7091
- });
7092
- return `wss://api.elevenlabs.io/v1/text-to-speech/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
7093
- }
7094
- // -----------------------------------------------------------------------
7637
+ // -------------------------------------------------------------------------
7095
7638
  // Cleanup
7096
- // -----------------------------------------------------------------------
7097
- /** Reset internal state after disconnection. */
7639
+ // -------------------------------------------------------------------------
7640
+ /** Reset internal state. */
7098
7641
  cleanup() {
7099
7642
  this._connected = false;
7100
- this.bosSent = false;
7101
7643
  }
7102
- // -----------------------------------------------------------------------
7644
+ // -------------------------------------------------------------------------
7103
7645
  // Logging
7104
- // -----------------------------------------------------------------------
7646
+ // -------------------------------------------------------------------------
7105
7647
  /** Conditional debug logging. */
7106
7648
  log(...args) {
7107
7649
  if (this.debugEnabled) {
7108
- console.debug(LOG_PREFIX7, ...args);
7650
+ console.debug(LOG_PREFIX10, ...args);
7109
7651
  }
7110
7652
  }
7111
7653
  };
7112
- var LOG_PREFIX8 = "[GuideKit:Voice]";
7654
+ var LOG_PREFIX11 = "[GuideKit:Voice]";
7113
7655
  var JITTER_BUFFER_MS = 150;
7114
7656
  var ECHO_WINDOW_MS = 3e3;
7115
7657
  var ECHO_OVERLAP_THRESHOLD = 0.6;
@@ -7217,17 +7759,42 @@ ${callDescriptions}` : callDescriptions;
7217
7759
  cause: err instanceof Error ? err : void 0
7218
7760
  });
7219
7761
  }
7220
- this._stt = new DeepgramSTT({
7221
- apiKey: this._sttConfig.apiKey,
7222
- model: this._sttConfig.model,
7223
- debug: this._debug
7224
- });
7225
- this._tts = new ElevenLabsTTS({
7226
- apiKey: this._ttsConfig.apiKey,
7227
- voiceId: this._ttsConfig.voiceId,
7228
- modelId: this._ttsConfig.modelId,
7229
- debug: this._debug
7230
- });
7762
+ if (this._sttConfig.provider === "deepgram") {
7763
+ this._stt = new DeepgramSTT({
7764
+ apiKey: this._sttConfig.apiKey,
7765
+ model: this._sttConfig.model,
7766
+ debug: this._debug
7767
+ });
7768
+ } else if (this._sttConfig.provider === "elevenlabs") {
7769
+ this._stt = new ElevenLabsSTT({
7770
+ apiKey: this._sttConfig.apiKey,
7771
+ language: this._sttConfig.language,
7772
+ debug: this._debug
7773
+ });
7774
+ } else {
7775
+ this._stt = new WebSpeechSTT({
7776
+ language: this._sttConfig.language,
7777
+ continuous: this._sttConfig.continuous,
7778
+ interimResults: this._sttConfig.interimResults,
7779
+ debug: this._debug
7780
+ });
7781
+ }
7782
+ if (this._ttsConfig.provider === "elevenlabs") {
7783
+ this._tts = new ElevenLabsTTS({
7784
+ apiKey: this._ttsConfig.apiKey,
7785
+ voiceId: this._ttsConfig.voiceId,
7786
+ modelId: "modelId" in this._ttsConfig ? this._ttsConfig.modelId : void 0,
7787
+ debug: this._debug
7788
+ });
7789
+ } else {
7790
+ this._tts = new WebSpeechTTS({
7791
+ voice: this._ttsConfig.voice,
7792
+ rate: this._ttsConfig.rate,
7793
+ pitch: this._ttsConfig.pitch,
7794
+ language: this._ttsConfig.language,
7795
+ debug: this._debug
7796
+ });
7797
+ }
7231
7798
  this._log("Initialization complete");
7232
7799
  }
7233
7800
  // ────────────────────────────────────────────────────────────────────
@@ -7367,10 +7934,11 @@ ${callDescriptions}` : callDescriptions;
7367
7934
  // ────────────────────────────────────────────────────────────────────
7368
7935
  // speak()
7369
7936
  // ────────────────────────────────────────────────────────────────────
7370
- /** Speak text via ElevenLabs TTS. */
7937
+ /** Speak text via TTS (ElevenLabs or Web Speech API). */
7371
7938
  async speak(text) {
7372
7939
  if (this._destroyed || !text.trim()) return;
7373
- if (!this._tts || !this._audioContext) {
7940
+ const isWebSpeechTTS = this._tts instanceof WebSpeechTTS;
7941
+ if (!this._tts || !this._audioContext && !isWebSpeechTTS) {
7374
7942
  this._log("TTS or AudioContext not available \u2014 cannot speak");
7375
7943
  this._bus.emit("voice:degraded", { reason: "TTS not available", fallback: "text" });
7376
7944
  this._setState("idle");
@@ -7414,11 +7982,24 @@ ${callDescriptions}` : callDescriptions;
7414
7982
  }
7415
7983
  resolve();
7416
7984
  };
7417
- this._unsubTTSAudio = this._tts.onAudio((event) => {
7418
- this._handleTTSAudio(event, done);
7419
- });
7420
- this._tts.speak(text);
7421
- this._tts.flush();
7985
+ if (isWebSpeechTTS) {
7986
+ this._unsubTTSAudio = this._tts.onAudio(
7987
+ (event) => {
7988
+ if (event.isFinal) {
7989
+ done();
7990
+ }
7991
+ }
7992
+ );
7993
+ this._tts.speak(text);
7994
+ } else {
7995
+ this._unsubTTSAudio = this._tts.onAudio(
7996
+ (event) => {
7997
+ this._handleTTSAudio(event, done);
7998
+ }
7999
+ );
8000
+ this._tts.speak(text);
8001
+ this._tts.flush();
8002
+ }
7422
8003
  });
7423
8004
  }
7424
8005
  // ────────────────────────────────────────────────────────────────────
@@ -7447,7 +8028,9 @@ ${callDescriptions}` : callDescriptions;
7447
8028
  this._pendingLLMAbort.abort();
7448
8029
  this._pendingLLMAbort = null;
7449
8030
  }
7450
- if (this._tts?.isConnected) {
8031
+ if (this._tts instanceof WebSpeechTTS) {
8032
+ this._tts.stop();
8033
+ } else if (this._tts?.isConnected) {
7451
8034
  this._tts.close();
7452
8035
  }
7453
8036
  }
@@ -7536,7 +8119,7 @@ ${callDescriptions}` : callDescriptions;
7536
8119
  try {
7537
8120
  cb(next, prev);
7538
8121
  } catch (err) {
7539
- console.error(LOG_PREFIX8, "State change callback threw:", err);
8122
+ console.error(LOG_PREFIX11, "State change callback threw:", err);
7540
8123
  }
7541
8124
  }
7542
8125
  }
@@ -7667,7 +8250,7 @@ ${callDescriptions}` : callDescriptions;
7667
8250
  try {
7668
8251
  cb(text, isFinal);
7669
8252
  } catch (err) {
7670
- console.error(LOG_PREFIX8, "Transcript callback threw:", err);
8253
+ console.error(LOG_PREFIX11, "Transcript callback threw:", err);
7671
8254
  }
7672
8255
  }
7673
8256
  if (isFinal && this._state === "listening") {
@@ -7770,8 +8353,14 @@ ${callDescriptions}` : callDescriptions;
7770
8353
  * sequential playback via AudioBufferSourceNode.
7771
8354
  */
7772
8355
  _decodeAndSchedule(audioData, onDone) {
8356
+ let onDoneCalled = false;
8357
+ const safeOnDone = onDone ? () => {
8358
+ if (onDoneCalled) return;
8359
+ onDoneCalled = true;
8360
+ onDone();
8361
+ } : void 0;
7773
8362
  if (!this._audioContext || this._state !== "speaking") {
7774
- onDone?.();
8363
+ safeOnDone?.();
7775
8364
  return;
7776
8365
  }
7777
8366
  const ctx = this._audioContext;
@@ -7780,7 +8369,7 @@ ${callDescriptions}` : callDescriptions;
7780
8369
  copy,
7781
8370
  (decodedBuffer) => {
7782
8371
  if (this._state !== "speaking" || !this._audioContext) {
7783
- onDone?.();
8372
+ safeOnDone?.();
7784
8373
  return;
7785
8374
  }
7786
8375
  const source = ctx.createBufferSource();
@@ -7793,8 +8382,8 @@ ${callDescriptions}` : callDescriptions;
7793
8382
  if (this._lastScheduledSource === source) {
7794
8383
  this._lastScheduledSource = null;
7795
8384
  }
7796
- if (onDone) {
7797
- onDone();
8385
+ if (safeOnDone) {
8386
+ safeOnDone();
7798
8387
  }
7799
8388
  };
7800
8389
  const now = ctx.currentTime;
@@ -7810,7 +8399,7 @@ ${callDescriptions}` : callDescriptions;
7810
8399
  },
7811
8400
  (err) => {
7812
8401
  this._log("Failed to decode audio chunk:", err);
7813
- onDone?.();
8402
+ safeOnDone?.();
7814
8403
  }
7815
8404
  );
7816
8405
  }
@@ -7869,11 +8458,11 @@ ${callDescriptions}` : callDescriptions;
7869
8458
  // ════════════════════════════════════════════════════════════════════
7870
8459
  _log(...args) {
7871
8460
  if (this._debug) {
7872
- console.debug(LOG_PREFIX8, ...args);
8461
+ console.debug(LOG_PREFIX11, ...args);
7873
8462
  }
7874
8463
  }
7875
8464
  };
7876
- var LOG_PREFIX9 = "[GuideKit:Visual]";
8465
+ var LOG_PREFIX12 = "[GuideKit:Visual]";
7877
8466
  var DEFAULT_OVERLAY_COLOR = "rgba(0, 0, 0, 0.5)";
7878
8467
  var DEFAULT_SPOTLIGHT_COLOR = "#4a9eed";
7879
8468
  var DEFAULT_ANIMATION_DURATION = 300;
@@ -8790,14 +9379,14 @@ ${callDescriptions}` : callDescriptions;
8790
9379
  if (!this.debug) return;
8791
9380
  if (typeof console !== "undefined") {
8792
9381
  if (data) {
8793
- console.log(`${LOG_PREFIX9} ${message}`, data);
9382
+ console.log(`${LOG_PREFIX12} ${message}`, data);
8794
9383
  } else {
8795
- console.log(`${LOG_PREFIX9} ${message}`);
9384
+ console.log(`${LOG_PREFIX12} ${message}`);
8796
9385
  }
8797
9386
  }
8798
9387
  }
8799
9388
  };
8800
- var LOG_PREFIX10 = "[GuideKit:Awareness]";
9389
+ var LOG_PREFIX13 = "[GuideKit:Awareness]";
8801
9390
  var DEFAULT_IDLE_TIMEOUT_MS = 6e4;
8802
9391
  var DEFAULT_DWELL_TIMEOUT_MS = 8e3;
8803
9392
  var DEFAULT_RAGE_CLICK_THRESHOLD = 3;
@@ -9159,11 +9748,11 @@ ${callDescriptions}` : callDescriptions;
9159
9748
  /** Conditional debug logging. */
9160
9749
  log(...args) {
9161
9750
  if (this.debugEnabled) {
9162
- console.debug(LOG_PREFIX10, ...args);
9751
+ console.debug(LOG_PREFIX13, ...args);
9163
9752
  }
9164
9753
  }
9165
9754
  };
9166
- var LOG_PREFIX11 = "[GuideKit:Proactive]";
9755
+ var LOG_PREFIX14 = "[GuideKit:Proactive]";
9167
9756
  var STORAGE_KEY = "guidekit:visited";
9168
9757
  var SEVEN_DAYS_MS = 7 * 24 * 60 * 60 * 1e3;
9169
9758
  var DWELL_COOLDOWNS = [3e4, 6e4, 12e4];
@@ -9201,7 +9790,7 @@ ${callDescriptions}` : callDescriptions;
9201
9790
  set quietMode(value) {
9202
9791
  this._quietMode = value;
9203
9792
  if (this.debug) {
9204
- console.debug(LOG_PREFIX11, `Quiet mode ${value ? "enabled" : "disabled"}`);
9793
+ console.debug(LOG_PREFIX14, `Quiet mode ${value ? "enabled" : "disabled"}`);
9205
9794
  }
9206
9795
  }
9207
9796
  // ---- Lifecycle -----------------------------------------------------------
@@ -9231,7 +9820,7 @@ ${callDescriptions}` : callDescriptions;
9231
9820
  })
9232
9821
  );
9233
9822
  if (this.debug) {
9234
- console.debug(LOG_PREFIX11, "Started \u2014 subscribed to awareness & dom events");
9823
+ console.debug(LOG_PREFIX14, "Started \u2014 subscribed to awareness & dom events");
9235
9824
  }
9236
9825
  }
9237
9826
  /** Unsubscribe all bus listeners and clear internal state. */
@@ -9246,7 +9835,7 @@ ${callDescriptions}` : callDescriptions;
9246
9835
  this.formTimers.clear();
9247
9836
  this.started = false;
9248
9837
  if (this.debug) {
9249
- console.debug(LOG_PREFIX11, "Stopped \u2014 all listeners removed");
9838
+ console.debug(LOG_PREFIX14, "Stopped \u2014 all listeners removed");
9250
9839
  }
9251
9840
  }
9252
9841
  /** Alias for {@link stop}. */
@@ -9281,7 +9870,7 @@ ${callDescriptions}` : callDescriptions;
9281
9870
  }, FORM_ABANDON_MS);
9282
9871
  this.formTimers.set(formSelector, timer);
9283
9872
  if (this.debug) {
9284
- console.debug(LOG_PREFIX11, `Form interaction started: ${formSelector}`);
9873
+ console.debug(LOG_PREFIX14, `Form interaction started: ${formSelector}`);
9285
9874
  }
9286
9875
  }
9287
9876
  /** Reset all cooldowns and internal tracking state (useful for testing). */
@@ -9295,7 +9884,7 @@ ${callDescriptions}` : callDescriptions;
9295
9884
  }
9296
9885
  this.formTimers.clear();
9297
9886
  if (this.debug) {
9298
- console.debug(LOG_PREFIX11, "All cooldowns and state reset");
9887
+ console.debug(LOG_PREFIX14, "All cooldowns and state reset");
9299
9888
  }
9300
9889
  }
9301
9890
  // ---- Internal handlers ---------------------------------------------------
@@ -9312,22 +9901,23 @@ ${callDescriptions}` : callDescriptions;
9312
9901
  message: "First-time visitor detected. Show a visual greeting (no audio)."
9313
9902
  }, "greeting");
9314
9903
  if (this.debug) {
9315
- console.debug(LOG_PREFIX11, "First visit \u2014 greeting triggered");
9904
+ console.debug(LOG_PREFIX14, "First visit \u2014 greeting triggered");
9316
9905
  }
9317
9906
  return;
9318
9907
  }
9319
9908
  const visitedAt = parseInt(visited, 10);
9320
- if (!Number.isNaN(visitedAt)) {
9321
- const elapsed = Date.now() - visitedAt;
9322
- if (elapsed <= SEVEN_DAYS_MS && this.debug) {
9323
- console.debug(LOG_PREFIX11, "Return visitor within 7 days \u2014 silent");
9324
- } else if (this.debug) {
9325
- console.debug(LOG_PREFIX11, "Return visitor after 7 days");
9326
- }
9909
+ if (Number.isNaN(visitedAt)) {
9910
+ return;
9911
+ }
9912
+ const elapsed = Date.now() - visitedAt;
9913
+ if (elapsed <= SEVEN_DAYS_MS && this.debug) {
9914
+ console.debug(LOG_PREFIX14, "Return visitor within 7 days \u2014 silent");
9915
+ } else if (this.debug) {
9916
+ console.debug(LOG_PREFIX14, "Return visitor after 7 days");
9327
9917
  }
9328
9918
  } catch {
9329
9919
  if (this.debug) {
9330
- console.warn(LOG_PREFIX11, "localStorage unavailable \u2014 skipping greeting check");
9920
+ console.warn(LOG_PREFIX14, "localStorage unavailable \u2014 skipping greeting check");
9331
9921
  }
9332
9922
  }
9333
9923
  }
@@ -9345,7 +9935,7 @@ ${callDescriptions}` : callDescriptions;
9345
9935
  const count = this.dwellCounts.get(sectionId) ?? 0;
9346
9936
  if (count >= DWELL_COOLDOWNS.length + 1) {
9347
9937
  if (this.debug) {
9348
- console.debug(LOG_PREFIX11, `Dwell cap reached for section "${sectionId}" \u2014 suppressed`);
9938
+ console.debug(LOG_PREFIX14, `Dwell cap reached for section "${sectionId}" \u2014 suppressed`);
9349
9939
  }
9350
9940
  return;
9351
9941
  }
@@ -9355,7 +9945,7 @@ ${callDescriptions}` : callDescriptions;
9355
9945
  const lastFired = this.cooldowns.get(key) ?? 0;
9356
9946
  if (Date.now() - lastFired < cooldownMs) {
9357
9947
  if (this.debug) {
9358
- console.debug(LOG_PREFIX11, `Dwell cooldown active for "${sectionId}" \u2014 suppressed`);
9948
+ console.debug(LOG_PREFIX14, `Dwell cooldown active for "${sectionId}" \u2014 suppressed`);
9359
9949
  }
9360
9950
  return;
9361
9951
  }
@@ -9371,7 +9961,7 @@ ${callDescriptions}` : callDescriptions;
9371
9961
  const sectionKey = selector;
9372
9962
  if (this.frustrationFired.has(sectionKey)) {
9373
9963
  if (this.debug) {
9374
- console.debug(LOG_PREFIX11, `Frustration already fired for "${selector}" \u2014 suppressed`);
9964
+ console.debug(LOG_PREFIX14, `Frustration already fired for "${selector}" \u2014 suppressed`);
9375
9965
  }
9376
9966
  return;
9377
9967
  }
@@ -9387,7 +9977,7 @@ ${callDescriptions}` : callDescriptions;
9387
9977
  const key = "navigation-commentary";
9388
9978
  if (this.isCooldownActive(key, NAVIGATION_COOLDOWN_MS)) {
9389
9979
  if (this.debug) {
9390
- console.debug(LOG_PREFIX11, "Navigation cooldown active \u2014 suppressed");
9980
+ console.debug(LOG_PREFIX14, "Navigation cooldown active \u2014 suppressed");
9391
9981
  }
9392
9982
  return;
9393
9983
  }
@@ -9410,7 +10000,7 @@ ${callDescriptions}` : callDescriptions;
9410
10000
  fireTrigger(partial, cooldownKey) {
9411
10001
  if (this._quietMode) {
9412
10002
  if (this.debug) {
9413
- console.debug(LOG_PREFIX11, `Quiet mode \u2014 suppressed trigger: ${partial.type}`);
10003
+ console.debug(LOG_PREFIX14, `Quiet mode \u2014 suppressed trigger: ${partial.type}`);
9414
10004
  }
9415
10005
  return;
9416
10006
  }
@@ -9420,13 +10010,13 @@ ${callDescriptions}` : callDescriptions;
9420
10010
  };
9421
10011
  this.cooldowns.set(cooldownKey, trigger.timestamp);
9422
10012
  if (this.debug) {
9423
- console.debug(LOG_PREFIX11, "Trigger fired:", trigger.type, trigger);
10013
+ console.debug(LOG_PREFIX14, "Trigger fired:", trigger.type, trigger);
9424
10014
  }
9425
10015
  if (this.onTrigger) {
9426
10016
  try {
9427
10017
  this.onTrigger(trigger);
9428
10018
  } catch (err) {
9429
- console.error(LOG_PREFIX11, "onTrigger callback error:", err);
10019
+ console.error(LOG_PREFIX14, "onTrigger callback error:", err);
9430
10020
  }
9431
10021
  }
9432
10022
  }
@@ -9437,7 +10027,7 @@ ${callDescriptions}` : callDescriptions;
9437
10027
  return Date.now() - lastFired < cooldownMs;
9438
10028
  }
9439
10029
  };
9440
- var LOG_PREFIX12 = "[GuideKit:RateLimiter]";
10030
+ var LOG_PREFIX15 = "[GuideKit:RateLimiter]";
9441
10031
  var DEFAULT_MAX_LLM_CALLS_PER_MINUTE = 10;
9442
10032
  var DEFAULT_MAX_STT_MINUTES_PER_SESSION = 60;
9443
10033
  var DEFAULT_MAX_TTS_CHARS_PER_SESSION = 5e4;
@@ -9532,7 +10122,19 @@ ${callDescriptions}` : callDescriptions;
9532
10122
  get sttMinutesUsed() {
9533
10123
  let totalMs = this.sttMs;
9534
10124
  if (this.sttStartedAt !== null) {
9535
- totalMs += Date.now() - this.sttStartedAt;
10125
+ const activeMs = Date.now() - this.sttStartedAt;
10126
+ const maxSessionMs = this.maxSTTMinutesPerSession * 6e4;
10127
+ const maxActiveMs = maxSessionMs * 2;
10128
+ if (activeMs > maxActiveMs) {
10129
+ console.warn(
10130
+ `${LOG_PREFIX15} STT stream running for ${Math.round(activeMs / 6e4)}min without sttStop() \u2014 capping at 2x session limit (${this.maxSTTMinutesPerSession * 2}min).`
10131
+ );
10132
+ this.sttMs += maxActiveMs;
10133
+ this.sttStartedAt = null;
10134
+ totalMs = this.sttMs;
10135
+ } else {
10136
+ totalMs += activeMs;
10137
+ }
9536
10138
  }
9537
10139
  return totalMs / 6e4;
9538
10140
  }
@@ -9604,7 +10206,7 @@ ${callDescriptions}` : callDescriptions;
9604
10206
  }
9605
10207
  log(...args) {
9606
10208
  if (this.debug) {
9607
- console.debug(LOG_PREFIX12, ...args);
10209
+ console.debug(LOG_PREFIX15, ...args);
9608
10210
  }
9609
10211
  }
9610
10212
  };
@@ -9835,7 +10437,7 @@ ${callDescriptions}` : callDescriptions;
9835
10437
  pt: pt2
9836
10438
  };
9837
10439
  var SUPPORTED_LOCALE_CODES = new Set(Object.keys(BUILTIN_LOCALES));
9838
- var LOG_PREFIX13 = "[GuideKit:I18n]";
10440
+ var LOG_PREFIX16 = "[GuideKit:I18n]";
9839
10441
  function isSupportedLocale(code) {
9840
10442
  return SUPPORTED_LOCALE_CODES.has(code);
9841
10443
  }
@@ -9873,7 +10475,7 @@ ${callDescriptions}` : callDescriptions;
9873
10475
  this.strings = strings;
9874
10476
  this.resolvedLocale = resolvedLocale;
9875
10477
  if (this.debug) {
9876
- console.debug(`${LOG_PREFIX13} Initialized with locale "${this.resolvedLocale}"`);
10478
+ console.debug(`${LOG_PREFIX16} Initialized with locale "${this.resolvedLocale}"`);
9877
10479
  }
9878
10480
  }
9879
10481
  // -------------------------------------------------------------------------
@@ -9884,9 +10486,9 @@ ${callDescriptions}` : callDescriptions;
9884
10486
  const value = this.strings[key];
9885
10487
  if (value === void 0) {
9886
10488
  if (this.debug) {
9887
- console.warn(`${LOG_PREFIX13} Missing translation key "${key}"`);
10489
+ console.warn(`${LOG_PREFIX16} Missing translation key "${key}"`);
9888
10490
  }
9889
- return en[key] ?? key;
10491
+ return en[key] ?? (typeof process !== "undefined" && process.env?.NODE_ENV === "production" ? key : `[MISSING: ${key}]`);
9890
10492
  }
9891
10493
  return value;
9892
10494
  }
@@ -9900,7 +10502,7 @@ ${callDescriptions}` : callDescriptions;
9900
10502
  this.strings = strings;
9901
10503
  this.resolvedLocale = resolvedLocale;
9902
10504
  if (this.debug) {
9903
- console.debug(`${LOG_PREFIX13} Locale changed to "${this.resolvedLocale}"`);
10505
+ console.debug(`${LOG_PREFIX16} Locale changed to "${this.resolvedLocale}"`);
9904
10506
  }
9905
10507
  }
9906
10508
  /** The current resolved locale code (e.g. 'en', 'fr', or 'custom'). */
@@ -9920,7 +10522,7 @@ ${callDescriptions}` : callDescriptions;
9920
10522
  if (locale === "auto") {
9921
10523
  const detected = detectLocaleFromDocument();
9922
10524
  if (this.debug) {
9923
- console.debug(`${LOG_PREFIX13} Auto-detected locale "${detected}"`);
10525
+ console.debug(`${LOG_PREFIX16} Auto-detected locale "${detected}"`);
9924
10526
  }
9925
10527
  return {
9926
10528
  strings: BUILTIN_LOCALES[detected],
@@ -9935,7 +10537,7 @@ ${callDescriptions}` : callDescriptions;
9935
10537
  }
9936
10538
  if (this.debug) {
9937
10539
  console.warn(
9938
- `${LOG_PREFIX13} Unknown locale "${String(locale)}", falling back to "en"`
10540
+ `${LOG_PREFIX16} Unknown locale "${String(locale)}", falling back to "en"`
9939
10541
  );
9940
10542
  }
9941
10543
  return {
@@ -9944,7 +10546,7 @@ ${callDescriptions}` : callDescriptions;
9944
10546
  };
9945
10547
  }
9946
10548
  };
9947
- var LOG_PREFIX14 = "[GuideKit:Auth]";
10549
+ var LOG_PREFIX17 = "[GuideKit:Auth]";
9948
10550
  var REFRESH_THRESHOLD = 0.8;
9949
10551
  var MAX_RETRY_ATTEMPTS = 3;
9950
10552
  var RETRY_BASE_MS = 1e3;
@@ -10223,7 +10825,7 @@ ${callDescriptions}` : callDescriptions;
10223
10825
  }
10224
10826
  log(message) {
10225
10827
  if (this.debug) {
10226
- console.debug(`${LOG_PREFIX14} ${message}`);
10828
+ console.debug(`${LOG_PREFIX17} ${message}`);
10227
10829
  }
10228
10830
  }
10229
10831
  };
@@ -10353,6 +10955,11 @@ ${callDescriptions}` : callDescriptions;
10353
10955
  debug: this._debug
10354
10956
  });
10355
10957
  await this.tokenManager.start();
10958
+ if (!this._options.llm) {
10959
+ console.warn(
10960
+ "[GuideKit] tokenEndpoint provided without llm config. The session token handles auth only \u2014 llm: { provider, apiKey } is still required for LLM calls. See: https://guidekit.dev/docs/provider#token-endpoint"
10961
+ );
10962
+ }
10356
10963
  this.resourceManager.register({
10357
10964
  name: "token-manager",
10358
10965
  cleanup: () => this.tokenManager?.destroy()
@@ -10475,21 +11082,50 @@ ${callDescriptions}` : callDescriptions;
10475
11082
  }
10476
11083
  });
10477
11084
  this.registerBuiltinTools();
10478
- if (this._options.stt && this._options.tts) {
10479
- const sttConfig = this._options.stt;
10480
- const ttsConfig = this._options.tts;
10481
- if (sttConfig.provider === "deepgram" && ttsConfig.provider === "elevenlabs") {
11085
+ {
11086
+ const sttConfig = this._options.stt ?? { provider: "web-speech" };
11087
+ const ttsConfig = this._options.tts ?? { provider: "web-speech" };
11088
+ let voiceSttConfig;
11089
+ let voiceTtsConfig;
11090
+ if (sttConfig.provider === "deepgram") {
11091
+ voiceSttConfig = {
11092
+ provider: "deepgram",
11093
+ apiKey: sttConfig.apiKey,
11094
+ model: sttConfig.model
11095
+ };
11096
+ } else if (sttConfig.provider === "elevenlabs") {
11097
+ voiceSttConfig = {
11098
+ provider: "elevenlabs",
11099
+ apiKey: sttConfig.apiKey,
11100
+ language: sttConfig.language
11101
+ };
11102
+ } else {
11103
+ voiceSttConfig = {
11104
+ provider: "web-speech",
11105
+ language: sttConfig.language,
11106
+ continuous: sttConfig.continuous,
11107
+ interimResults: sttConfig.interimResults
11108
+ };
11109
+ }
11110
+ if (ttsConfig.provider === "elevenlabs") {
11111
+ voiceTtsConfig = {
11112
+ provider: "elevenlabs",
11113
+ apiKey: ttsConfig.apiKey,
11114
+ voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
11115
+ };
11116
+ } else {
11117
+ voiceTtsConfig = {
11118
+ provider: "web-speech",
11119
+ voice: ttsConfig.voice,
11120
+ rate: ttsConfig.rate,
11121
+ pitch: ttsConfig.pitch,
11122
+ language: ttsConfig.language
11123
+ };
11124
+ }
11125
+ try {
10482
11126
  this.voicePipeline = new VoicePipeline({
10483
- sttConfig: {
10484
- provider: "deepgram",
10485
- apiKey: sttConfig.apiKey,
10486
- model: "model" in sttConfig ? sttConfig.model : void 0
10487
- },
10488
- ttsConfig: {
10489
- provider: "elevenlabs",
10490
- apiKey: ttsConfig.apiKey,
10491
- voiceId: "voiceId" in ttsConfig ? ttsConfig.voiceId : void 0
10492
- },
11127
+ sttConfig: voiceSttConfig,
11128
+ ttsConfig: voiceTtsConfig,
10493
11129
  debug: this._debug
10494
11130
  });
10495
11131
  this.voicePipeline.onStateChange((state, previous) => {
@@ -10522,6 +11158,11 @@ ${callDescriptions}` : callDescriptions;
10522
11158
  name: "voice-pipeline",
10523
11159
  cleanup: () => this.voicePipeline?.destroy()
10524
11160
  });
11161
+ } catch (_err) {
11162
+ this.voicePipeline = null;
11163
+ if (this._debug) {
11164
+ console.debug("[GuideKit:Core] Voice pipeline unavailable in this environment");
11165
+ }
10525
11166
  }
10526
11167
  }
10527
11168
  const session = this.contextManager.restoreSession();
@@ -10646,7 +11287,7 @@ ${callDescriptions}` : callDescriptions;
10646
11287
  return responseText;
10647
11288
  } catch (error) {
10648
11289
  const err = error instanceof GuideKitError ? error : new GuideKitError({
10649
- code: "UNKNOWN",
11290
+ code: ErrorCodes.UNKNOWN,
10650
11291
  message: error instanceof Error ? error.message : "Unknown error",
10651
11292
  recoverable: false,
10652
11293
  suggestion: "Check the console for details."
@@ -10902,172 +11543,11 @@ ${callDescriptions}` : callDescriptions;
10902
11543
  };
10903
11544
  }
10904
11545
  /**
10905
- * Register all built-in tool handlers with the ToolExecutor.
10906
- * Called once during init() after VisualGuidance and all subsystems are ready.
11546
+ * Unified built-in tool specifications single source of truth for both
11547
+ * tool definitions (sent to LLM) and handler registration.
10907
11548
  */
10908
- registerBuiltinTools() {
10909
- if (!this.toolExecutor) return;
10910
- this.toolExecutor.registerTool({
10911
- name: "highlight",
10912
- execute: async (args) => {
10913
- const sectionId = args.sectionId;
10914
- const selector = args.selector;
10915
- const tooltip = args.tooltip;
10916
- const position = args.position;
10917
- const result = this.highlight({ sectionId, selector, tooltip, position });
10918
- return { success: result };
10919
- }
10920
- });
10921
- this.toolExecutor.registerTool({
10922
- name: "dismissHighlight",
10923
- execute: async () => {
10924
- this.dismissHighlight();
10925
- return { success: true };
10926
- }
10927
- });
10928
- this.toolExecutor.registerTool({
10929
- name: "scrollToSection",
10930
- execute: async (args) => {
10931
- const sectionId = args.sectionId;
10932
- const offset = args.offset;
10933
- this.scrollToSection(sectionId, offset);
10934
- return { success: true };
10935
- }
10936
- });
10937
- this.toolExecutor.registerTool({
10938
- name: "navigate",
10939
- execute: async (args) => {
10940
- const href = args.href;
10941
- const result = await this.navigate(href);
10942
- return { success: result, navigatedTo: result ? href : null };
10943
- }
10944
- });
10945
- this.toolExecutor.registerTool({
10946
- name: "startTour",
10947
- execute: async (args) => {
10948
- const sectionIds = args.sectionIds;
10949
- const mode = args.mode ?? "manual";
10950
- this.startTour(sectionIds, mode);
10951
- return { success: true, steps: sectionIds.length };
10952
- }
10953
- });
10954
- this.toolExecutor.registerTool({
10955
- name: "readPageContent",
10956
- execute: async (args) => {
10957
- const sectionId = args.sectionId;
10958
- const query = args.query;
10959
- const model = this._currentPageModel;
10960
- if (!model) return { error: "No page model available" };
10961
- if (sectionId) {
10962
- const section = model.sections.find((s) => s.id === sectionId);
10963
- if (section) {
10964
- const contentMapResult = await this.contextManager.getContent(sectionId);
10965
- return {
10966
- sectionId: section.id,
10967
- label: section.label,
10968
- summary: section.summary,
10969
- contentMap: contentMapResult
10970
- };
10971
- }
10972
- return { error: `Section "${sectionId}" not found` };
10973
- }
10974
- if (query) {
10975
- const queryLower = query.toLowerCase();
10976
- const matches = model.sections.filter(
10977
- (s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
10978
- );
10979
- return {
10980
- query,
10981
- results: matches.slice(0, 5).map((s) => ({
10982
- sectionId: s.id,
10983
- label: s.label,
10984
- snippet: s.summary?.slice(0, 200)
10985
- }))
10986
- };
10987
- }
10988
- return { error: "Provide either sectionId or query" };
10989
- }
10990
- });
10991
- this.toolExecutor.registerTool({
10992
- name: "getVisibleSections",
10993
- execute: async () => {
10994
- const model = this._currentPageModel;
10995
- if (!model) return { sections: [] };
10996
- return {
10997
- sections: model.sections.slice(0, 10).map((s) => ({
10998
- id: s.id,
10999
- label: s.label,
11000
- selector: s.selector,
11001
- score: s.score
11002
- }))
11003
- };
11004
- }
11005
- });
11006
- this.toolExecutor.registerTool({
11007
- name: "clickElement",
11008
- execute: async (args) => {
11009
- if (typeof document === "undefined") return { success: false, error: "Not in browser" };
11010
- const selector = args.selector;
11011
- const el = document.querySelector(selector);
11012
- if (!el) return { success: false, error: `Element not found: ${selector}` };
11013
- if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
11014
- const clickableRules = this._options.options?.clickableSelectors;
11015
- const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
11016
- try {
11017
- return el.matches(pattern);
11018
- } catch {
11019
- return selector === pattern;
11020
- }
11021
- }) ?? false;
11022
- if (!isInDevAllowList) {
11023
- const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
11024
- try {
11025
- return el.matches(pattern);
11026
- } catch {
11027
- return false;
11028
- }
11029
- });
11030
- if (defaultDenied) {
11031
- return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
11032
- }
11033
- }
11034
- if (clickableRules?.deny?.length) {
11035
- const denied = clickableRules.deny.some((pattern) => {
11036
- try {
11037
- return el.matches(pattern);
11038
- } catch {
11039
- return selector === pattern;
11040
- }
11041
- });
11042
- if (denied) {
11043
- return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
11044
- }
11045
- }
11046
- if (clickableRules?.allow?.length && !isInDevAllowList) {
11047
- return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
11048
- }
11049
- el.click();
11050
- return { success: true };
11051
- }
11052
- });
11053
- this.toolExecutor.registerTool({
11054
- name: "executeCustomAction",
11055
- execute: async (args) => {
11056
- const actionId = args.actionId;
11057
- const params = args.params ?? {};
11058
- const action = this.customActions.get(actionId);
11059
- if (!action) return { error: `Unknown action: ${actionId}` };
11060
- try {
11061
- const result = await action.handler(params);
11062
- return { success: true, result };
11063
- } catch (err) {
11064
- return { success: false, error: err instanceof Error ? err.message : String(err) };
11065
- }
11066
- }
11067
- });
11068
- }
11069
- getToolDefinitions() {
11070
- const builtinTools = [
11549
+ getBuiltinToolSpecs() {
11550
+ return [
11071
11551
  {
11072
11552
  name: "highlight",
11073
11553
  description: "Spotlight an element on the page to draw the user's attention. Use sectionId to highlight a page section, or selector for a specific CSS selector. Optionally add a tooltip with explanation text.",
@@ -11077,13 +11557,27 @@ ${callDescriptions}` : callDescriptions;
11077
11557
  tooltip: { type: "string", description: "Text to show in tooltip" },
11078
11558
  position: { type: "string", enum: ["top", "bottom", "left", "right", "auto"], description: "Tooltip position" }
11079
11559
  },
11080
- schemaVersion: 1
11560
+ required: [],
11561
+ schemaVersion: 1,
11562
+ execute: async (args) => {
11563
+ const sectionId = args.sectionId;
11564
+ const selector = args.selector;
11565
+ const tooltip = args.tooltip;
11566
+ const position = args.position;
11567
+ const result = this.highlight({ sectionId, selector, tooltip, position });
11568
+ return { success: result };
11569
+ }
11081
11570
  },
11082
11571
  {
11083
11572
  name: "dismissHighlight",
11084
11573
  description: "Remove the current spotlight overlay.",
11085
11574
  parameters: {},
11086
- schemaVersion: 1
11575
+ required: [],
11576
+ schemaVersion: 1,
11577
+ execute: async () => {
11578
+ this.dismissHighlight();
11579
+ return { success: true };
11580
+ }
11087
11581
  },
11088
11582
  {
11089
11583
  name: "scrollToSection",
@@ -11092,7 +11586,14 @@ ${callDescriptions}` : callDescriptions;
11092
11586
  sectionId: { type: "string", description: "ID of the section to scroll to" },
11093
11587
  offset: { type: "number", description: "Pixel offset for sticky headers" }
11094
11588
  },
11095
- schemaVersion: 1
11589
+ required: ["sectionId"],
11590
+ schemaVersion: 1,
11591
+ execute: async (args) => {
11592
+ const sectionId = args.sectionId;
11593
+ const offset = args.offset;
11594
+ this.scrollToSection(sectionId, offset);
11595
+ return { success: true };
11596
+ }
11096
11597
  },
11097
11598
  {
11098
11599
  name: "navigate",
@@ -11100,7 +11601,13 @@ ${callDescriptions}` : callDescriptions;
11100
11601
  parameters: {
11101
11602
  href: { type: "string", description: "URL or path to navigate to (same-origin only)" }
11102
11603
  },
11103
- schemaVersion: 1
11604
+ required: ["href"],
11605
+ schemaVersion: 1,
11606
+ execute: async (args) => {
11607
+ const href = args.href;
11608
+ const result = await this.navigate(href);
11609
+ return { success: result, navigatedTo: result ? href : null };
11610
+ }
11104
11611
  },
11105
11612
  {
11106
11613
  name: "startTour",
@@ -11109,7 +11616,14 @@ ${callDescriptions}` : callDescriptions;
11109
11616
  sectionIds: { type: "array", items: { type: "string" }, description: "Section IDs in tour order" },
11110
11617
  mode: { type: "string", enum: ["auto", "manual"], description: "auto advances automatically; manual waits for user" }
11111
11618
  },
11112
- schemaVersion: 1
11619
+ required: ["sectionIds"],
11620
+ schemaVersion: 1,
11621
+ execute: async (args) => {
11622
+ const sectionIds = args.sectionIds;
11623
+ const mode = args.mode ?? "manual";
11624
+ this.startTour(sectionIds, mode);
11625
+ return { success: true, steps: sectionIds.length };
11626
+ }
11113
11627
  },
11114
11628
  {
11115
11629
  name: "readPageContent",
@@ -11118,13 +11632,61 @@ ${callDescriptions}` : callDescriptions;
11118
11632
  sectionId: { type: "string", description: "Section ID to read" },
11119
11633
  query: { type: "string", description: "Keyword to search for across sections" }
11120
11634
  },
11121
- schemaVersion: 1
11635
+ required: [],
11636
+ schemaVersion: 1,
11637
+ execute: async (args) => {
11638
+ const sectionId = args.sectionId;
11639
+ const query = args.query;
11640
+ const model = this._currentPageModel;
11641
+ if (!model) return { error: "No page model available" };
11642
+ if (sectionId) {
11643
+ const section = model.sections.find((s) => s.id === sectionId);
11644
+ if (section) {
11645
+ const contentMapResult = await this.contextManager.getContent(sectionId);
11646
+ return {
11647
+ sectionId: section.id,
11648
+ label: section.label,
11649
+ summary: section.summary,
11650
+ contentMap: contentMapResult
11651
+ };
11652
+ }
11653
+ return { error: `Section "${sectionId}" not found` };
11654
+ }
11655
+ if (query) {
11656
+ const queryLower = query.toLowerCase();
11657
+ const matches = model.sections.filter(
11658
+ (s) => s.label?.toLowerCase().includes(queryLower) || s.summary?.toLowerCase().includes(queryLower)
11659
+ );
11660
+ return {
11661
+ query,
11662
+ results: matches.slice(0, 5).map((s) => ({
11663
+ sectionId: s.id,
11664
+ label: s.label,
11665
+ snippet: s.summary?.slice(0, 200)
11666
+ }))
11667
+ };
11668
+ }
11669
+ return { error: "Provide either sectionId or query" };
11670
+ }
11122
11671
  },
11123
11672
  {
11124
11673
  name: "getVisibleSections",
11125
11674
  description: "Get the list of sections currently visible in the user viewport.",
11126
11675
  parameters: {},
11127
- schemaVersion: 1
11676
+ required: [],
11677
+ schemaVersion: 1,
11678
+ execute: async () => {
11679
+ const model = this._currentPageModel;
11680
+ if (!model) return { sections: [] };
11681
+ return {
11682
+ sections: model.sections.slice(0, 10).map((s) => ({
11683
+ id: s.id,
11684
+ label: s.label,
11685
+ selector: s.selector,
11686
+ score: s.score
11687
+ }))
11688
+ };
11689
+ }
11128
11690
  },
11129
11691
  {
11130
11692
  name: "clickElement",
@@ -11132,7 +11694,52 @@ ${callDescriptions}` : callDescriptions;
11132
11694
  parameters: {
11133
11695
  selector: { type: "string", description: "CSS selector of the element to click" }
11134
11696
  },
11135
- schemaVersion: 1
11697
+ required: ["selector"],
11698
+ schemaVersion: 1,
11699
+ execute: async (args) => {
11700
+ if (typeof document === "undefined") return { success: false, error: "Not in browser" };
11701
+ const selector = args.selector;
11702
+ const el = document.querySelector(selector);
11703
+ if (!el) return { success: false, error: `Element not found: ${selector}` };
11704
+ if (!(el instanceof HTMLElement)) return { success: false, error: "Element is not clickable" };
11705
+ const clickableRules = this._options.options?.clickableSelectors;
11706
+ const isInDevAllowList = clickableRules?.allow?.some((pattern) => {
11707
+ try {
11708
+ return el.matches(pattern);
11709
+ } catch {
11710
+ return selector === pattern;
11711
+ }
11712
+ }) ?? false;
11713
+ if (!isInDevAllowList) {
11714
+ const defaultDenied = DEFAULT_CLICK_DENY.some((pattern) => {
11715
+ try {
11716
+ return el.matches(pattern);
11717
+ } catch {
11718
+ return false;
11719
+ }
11720
+ });
11721
+ if (defaultDenied) {
11722
+ return { success: false, error: `Selector "${selector}" matches the default deny list. Add it to clickableSelectors.allow to override.` };
11723
+ }
11724
+ }
11725
+ if (clickableRules?.deny?.length) {
11726
+ const denied = clickableRules.deny.some((pattern) => {
11727
+ try {
11728
+ return el.matches(pattern);
11729
+ } catch {
11730
+ return selector === pattern;
11731
+ }
11732
+ });
11733
+ if (denied) {
11734
+ return { success: false, error: `Selector "${selector}" is blocked by the deny list.` };
11735
+ }
11736
+ }
11737
+ if (clickableRules?.allow?.length && !isInDevAllowList) {
11738
+ return { success: false, error: `Selector "${selector}" is not in the allowed clickable selectors list.` };
11739
+ }
11740
+ el.click();
11741
+ return { success: true };
11742
+ }
11136
11743
  },
11137
11744
  {
11138
11745
  name: "executeCustomAction",
@@ -11141,9 +11748,37 @@ ${callDescriptions}` : callDescriptions;
11141
11748
  actionId: { type: "string", description: "ID of the custom action" },
11142
11749
  params: { type: "object", description: "Parameters for the action" }
11143
11750
  },
11144
- schemaVersion: 1
11751
+ required: ["actionId"],
11752
+ schemaVersion: 1,
11753
+ execute: async (args) => {
11754
+ const actionId = args.actionId;
11755
+ const params = args.params ?? {};
11756
+ const action = this.customActions.get(actionId);
11757
+ if (!action) return { error: `Unknown action: ${actionId}` };
11758
+ try {
11759
+ const result = await action.handler(params);
11760
+ return { success: true, result };
11761
+ } catch (err) {
11762
+ return { success: false, error: err instanceof Error ? err.message : String(err) };
11763
+ }
11764
+ }
11145
11765
  }
11146
11766
  ];
11767
+ }
11768
+ /**
11769
+ * Register all built-in tool handlers with the ToolExecutor.
11770
+ * Called once during init() after VisualGuidance and all subsystems are ready.
11771
+ */
11772
+ registerBuiltinTools() {
11773
+ if (!this.toolExecutor) return;
11774
+ for (const spec of this.getBuiltinToolSpecs()) {
11775
+ this.toolExecutor.registerTool({ name: spec.name, execute: spec.execute });
11776
+ }
11777
+ }
11778
+ getToolDefinitions() {
11779
+ const builtinTools = this.getBuiltinToolSpecs().map(
11780
+ ({ execute: _execute, ...def }) => def
11781
+ );
11147
11782
  for (const [actionId, action] of this.customActions) {
11148
11783
  builtinTools.push({
11149
11784
  name: `action_${actionId}`,
@@ -11608,7 +12243,7 @@ ${callDescriptions}` : callDescriptions;
11608
12243
  );
11609
12244
  }
11610
12245
  }
11611
- var VERSION = "0.1.0-beta.1";
12246
+ var VERSION = "0.1.0-beta.2";
11612
12247
  return __toCommonJS(index_exports);
11613
12248
  })();
11614
12249
  /*! Bundled license information: