cursor-buddy 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1000,6 +1000,8 @@ var ScreenCaptureService = class {
1000
1000
  * Preparation is allowed to run ahead of playback so server synthesis can
1001
1001
  * overlap with the currently playing segment, but the returned playback tasks
1002
1002
  * still execute one-by-one in enqueue order.
1003
+ *
1004
+ * Supports pausing after the current segment completes (for approval flows).
1003
1005
  */
1004
1006
  var TTSPlaybackQueue = class {
1005
1007
  error = null;
@@ -1009,6 +1011,10 @@ var TTSPlaybackQueue = class {
1009
1011
  playbackChain = Promise.resolve();
1010
1012
  prepare;
1011
1013
  signal;
1014
+ isPaused = false;
1015
+ pausePromise = null;
1016
+ pauseResolver = null;
1017
+ pendingSegments = [];
1012
1018
  constructor(options) {
1013
1019
  this.onError = options.onError;
1014
1020
  this.onPlaybackStart = options.onPlaybackStart;
@@ -1021,11 +1027,15 @@ var TTSPlaybackQueue = class {
1021
1027
  enqueue(text) {
1022
1028
  const normalizedText = text.trim();
1023
1029
  if (!normalizedText || this.error || this.signal?.aborted) return;
1030
+ if (this.isPaused) return;
1024
1031
  const preparedPlaybackTask = this.prepare(normalizedText, this.signal);
1032
+ this.pendingSegments.push(normalizedText);
1025
1033
  preparedPlaybackTask.catch((error) => {
1026
1034
  this.fail(toError(error));
1027
1035
  });
1028
1036
  this.playbackChain = this.playbackChain.then(async () => {
1037
+ if (this.signal?.aborted) return;
1038
+ if (this.pausePromise) await this.pausePromise;
1029
1039
  if (this.signal?.aborted) return;
1030
1040
  const play = await preparedPlaybackTask;
1031
1041
  if (this.signal?.aborted) return;
@@ -1034,11 +1044,43 @@ var TTSPlaybackQueue = class {
1034
1044
  this.onPlaybackStart?.();
1035
1045
  }
1036
1046
  await play();
1047
+ const index = this.pendingSegments.indexOf(normalizedText);
1048
+ if (index !== -1) this.pendingSegments.splice(index, 1);
1037
1049
  }).catch((error) => {
1038
1050
  this.fail(toError(error));
1039
1051
  });
1040
1052
  }
1041
1053
  /**
1054
+ * Pause the queue after the currently playing segment completes.
1055
+ * New segments enqueued while paused will be discarded.
1056
+ */
1057
+ pauseAfterCurrent() {
1058
+ if (this.isPaused) return;
1059
+ this.isPaused = true;
1060
+ this.pausePromise = new Promise((resolve) => {
1061
+ this.pauseResolver = resolve;
1062
+ });
1063
+ this.pendingSegments = [];
1064
+ }
1065
+ /**
1066
+ * Resume playback after a pause.
1067
+ */
1068
+ resume() {
1069
+ if (!this.isPaused) return;
1070
+ this.isPaused = false;
1071
+ if (this.pauseResolver) {
1072
+ this.pauseResolver();
1073
+ this.pauseResolver = null;
1074
+ this.pausePromise = null;
1075
+ }
1076
+ }
1077
+ /**
1078
+ * Check if the queue is currently paused.
1079
+ */
1080
+ isPausedState() {
1081
+ return this.isPaused;
1082
+ }
1083
+ /**
1042
1084
  * Wait until every queued segment has either played or the queue failed.
1043
1085
  */
1044
1086
  async waitForCompletion() {
@@ -1442,12 +1484,12 @@ function createStateMachine(initial = "idle") {
1442
1484
  };
1443
1485
  }
1444
1486
  //#endregion
1445
- //#region src/core/utils/ui-stream-parser.ts
1487
+ //#region src/core/stream/parser.ts
1446
1488
  /**
1447
1489
  * Parse a single line from the UI message stream.
1448
1490
  * The stream format is SSE with "data: " prefix followed by JSON.
1449
1491
  */
1450
- function parseUIStreamLine(line) {
1492
+ function parseStreamLine(line) {
1451
1493
  const trimmed = line.trim();
1452
1494
  if (!trimmed) return null;
1453
1495
  let jsonStr = trimmed;
@@ -1458,17 +1500,46 @@ function parseUIStreamLine(line) {
1458
1500
  switch (chunk.type) {
1459
1501
  case "text-delta": return {
1460
1502
  type: "text-delta",
1461
- delta: chunk.delta ?? ""
1503
+ delta: typeof chunk.delta === "string" ? chunk.delta : ""
1462
1504
  };
1463
- case "tool-input-available": return {
1464
- type: "tool-input-available",
1465
- toolName: chunk.toolName ?? "",
1466
- input: chunk.input
1505
+ case "tool-call": return {
1506
+ type: "tool-call",
1507
+ toolCallId: typeof chunk.toolCallId === "string" ? chunk.toolCallId : "",
1508
+ toolName: typeof chunk.toolName === "string" ? chunk.toolName : "",
1509
+ args: chunk.args
1510
+ };
1511
+ case "tool-approval-request": return {
1512
+ type: "tool-approval-request",
1513
+ approvalId: typeof chunk.approvalId === "string" ? chunk.approvalId : "",
1514
+ toolCallId: typeof chunk.toolCallId === "string" ? chunk.toolCallId : "",
1515
+ toolName: typeof chunk.toolName === "string" ? chunk.toolName : "",
1516
+ args: chunk.args
1517
+ };
1518
+ case "tool-result": return {
1519
+ type: "tool-result",
1520
+ toolCallId: typeof chunk.toolCallId === "string" ? chunk.toolCallId : "",
1521
+ result: chunk.result
1522
+ };
1523
+ case "tool-result-error": return {
1524
+ type: "tool-result-error",
1525
+ toolCallId: typeof chunk.toolCallId === "string" ? chunk.toolCallId : "",
1526
+ error: typeof chunk.error === "string" ? chunk.error : "Unknown error"
1467
1527
  };
1468
1528
  case "finish": return { type: "finish" };
1469
1529
  case "error": return {
1470
1530
  type: "error",
1471
- errorText: chunk.errorText ?? "Unknown error"
1531
+ errorText: typeof chunk.errorText === "string" ? chunk.errorText : "Unknown error"
1532
+ };
1533
+ case "tool-input-available": return {
1534
+ type: "tool-call",
1535
+ toolCallId: typeof chunk.toolCallId === "string" ? chunk.toolCallId : `legacy-${Date.now()}`,
1536
+ toolName: typeof chunk.toolName === "string" ? chunk.toolName : "",
1537
+ args: chunk.input
1538
+ };
1539
+ case "tool-output-available": return {
1540
+ type: "tool-result",
1541
+ toolCallId: typeof chunk.toolCallId === "string" ? chunk.toolCallId : "",
1542
+ result: chunk.output
1472
1543
  };
1473
1544
  default: return { type: "unknown" };
1474
1545
  }
@@ -1477,24 +1548,23 @@ function parseUIStreamLine(line) {
1477
1548
  }
1478
1549
  }
1479
1550
  /**
1480
- * Check if a tool call is a point tool call with valid input.
1551
+ * Parse multiple lines from the stream buffer
1481
1552
  */
1482
- function isPointToolCall(chunk) {
1483
- return chunk.type === "tool-input-available" && chunk.toolName === "point" && chunk.input != null && typeof chunk.input === "object" && "elementId" in chunk.input && "label" in chunk.input;
1553
+ function parseStreamBuffer(buffer) {
1554
+ const lines = buffer.split("\n");
1555
+ const remainder = lines.pop() ?? "";
1556
+ const chunks = [];
1557
+ for (const line of lines) {
1558
+ const chunk = parseStreamLine(line);
1559
+ if (chunk) chunks.push(chunk);
1560
+ }
1561
+ return {
1562
+ chunks,
1563
+ remainder
1564
+ };
1484
1565
  }
1485
1566
  //#endregion
1486
- //#region src/core/utils/response-processor.ts
1487
- const COMMON_ABBREVIATIONS = [
1488
- "mr.",
1489
- "mrs.",
1490
- "ms.",
1491
- "dr.",
1492
- "prof.",
1493
- "sr.",
1494
- "jr.",
1495
- "e.g.",
1496
- "i.e."
1497
- ];
1567
+ //#region src/core/speech/sentences.ts
1498
1568
  const CLOSING_PUNCTUATION = new Set([
1499
1569
  "\"",
1500
1570
  "'",
@@ -1512,8 +1582,6 @@ function isLikelySentenceBoundary(text, index) {
1512
1582
  const previousChar = text[index - 1] ?? "";
1513
1583
  const nextChar = text[index + 1] ?? "";
1514
1584
  if (/\d/.test(previousChar) && /\d/.test(nextChar)) return false;
1515
- const lookback = text.slice(Math.max(0, index - 10), index + 1).toLowerCase();
1516
- if (COMMON_ABBREVIATIONS.some((abbreviation) => lookback.endsWith(abbreviation))) return false;
1517
1585
  return true;
1518
1586
  }
1519
1587
  function findBoundaryEnd(text, start) {
@@ -1526,16 +1594,17 @@ function findBoundaryEnd(text, start) {
1526
1594
  if (!isLikelySentenceBoundary(text, index)) continue;
1527
1595
  let end = index + 1;
1528
1596
  while (end < text.length && CLOSING_PUNCTUATION.has(text[end] ?? "")) end++;
1529
- if (end < text.length) {
1530
- const nextChar = text[end] ?? "";
1531
- if (!/\s/.test(nextChar) && !/[A-Z0-9]/.test(nextChar)) continue;
1532
- }
1597
+ if (end < text.length && !/\s/.test(text[end] ?? "")) continue;
1533
1598
  while (end < text.length && /\s/.test(text[end] ?? "")) end++;
1534
1599
  return end;
1535
1600
  }
1536
1601
  return null;
1537
1602
  }
1538
- function extractCompletedSegments(text) {
1603
+ /**
1604
+ * Extract completed sentences from text.
1605
+ * Returns the consumed length and extracted segments.
1606
+ */
1607
+ function extractCompletedSentences(text) {
1539
1608
  const segments = [];
1540
1609
  let consumedLength = 0;
1541
1610
  while (consumedLength < text.length) {
@@ -1551,59 +1620,32 @@ function extractCompletedSegments(text) {
1551
1620
  };
1552
1621
  }
1553
1622
  /**
1554
- * Processes a streaming AI SDK UI message stream response.
1555
- * Extracts text for display/TTS and captures point tool calls.
1623
+ * Buffer that accumulates text and emits complete sentences for TTS.
1624
+ * Coalesces short segments to avoid choppy speech.
1556
1625
  */
1557
- var ProgressiveResponseProcessor = class {
1558
- consumedTextLength = 0;
1626
+ var SentenceBuffer = class {
1627
+ text = "";
1628
+ consumedLength = 0;
1559
1629
  pendingShortSegment = "";
1560
- rawText = "";
1561
- buffer = "";
1562
- pointToolCall = null;
1563
1630
  /**
1564
- * Push raw stream data and extract text chunks and tool calls.
1565
- * The UI message stream format is newline-delimited JSON.
1631
+ * Add text to the buffer and extract any complete sentences.
1566
1632
  */
1567
- push(chunk) {
1568
- this.buffer += chunk;
1569
- const lines = this.buffer.split("\n");
1570
- this.buffer = lines.pop() ?? "";
1571
- const newTextParts = [];
1572
- for (const line of lines) {
1573
- const parsed = parseUIStreamLine(line);
1574
- if (!parsed) continue;
1575
- if (parsed.type === "text-delta") newTextParts.push(parsed.delta);
1576
- else if (isPointToolCall(parsed)) {
1577
- if (!this.pointToolCall) this.pointToolCall = parsed.input;
1578
- }
1579
- }
1580
- if (newTextParts.length > 0) this.rawText += newTextParts.join("");
1581
- const { consumedLength, segments } = extractCompletedSegments(this.rawText.slice(this.consumedTextLength));
1582
- this.consumedTextLength += consumedLength;
1583
- return {
1584
- visibleText: this.rawText,
1585
- speechSegments: this.coalesceSegments(segments),
1586
- pointToolCall: this.pointToolCall
1587
- };
1633
+ push(delta) {
1634
+ this.text += delta;
1635
+ const { consumedLength, segments } = extractCompletedSentences(this.text.slice(this.consumedLength));
1636
+ this.consumedLength += consumedLength;
1637
+ return this.coalesceSegments(segments);
1588
1638
  }
1589
1639
  /**
1590
- * Finalize processing and return any remaining text/tool call.
1640
+ * Flush any remaining text as a final segment.
1591
1641
  */
1592
- finish() {
1593
- if (this.buffer) {
1594
- const parsed = parseUIStreamLine(this.buffer);
1595
- if (parsed?.type === "text-delta") this.rawText += parsed.delta;
1596
- else if (parsed && isPointToolCall(parsed) && !this.pointToolCall) this.pointToolCall = parsed.input;
1597
- this.buffer = "";
1598
- }
1599
- const trailingText = this.rawText.slice(this.consumedTextLength).trim();
1600
- const finalSegmentParts = [this.pendingShortSegment, trailingText].filter(Boolean);
1642
+ flush() {
1643
+ const trailingText = this.text.slice(this.consumedLength).trim();
1644
+ const finalParts = [this.pendingShortSegment, trailingText].filter(Boolean);
1601
1645
  this.pendingShortSegment = "";
1602
- return {
1603
- finalResponseText: this.rawText.trim(),
1604
- speechSegments: finalSegmentParts.length ? [finalSegmentParts.join(" ").trim()] : [],
1605
- pointToolCall: this.pointToolCall
1606
- };
1646
+ this.text = "";
1647
+ this.consumedLength = 0;
1648
+ return finalParts.length ? finalParts.join(" ").trim() : "";
1607
1649
  }
1608
1650
  coalesceSegments(segments) {
1609
1651
  const speechSegments = [];
@@ -1622,6 +1664,342 @@ var ProgressiveResponseProcessor = class {
1622
1664
  }
1623
1665
  };
1624
1666
  //#endregion
1667
+ //#region src/core/stream/processor.ts
1668
+ /**
1669
+ * Processes a streaming AI SDK UI message stream.
1670
+ * Extracts text, tool calls, and speech segments.
1671
+ */
1672
+ var StreamProcessor = class {
1673
+ callbacks;
1674
+ sentenceBuffer;
1675
+ buffer = "";
1676
+ responseText = "";
1677
+ pendingApproval = void 0;
1678
+ constructor(callbacks) {
1679
+ this.callbacks = callbacks;
1680
+ this.sentenceBuffer = new SentenceBuffer();
1681
+ }
1682
+ /**
1683
+ * Process a raw chunk from the stream.
1684
+ */
1685
+ processChunk(chunk) {
1686
+ this.buffer += chunk;
1687
+ const { chunks, remainder } = parseStreamBuffer(this.buffer);
1688
+ this.buffer = remainder;
1689
+ for (const parsed of chunks) this.handleParsedChunk(parsed);
1690
+ }
1691
+ /**
1692
+ * Finalize processing and return turn result.
1693
+ */
1694
+ finish() {
1695
+ if (this.buffer) {
1696
+ const { chunks } = parseStreamBuffer(this.buffer + "\n");
1697
+ for (const parsed of chunks) this.handleParsedChunk(parsed);
1698
+ this.buffer = "";
1699
+ }
1700
+ const remainingText = this.sentenceBuffer.flush();
1701
+ if (remainingText) this.callbacks.onSpeechSegment(remainingText);
1702
+ return {
1703
+ responseText: this.responseText.trim(),
1704
+ requiresApprovalContinuation: this.pendingApproval !== void 0,
1705
+ pendingApproval: this.pendingApproval
1706
+ };
1707
+ }
1708
+ /**
1709
+ * Get the current response text.
1710
+ */
1711
+ getResponseText() {
1712
+ return this.responseText;
1713
+ }
1714
+ handleParsedChunk(chunk) {
1715
+ switch (chunk.type) {
1716
+ case "text-delta":
1717
+ this.responseText += chunk.delta;
1718
+ this.callbacks.onTextDelta(chunk.delta);
1719
+ const sentences = this.sentenceBuffer.push(chunk.delta);
1720
+ for (const sentence of sentences) this.callbacks.onSpeechSegment(sentence);
1721
+ break;
1722
+ case "tool-call":
1723
+ this.callbacks.onToolCall({
1724
+ toolCallId: chunk.toolCallId,
1725
+ toolName: chunk.toolName,
1726
+ args: chunk.args
1727
+ });
1728
+ break;
1729
+ case "tool-approval-request":
1730
+ this.pendingApproval = {
1731
+ approvalId: chunk.approvalId,
1732
+ toolCallId: chunk.toolCallId,
1733
+ toolName: chunk.toolName,
1734
+ args: chunk.args
1735
+ };
1736
+ this.callbacks.onApprovalRequest({
1737
+ approvalId: chunk.approvalId,
1738
+ toolCallId: chunk.toolCallId,
1739
+ toolName: chunk.toolName,
1740
+ args: chunk.args
1741
+ });
1742
+ break;
1743
+ case "tool-result":
1744
+ this.callbacks.onToolResult({
1745
+ toolCallId: chunk.toolCallId,
1746
+ result: chunk.result
1747
+ });
1748
+ break;
1749
+ case "tool-result-error":
1750
+ this.callbacks.onToolError({
1751
+ toolCallId: chunk.toolCallId,
1752
+ error: chunk.error
1753
+ });
1754
+ break;
1755
+ case "finish":
1756
+ this.callbacks.onFinish();
1757
+ break;
1758
+ case "error":
1759
+ this.callbacks.onError(chunk.errorText);
1760
+ break;
1761
+ case "unknown": break;
1762
+ }
1763
+ }
1764
+ };
1765
+ //#endregion
1766
+ //#region src/core/tools/labels.ts
1767
+ /**
1768
+ * Capitalize the first letter of a string.
1769
+ */
1770
+ function capitalize(str) {
1771
+ if (!str) return str;
1772
+ return str.charAt(0).toUpperCase() + str.slice(1);
1773
+ }
1774
+ /**
1775
+ * Convert a tool name to human-readable format.
1776
+ * e.g., "web_search" -> "web search", "createNote" -> "create note"
1777
+ */
1778
+ function humanizeToolName(toolName) {
1779
+ return toolName.replace(/_/g, " ").replace(/([a-z])([A-Z])/g, "$1 $2").toLowerCase();
1780
+ }
1781
+ /**
1782
+ * Generate a default label for a tool based on its name and status.
1783
+ */
1784
+ function generateToolLabel(toolName, status) {
1785
+ const humanName = humanizeToolName(toolName);
1786
+ switch (status) {
1787
+ case "pending": return `${capitalize(humanName)}...`;
1788
+ case "awaiting_approval": return `Approve ${humanName}?`;
1789
+ case "approved": return `${capitalize(humanName)}...`;
1790
+ case "denied": return "Cancelled";
1791
+ case "completed": return capitalize(humanName);
1792
+ case "failed": return `${capitalize(humanName)} failed`;
1793
+ }
1794
+ }
1795
+ /**
1796
+ * Resolve the label for a tool call using the display config.
1797
+ * Falls back to auto-generated label if no config is provided.
1798
+ */
1799
+ function resolveToolLabel(toolName, args, status, config) {
1800
+ const toolConfig = config?.[toolName];
1801
+ if (toolConfig?.label) {
1802
+ if (typeof toolConfig.label === "function") return toolConfig.label(args, status);
1803
+ return toolConfig.label;
1804
+ }
1805
+ const defaultConfig = config?.["*"];
1806
+ if (defaultConfig?.label) {
1807
+ if (typeof defaultConfig.label === "function") return defaultConfig.label(args, status);
1808
+ return defaultConfig.label;
1809
+ }
1810
+ return generateToolLabel(toolName, status);
1811
+ }
1812
+ //#endregion
1813
+ //#region src/core/tools/manager.ts
1814
+ /**
1815
+ * Manages tool call state, display timing, and approval flow.
1816
+ */
1817
+ var ToolCallManager = class {
1818
+ toolCalls = /* @__PURE__ */ new Map();
1819
+ displayConfig;
1820
+ callbacks;
1821
+ removalTimers = /* @__PURE__ */ new Map();
1822
+ constructor(callbacks, displayConfig) {
1823
+ this.callbacks = callbacks;
1824
+ this.displayConfig = displayConfig ?? {};
1825
+ }
1826
+ /**
1827
+ * Update the display configuration.
1828
+ */
1829
+ setDisplayConfig(config) {
1830
+ this.displayConfig = config;
1831
+ }
1832
+ /**
1833
+ * Handle a new tool call from the stream.
1834
+ */
1835
+ handleToolCall(event) {
1836
+ const status = "pending";
1837
+ const label = resolveToolLabel(event.toolName, event.args, status, this.displayConfig);
1838
+ const toolCall = {
1839
+ id: event.toolCallId,
1840
+ toolName: event.toolName,
1841
+ args: event.args,
1842
+ status,
1843
+ label,
1844
+ enteredQueueAt: Date.now()
1845
+ };
1846
+ this.toolCalls.set(event.toolCallId, toolCall);
1847
+ this.callbacks.onChange();
1848
+ }
1849
+ /**
1850
+ * Handle an approval request for a tool call.
1851
+ */
1852
+ handleApprovalRequest(event) {
1853
+ const existing = this.toolCalls.get(event.toolCallId);
1854
+ if (existing) {
1855
+ existing.status = "awaiting_approval";
1856
+ existing.approvalId = event.approvalId;
1857
+ existing.label = resolveToolLabel(existing.toolName, existing.args, "awaiting_approval", this.displayConfig);
1858
+ } else {
1859
+ const label = resolveToolLabel(event.toolName, event.args, "awaiting_approval", this.displayConfig);
1860
+ const toolCall = {
1861
+ id: event.toolCallId,
1862
+ toolName: event.toolName,
1863
+ args: event.args,
1864
+ status: "awaiting_approval",
1865
+ label,
1866
+ approvalId: event.approvalId,
1867
+ enteredQueueAt: Date.now()
1868
+ };
1869
+ this.toolCalls.set(event.toolCallId, toolCall);
1870
+ }
1871
+ this.callbacks.onChange();
1872
+ }
1873
+ /**
1874
+ * Handle a successful tool result.
1875
+ */
1876
+ handleToolResult(event) {
1877
+ const toolCall = this.toolCalls.get(event.toolCallId);
1878
+ if (!toolCall) return;
1879
+ toolCall.status = "completed";
1880
+ toolCall.result = event.result;
1881
+ toolCall.label = resolveToolLabel(toolCall.toolName, toolCall.args, "completed", this.displayConfig);
1882
+ this.scheduleRemoval(toolCall);
1883
+ this.callbacks.onChange();
1884
+ }
1885
+ /**
1886
+ * Handle a tool execution error.
1887
+ */
1888
+ handleToolError(event) {
1889
+ const toolCall = this.toolCalls.get(event.toolCallId);
1890
+ if (!toolCall) return;
1891
+ const errorResult = this.getConfigFor(toolCall.toolName)?.onError?.(event.error, toolCall.args);
1892
+ if (errorResult && "hide" in errorResult && errorResult.hide) {
1893
+ this.toolCalls.delete(event.toolCallId);
1894
+ this.callbacks.onChange();
1895
+ return;
1896
+ }
1897
+ toolCall.status = "failed";
1898
+ toolCall.error = event.error;
1899
+ if (errorResult && "label" in errorResult) toolCall.label = errorResult.label;
1900
+ else toolCall.label = resolveToolLabel(toolCall.toolName, toolCall.args, "failed", this.displayConfig);
1901
+ this.scheduleRemoval(toolCall);
1902
+ this.callbacks.onChange();
1903
+ }
1904
+ /**
1905
+ * Approve a pending tool call.
1906
+ */
1907
+ async approve(toolCallId) {
1908
+ const toolCall = this.toolCalls.get(toolCallId);
1909
+ if (!toolCall || toolCall.status !== "awaiting_approval") return;
1910
+ if (!toolCall.approvalId) return;
1911
+ toolCall.status = "approved";
1912
+ toolCall.label = resolveToolLabel(toolCall.toolName, toolCall.args, "approved", this.displayConfig);
1913
+ this.callbacks.onChange();
1914
+ await this.callbacks.onApprovalResponse(toolCall.approvalId, true);
1915
+ }
1916
+ /**
1917
+ * Deny a pending tool call.
1918
+ */
1919
+ async deny(toolCallId) {
1920
+ const toolCall = this.toolCalls.get(toolCallId);
1921
+ if (!toolCall || toolCall.status !== "awaiting_approval") return;
1922
+ if (!toolCall.approvalId) return;
1923
+ toolCall.status = "denied";
1924
+ toolCall.label = resolveToolLabel(toolCall.toolName, toolCall.args, "denied", this.displayConfig);
1925
+ this.scheduleRemoval(toolCall);
1926
+ this.callbacks.onChange();
1927
+ await this.callbacks.onApprovalResponse(toolCall.approvalId, false);
1928
+ }
1929
+ /**
1930
+ * Manually dismiss a tool call bubble.
1931
+ */
1932
+ dismiss(toolCallId) {
1933
+ this.clearRemovalTimer(toolCallId);
1934
+ this.toolCalls.delete(toolCallId);
1935
+ this.callbacks.onChange();
1936
+ }
1937
+ /**
1938
+ * Get a tool call by ID.
1939
+ */
1940
+ getToolCall(id) {
1941
+ return this.toolCalls.get(id);
1942
+ }
1943
+ /**
1944
+ * Get all tool calls.
1945
+ */
1946
+ getToolCalls() {
1947
+ return Array.from(this.toolCalls.values());
1948
+ }
1949
+ /**
1950
+ * Get active (visible, non-expired) tool calls.
1951
+ */
1952
+ getActiveToolCalls() {
1953
+ const now = Date.now();
1954
+ return Array.from(this.toolCalls.values()).filter((toolCall) => {
1955
+ const config = this.getConfigFor(toolCall.toolName);
1956
+ if (config?.mode === "hidden") return false;
1957
+ if (toolCall.status === "awaiting_approval") return true;
1958
+ if (toolCall.status === "pending" || toolCall.status === "approved") return true;
1959
+ const minTime = config?.minDisplayTime ?? 1500;
1960
+ return now - toolCall.enteredQueueAt < minTime + 300;
1961
+ });
1962
+ }
1963
+ /**
1964
+ * Get the tool call awaiting approval, if any.
1965
+ */
1966
+ getPendingApproval() {
1967
+ for (const toolCall of this.toolCalls.values()) if (toolCall.status === "awaiting_approval") return toolCall;
1968
+ return null;
1969
+ }
1970
+ /**
1971
+ * Clear all tool calls and timers.
1972
+ */
1973
+ reset() {
1974
+ for (const timer of this.removalTimers.values()) clearTimeout(timer);
1975
+ this.removalTimers.clear();
1976
+ this.toolCalls.clear();
1977
+ this.callbacks.onChange();
1978
+ }
1979
+ getConfigFor(toolName) {
1980
+ return this.displayConfig[toolName] ?? this.displayConfig["*"];
1981
+ }
1982
+ scheduleRemoval(toolCall) {
1983
+ this.clearRemovalTimer(toolCall.id);
1984
+ const minTime = this.getConfigFor(toolCall.toolName)?.minDisplayTime ?? 1500;
1985
+ const elapsed = Date.now() - toolCall.enteredQueueAt;
1986
+ const remaining = Math.max(0, minTime - elapsed) + 300;
1987
+ const timer = setTimeout(() => {
1988
+ this.toolCalls.delete(toolCall.id);
1989
+ this.removalTimers.delete(toolCall.id);
1990
+ this.callbacks.onChange();
1991
+ }, remaining);
1992
+ this.removalTimers.set(toolCall.id, timer);
1993
+ }
1994
+ clearRemovalTimer(toolCallId) {
1995
+ const existing = this.removalTimers.get(toolCallId);
1996
+ if (existing) {
1997
+ clearTimeout(existing);
1998
+ this.removalTimers.delete(toolCallId);
1999
+ }
2000
+ }
2001
+ };
2002
+ //#endregion
1625
2003
  //#region src/core/client.ts
1626
2004
  async function readErrorMessage(response, fallbackMessage) {
1627
2005
  try {
@@ -1640,8 +2018,10 @@ async function readErrorMessage(response, fallbackMessage) {
1640
2018
  * Manages the complete voice interaction flow:
1641
2019
  * idle -> listening -> processing -> responding -> idle
1642
2020
  *
1643
- * Supports interruption: pressing hotkey during any state aborts
1644
- * in-flight work and immediately transitions to listening.
2021
+ * Supports:
2022
+ * - Interruption via hotkey
2023
+ * - Tool call display with approval flow
2024
+ * - Point tool for cursor movement
1645
2025
  */
1646
2026
  var CursorBuddyClient = class {
1647
2027
  endpoint;
@@ -1653,14 +2033,17 @@ var CursorBuddyClient = class {
1653
2033
  screenCapture;
1654
2034
  pointerController;
1655
2035
  stateMachine;
2036
+ toolManager;
1656
2037
  liveTranscript = "";
1657
2038
  transcript = "";
1658
2039
  response = "";
1659
2040
  error = null;
1660
2041
  abortController = null;
1661
- historyCommittedForTurn = false;
1662
2042
  speechProviderForTurn = null;
1663
2043
  screenshotPromise = null;
2044
+ currentScreenshot = null;
2045
+ pendingApprovalResolver = null;
2046
+ playbackQueue = null;
1664
2047
  cachedSnapshot;
1665
2048
  listeners = /* @__PURE__ */ new Set();
1666
2049
  constructor(endpoint, options = {}, services = {}) {
@@ -1673,6 +2056,10 @@ var CursorBuddyClient = class {
1673
2056
  this.screenCapture = services.screenCapture ?? new ScreenCaptureService();
1674
2057
  this.pointerController = services.pointerController ?? new PointerController();
1675
2058
  this.stateMachine = createStateMachine();
2059
+ this.toolManager = new ToolCallManager({
2060
+ onChange: () => this.notify(),
2061
+ onApprovalResponse: async () => {}
2062
+ }, options.toolDisplay);
1676
2063
  this.cachedSnapshot = this.buildSnapshot();
1677
2064
  this.voiceCapture.onLevel((level) => $audioLevel.set(level));
1678
2065
  this.liveTranscription.onPartial((text) => {
@@ -1686,19 +2073,16 @@ var CursorBuddyClient = class {
1686
2073
  });
1687
2074
  this.pointerController.subscribe(() => this.notify());
1688
2075
  }
1689
- /**
1690
- * Start listening for voice input.
1691
- * Aborts any in-flight work from previous session.
1692
- */
1693
2076
  startListening() {
1694
2077
  this.abort();
1695
2078
  this.liveTranscript = "";
1696
2079
  this.transcript = "";
1697
2080
  this.response = "";
1698
2081
  this.error = null;
1699
- this.historyCommittedForTurn = false;
1700
2082
  this.speechProviderForTurn = null;
2083
+ this.currentScreenshot = null;
1701
2084
  this.pointerController.release();
2085
+ this.toolManager.reset();
1702
2086
  this.stateMachine.transition({ type: "HOTKEY_PRESSED" });
1703
2087
  this.notify();
1704
2088
  this.abortController = new AbortController();
@@ -1711,9 +2095,6 @@ var CursorBuddyClient = class {
1711
2095
  this.handleError(toError(error, "Failed to start listening"));
1712
2096
  });
1713
2097
  }
1714
- /**
1715
- * Stop listening and process the voice input.
1716
- */
1717
2098
  async stopListening() {
1718
2099
  if (this.stateMachine.getState() !== "listening") return;
1719
2100
  this.stateMachine.transition({ type: "HOTKEY_RELEASED" });
@@ -1728,14 +2109,8 @@ var CursorBuddyClient = class {
1728
2109
  };
1729
2110
  try {
1730
2111
  const [audioBlob, browserTranscript] = await Promise.all([this.voiceCapture.stop(), this.stopLiveTranscription()]);
1731
- let screenshot;
1732
- try {
1733
- if (!this.screenshotPromise) throw new Error("Screenshot was not started");
1734
- screenshot = await this.screenshotPromise;
1735
- } catch (screenshotError) {
1736
- const errorMessage = screenshotError instanceof Error ? `Failed to capture screenshot: ${screenshotError.message}` : "Failed to capture screenshot";
1737
- throw new Error(errorMessage);
1738
- }
2112
+ if (!this.screenshotPromise) throw new Error("Screenshot was not started");
2113
+ this.currentScreenshot = await this.screenshotPromise;
1739
2114
  if (turnFailure) throw turnFailure;
1740
2115
  if (signal?.aborted) return;
1741
2116
  const transcript = await this.resolveTranscript(browserTranscript, audioBlob, signal);
@@ -1746,47 +2121,18 @@ var CursorBuddyClient = class {
1746
2121
  this.options.onTranscript?.(transcript);
1747
2122
  this.notify();
1748
2123
  this.prepareSpeechMode();
1749
- const { cleanResponse, pointToolCall, playbackQueue } = await this.chatAndSpeak(transcript, screenshot, signal, {
1750
- onFailure: failTurn,
1751
- onPlaybackStart: () => {
1752
- this.stateMachine.transition({ type: "RESPONSE_STARTED" });
1753
- }
1754
- });
2124
+ const messages = [...$conversationHistory.get(), {
2125
+ role: "user",
2126
+ content: transcript
2127
+ }];
2128
+ const { responseText, updatedMessages } = await this.processChatLoop(messages, this.currentScreenshot, signal, failTurn);
1755
2129
  if (turnFailure) throw turnFailure;
1756
2130
  if (signal?.aborted) return;
1757
- this.options.onResponse?.(cleanResponse);
1758
- let pointTarget = null;
1759
- if (pointToolCall) {
1760
- const element = screenshot.elementRegistry.get(pointToolCall.elementId);
1761
- if (element) {
1762
- const rect = element.getBoundingClientRect();
1763
- pointTarget = {
1764
- x: Math.round(rect.left + rect.width / 2),
1765
- y: Math.round(rect.top + rect.height / 2),
1766
- label: pointToolCall.label
1767
- };
1768
- }
1769
- }
1770
- if (pointTarget) {
1771
- this.options.onPoint?.(pointTarget);
1772
- this.pointerController.pointAt(pointTarget);
1773
- }
1774
- await playbackQueue.waitForCompletion();
2131
+ this.options.onResponse?.(responseText);
2132
+ if (this.playbackQueue) await this.playbackQueue.waitForCompletion();
1775
2133
  if (turnFailure) throw turnFailure;
1776
2134
  if (signal?.aborted) return;
1777
- const newHistory = [
1778
- ...$conversationHistory.get(),
1779
- {
1780
- role: "user",
1781
- content: transcript
1782
- },
1783
- {
1784
- role: "assistant",
1785
- content: cleanResponse
1786
- }
1787
- ];
1788
- $conversationHistory.set(newHistory);
1789
- this.historyCommittedForTurn = true;
2135
+ $conversationHistory.set(updatedMessages);
1790
2136
  this.stateMachine.transition({ type: "TTS_COMPLETE" });
1791
2137
  } catch (err) {
1792
2138
  if (turnFailure) {
@@ -1797,16 +2143,10 @@ var CursorBuddyClient = class {
1797
2143
  this.handleError(toError(err));
1798
2144
  }
1799
2145
  }
1800
- /**
1801
- * Enable or disable the buddy.
1802
- */
1803
2146
  setEnabled(enabled) {
1804
2147
  $isEnabled.set(enabled);
1805
2148
  this.notify();
1806
2149
  }
1807
- /**
1808
- * Manually point at coordinates.
1809
- */
1810
2150
  pointAt(x, y, label) {
1811
2151
  this.pointerController.pointAt({
1812
2152
  x,
@@ -1814,50 +2154,48 @@ var CursorBuddyClient = class {
1814
2154
  label
1815
2155
  });
1816
2156
  }
1817
- /**
1818
- * Dismiss the current pointing target.
1819
- */
1820
2157
  dismissPointing() {
1821
2158
  this.pointerController.release();
1822
2159
  }
1823
- /**
1824
- * Reset to idle state and stop any in-progress work.
1825
- */
1826
2160
  reset() {
1827
2161
  this.abort();
1828
2162
  this.liveTranscript = "";
1829
2163
  this.transcript = "";
1830
2164
  this.response = "";
1831
2165
  this.error = null;
1832
- this.historyCommittedForTurn = false;
2166
+ this.currentScreenshot = null;
1833
2167
  this.pointerController.release();
2168
+ this.toolManager.reset();
1834
2169
  this.stateMachine.reset();
1835
2170
  this.notify();
1836
2171
  }
1837
- /**
1838
- * Update buddy position to follow cursor.
1839
- * Call this on cursor position changes.
1840
- */
1841
2172
  updateCursorPosition() {
1842
2173
  this.pointerController.updateFollowPosition();
1843
2174
  }
1844
- /**
1845
- * Subscribe to state changes.
1846
- */
2175
+ async approveToolCall(id) {
2176
+ if (this.pendingApprovalResolver) {
2177
+ this.pendingApprovalResolver(true);
2178
+ this.pendingApprovalResolver = null;
2179
+ }
2180
+ await this.toolManager.approve(id);
2181
+ }
2182
+ async denyToolCall(id) {
2183
+ if (this.pendingApprovalResolver) {
2184
+ this.pendingApprovalResolver(false);
2185
+ this.pendingApprovalResolver = null;
2186
+ }
2187
+ await this.toolManager.deny(id);
2188
+ }
2189
+ dismissToolCall(id) {
2190
+ this.toolManager.dismiss(id);
2191
+ }
1847
2192
  subscribe(listener) {
1848
2193
  this.listeners.add(listener);
1849
2194
  return () => this.listeners.delete(listener);
1850
2195
  }
1851
- /**
1852
- * Get current state snapshot for React's useSyncExternalStore.
1853
- * Returns a cached object to ensure referential stability.
1854
- */
1855
2196
  getSnapshot() {
1856
2197
  return this.cachedSnapshot;
1857
2198
  }
1858
- /**
1859
- * Build a new snapshot object.
1860
- */
1861
2199
  buildSnapshot() {
1862
2200
  return {
1863
2201
  state: this.stateMachine.getState(),
@@ -1866,11 +2204,13 @@ var CursorBuddyClient = class {
1866
2204
  response: this.response,
1867
2205
  error: this.error,
1868
2206
  isPointing: this.pointerController.isPointing(),
1869
- isEnabled: $isEnabled.get()
2207
+ isEnabled: $isEnabled.get(),
2208
+ toolCalls: this.toolManager.getToolCalls(),
2209
+ activeToolCalls: this.toolManager.getActiveToolCalls(),
2210
+ pendingApproval: this.toolManager.getPendingApproval()
1870
2211
  };
1871
2212
  }
1872
2213
  abort() {
1873
- this.commitPartialHistory();
1874
2214
  this.abortController?.abort();
1875
2215
  this.abortController = null;
1876
2216
  this.screenshotPromise = null;
@@ -1879,102 +2219,169 @@ var CursorBuddyClient = class {
1879
2219
  this.audioPlayback.stop();
1880
2220
  this.browserSpeech.stop();
1881
2221
  this.speechProviderForTurn = null;
2222
+ this.pendingApprovalResolver = null;
2223
+ this.toolManager.reset();
1882
2224
  $audioLevel.set(0);
1883
2225
  }
1884
2226
  /**
1885
- * Commit partial turn to history when interrupted.
1886
- * Only commits if we have both transcript and response,
1887
- * and haven't already committed for this turn.
2227
+ * Process chat with approval loop.
2228
+ * Returns when the turn is complete (no pending approvals).
1888
2229
  */
1889
- commitPartialHistory() {
1890
- if (this.historyCommittedForTurn) return;
1891
- if (!this.transcript || !this.response) return;
1892
- const newHistory = [
1893
- ...$conversationHistory.get(),
1894
- {
1895
- role: "user",
1896
- content: this.transcript
2230
+ async processChatLoop(messages, screenshot, signal, onFailure) {
2231
+ let currentMessages = [...messages];
2232
+ let fullResponseText = "";
2233
+ let hasStartedPlayback = false;
2234
+ this.playbackQueue = new TTSPlaybackQueue({
2235
+ onError: onFailure,
2236
+ onPlaybackStart: () => {
2237
+ if (!hasStartedPlayback) {
2238
+ hasStartedPlayback = true;
2239
+ this.stateMachine.transition({ type: "RESPONSE_STARTED" });
2240
+ }
1897
2241
  },
1898
- {
1899
- role: "assistant",
1900
- content: this.response
1901
- }
1902
- ];
1903
- $conversationHistory.set(newHistory);
1904
- this.historyCommittedForTurn = true;
1905
- }
1906
- async transcribe(blob, signal) {
1907
- const formData = new FormData();
1908
- formData.append("audio", blob, "recording.wav");
1909
- const response = await fetch(`${this.endpoint}/transcribe`, {
1910
- method: "POST",
1911
- body: formData,
2242
+ prepare: (text, currentSignal) => this.prepareSpeechSegment(text, currentSignal),
1912
2243
  signal
1913
2244
  });
1914
- if (!response.ok) throw new Error(await readErrorMessage(response, "Transcription failed"));
1915
- const { text } = await response.json();
1916
- return text;
2245
+ const shouldStreamSpeech = this.isSpeechStreamingEnabled();
2246
+ while (true) {
2247
+ if (signal?.aborted) break;
2248
+ let currentScreenshot = screenshot;
2249
+ if (currentMessages.length > messages.length) currentScreenshot = await this.screenCapture.capture();
2250
+ const response = await this.fetchChatStream(currentMessages, currentScreenshot, signal);
2251
+ const { responseText, requiresApprovalContinuation, pendingApproval } = await this.consumeStream(response, currentScreenshot, shouldStreamSpeech, signal);
2252
+ fullResponseText = responseText;
2253
+ this.response = responseText;
2254
+ this.notify();
2255
+ currentMessages = [...currentMessages, {
2256
+ role: "assistant",
2257
+ content: responseText
2258
+ }];
2259
+ if (!requiresApprovalContinuation || !pendingApproval) break;
2260
+ this.playbackQueue.pauseAfterCurrent();
2261
+ const approved = await this.waitForApproval();
2262
+ this.playbackQueue.resume();
2263
+ currentMessages = [...currentMessages, {
2264
+ role: "tool",
2265
+ content: [{
2266
+ type: "tool-approval-response",
2267
+ approvalId: pendingApproval.approvalId,
2268
+ approved
2269
+ }]
2270
+ }];
2271
+ }
2272
+ return {
2273
+ responseText: fullResponseText,
2274
+ updatedMessages: currentMessages
2275
+ };
1917
2276
  }
1918
- /**
1919
- * Stream the chat response, keep the visible text updated, and feed complete
1920
- * speech segments into the TTS queue as soon as they are ready.
1921
- */
1922
- async chatAndSpeak(transcript, screenshot, signal, options) {
1923
- const history = $conversationHistory.get();
2277
+ async fetchChatStream(messages, screenshot, signal) {
1924
2278
  const response = await fetch(`${this.endpoint}/chat`, {
1925
2279
  method: "POST",
1926
2280
  headers: { "Content-Type": "application/json" },
1927
2281
  body: JSON.stringify({
2282
+ messages,
1928
2283
  screenshot: screenshot.imageData,
1929
2284
  capture: {
1930
2285
  width: screenshot.width,
1931
2286
  height: screenshot.height
1932
2287
  },
1933
- transcript,
1934
- history,
1935
2288
  domSnapshot: screenshot.domSnapshot
1936
2289
  }),
1937
2290
  signal
1938
2291
  });
1939
- if (!response.ok) throw new Error("Chat request failed");
2292
+ if (!response.ok) throw new Error(await readErrorMessage(response, "Chat request failed"));
2293
+ return response;
2294
+ }
2295
+ async consumeStream(response, screenshot, shouldStreamSpeech, signal) {
1940
2296
  const reader = response.body?.getReader();
1941
2297
  if (!reader) throw new Error("No response body");
1942
2298
  const decoder = new TextDecoder();
1943
- const responseProcessor = new ProgressiveResponseProcessor();
1944
- const playbackQueue = new TTSPlaybackQueue({
1945
- onError: options.onFailure,
1946
- onPlaybackStart: options.onPlaybackStart,
1947
- prepare: (text, currentSignal) => this.prepareSpeechSegment(text, currentSignal),
1948
- signal
2299
+ const state = { pointToolCall: null };
2300
+ const processor = new StreamProcessor({
2301
+ onTextDelta: () => {},
2302
+ onSpeechSegment: (text) => {
2303
+ if (shouldStreamSpeech && this.playbackQueue) this.playbackQueue.enqueue(text);
2304
+ },
2305
+ onToolCall: (event) => {
2306
+ if (event.toolName === "point") {
2307
+ const input = event.args;
2308
+ if (input && typeof input.elementId === "number" && typeof input.label === "string") state.pointToolCall = {
2309
+ elementId: input.elementId,
2310
+ label: input.label
2311
+ };
2312
+ } else {
2313
+ this.toolManager.handleToolCall(event);
2314
+ this.options.onToolCall?.({
2315
+ id: event.toolCallId,
2316
+ toolName: event.toolName,
2317
+ args: event.args
2318
+ });
2319
+ }
2320
+ },
2321
+ onApprovalRequest: (event) => {
2322
+ this.toolManager.handleApprovalRequest(event);
2323
+ },
2324
+ onToolResult: (event) => {
2325
+ const toolCall = this.toolManager.getToolCall(event.toolCallId);
2326
+ this.toolManager.handleToolResult(event);
2327
+ if (toolCall) this.options.onToolResult?.({
2328
+ id: event.toolCallId,
2329
+ toolName: toolCall.toolName,
2330
+ result: event.result
2331
+ });
2332
+ },
2333
+ onToolError: (event) => {
2334
+ this.toolManager.handleToolError(event);
2335
+ },
2336
+ onFinish: () => {},
2337
+ onError: (error) => {
2338
+ throw new Error(error);
2339
+ }
1949
2340
  });
1950
- const shouldStreamSpeech = this.isSpeechStreamingEnabled();
1951
2341
  while (true) {
1952
2342
  const { done, value } = await reader.read();
1953
2343
  if (done) break;
1954
2344
  const chunk = decoder.decode(value, { stream: true });
1955
- const { speechSegments, visibleText } = responseProcessor.push(chunk);
1956
- if (shouldStreamSpeech) for (const speechSegment of speechSegments) playbackQueue.enqueue(speechSegment);
1957
- this.updateResponse(visibleText);
2345
+ processor.processChunk(chunk);
2346
+ this.response = processor.getResponseText();
2347
+ this.notify();
1958
2348
  }
1959
2349
  const trailingChunk = decoder.decode();
1960
- if (trailingChunk) {
1961
- const { speechSegments, visibleText } = responseProcessor.push(trailingChunk);
1962
- if (shouldStreamSpeech) for (const speechSegment of speechSegments) playbackQueue.enqueue(speechSegment);
1963
- this.updateResponse(visibleText);
2350
+ if (trailingChunk) processor.processChunk(trailingChunk);
2351
+ const result = processor.finish();
2352
+ if (state.pointToolCall !== null) {
2353
+ const pointCall = state.pointToolCall;
2354
+ const element = screenshot.elementRegistry.get(pointCall.elementId);
2355
+ if (element) {
2356
+ const rect = element.getBoundingClientRect();
2357
+ const target = {
2358
+ x: Math.round(rect.left + rect.width / 2),
2359
+ y: Math.round(rect.top + rect.height / 2),
2360
+ label: pointCall.label
2361
+ };
2362
+ this.options.onPoint?.(target);
2363
+ this.pointerController.pointAt(target);
2364
+ }
1964
2365
  }
1965
- const finalizedResponse = responseProcessor.finish();
1966
- if (shouldStreamSpeech) for (const speechSegment of finalizedResponse.speechSegments) playbackQueue.enqueue(speechSegment);
1967
- else playbackQueue.enqueue(finalizedResponse.finalResponseText);
1968
- this.updateResponse(finalizedResponse.finalResponseText);
1969
- return {
1970
- cleanResponse: finalizedResponse.finalResponseText,
1971
- pointToolCall: finalizedResponse.pointToolCall,
1972
- playbackQueue
1973
- };
2366
+ if (!shouldStreamSpeech && this.playbackQueue) this.playbackQueue.enqueue(result.responseText);
2367
+ return result;
2368
+ }
2369
+ waitForApproval() {
2370
+ return new Promise((resolve) => {
2371
+ this.pendingApprovalResolver = resolve;
2372
+ });
2373
+ }
2374
+ async transcribe(blob, signal) {
2375
+ const formData = new FormData();
2376
+ formData.append("audio", blob, "recording.wav");
2377
+ const response = await fetch(`${this.endpoint}/transcribe`, {
2378
+ method: "POST",
2379
+ body: formData,
2380
+ signal
2381
+ });
2382
+ if (!response.ok) throw new Error(await readErrorMessage(response, "Transcription failed"));
2383
+ return (await response.json()).text;
1974
2384
  }
1975
- /**
1976
- * Request server-side TTS audio for one text segment.
1977
- */
1978
2385
  async synthesizeSpeech(text, signal) {
1979
2386
  const response = await fetch(`${this.endpoint}/tts`, {
1980
2387
  method: "POST",
@@ -1985,16 +2392,6 @@ var CursorBuddyClient = class {
1985
2392
  if (!response.ok) throw new Error(await readErrorMessage(response, "TTS request failed"));
1986
2393
  return response.blob();
1987
2394
  }
1988
- /**
1989
- * Resolve the initial speech provider for this turn.
1990
- *
1991
- * Decision tree:
1992
- * 1. In `server` mode, always synthesize on the server.
1993
- * 2. In `browser` mode, require browser speech support up front.
1994
- * 3. In `auto` mode, prefer browser speech when available and keep that
1995
- * choice cached so later segments stay on the same provider unless a
1996
- * browser failure forces a one-way fallback to the server.
1997
- */
1998
2395
  prepareSpeechMode() {
1999
2396
  const speechMode = this.getSpeechMode();
2000
2397
  if (speechMode === "browser" && !this.browserSpeech.isAvailable()) throw new Error("Browser speech is not supported");
@@ -2008,13 +2405,6 @@ var CursorBuddyClient = class {
2008
2405
  }
2009
2406
  this.speechProviderForTurn = this.browserSpeech.isAvailable() ? "browser" : "server";
2010
2407
  }
2011
- /**
2012
- * Prepare a playback task for one text segment.
2013
- *
2014
- * The queue calls this eagerly so server synthesis can overlap with the
2015
- * currently playing segment, but the returned task is still executed in the
2016
- * original enqueue order.
2017
- */
2018
2408
  async prepareSpeechSegment(text, signal) {
2019
2409
  switch (this.getSpeechMode()) {
2020
2410
  case "server": return this.prepareServerSpeechTask(text, signal);
@@ -2022,27 +2412,13 @@ var CursorBuddyClient = class {
2022
2412
  default: return this.prepareAutoSpeechTask(text, signal);
2023
2413
  }
2024
2414
  }
2025
- /**
2026
- * Synthesize server audio immediately and return a playback task that reuses
2027
- * the prepared blob later.
2028
- */
2029
2415
  async prepareServerSpeechTask(text, signal) {
2030
2416
  const blob = await this.synthesizeSpeech(text, signal);
2031
2417
  return () => this.audioPlayback.play(blob, signal);
2032
2418
  }
2033
- /**
2034
- * Return a browser playback task for one text segment.
2035
- */
2036
- async prepareBrowserSpeechTask(text, signal) {
2037
- return () => this.browserSpeech.speak(text, signal);
2419
+ async prepareBrowserSpeechTask(_text, signal) {
2420
+ return () => this.browserSpeech.speak(_text, signal);
2038
2421
  }
2039
- /**
2040
- * Prepare a playback task for `auto` mode.
2041
- *
2042
- * We prefer the browser for low latency, but if browser speech fails for any
2043
- * segment we permanently switch the remainder of the turn to server TTS so
2044
- * later segments do not keep retrying the failing browser path.
2045
- */
2046
2422
  async prepareAutoSpeechTask(text, signal) {
2047
2423
  if (this.getAutoSpeechProvider() === "server") return this.prepareServerSpeechTask(text, signal);
2048
2424
  return async () => {
@@ -2052,17 +2428,13 @@ var CursorBuddyClient = class {
2052
2428
  }
2053
2429
  try {
2054
2430
  await this.browserSpeech.speak(text, signal);
2055
- } catch (error) {
2431
+ } catch {
2056
2432
  if (signal?.aborted) return;
2057
2433
  this.speechProviderForTurn = "server";
2058
2434
  await (await this.prepareServerSpeechTask(text, signal))();
2059
2435
  }
2060
2436
  };
2061
2437
  }
2062
- /**
2063
- * Read the current provider choice for `auto` mode, lazily defaulting to the
2064
- * browser when supported and the server otherwise.
2065
- */
2066
2438
  getAutoSpeechProvider() {
2067
2439
  if (this.speechProviderForTurn) return this.speechProviderForTurn;
2068
2440
  this.speechProviderForTurn = this.browserSpeech.isAvailable() ? "browser" : "server";
@@ -2078,42 +2450,21 @@ var CursorBuddyClient = class {
2078
2450
  this.options.onError?.(err);
2079
2451
  this.notify();
2080
2452
  }
2081
- /**
2082
- * Resolve the effective transcription mode for the current client.
2083
- */
2084
2453
  getTranscriptionMode() {
2085
2454
  return this.options.transcription?.mode ?? "auto";
2086
2455
  }
2087
- /**
2088
- * Resolve the effective speech mode for the current client.
2089
- */
2090
2456
  getSpeechMode() {
2091
2457
  return this.options.speech?.mode ?? "server";
2092
2458
  }
2093
- /**
2094
- * Decide whether speech should start before the full chat response is ready.
2095
- */
2096
2459
  isSpeechStreamingEnabled() {
2097
2460
  return this.options.speech?.allowStreaming ?? false;
2098
2461
  }
2099
- /**
2100
- * Decide whether this turn should attempt browser speech recognition.
2101
- */
2102
2462
  shouldAttemptBrowserTranscription() {
2103
2463
  return this.getTranscriptionMode() !== "server";
2104
2464
  }
2105
- /**
2106
- * Decide whether browser speech recognition is mandatory for this turn.
2107
- */
2108
2465
  isBrowserTranscriptionRequired() {
2109
2466
  return this.getTranscriptionMode() === "browser";
2110
2467
  }
2111
- /**
2112
- * Start the recorder and browser speech recognition together.
2113
- *
2114
- * The recorder always runs so we keep waveform updates and preserve a raw
2115
- * audio backup for server fallback in `auto` mode.
2116
- */
2117
2468
  async beginListeningSession(signal) {
2118
2469
  const shouldAttemptBrowser = this.shouldAttemptBrowserTranscription();
2119
2470
  const isBrowserTranscriptionAvailable = shouldAttemptBrowser && this.liveTranscription.isAvailable();
@@ -2126,10 +2477,6 @@ var CursorBuddyClient = class {
2126
2477
  if (browserTranscriptionResult.status === "rejected" && this.isBrowserTranscriptionRequired()) throw toError(browserTranscriptionResult.reason, "Browser transcription failed to start");
2127
2478
  if (browserTranscriptionResult.status === "rejected") this.liveTranscription.dispose();
2128
2479
  }
2129
- /**
2130
- * Stop browser speech recognition and return the best final transcript it
2131
- * produced for this turn.
2132
- */
2133
2480
  async stopLiveTranscription() {
2134
2481
  if (!this.shouldAttemptBrowserTranscription() || !this.liveTranscription.isAvailable()) return "";
2135
2482
  try {
@@ -2139,25 +2486,12 @@ var CursorBuddyClient = class {
2139
2486
  return "";
2140
2487
  }
2141
2488
  }
2142
- /**
2143
- * Choose the transcript that should drive the turn.
2144
- *
2145
- * Decision tree:
2146
- * 1. Use the browser transcript when it is available.
2147
- * 2. In browser-only mode, fail if the browser produced nothing usable.
2148
- * 3. In auto/server modes, fall back to the recorded audio upload.
2149
- */
2150
2489
  async resolveTranscript(browserTranscript, audioBlob, signal) {
2151
2490
  const normalizedBrowserTranscript = browserTranscript.trim();
2152
2491
  if (normalizedBrowserTranscript) return normalizedBrowserTranscript;
2153
2492
  if (this.getTranscriptionMode() === "browser") throw new Error("Browser transcription did not produce a final transcript");
2154
2493
  return this.transcribe(audioBlob, signal);
2155
2494
  }
2156
- updateResponse(text) {
2157
- if (this.response === text) return;
2158
- this.response = text;
2159
- this.notify();
2160
- }
2161
2495
  notify() {
2162
2496
  this.cachedSnapshot = this.buildSnapshot();
2163
2497
  this.listeners.forEach((listener) => listener());
@@ -2166,4 +2500,4 @@ var CursorBuddyClient = class {
2166
2500
  //#endregion
2167
2501
  export { $buddyScale as a, $buddyRotation as i, $audioLevel as n, $cursorPosition as o, $buddyPosition as r, $pointingTarget as s, CursorBuddyClient as t };
2168
2502
 
2169
- //# sourceMappingURL=client-CliXcNch.mjs.map
2503
+ //# sourceMappingURL=client-D7kFGsuH.mjs.map