@auticlabs/bulut 1.2.2 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/embed.js CHANGED
@@ -729,16 +729,15 @@ const MAX_LINKS = 20;
729
729
  const MAX_INTERACTABLES = 24;
730
730
  const MAX_HEADINGS = 10;
731
731
  const MAX_TEXT_SNIPPETS = 4;
732
- const MAX_OUTER_HTML_DIGEST = 760;
733
732
  const MAX_CACHED_PAGES = 20;
734
733
  const MAX_PAGE_SCAN_ELEMENTS = 2e3;
735
734
  const MAX_EVENT_HINTS_PER_ELEMENT = 4;
736
735
  const MAX_BRANCH_SAMPLES = 4;
737
736
  const MAX_BRANCH_DEPTH = 2;
738
- const MAX_CONTEXT_SUMMARY_CHARS = 3400;
739
- const MAX_CONTEXT_WITH_HISTORY_CHARS = 4200;
740
- const PAGE_CONTEXT_CACHE_VERSION = 2;
741
- const PAGE_CONTEXT_CACHE_KEY = "auticbot_page_context_cache_v2";
737
+ const MAX_STYLESHEET_SELECTORS = 80;
738
+ const MAX_STYLESHEET_RULES = 700;
739
+ const PAGE_CONTEXT_CACHE_VERSION = 3;
740
+ const PAGE_CONTEXT_CACHE_KEY = "auticbot_page_context_cache_v3";
742
741
  const NON_CONTENT_TAGS = /* @__PURE__ */ new Set([
743
742
  "script",
744
743
  "style",
@@ -814,22 +813,10 @@ const ARIA_INTERACTION_ATTRS = [
814
813
  "aria-selected"
815
814
  ];
816
815
  const DATA_INTERACTION_PATTERN = /(action|click|press|toggle|target|trigger|nav|open|close|menu|modal|command|submit)/i;
816
+ const STYLESHEET_SELECTOR_PATTERN = /(:hover|:focus|:active|button|a\b|input|textarea|select|\[role=|\[aria-|\[data-|\.btn|\.link)/i;
817
817
  const pageContextCache = /* @__PURE__ */ new Map();
818
818
  let cacheHydrated = false;
819
819
  const normalizeWhitespace = (value) => value.replace(/\s+/g, " ").trim();
820
- const truncate = (value, maxChars) => {
821
- if (value.length <= maxChars) {
822
- return value;
823
- }
824
- const suffix = "\n...[truncated]";
825
- return `${value.slice(0, Math.max(0, maxChars - suffix.length))}${suffix}`;
826
- };
827
- const truncateInline = (value, maxChars) => {
828
- if (value.length <= maxChars) {
829
- return value;
830
- }
831
- return `${value.slice(0, Math.max(0, maxChars - 3))}...`;
832
- };
833
820
  const canonicalUrl = (rawUrl) => {
834
821
  try {
835
822
  return new URL(rawUrl, rawUrl).href;
@@ -863,9 +850,9 @@ const parseTabIndex = (value) => {
863
850
  const parsed = Number.parseInt(value, 10);
864
851
  return Number.isNaN(parsed) ? null : parsed;
865
852
  };
866
- const compactToken = (value, maxChars = 18) => {
853
+ const compactToken = (value) => {
867
854
  const compact = value.replace(/\s+/g, "-").replace(/[^a-zA-Z0-9_-]/g, "");
868
- return compact ? truncateInline(compact, maxChars) : "";
855
+ return compact || "";
869
856
  };
870
857
  const getElementDepth = (element) => {
871
858
  let depth = 0;
@@ -950,16 +937,13 @@ const buildSummaryWithHistory = (current) => {
950
937
  const historySection = [
951
938
  "Recent Page Memory:",
952
939
  ...recentPages.map((entry) => {
953
- const compactSummary = normalizeWhitespace(entry.summary).slice(0, 180);
940
+ const compactSummary = normalizeWhitespace(entry.summary);
954
941
  return `- ${entry.url} :: ${compactSummary}`;
955
942
  })
956
943
  ].join("\n");
957
- return truncate(
958
- `${current.summary}
944
+ return `${current.summary}
959
945
 
960
- ${historySection}`,
961
- MAX_CONTEXT_WITH_HISTORY_CHARS
962
- );
946
+ ${historySection}`;
963
947
  };
964
948
  const isVisible = (element) => {
965
949
  if (element.getAttribute("aria-hidden") === "true") {
@@ -988,20 +972,25 @@ const escapeCssValue = (value) => {
988
972
  }
989
973
  return value.replace(/([ #;&,.+*~':"!^$\[\]()=>|\/@])/g, "\\$1");
990
974
  };
991
- const buildSelector = (element) => {
975
+ const buildSelectorSegment = (element) => {
992
976
  const tag = element.tagName.toLowerCase();
993
977
  if (element.id) {
994
978
  return `#${escapeCssValue(element.id)}`;
995
979
  }
996
- const name = element.getAttribute("name");
997
- if (name) {
998
- return `${tag}[name="${escapeCssValue(name)}"]`;
999
- }
1000
- const ariaLabel = element.getAttribute("aria-label");
1001
- if (ariaLabel) {
1002
- return `${tag}[aria-label="${escapeCssValue(ariaLabel)}"]`;
980
+ const attrCandidates = [
981
+ ["name", element.getAttribute("name")],
982
+ ["data-testid", element.getAttribute("data-testid")],
983
+ ["data-test-id", element.getAttribute("data-test-id")],
984
+ ["aria-label", element.getAttribute("aria-label")],
985
+ ["role", element.getAttribute("role")],
986
+ ["type", element.getAttribute("type")]
987
+ ];
988
+ for (const [attrName, attrValue] of attrCandidates) {
989
+ if (attrValue) {
990
+ return `${tag}[${attrName}="${escapeCssValue(attrValue)}"]`;
991
+ }
1003
992
  }
1004
- const classes = Array.from(element.classList).filter(Boolean).slice(0, 2).map((className) => `.${escapeCssValue(className)}`).join("");
993
+ const classes = Array.from(element.classList).filter(Boolean).slice(0, 3).map((className) => `.${escapeCssValue(className)}`).join("");
1005
994
  if (classes) {
1006
995
  return `${tag}${classes}`;
1007
996
  }
@@ -1015,6 +1004,21 @@ const buildSelector = (element) => {
1015
1004
  const index = siblingsOfTag.indexOf(element) + 1;
1016
1005
  return `${tag}:nth-of-type(${index})`;
1017
1006
  };
1007
+ const buildSelector = (element) => {
1008
+ const segments = [];
1009
+ let cursor = element;
1010
+ let depth = 0;
1011
+ while (cursor && depth < 4) {
1012
+ const segment = buildSelectorSegment(cursor);
1013
+ segments.unshift(segment);
1014
+ if (segment.startsWith("#")) {
1015
+ break;
1016
+ }
1017
+ cursor = cursor.parentElement;
1018
+ depth += 1;
1019
+ }
1020
+ return segments.join(" > ");
1021
+ };
1018
1022
  const getElementLabel = (element) => {
1019
1023
  const text = normalizeWhitespace(
1020
1024
  (element instanceof HTMLElement ? element.innerText : element.textContent) || ""
@@ -1026,7 +1030,7 @@ const getElementLabel = (element) => {
1026
1030
  );
1027
1031
  const name = normalizeWhitespace(element.getAttribute("name") || "");
1028
1032
  const value = element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement || element instanceof HTMLButtonElement ? normalizeWhitespace(element.value || "") : "";
1029
- const classHint = Array.from(element.classList).map((item) => compactToken(item, 16)).find(Boolean);
1033
+ const classHint = Array.from(element.classList).map((item) => compactToken(item)).find(Boolean);
1030
1034
  const fallback = element.id && `#${element.id}` || classHint && `.${classHint}` || buildSelector(element);
1031
1035
  const label = text || ariaLabel || title || placeholder || value || name || fallback;
1032
1036
  if (element.tagName.toLowerCase() === "input") {
@@ -1058,23 +1062,35 @@ const getAriaInteractionHints = (element) => ARIA_INTERACTION_ATTRS.filter((attr
1058
1062
  const getDataInteractionHints = (element) => element.getAttributeNames().filter(
1059
1063
  (attrName) => attrName.startsWith("data-") && DATA_INTERACTION_PATTERN.test(attrName)
1060
1064
  ).slice(0, 2).map((attrName) => attrName.replace("data-", ""));
1061
- const getStyleHints = (style) => {
1062
- const styleHints = [];
1063
- if (style.cursor === "pointer") {
1064
- styleHints.push("cursor:pointer");
1065
+ const getComputedStyleSignals = (style) => {
1066
+ const signals = [];
1067
+ if (style.cursor && style.cursor !== "auto") {
1068
+ signals.push(`cursor:${style.cursor}`);
1069
+ }
1070
+ if (style.display) {
1071
+ signals.push(`display:${style.display}`);
1072
+ }
1073
+ if (style.position) {
1074
+ signals.push(`position:${style.position}`);
1075
+ }
1076
+ if (style.zIndex && style.zIndex !== "auto") {
1077
+ signals.push(`z-index:${style.zIndex}`);
1065
1078
  }
1066
- if (style.display === "flex" || style.display === "grid" || style.display === "inline-flex" || style.display === "inline-grid") {
1067
- styleHints.push(`display:${style.display}`);
1079
+ if (style.pointerEvents && style.pointerEvents !== "auto") {
1080
+ signals.push(`pointer-events:${style.pointerEvents}`);
1068
1081
  }
1069
- if (style.position === "fixed" || style.position === "sticky") {
1070
- styleHints.push(`position:${style.position}`);
1082
+ if (style.visibility && style.visibility !== "visible") {
1083
+ signals.push(`visibility:${style.visibility}`);
1071
1084
  }
1072
- return styleHints.slice(0, 2);
1085
+ if (style.opacity && style.opacity !== "1") {
1086
+ signals.push(`opacity:${style.opacity}`);
1087
+ }
1088
+ return Array.from(new Set(signals));
1073
1089
  };
1074
1090
  const buildBlueprintToken = (element) => {
1075
1091
  const tag = element.tagName.toLowerCase();
1076
1092
  const idToken = element.id ? `#${compactToken(element.id)}` : "";
1077
- const classToken = Array.from(element.classList).map((item) => compactToken(item, 16)).find(Boolean);
1093
+ const classToken = Array.from(element.classList).map((item) => compactToken(item)).find(Boolean);
1078
1094
  return `${tag}${idToken}${classToken ? `.${classToken}` : ""}`;
1079
1095
  };
1080
1096
  const buildBranchDigest = (element, depth) => {
@@ -1094,7 +1110,7 @@ const collectDomBranchDigest = () => {
1094
1110
  const root = document.body ?? document.documentElement;
1095
1111
  const topLevelNodes = Array.from(root.children).filter((child) => !NON_CONTENT_TAGS.has(child.tagName.toLowerCase())).filter((child) => isVisible(child)).slice(0, MAX_BRANCH_SAMPLES);
1096
1112
  return topLevelNodes.map(
1097
- (child) => truncateInline(buildBranchDigest(child, MAX_BRANCH_DEPTH), 140)
1113
+ (child) => buildBranchDigest(child, MAX_BRANCH_DEPTH)
1098
1114
  );
1099
1115
  };
1100
1116
  const formatSection = (title, lines) => {
@@ -1110,7 +1126,7 @@ const buildOuterHtmlDigest = () => {
1110
1126
  const raw = ((_a = document.body) == null ? void 0 : _a.outerHTML) || document.documentElement.outerHTML;
1111
1127
  const withoutScripts = raw.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<noscript[\s\S]*?<\/noscript>/gi, "").replace(/<!--[\s\S]*?-->/g, "").replace(/\s+/g, " ").trim();
1112
1128
  const structural = withoutScripts.replace(/>[^<]*</g, "><").replace(/\s+/g, " ").trim();
1113
- return truncate(structural, MAX_OUTER_HTML_DIGEST);
1129
+ return structural;
1114
1130
  };
1115
1131
  const collectTextSnippets = () => {
1116
1132
  const root = document.querySelector("main, article, [role='main']") ?? document.body;
@@ -1125,12 +1141,11 @@ const collectTextSnippets = () => {
1125
1141
  if (!text || text.length < 20) {
1126
1142
  continue;
1127
1143
  }
1128
- const compact = truncateInline(text, 180);
1129
- if (seen.has(compact)) {
1144
+ if (seen.has(text)) {
1130
1145
  continue;
1131
1146
  }
1132
- seen.add(compact);
1133
- snippets.push(`- ${compact}`);
1147
+ seen.add(text);
1148
+ snippets.push(`- ${text}`);
1134
1149
  if (snippets.length >= MAX_TEXT_SNIPPETS) {
1135
1150
  break;
1136
1151
  }
@@ -1161,6 +1176,51 @@ const collectLandmarkSnapshot = () => {
1161
1176
  ({ label, selector }) => `- ${label}: ${document.querySelectorAll(selector).length}`
1162
1177
  );
1163
1178
  };
1179
+ const collectSelectorsFromRuleList = (rules, selectors, scanned) => {
1180
+ for (const rule of Array.from(rules)) {
1181
+ if (scanned.count >= MAX_STYLESHEET_RULES || selectors.size >= MAX_STYLESHEET_SELECTORS) {
1182
+ return;
1183
+ }
1184
+ scanned.count += 1;
1185
+ if (rule instanceof CSSStyleRule) {
1186
+ const parts = rule.selectorText.split(",").map((selector) => normalizeWhitespace(selector)).filter(Boolean);
1187
+ for (const selector of parts) {
1188
+ if (!STYLESHEET_SELECTOR_PATTERN.test(selector)) {
1189
+ continue;
1190
+ }
1191
+ selectors.add(selector);
1192
+ if (selectors.size >= MAX_STYLESHEET_SELECTORS) {
1193
+ return;
1194
+ }
1195
+ }
1196
+ continue;
1197
+ }
1198
+ if ("cssRules" in rule) {
1199
+ try {
1200
+ const nestedRules = rule.cssRules;
1201
+ collectSelectorsFromRuleList(nestedRules, selectors, scanned);
1202
+ } catch {
1203
+ }
1204
+ }
1205
+ }
1206
+ };
1207
+ const collectStylesheetSelectors = () => {
1208
+ const selectors = /* @__PURE__ */ new Set();
1209
+ const scanned = { count: 0 };
1210
+ for (const stylesheet of Array.from(document.styleSheets)) {
1211
+ if (scanned.count >= MAX_STYLESHEET_RULES || selectors.size >= MAX_STYLESHEET_SELECTORS) {
1212
+ break;
1213
+ }
1214
+ try {
1215
+ if (!stylesheet.cssRules) {
1216
+ continue;
1217
+ }
1218
+ collectSelectorsFromRuleList(stylesheet.cssRules, selectors, scanned);
1219
+ } catch {
1220
+ }
1221
+ }
1222
+ return Array.from(selectors).map((selector) => `- ${selector}`);
1223
+ };
1164
1224
  const collectPageSignalSnapshot = () => {
1165
1225
  const allElements = Array.from(document.querySelectorAll("*"));
1166
1226
  const sampledElements = allElements.slice(0, MAX_PAGE_SCAN_ELEMENTS);
@@ -1172,6 +1232,7 @@ const collectPageSignalSnapshot = () => {
1172
1232
  const eventCounts = /* @__PURE__ */ new Map();
1173
1233
  const displayCounts = /* @__PURE__ */ new Map();
1174
1234
  const positionCounts = /* @__PURE__ */ new Map();
1235
+ const styleSignalCounts = /* @__PURE__ */ new Map();
1175
1236
  let visibleElements = 0;
1176
1237
  let maxDepth = 0;
1177
1238
  let semanticInteractables = 0;
@@ -1207,6 +1268,10 @@ const collectPageSignalSnapshot = () => {
1207
1268
  if (TRACKED_POSITION_VALUES.has(style.position)) {
1208
1269
  bumpCount(positionCounts, style.position);
1209
1270
  }
1271
+ const computedStyleSignals = getComputedStyleSignals(style);
1272
+ for (const styleSignal of computedStyleSignals) {
1273
+ bumpCount(styleSignalCounts, styleSignal);
1274
+ }
1210
1275
  const eventHints = getEventHints(element);
1211
1276
  if (eventHints.length > 0) {
1212
1277
  for (const eventName of eventHints) {
@@ -1241,7 +1306,7 @@ const collectPageSignalSnapshot = () => {
1241
1306
  if (tag === "a" && href && !href.startsWith("#") && !href.startsWith("javascript:")) {
1242
1307
  const absoluteHref = toAbsoluteUrl(href);
1243
1308
  const label2 = getElementLabel(element) || absoluteHref;
1244
- const line2 = `- ${truncateInline(label2, 90)} -> ${truncateInline(absoluteHref, 140)}`;
1309
+ const line2 = `- ${label2} -> ${absoluteHref}`;
1245
1310
  if (!linkSet.has(line2)) {
1246
1311
  linkSet.add(line2);
1247
1312
  links.push(line2);
@@ -1257,8 +1322,8 @@ const collectPageSignalSnapshot = () => {
1257
1322
  nonSemanticInteractables += 1;
1258
1323
  }
1259
1324
  const selector = buildSelector(element);
1260
- const label = truncateInline(getElementLabel(element), 90);
1261
- const styleHints = getStyleHints(style);
1325
+ const label = getElementLabel(element);
1326
+ const styleSignals = computedStyleSignals;
1262
1327
  const signalTokens = [];
1263
1328
  if (eventHints.length > 0) {
1264
1329
  signalTokens.push(`evt:${eventHints.join("|")}`);
@@ -1275,16 +1340,13 @@ const collectPageSignalSnapshot = () => {
1275
1340
  if (ariaHints.length > 0) {
1276
1341
  signalTokens.push(`aria:${ariaHints.join("|")}`);
1277
1342
  }
1278
- if (styleHints.length > 0) {
1279
- signalTokens.push(`css:${styleHints.join("|")}`);
1343
+ if (styleSignals.length > 0) {
1344
+ signalTokens.push(`css:${styleSignals.join("|")}`);
1280
1345
  } else if (hasPointerCursor) {
1281
1346
  signalTokens.push("css:cursor:pointer");
1282
1347
  }
1283
1348
  const signalBlock = signalTokens.length > 0 ? ` [${signalTokens.join("; ")}]` : "";
1284
- const line = truncateInline(
1285
- `- ${tag} ${selector}${signalBlock} (${label})`,
1286
- 240
1287
- );
1349
+ const line = `- ${tag} ${selector}${signalBlock} (${label})`;
1288
1350
  const score = eventHints.length * 5 + (isNativeInteractive ? 5 : 0) + (isRoleInteractive ? 4 : 0) + (hasTabStop ? 2 : 0) + (hasPointerCursor ? 2 : 0) + (dataHints.length > 0 ? 2 : 0) + (ariaHints.length > 0 ? 1 : 0) + (isContentEditable ? 2 : 0);
1289
1351
  const existing = interactableCandidates.get(line);
1290
1352
  if (!existing || score > existing.score) {
@@ -1302,8 +1364,9 @@ const collectPageSignalSnapshot = () => {
1302
1364
  `- listener hints: ${formatTopCounts(eventCounts, 8)}`,
1303
1365
  `- interaction cues: tabindex>=0=${tabStopElements}, pointer-cursor=${pointerCursorElements}, data-hints=${dataHintElements}, aria-hints=${ariaHintElements}`,
1304
1366
  `- role hints: ${formatTopCounts(interactiveRoleCounts, 8)}`,
1367
+ `- css footprint: ${formatTopCounts(styleSignalCounts, 10)}`,
1305
1368
  "- listener scope: inline/on* handlers are detected directly; addEventListener handlers are inferred via cues."
1306
- ].map((line) => truncateInline(line, 250));
1369
+ ];
1307
1370
  const branchDigest = collectDomBranchDigest();
1308
1371
  const pageBlueprint = [
1309
1372
  `- nodes: total=${allElements.length}, scanned=${sampledElements.length}, visible=${visibleElements}, max-depth=${maxDepth}${allElements.length > sampledElements.length ? ", sampling=on" : ""}`,
@@ -1311,15 +1374,16 @@ const collectPageSignalSnapshot = () => {
1311
1374
  `- role density: ${formatTopCounts(roleCounts, 8)}`,
1312
1375
  `- layout density: display(${formatTopCounts(displayCounts, 6)}), position(${formatTopCounts(positionCounts, 4)})`,
1313
1376
  `- branch digest: ${branchDigest.length > 0 ? branchDigest.join(" || ") : "none"}`
1314
- ].map((line) => truncateInline(line, 260));
1377
+ ];
1315
1378
  return {
1316
1379
  links: links.slice(0, MAX_LINKS),
1317
1380
  interactables,
1318
1381
  interactionSignals,
1382
+ styleSelectors: collectStylesheetSelectors(),
1319
1383
  pageBlueprint
1320
1384
  };
1321
1385
  };
1322
- const buildPageContextSummary = (input, maxChars = MAX_CONTEXT_SUMMARY_CHARS) => {
1386
+ const buildPageContextSummary = (input) => {
1323
1387
  const sections = [
1324
1388
  formatSection("Meta", [
1325
1389
  `- URL: ${input.url || "unknown"}`,
@@ -1329,6 +1393,7 @@ const buildPageContextSummary = (input, maxChars = MAX_CONTEXT_SUMMARY_CHARS) =>
1329
1393
  formatSection("Headings", input.headings),
1330
1394
  formatSection("Landmark Snapshot", input.landmarks),
1331
1395
  formatSection("Interaction Signals", input.interactionSignals),
1396
+ formatSection("Stylesheet Selector Snapshot", input.styleSelectors),
1332
1397
  formatSection("Compressed Page Blueprint", input.pageBlueprint),
1333
1398
  formatSection("Top Links", input.links),
1334
1399
  formatSection("Top Interactables", input.interactables),
@@ -1337,7 +1402,7 @@ const buildPageContextSummary = (input, maxChars = MAX_CONTEXT_SUMMARY_CHARS) =>
1337
1402
  `- ${input.outerHtmlDigest || "unavailable"}`
1338
1403
  ])
1339
1404
  ];
1340
- return truncate(sections.join("\n\n"), maxChars);
1405
+ return sections.join("\n\n");
1341
1406
  };
1342
1407
  const getPageContext = () => {
1343
1408
  if (typeof window === "undefined" || typeof document === "undefined") {
@@ -1361,7 +1426,7 @@ const getPageContext = () => {
1361
1426
  console.info(`[Autic] context cache miss url=${url}`);
1362
1427
  const snapshot = collectPageSignalSnapshot();
1363
1428
  const headings = Array.from(document.querySelectorAll("h1, h2, h3")).filter((element) => isVisible(element)).map(
1364
- (element) => `- ${truncateInline(normalizeWhitespace(element.textContent || ""), 120)}`
1429
+ (element) => `- ${normalizeWhitespace(element.textContent || "")}`
1365
1430
  ).filter((line) => line !== "- ").slice(0, MAX_HEADINGS);
1366
1431
  const summary = buildPageContextSummary({
1367
1432
  url,
@@ -1372,6 +1437,7 @@ const getPageContext = () => {
1372
1437
  links: snapshot.links,
1373
1438
  interactables: snapshot.interactables,
1374
1439
  interactionSignals: snapshot.interactionSignals,
1440
+ styleSelectors: snapshot.styleSelectors,
1375
1441
  pageBlueprint: snapshot.pageBlueprint,
1376
1442
  textSnippets: collectTextSnippets(),
1377
1443
  outerHtmlDigest: buildOuterHtmlDigest()
@@ -1516,6 +1582,18 @@ const getCursorPosition = (cursor) => ({
1516
1582
  const setCursorVisibility = (cursor, visible) => {
1517
1583
  cursor.style.opacity = visible ? "1" : "0";
1518
1584
  };
1585
+ const hideAgentCursor = () => {
1586
+ if (typeof document === "undefined" || typeof window === "undefined") {
1587
+ return;
1588
+ }
1589
+ const cursor = document.getElementById(AGENT_CURSOR_ID);
1590
+ if (!(cursor instanceof HTMLElement)) {
1591
+ return;
1592
+ }
1593
+ const { x: x2, y: y2 } = getCursorPosition(cursor);
1594
+ setCursorVisibility(cursor, false);
1595
+ persistCursorState(x2, y2, false);
1596
+ };
1519
1597
  let cursorHoverTrackingInitialized = false;
1520
1598
  const initializeCursorHoverTracking = () => {
1521
1599
  if (cursorHoverTrackingInitialized) {
@@ -1954,6 +2032,9 @@ if (typeof document !== "undefined") {
1954
2032
  }
1955
2033
  }
1956
2034
  const TTS_WS_RETRY_DELAYS_MS = [250, 750, 1500];
2035
+ const BULUT_AUDIO_STOP_EVENT = "bulut:audio-stop";
2036
+ const activeAudioElements = /* @__PURE__ */ new Set();
2037
+ let audioPlaybackGeneration = 0;
1957
2038
  const normalizeBaseUrl = (baseUrl) => {
1958
2039
  const trimmed = baseUrl.trim().replace(/\/+$/, "");
1959
2040
  if (/^https?:\/\//i.test(trimmed)) {
@@ -2017,6 +2098,27 @@ const parseErrorBody = async (response) => {
2017
2098
  const sleep = (ms) => new Promise((resolve) => {
2018
2099
  setTimeout(resolve, ms);
2019
2100
  });
2101
+ const registerActiveAudioElement = (audioElement) => {
2102
+ activeAudioElements.add(audioElement);
2103
+ };
2104
+ const unregisterActiveAudioElement = (audioElement) => {
2105
+ activeAudioElements.delete(audioElement);
2106
+ };
2107
+ const wasPlaybackStoppedAfter = (generationAtStart) => audioPlaybackGeneration !== generationAtStart;
2108
+ const getAudioPlaybackGeneration = () => audioPlaybackGeneration;
2109
+ const stopActiveAudioPlayback = () => {
2110
+ audioPlaybackGeneration += 1;
2111
+ const active = Array.from(activeAudioElements);
2112
+ for (const audioElement of active) {
2113
+ try {
2114
+ audioElement.dispatchEvent(new Event(BULUT_AUDIO_STOP_EVENT));
2115
+ audioElement.pause();
2116
+ audioElement.removeAttribute("src");
2117
+ audioElement.load();
2118
+ } catch {
2119
+ }
2120
+ }
2121
+ };
2020
2122
  const base64ToUint8Array = (base64) => {
2021
2123
  const cleanBase64 = base64.replace(/^data:audio\/\w+;base64,/, "");
2022
2124
  const binaryString = atob(cleanBase64);
@@ -2072,20 +2174,31 @@ const waitForPlaybackEnd = async (audioElement) => {
2072
2174
  cleanup();
2073
2175
  reject(new Error("Ses oynatma hatası oluştu."));
2074
2176
  };
2177
+ const onForcedStop = () => {
2178
+ cleanup();
2179
+ resolve();
2180
+ };
2075
2181
  const cleanup = () => {
2076
2182
  window.clearInterval(watchdog);
2077
2183
  audioElement.removeEventListener("ended", onEnded);
2078
2184
  audioElement.removeEventListener("error", onError);
2185
+ audioElement.removeEventListener(BULUT_AUDIO_STOP_EVENT, onForcedStop);
2079
2186
  };
2080
2187
  audioElement.addEventListener("ended", onEnded);
2081
2188
  audioElement.addEventListener("error", onError);
2189
+ audioElement.addEventListener(BULUT_AUDIO_STOP_EVENT, onForcedStop);
2082
2190
  });
2083
2191
  };
2084
2192
  const playBufferedAudio = async (chunks, mimeType, sampleRate = 16e3, onAudioStateChange) => {
2193
+ const playbackGeneration = getAudioPlaybackGeneration();
2085
2194
  if (chunks.length === 0) {
2086
2195
  onAudioStateChange == null ? void 0 : onAudioStateChange("done");
2087
2196
  return;
2088
2197
  }
2198
+ if (wasPlaybackStoppedAfter(playbackGeneration)) {
2199
+ onAudioStateChange == null ? void 0 : onAudioStateChange("done");
2200
+ return;
2201
+ }
2089
2202
  const totalBytes = chunks.reduce((acc, c2) => acc + c2.byteLength, 0);
2090
2203
  console.log(`[Bulut] Playing buffered audio: ${chunks.length} chunks, ${totalBytes} bytes, type=${mimeType}`);
2091
2204
  onAudioStateChange == null ? void 0 : onAudioStateChange("fallback");
@@ -2122,10 +2235,15 @@ const playBufferedAudio = async (chunks, mimeType, sampleRate = 16e3, onAudioSta
2122
2235
  const audioElement = new Audio();
2123
2236
  const objectUrl = URL.createObjectURL(blob);
2124
2237
  try {
2238
+ registerActiveAudioElement(audioElement);
2125
2239
  audioElement.preload = "auto";
2126
2240
  audioElement.autoplay = true;
2127
2241
  audioElement.setAttribute("playsinline", "true");
2128
2242
  audioElement.src = objectUrl;
2243
+ if (wasPlaybackStoppedAfter(playbackGeneration)) {
2244
+ onAudioStateChange == null ? void 0 : onAudioStateChange("done");
2245
+ return;
2246
+ }
2129
2247
  await audioElement.play();
2130
2248
  onAudioStateChange == null ? void 0 : onAudioStateChange("playing");
2131
2249
  await waitForPlaybackEnd(audioElement);
@@ -2135,6 +2253,7 @@ const playBufferedAudio = async (chunks, mimeType, sampleRate = 16e3, onAudioSta
2135
2253
  onAudioStateChange == null ? void 0 : onAudioStateChange("done");
2136
2254
  throw err;
2137
2255
  } finally {
2256
+ unregisterActiveAudioElement(audioElement);
2138
2257
  audioElement.pause();
2139
2258
  audioElement.removeAttribute("src");
2140
2259
  audioElement.load();
@@ -2175,6 +2294,7 @@ async function transcribeAudio(baseUrl, file, projectId, sessionId, language, on
2175
2294
  }
2176
2295
  const startSttWebSocketStream = (baseUrl, config, events = {}) => {
2177
2296
  const wsUrl = toWebSocketUrl(baseUrl, "/chat/stt/ws");
2297
+ console.info("[Bulut] STT WS connecting to", wsUrl);
2178
2298
  const socket = new WebSocket(wsUrl);
2179
2299
  let seq = 0;
2180
2300
  let finalText = "";
@@ -2197,6 +2317,7 @@ const startSttWebSocketStream = (baseUrl, config, events = {}) => {
2197
2317
  const rejectAll = (error) => {
2198
2318
  if (settled) return;
2199
2319
  settled = true;
2320
+ console.warn("[Bulut] STT WS rejected:", error.message);
2200
2321
  rejectStart == null ? void 0 : rejectStart(error);
2201
2322
  rejectDone == null ? void 0 : rejectDone(error);
2202
2323
  };
@@ -2210,15 +2331,9 @@ const startSttWebSocketStream = (baseUrl, config, events = {}) => {
2210
2331
  session_id: finalSessionId
2211
2332
  });
2212
2333
  };
2213
- const enqueueSend = (payload) => {
2214
- sendQueue = sendQueue.then(() => {
2215
- if (stopped || socket.readyState !== WebSocket.OPEN) return;
2216
- socket.send(JSON.stringify(payload));
2217
- });
2218
- return sendQueue;
2219
- };
2220
2334
  socket.onopen = () => {
2221
2335
  var _a;
2336
+ console.info("[Bulut] STT WS connected, sending start");
2222
2337
  (_a = events.onRequestSent) == null ? void 0 : _a.call(events);
2223
2338
  socket.send(
2224
2339
  JSON.stringify({
@@ -2235,6 +2350,7 @@ const startSttWebSocketStream = (baseUrl, config, events = {}) => {
2235
2350
  const payload = parseSttWsEventPayload(String(event.data));
2236
2351
  if (!payload) return;
2237
2352
  if (payload.type === "start_ack" && typeof payload.session_id === "string") {
2353
+ console.info("[Bulut] STT WS start_ack received, session:", payload.session_id);
2238
2354
  finalSessionId = payload.session_id;
2239
2355
  (_a = events.onSessionId) == null ? void 0 : _a.call(events, payload.session_id);
2240
2356
  resolveStart == null ? void 0 : resolveStart();
@@ -2248,6 +2364,7 @@ const startSttWebSocketStream = (baseUrl, config, events = {}) => {
2248
2364
  return;
2249
2365
  }
2250
2366
  if (payload.type === "final" && typeof payload.text === "string") {
2367
+ console.info("[Bulut] STT WS final text received:", payload.text.slice(0, 80));
2251
2368
  finalText = payload.text;
2252
2369
  if (typeof payload.session_id === "string") {
2253
2370
  finalSessionId = payload.session_id;
@@ -2256,20 +2373,24 @@ const startSttWebSocketStream = (baseUrl, config, events = {}) => {
2256
2373
  return;
2257
2374
  }
2258
2375
  if (payload.type === "done") {
2376
+ console.info("[Bulut] STT WS done");
2259
2377
  resolveDoneIfPossible();
2260
2378
  socket.close();
2261
2379
  return;
2262
2380
  }
2263
2381
  if (payload.type === "error") {
2382
+ console.error("[Bulut] STT WS server error:", payload.error);
2264
2383
  const err = buildError(payload.error || "stt_ws_error", payload.retryable !== false);
2265
2384
  rejectAll(err);
2266
2385
  socket.close();
2267
2386
  }
2268
2387
  };
2269
- socket.onerror = () => {
2388
+ socket.onerror = (ev) => {
2389
+ console.error("[Bulut] STT WS transport error", ev);
2270
2390
  rejectAll(buildError("stt_ws_transport_error", true));
2271
2391
  };
2272
- socket.onclose = () => {
2392
+ socket.onclose = (ev) => {
2393
+ console.info("[Bulut] STT WS closed code=%d reason=%s", ev.code, ev.reason);
2273
2394
  if (settled) return;
2274
2395
  if (finalText && finalSessionId) {
2275
2396
  resolveDoneIfPossible();
@@ -2278,23 +2399,30 @@ const startSttWebSocketStream = (baseUrl, config, events = {}) => {
2278
2399
  rejectAll(buildError("stt_ws_closed_before_done", true));
2279
2400
  };
2280
2401
  return {
2281
- pushChunk: async (chunk) => {
2282
- if (stopped || chunk.size === 0) return;
2283
- await startPromise;
2284
- if (stopped) return;
2285
- const audio = await blobToBase64(chunk);
2286
- seq += 1;
2287
- await enqueueSend({
2288
- type: "chunk",
2289
- seq,
2290
- audio
2402
+ pushChunk: (chunk) => {
2403
+ if (stopped || chunk.size === 0) return Promise.resolve();
2404
+ sendQueue = sendQueue.then(async () => {
2405
+ if (stopped) return;
2406
+ await startPromise;
2407
+ if (stopped) return;
2408
+ const audio = await blobToBase64(chunk);
2409
+ seq += 1;
2410
+ if (stopped || socket.readyState !== WebSocket.OPEN) return;
2411
+ console.debug("[Bulut] STT WS sending chunk seq=%d size=%d", seq, chunk.size);
2412
+ socket.send(JSON.stringify({ type: "chunk", seq, audio }));
2291
2413
  });
2414
+ return sendQueue;
2292
2415
  },
2293
- stop: async () => {
2294
- await startPromise;
2295
- if (!stopped) {
2296
- await enqueueSend({ type: "stop" });
2297
- }
2416
+ stop: () => {
2417
+ console.info("[Bulut] STT WS stop requested, draining %d pending chunks", seq);
2418
+ sendQueue = sendQueue.then(async () => {
2419
+ await startPromise;
2420
+ if (stopped) return;
2421
+ if (socket.readyState === WebSocket.OPEN) {
2422
+ console.info("[Bulut] STT WS sending stop after seq=%d", seq);
2423
+ socket.send(JSON.stringify({ type: "stop" }));
2424
+ }
2425
+ });
2298
2426
  return donePromise;
2299
2427
  },
2300
2428
  cancel: () => {
@@ -2499,6 +2627,50 @@ const collectTtsViaWebSocket = async (baseUrl, assistantText, voice, accessibili
2499
2627
  }
2500
2628
  throw buildError("tts_ws_exhausted", true);
2501
2629
  };
2630
+ const speakText = async (baseUrl, text, voice, accessibilityMode, onAudioStateChange) => {
2631
+ const trimmed = text.trim();
2632
+ if (!trimmed) return;
2633
+ const playbackGeneration = getAudioPlaybackGeneration();
2634
+ console.info(`[Bulut] speakText start (${trimmed.length} chars)`);
2635
+ onAudioStateChange == null ? void 0 : onAudioStateChange("rendering");
2636
+ let ttsResult;
2637
+ const neverStopped = () => false;
2638
+ try {
2639
+ ttsResult = await collectTtsViaWebSocket(
2640
+ baseUrl,
2641
+ trimmed,
2642
+ voice,
2643
+ accessibilityMode,
2644
+ neverStopped,
2645
+ () => {
2646
+ }
2647
+ );
2648
+ } catch {
2649
+ ttsResult = await collectTtsViaSse(
2650
+ baseUrl,
2651
+ trimmed,
2652
+ voice,
2653
+ accessibilityMode,
2654
+ neverStopped,
2655
+ () => {
2656
+ }
2657
+ );
2658
+ }
2659
+ if (wasPlaybackStoppedAfter(playbackGeneration)) {
2660
+ onAudioStateChange == null ? void 0 : onAudioStateChange("done");
2661
+ return;
2662
+ }
2663
+ if (ttsResult.chunks.length > 0) {
2664
+ await playBufferedAudio(
2665
+ ttsResult.chunks,
2666
+ ttsResult.mimeType,
2667
+ ttsResult.sampleRate,
2668
+ onAudioStateChange
2669
+ );
2670
+ } else {
2671
+ onAudioStateChange == null ? void 0 : onAudioStateChange("done");
2672
+ }
2673
+ };
2502
2674
  const agentVoiceChatStream = (baseUrl, audioFile, projectId, sessionId, config, events, executeTool) => {
2503
2675
  let isStopped = false;
2504
2676
  let activeSocket = null;
@@ -2534,6 +2706,7 @@ const agentVoiceChatStream = (baseUrl, audioFile, projectId, sessionId, config,
2534
2706
  activeSocket = socket;
2535
2707
  let finalReply = "";
2536
2708
  let resolved = false;
2709
+ let accumulatedDelta = "";
2537
2710
  const finish = (reply) => {
2538
2711
  if (resolved) return;
2539
2712
  resolved = true;
@@ -2557,7 +2730,7 @@ const agentVoiceChatStream = (baseUrl, audioFile, projectId, sessionId, config,
2557
2730
  }));
2558
2731
  };
2559
2732
  socket.onmessage = async (event) => {
2560
- var _a2, _b2, _c2, _d2, _e, _f, _g, _h;
2733
+ var _a2, _b2, _c2, _d2, _e, _f, _g, _h, _i;
2561
2734
  let data;
2562
2735
  try {
2563
2736
  data = JSON.parse(String(event.data));
@@ -2580,12 +2753,17 @@ const agentVoiceChatStream = (baseUrl, audioFile, projectId, sessionId, config,
2580
2753
  return;
2581
2754
  }
2582
2755
  if (msgType === "reply_delta" && typeof data.delta === "string") {
2756
+ accumulatedDelta += data.delta;
2583
2757
  (_c2 = events.onAssistantDelta) == null ? void 0 : _c2.call(events, data.delta);
2584
2758
  return;
2585
2759
  }
2586
2760
  if (msgType === "tool_calls" && Array.isArray(data.calls)) {
2587
2761
  const calls = data.calls;
2588
- (_d2 = events.onToolCalls) == null ? void 0 : _d2.call(events, calls);
2762
+ if (accumulatedDelta.trim()) {
2763
+ (_d2 = events.onIntermediateReply) == null ? void 0 : _d2.call(events, accumulatedDelta.trim());
2764
+ }
2765
+ accumulatedDelta = "";
2766
+ (_e = events.onToolCalls) == null ? void 0 : _e.call(events, calls);
2589
2767
  const results = [];
2590
2768
  for (const call of calls) {
2591
2769
  const isNavigate = call.tool === "navigate";
@@ -2608,7 +2786,7 @@ const agentVoiceChatStream = (baseUrl, audioFile, projectId, sessionId, config,
2608
2786
  if (isNavigate) {
2609
2787
  clearPendingAgentResume();
2610
2788
  }
2611
- (_e = events.onToolResult) == null ? void 0 : _e.call(events, call.call_id, call.tool, result.result);
2789
+ (_f = events.onToolResult) == null ? void 0 : _f.call(events, call.call_id, call.tool, result.result);
2612
2790
  results.push(result);
2613
2791
  }
2614
2792
  if (socket.readyState === WebSocket.OPEN) {
@@ -2621,9 +2799,9 @@ const agentVoiceChatStream = (baseUrl, audioFile, projectId, sessionId, config,
2621
2799
  }
2622
2800
  if (msgType === "agent_done") {
2623
2801
  finalReply = data.final_reply || "";
2624
- (_f = events.onAssistantDone) == null ? void 0 : _f.call(events, finalReply);
2802
+ (_g = events.onAssistantDone) == null ? void 0 : _g.call(events, finalReply);
2625
2803
  if (typeof data.session_id === "string") {
2626
- (_g = events.onSessionId) == null ? void 0 : _g.call(events, data.session_id);
2804
+ (_h = events.onSessionId) == null ? void 0 : _h.call(events, data.session_id);
2627
2805
  }
2628
2806
  finish(finalReply);
2629
2807
  return;
@@ -2631,7 +2809,7 @@ const agentVoiceChatStream = (baseUrl, audioFile, projectId, sessionId, config,
2631
2809
  if (msgType === "error") {
2632
2810
  const errMsg = data.error || "Agent error";
2633
2811
  errorEmitted = true;
2634
- (_h = events.onError) == null ? void 0 : _h.call(events, errMsg);
2812
+ (_i = events.onError) == null ? void 0 : _i.call(events, errMsg);
2635
2813
  fail(new Error(errMsg));
2636
2814
  return;
2637
2815
  }
@@ -2650,6 +2828,9 @@ const agentVoiceChatStream = (baseUrl, audioFile, projectId, sessionId, config,
2650
2828
  });
2651
2829
  activeSocket = null;
2652
2830
  if (isStopped || !assistantText) {
2831
+ if (!isStopped) {
2832
+ hideAgentCursor();
2833
+ }
2653
2834
  return resolve();
2654
2835
  }
2655
2836
  console.info(
@@ -2694,6 +2875,9 @@ const agentVoiceChatStream = (baseUrl, audioFile, projectId, sessionId, config,
2694
2875
  } else {
2695
2876
  (_c = events.onAudioStateChange) == null ? void 0 : _c.call(events, "done");
2696
2877
  }
2878
+ if (!isStopped) {
2879
+ hideAgentCursor();
2880
+ }
2697
2881
  resolve();
2698
2882
  } catch (err) {
2699
2883
  if (!errorEmitted) {
@@ -2713,6 +2897,7 @@ const agentVoiceChatStream = (baseUrl, audioFile, projectId, sessionId, config,
2713
2897
  return {
2714
2898
  stop: () => {
2715
2899
  isStopped = true;
2900
+ stopActiveAudioPlayback();
2716
2901
  if (activeReader) {
2717
2902
  activeReader.cancel().catch(() => {
2718
2903
  });
@@ -2744,6 +2929,7 @@ const agentTextChatStream = (baseUrl, userText, projectId, sessionId, config, ev
2744
2929
  let finalReply = "";
2745
2930
  let resolved = false;
2746
2931
  let effectiveSessionId = sessionId || "";
2932
+ let accumulatedDelta = "";
2747
2933
  const finish = (reply) => {
2748
2934
  if (resolved) return;
2749
2935
  resolved = true;
@@ -2766,7 +2952,7 @@ const agentTextChatStream = (baseUrl, userText, projectId, sessionId, config, ev
2766
2952
  }));
2767
2953
  };
2768
2954
  socket.onmessage = async (event) => {
2769
- var _a2, _b2, _c2, _d, _e, _f, _g, _h;
2955
+ var _a2, _b2, _c2, _d, _e, _f, _g, _h, _i;
2770
2956
  let data;
2771
2957
  try {
2772
2958
  data = JSON.parse(String(event.data));
@@ -2788,12 +2974,17 @@ const agentTextChatStream = (baseUrl, userText, projectId, sessionId, config, ev
2788
2974
  return;
2789
2975
  }
2790
2976
  if (msgType === "reply_delta" && typeof data.delta === "string") {
2977
+ accumulatedDelta += data.delta;
2791
2978
  (_c2 = events.onAssistantDelta) == null ? void 0 : _c2.call(events, data.delta);
2792
2979
  return;
2793
2980
  }
2794
2981
  if (msgType === "tool_calls" && Array.isArray(data.calls)) {
2795
2982
  const calls = data.calls;
2796
- (_d = events.onToolCalls) == null ? void 0 : _d.call(events, calls);
2983
+ if (accumulatedDelta.trim()) {
2984
+ (_d = events.onIntermediateReply) == null ? void 0 : _d.call(events, accumulatedDelta.trim());
2985
+ }
2986
+ accumulatedDelta = "";
2987
+ (_e = events.onToolCalls) == null ? void 0 : _e.call(events, calls);
2797
2988
  const results = [];
2798
2989
  for (const call of calls) {
2799
2990
  const isNavigate = call.tool === "navigate";
@@ -2816,7 +3007,7 @@ const agentTextChatStream = (baseUrl, userText, projectId, sessionId, config, ev
2816
3007
  if (isNavigate) {
2817
3008
  clearPendingAgentResume();
2818
3009
  }
2819
- (_e = events.onToolResult) == null ? void 0 : _e.call(events, call.call_id, call.tool, result.result);
3010
+ (_f = events.onToolResult) == null ? void 0 : _f.call(events, call.call_id, call.tool, result.result);
2820
3011
  results.push(result);
2821
3012
  }
2822
3013
  if (socket.readyState === WebSocket.OPEN) {
@@ -2829,9 +3020,9 @@ const agentTextChatStream = (baseUrl, userText, projectId, sessionId, config, ev
2829
3020
  }
2830
3021
  if (msgType === "agent_done") {
2831
3022
  finalReply = data.final_reply || "";
2832
- (_f = events.onAssistantDone) == null ? void 0 : _f.call(events, finalReply);
3023
+ (_g = events.onAssistantDone) == null ? void 0 : _g.call(events, finalReply);
2833
3024
  if (typeof data.session_id === "string") {
2834
- (_g = events.onSessionId) == null ? void 0 : _g.call(events, data.session_id);
3025
+ (_h = events.onSessionId) == null ? void 0 : _h.call(events, data.session_id);
2835
3026
  }
2836
3027
  finish(finalReply);
2837
3028
  return;
@@ -2839,7 +3030,7 @@ const agentTextChatStream = (baseUrl, userText, projectId, sessionId, config, ev
2839
3030
  if (msgType === "error") {
2840
3031
  const errMsg = data.error || "Agent error";
2841
3032
  errorEmitted = true;
2842
- (_h = events.onError) == null ? void 0 : _h.call(events, errMsg);
3033
+ (_i = events.onError) == null ? void 0 : _i.call(events, errMsg);
2843
3034
  fail(new Error(errMsg));
2844
3035
  return;
2845
3036
  }
@@ -2853,7 +3044,12 @@ const agentTextChatStream = (baseUrl, userText, projectId, sessionId, config, ev
2853
3044
  socket.onclose = () => finish(finalReply);
2854
3045
  });
2855
3046
  activeSocket = null;
2856
- if (isStopped || !assistantText) return resolve();
3047
+ if (isStopped || !assistantText) {
3048
+ if (!isStopped) {
3049
+ hideAgentCursor();
3050
+ }
3051
+ return resolve();
3052
+ }
2857
3053
  (_a = events.onAudioStateChange) == null ? void 0 : _a.call(events, "rendering");
2858
3054
  let ttsResult;
2859
3055
  try {
@@ -2890,6 +3086,9 @@ const agentTextChatStream = (baseUrl, userText, projectId, sessionId, config, ev
2890
3086
  } else {
2891
3087
  (_b = events.onAudioStateChange) == null ? void 0 : _b.call(events, "done");
2892
3088
  }
3089
+ if (!isStopped) {
3090
+ hideAgentCursor();
3091
+ }
2893
3092
  resolve();
2894
3093
  } catch (err) {
2895
3094
  if (!errorEmitted) {
@@ -2909,6 +3108,7 @@ const agentTextChatStream = (baseUrl, userText, projectId, sessionId, config, ev
2909
3108
  return {
2910
3109
  stop: () => {
2911
3110
  isStopped = true;
3111
+ stopActiveAudioPlayback();
2912
3112
  if (activeReader) activeReader.cancel().catch(() => {
2913
3113
  });
2914
3114
  if (activeSocket && activeSocket.readyState <= WebSocket.OPEN) {
@@ -2954,6 +3154,7 @@ Sayfa bağlamı: ${pageContext}`
2954
3154
  activeSocket = socket;
2955
3155
  let finalReply = "";
2956
3156
  let resolved = false;
3157
+ let accumulatedDelta = "";
2957
3158
  const finish = (reply) => {
2958
3159
  if (resolved) return;
2959
3160
  resolved = true;
@@ -2978,7 +3179,7 @@ Sayfa bağlamı: ${pageContext}`
2978
3179
  }));
2979
3180
  };
2980
3181
  socket.onmessage = async (event) => {
2981
- var _a2, _b2, _c2, _d, _e, _f, _g, _h;
3182
+ var _a2, _b2, _c2, _d, _e, _f, _g, _h, _i;
2982
3183
  let data;
2983
3184
  try {
2984
3185
  data = JSON.parse(String(event.data));
@@ -3000,12 +3201,17 @@ Sayfa bağlamı: ${pageContext}`
3000
3201
  return;
3001
3202
  }
3002
3203
  if (msgType === "reply_delta" && typeof data.delta === "string") {
3204
+ accumulatedDelta += data.delta;
3003
3205
  (_c2 = events.onAssistantDelta) == null ? void 0 : _c2.call(events, data.delta);
3004
3206
  return;
3005
3207
  }
3006
3208
  if (msgType === "tool_calls" && Array.isArray(data.calls)) {
3007
3209
  const calls = data.calls;
3008
- (_d = events.onToolCalls) == null ? void 0 : _d.call(events, calls);
3210
+ if (accumulatedDelta.trim()) {
3211
+ (_d = events.onIntermediateReply) == null ? void 0 : _d.call(events, accumulatedDelta.trim());
3212
+ }
3213
+ accumulatedDelta = "";
3214
+ (_e = events.onToolCalls) == null ? void 0 : _e.call(events, calls);
3009
3215
  const results = [];
3010
3216
  for (const call of calls) {
3011
3217
  const isNavigate = call.tool === "navigate";
@@ -3028,7 +3234,7 @@ Sayfa bağlamı: ${pageContext}`
3028
3234
  if (isNavigate) {
3029
3235
  clearPendingAgentResume();
3030
3236
  }
3031
- (_e = events.onToolResult) == null ? void 0 : _e.call(events, call.call_id, call.tool, result.result);
3237
+ (_f = events.onToolResult) == null ? void 0 : _f.call(events, call.call_id, call.tool, result.result);
3032
3238
  results.push(result);
3033
3239
  }
3034
3240
  if (socket.readyState === WebSocket.OPEN) {
@@ -3038,9 +3244,9 @@ Sayfa bağlamı: ${pageContext}`
3038
3244
  }
3039
3245
  if (msgType === "agent_done") {
3040
3246
  finalReply = data.final_reply || "";
3041
- (_f = events.onAssistantDone) == null ? void 0 : _f.call(events, finalReply);
3247
+ (_g = events.onAssistantDone) == null ? void 0 : _g.call(events, finalReply);
3042
3248
  if (typeof data.session_id === "string") {
3043
- (_g = events.onSessionId) == null ? void 0 : _g.call(events, data.session_id);
3249
+ (_h = events.onSessionId) == null ? void 0 : _h.call(events, data.session_id);
3044
3250
  }
3045
3251
  finish(finalReply);
3046
3252
  return;
@@ -3048,7 +3254,7 @@ Sayfa bağlamı: ${pageContext}`
3048
3254
  if (msgType === "error") {
3049
3255
  const errMsg = data.error || "Agent error";
3050
3256
  errorEmitted = true;
3051
- (_h = events.onError) == null ? void 0 : _h.call(events, errMsg);
3257
+ (_i = events.onError) == null ? void 0 : _i.call(events, errMsg);
3052
3258
  fail(new Error(errMsg));
3053
3259
  return;
3054
3260
  }
@@ -3062,7 +3268,12 @@ Sayfa bağlamı: ${pageContext}`
3062
3268
  socket.onclose = () => finish(finalReply);
3063
3269
  });
3064
3270
  activeSocket = null;
3065
- if (isStopped || !assistantText) return resolve();
3271
+ if (isStopped || !assistantText) {
3272
+ if (!isStopped) {
3273
+ hideAgentCursor();
3274
+ }
3275
+ return resolve();
3276
+ }
3066
3277
  console.info(`[Bulut] TTS start mode=resume voice=${resumeState.voice}`);
3067
3278
  (_a = events.onAudioStateChange) == null ? void 0 : _a.call(events, "rendering");
3068
3279
  let ttsResult;
@@ -3103,6 +3314,9 @@ Sayfa bağlamı: ${pageContext}`
3103
3314
  } else {
3104
3315
  (_b = events.onAudioStateChange) == null ? void 0 : _b.call(events, "done");
3105
3316
  }
3317
+ if (!isStopped) {
3318
+ hideAgentCursor();
3319
+ }
3106
3320
  resolve();
3107
3321
  } catch (err) {
3108
3322
  if (!errorEmitted) {
@@ -3122,6 +3336,7 @@ Sayfa bağlamı: ${pageContext}`
3122
3336
  return {
3123
3337
  stop: () => {
3124
3338
  isStopped = true;
3339
+ stopActiveAudioPlayback();
3125
3340
  if (activeReader) activeReader.cancel().catch(() => {
3126
3341
  });
3127
3342
  if (activeSocket && activeSocket.readyState <= WebSocket.OPEN) {
@@ -3214,6 +3429,9 @@ const resolveStatusText = (flags) => {
3214
3429
  if (flags.isBusy) return STATUS_LABELS.loading;
3215
3430
  return STATUS_LABELS.ready;
3216
3431
  };
3432
+ const hasActiveStatus = (flags, statusOverride) => Boolean(
3433
+ statusOverride || flags.isBusy || flags.isRecording || flags.isTranscribing || flags.isThinking || flags.isRenderingAudio || flags.isPlayingAudio || flags.isRunningTools
3434
+ );
3217
3435
  const formatDurationMs = (durationMs) => {
3218
3436
  const totalSeconds = Math.max(0, Math.floor(durationMs / 1e3));
3219
3437
  const minutes = Math.floor(totalSeconds / 60).toString().padStart(2, "0");
@@ -3291,15 +3509,18 @@ const ChatWindow = ({
3291
3509
  const [isRunningTools, setIsRunningTools] = d(false);
3292
3510
  const [recordingDurationMs, setRecordingDurationMs] = d(0);
3293
3511
  const [statusOverride, setStatusOverride] = d(null);
3294
- const resolvedStatusText = resolveStatusText({
3512
+ const statusFlags = {
3295
3513
  isBusy,
3296
3514
  isRecording,
3297
3515
  isTranscribing,
3298
3516
  isThinking,
3517
+ isRenderingAudio,
3299
3518
  isPlayingAudio,
3300
3519
  isRunningTools
3301
- });
3302
- const statusText = statusOverride ?? resolvedStatusText;
3520
+ };
3521
+ const resolvedStatusText = resolveStatusText(statusFlags);
3522
+ const showStatus = hasActiveStatus(statusFlags, statusOverride);
3523
+ const statusText = showStatus ? statusOverride ?? resolvedStatusText : STATUS_LABELS.ready;
3303
3524
  const isBusyRef = A(isBusy);
3304
3525
  const isRecordingRef = A(isRecording);
3305
3526
  const nextMessageIdRef = A(getNextMessageId(messages));
@@ -3343,6 +3564,8 @@ const ChatWindow = ({
3343
3564
  const liveTranscriptionMessageIdRef = A(null);
3344
3565
  const liveTranscriptionTextRef = A("");
3345
3566
  const autoListenSuppressedRef = A(false);
3567
+ const requestEpochRef = A(0);
3568
+ const sttSendCuePlayedRef = A(false);
3346
3569
  y(() => {
3347
3570
  isBusyRef.current = isBusy;
3348
3571
  }, [isBusy]);
@@ -3366,7 +3589,7 @@ const ChatWindow = ({
3366
3589
  onPreviewChange((lastAssistant2 == null ? void 0 : lastAssistant2.text) ?? getGreetingText(config.agentName));
3367
3590
  return;
3368
3591
  }
3369
- if (isBusy || isTranscribing || isThinking || isRunningTools) {
3592
+ if (showStatus) {
3370
3593
  const st = statusOverride ?? resolveStatusText({
3371
3594
  isBusy,
3372
3595
  isRecording,
@@ -3389,11 +3612,27 @@ const ChatWindow = ({
3389
3612
  isPlayingAudio,
3390
3613
  isRenderingAudio,
3391
3614
  statusOverride,
3615
+ showStatus,
3392
3616
  messages
3393
3617
  ]);
3394
3618
  const playSfx = (name) => {
3395
3619
  playCue(name);
3396
3620
  };
3621
+ const beginRequestEpoch = () => {
3622
+ requestEpochRef.current += 1;
3623
+ return requestEpochRef.current;
3624
+ };
3625
+ const invalidateRequestEpoch = () => {
3626
+ requestEpochRef.current += 1;
3627
+ };
3628
+ const isCurrentRequestEpoch = (epoch) => requestEpochRef.current === epoch;
3629
+ const playSttSentCueOnce = () => {
3630
+ if (sttSendCuePlayedRef.current) {
3631
+ return;
3632
+ }
3633
+ sttSendCuePlayedRef.current = true;
3634
+ playSfx("sent");
3635
+ };
3397
3636
  y(() => {
3398
3637
  if (typeof localStorage !== "undefined") {
3399
3638
  localStorage.setItem(STORAGE_KEY, JSON.stringify(messages));
@@ -3497,9 +3736,11 @@ const ChatWindow = ({
3497
3736
  };
3498
3737
  y(
3499
3738
  () => () => {
3739
+ invalidateRequestEpoch();
3500
3740
  clearMicHoldTimeout();
3501
3741
  pendingStopAfterStartRef.current = false;
3502
3742
  stopActiveStream();
3743
+ stopActiveAudioPlayback();
3503
3744
  cancelActiveSttWs();
3504
3745
  cleanupVAD();
3505
3746
  stopStreamTracks();
@@ -3529,6 +3770,7 @@ const ChatWindow = ({
3529
3770
  localStorage.setItem(SESSION_ID_KEY, resumeState.sessionId);
3530
3771
  }
3531
3772
  }
3773
+ const requestEpoch = beginRequestEpoch();
3532
3774
  setIsBusy(true);
3533
3775
  setIsRunningTools(true);
3534
3776
  setStatusOverride(STATUS_LABELS.thinking);
@@ -3547,6 +3789,7 @@ const ChatWindow = ({
3547
3789
  freshPageContext,
3548
3790
  {
3549
3791
  onSessionId: (sid) => {
3792
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3550
3793
  if (sid && sid !== sessionIdRef.current) {
3551
3794
  sessionIdRef.current = sid;
3552
3795
  if (typeof localStorage !== "undefined") {
@@ -3555,6 +3798,7 @@ const ChatWindow = ({
3555
3798
  }
3556
3799
  },
3557
3800
  onAssistantDelta: (delta) => {
3801
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3558
3802
  setIsRunningTools(false);
3559
3803
  setIsThinking(true);
3560
3804
  setStatusOverride(null);
@@ -3572,6 +3816,7 @@ const ChatWindow = ({
3572
3816
  }
3573
3817
  },
3574
3818
  onAssistantDone: (assistantText) => {
3819
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3575
3820
  playSfx("completed");
3576
3821
  setStatusOverride(null);
3577
3822
  setIsThinking(false);
@@ -3590,7 +3835,18 @@ const ChatWindow = ({
3590
3835
  );
3591
3836
  }
3592
3837
  },
3838
+ onIntermediateReply: (text) => {
3839
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3840
+ void speakText(
3841
+ config.backendBaseUrl,
3842
+ text,
3843
+ config.voice,
3844
+ accessibilityMode,
3845
+ (state) => handleAudioStateChange(state, requestEpoch)
3846
+ ).catch((err) => console.warn("[Bulut] intermediate TTS failed", err));
3847
+ },
3593
3848
  onToolCalls: (calls) => {
3849
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3594
3850
  if (calls.length > 0) {
3595
3851
  playSfx("toolCall");
3596
3852
  }
@@ -3616,12 +3872,16 @@ const ChatWindow = ({
3616
3872
  onToolResult: () => {
3617
3873
  },
3618
3874
  onIteration: () => {
3875
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3619
3876
  playSfx("thinking");
3620
3877
  setIsThinking(true);
3621
3878
  setStatusOverride(STATUS_LABELS.thinking);
3622
3879
  },
3623
- onAudioStateChange: handleAudioStateChange,
3880
+ onAudioStateChange: (state) => {
3881
+ handleAudioStateChange(state, requestEpoch);
3882
+ },
3624
3883
  onError: (err) => {
3884
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3625
3885
  setStatusOverride(null);
3626
3886
  appendMessage(`Hata: ${err}`, false);
3627
3887
  }
@@ -3631,6 +3891,7 @@ const ChatWindow = ({
3631
3891
  activeStreamControllerRef.current = controller;
3632
3892
  controller.done.catch(() => {
3633
3893
  }).finally(() => {
3894
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3634
3895
  setIsBusy(false);
3635
3896
  setIsRunningTools(false);
3636
3897
  setIsThinking(false);
@@ -3684,7 +3945,10 @@ const ChatWindow = ({
3684
3945
  liveTranscriptionMessageIdRef.current = null;
3685
3946
  liveTranscriptionTextRef.current = "";
3686
3947
  };
3687
- const handleAudioStateChange = (state) => {
3948
+ const handleAudioStateChange = (state, requestEpoch) => {
3949
+ if (typeof requestEpoch === "number" && !isCurrentRequestEpoch(requestEpoch)) {
3950
+ return;
3951
+ }
3688
3952
  if (state === "rendering") {
3689
3953
  setIsRenderingAudio(true);
3690
3954
  setIsPlayingAudio(false);
@@ -3708,7 +3972,10 @@ const ChatWindow = ({
3708
3972
  setIsRenderingAudio(false);
3709
3973
  setIsPlayingAudio(false);
3710
3974
  };
3711
- const finalizeStreamCycle = () => {
3975
+ const finalizeStreamCycle = (requestEpoch) => {
3976
+ if (typeof requestEpoch === "number" && !isCurrentRequestEpoch(requestEpoch)) {
3977
+ return;
3978
+ }
3712
3979
  awaitingAssistantResponseRef.current = false;
3713
3980
  setStatusOverride(null);
3714
3981
  setIsBusy(false);
@@ -3742,6 +4009,7 @@ const ChatWindow = ({
3742
4009
  appendMessage("Ses kaydı metne dönüştürülemedi. Lütfen tekrar deneyin.", false);
3743
4010
  return;
3744
4011
  }
4012
+ const requestEpoch = beginRequestEpoch();
3745
4013
  setIsBusy(true);
3746
4014
  setIsTranscribing(false);
3747
4015
  setIsThinking(true);
@@ -3777,6 +4045,7 @@ const ChatWindow = ({
3777
4045
  },
3778
4046
  {
3779
4047
  onSessionId: (sid) => {
4048
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3780
4049
  if (sid && sid !== sessionIdRef.current) {
3781
4050
  sessionIdRef.current = sid;
3782
4051
  if (typeof localStorage !== "undefined") {
@@ -3785,6 +4054,7 @@ const ChatWindow = ({
3785
4054
  }
3786
4055
  },
3787
4056
  onAssistantDelta: (delta) => {
4057
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3788
4058
  setIsTranscribing(false);
3789
4059
  setIsThinking(true);
3790
4060
  setIsRunningTools(false);
@@ -3806,6 +4076,7 @@ const ChatWindow = ({
3806
4076
  }
3807
4077
  },
3808
4078
  onAssistantDone: (assistantText) => {
4079
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3809
4080
  playSfx("completed");
3810
4081
  awaitingAssistantResponseRef.current = false;
3811
4082
  setStatusOverride(null);
@@ -3825,7 +4096,18 @@ const ChatWindow = ({
3825
4096
  );
3826
4097
  }
3827
4098
  },
4099
+ onIntermediateReply: (text) => {
4100
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
4101
+ void speakText(
4102
+ config.backendBaseUrl,
4103
+ text,
4104
+ config.voice,
4105
+ accessibilityMode,
4106
+ (state) => handleAudioStateChange(state, requestEpoch)
4107
+ ).catch((err) => console.warn("[Bulut] intermediate TTS failed", err));
4108
+ },
3828
4109
  onToolCalls: (calls) => {
4110
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3829
4111
  if (calls.length > 0) {
3830
4112
  playSfx("toolCall");
3831
4113
  }
@@ -3851,12 +4133,16 @@ const ChatWindow = ({
3851
4133
  onToolResult: () => {
3852
4134
  },
3853
4135
  onIteration: () => {
4136
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3854
4137
  playSfx("thinking");
3855
4138
  setIsThinking(true);
3856
4139
  setStatusOverride(STATUS_LABELS.thinking);
3857
4140
  },
3858
- onAudioStateChange: handleAudioStateChange,
4141
+ onAudioStateChange: (state) => {
4142
+ handleAudioStateChange(state, requestEpoch);
4143
+ },
3859
4144
  onError: (err) => {
4145
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3860
4146
  awaitingAssistantResponseRef.current = false;
3861
4147
  setStatusOverride(null);
3862
4148
  appendMessage(`Hata: ${err}`, false);
@@ -3867,13 +4153,14 @@ const ChatWindow = ({
3867
4153
  activeStreamControllerRef.current = controller;
3868
4154
  await controller.done;
3869
4155
  } catch (error) {
4156
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3870
4157
  awaitingAssistantResponseRef.current = false;
3871
4158
  setStatusOverride(null);
3872
4159
  if (error instanceof Error) {
3873
4160
  appendMessage(`Hata: ${error.message}`, false);
3874
4161
  }
3875
4162
  } finally {
3876
- finalizeStreamCycle();
4163
+ finalizeStreamCycle(requestEpoch);
3877
4164
  }
3878
4165
  };
3879
4166
  const handleAudioBlob = async (blob) => {
@@ -3881,6 +4168,7 @@ const ChatWindow = ({
3881
4168
  appendMessage("Hata: Project ID yapılandırılmamış.", false);
3882
4169
  return;
3883
4170
  }
4171
+ const requestEpoch = beginRequestEpoch();
3884
4172
  setIsBusy(true);
3885
4173
  setIsTranscribing(true);
3886
4174
  setIsThinking(false);
@@ -3919,9 +4207,11 @@ const ChatWindow = ({
3919
4207
  },
3920
4208
  {
3921
4209
  onSttRequestSent: () => {
3922
- playSfx("sent");
4210
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
4211
+ playSttSentCueOnce();
3923
4212
  },
3924
4213
  onTranscription: (data) => {
4214
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3925
4215
  if (data.session_id && data.session_id !== sessionIdRef.current) {
3926
4216
  sessionIdRef.current = data.session_id;
3927
4217
  if (typeof localStorage !== "undefined") {
@@ -3944,6 +4234,7 @@ const ChatWindow = ({
3944
4234
  setStatusOverride(STATUS_LABELS.thinking);
3945
4235
  },
3946
4236
  onSessionId: (sid) => {
4237
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3947
4238
  if (sid && sid !== sessionIdRef.current) {
3948
4239
  sessionIdRef.current = sid;
3949
4240
  if (typeof localStorage !== "undefined") {
@@ -3952,6 +4243,7 @@ const ChatWindow = ({
3952
4243
  }
3953
4244
  },
3954
4245
  onAssistantDelta: (delta) => {
4246
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3955
4247
  setIsTranscribing(false);
3956
4248
  setIsThinking(true);
3957
4249
  setIsRunningTools(false);
@@ -3973,6 +4265,7 @@ const ChatWindow = ({
3973
4265
  }
3974
4266
  },
3975
4267
  onAssistantDone: (assistantText) => {
4268
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3976
4269
  playSfx("completed");
3977
4270
  awaitingAssistantResponseRef.current = false;
3978
4271
  setStatusOverride(null);
@@ -3993,7 +4286,18 @@ const ChatWindow = ({
3993
4286
  );
3994
4287
  }
3995
4288
  },
4289
+ onIntermediateReply: (text) => {
4290
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
4291
+ void speakText(
4292
+ config.backendBaseUrl,
4293
+ text,
4294
+ config.voice,
4295
+ accessibilityMode,
4296
+ (state) => handleAudioStateChange(state, requestEpoch)
4297
+ ).catch((err) => console.warn("[Bulut] intermediate TTS failed", err));
4298
+ },
3996
4299
  onToolCalls: (calls) => {
4300
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
3997
4301
  if (calls.length > 0) {
3998
4302
  playSfx("toolCall");
3999
4303
  }
@@ -4019,12 +4323,16 @@ const ChatWindow = ({
4019
4323
  onToolResult: (_callId, _toolName, _result) => {
4020
4324
  },
4021
4325
  onIteration: (_iteration, _maxIterations) => {
4326
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
4022
4327
  playSfx("thinking");
4023
4328
  setIsThinking(true);
4024
4329
  setStatusOverride(STATUS_LABELS.thinking);
4025
4330
  },
4026
- onAudioStateChange: handleAudioStateChange,
4331
+ onAudioStateChange: (state) => {
4332
+ handleAudioStateChange(state, requestEpoch);
4333
+ },
4027
4334
  onError: (err) => {
4335
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
4028
4336
  awaitingAssistantResponseRef.current = false;
4029
4337
  setStatusOverride(null);
4030
4338
  appendMessage(`Hata: ${err}`, false);
@@ -4035,10 +4343,11 @@ const ChatWindow = ({
4035
4343
  activeStreamControllerRef.current = controller;
4036
4344
  await controller.done;
4037
4345
  } catch (error) {
4346
+ if (!isCurrentRequestEpoch(requestEpoch)) return;
4038
4347
  awaitingAssistantResponseRef.current = false;
4039
4348
  setStatusOverride(null);
4040
4349
  } finally {
4041
- finalizeStreamCycle();
4350
+ finalizeStreamCycle(requestEpoch);
4042
4351
  }
4043
4352
  };
4044
4353
  const stopRecording = (options) => {
@@ -4132,6 +4441,7 @@ const ChatWindow = ({
4132
4441
  recorderRef.current = recorder;
4133
4442
  audioChunksRef.current = [];
4134
4443
  clearLiveUserTranscriptionState();
4444
+ sttSendCuePlayedRef.current = false;
4135
4445
  const sttMimeType = (recorder.mimeType || recorderOptions.mimeType || "audio/webm").split(";")[0].trim() || "audio/webm";
4136
4446
  const sttWsController = startSttWebSocketStream(
4137
4447
  config.backendBaseUrl,
@@ -4142,9 +4452,6 @@ const ChatWindow = ({
4142
4452
  mimeType: sttMimeType
4143
4453
  },
4144
4454
  {
4145
- onRequestSent: () => {
4146
- playSfx("sent");
4147
- },
4148
4455
  onSessionId: (sid) => {
4149
4456
  if (!sid || sid === sessionIdRef.current) {
4150
4457
  return;
@@ -4210,6 +4517,7 @@ const ChatWindow = ({
4210
4517
  setStatusOverride(STATUS_LABELS.transcribing);
4211
4518
  try {
4212
4519
  if (currentSttWs) {
4520
+ playSttSentCueOnce();
4213
4521
  const sttResult = await currentSttWs.stop();
4214
4522
  if (sttResult.session_id && sttResult.session_id !== sessionIdRef.current) {
4215
4523
  sessionIdRef.current = sttResult.session_id;
@@ -4226,11 +4534,12 @@ const ChatWindow = ({
4226
4534
  }
4227
4535
  } catch (error) {
4228
4536
  console.warn(
4229
- `[Bulut] STT WS finalization failed, falling back to /chat/stt: ${error instanceof Error ? error.message : String(error)}`
4537
+ `[Bulut] STT WS finalization failed, falling back to HTTP POST /chat/stt: ${error instanceof Error ? error.message : String(error)}`
4230
4538
  );
4231
4539
  } finally {
4232
4540
  clearLiveUserTranscriptionState();
4233
4541
  }
4542
+ console.info("[Bulut] Using HTTP POST fallback for STT (streaming WS did not succeed)");
4234
4543
  setStatusOverride(STATUS_LABELS.thinking);
4235
4544
  await handleAudioBlob(blob);
4236
4545
  };
@@ -4333,9 +4642,12 @@ const ChatWindow = ({
4333
4642
  handleMicPointerUp(event);
4334
4643
  };
4335
4644
  const handleRestart = () => {
4645
+ invalidateRequestEpoch();
4646
+ sttSendCuePlayedRef.current = false;
4336
4647
  resetMicGesture();
4337
4648
  pendingStopAfterStartRef.current = false;
4338
4649
  stopActiveStream();
4650
+ stopActiveAudioPlayback();
4339
4651
  cancelActiveSttWs();
4340
4652
  if (recorderRef.current && recorderRef.current.state !== "inactive") {
4341
4653
  stopRecording({ discard: true });
@@ -4369,7 +4681,10 @@ const ChatWindow = ({
4369
4681
  return () => window.clearTimeout(timer);
4370
4682
  }, [accessibilityMode]);
4371
4683
  const stopTask = () => {
4684
+ invalidateRequestEpoch();
4685
+ sttSendCuePlayedRef.current = false;
4372
4686
  stopActiveStream();
4687
+ stopActiveAudioPlayback();
4373
4688
  cancelActiveSttWs();
4374
4689
  stopRecording({ discard: true });
4375
4690
  cleanupVAD();
@@ -4384,6 +4699,7 @@ const ChatWindow = ({
4384
4699
  void startRecording("vad");
4385
4700
  },
4386
4701
  cancelRecording: () => {
4702
+ stopActiveAudioPlayback();
4387
4703
  cancelActiveSttWs();
4388
4704
  const recorder = recorderRef.current;
4389
4705
  if (recorder && recorder.state !== "inactive") {
@@ -4642,7 +4958,7 @@ const ChatWindow = ({
4642
4958
  message.id
4643
4959
  )) }) }),
4644
4960
  /* @__PURE__ */ u$1("div", { style: footerStyle, children: [
4645
- /* @__PURE__ */ u$1("div", { style: { ...statusPanelStyle, transition: "opacity 0.2s ease-out" }, children: statusText !== STATUS_LABELS.ready ? /* @__PURE__ */ u$1("span", { className: "bulut-status-dots", title: statusText, children: statusText }) : onAccessibilityToggle ? /* @__PURE__ */ u$1(
4961
+ /* @__PURE__ */ u$1("div", { style: { ...statusPanelStyle, transition: "opacity 0.2s ease-out" }, children: showStatus ? /* @__PURE__ */ u$1("span", { className: "bulut-status-dots", title: statusText, children: statusText }) : onAccessibilityToggle ? /* @__PURE__ */ u$1(
4646
4962
  "div",
4647
4963
  {
4648
4964
  style: {