@auticlabs/bulut 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/embed.js CHANGED
@@ -719,1154 +719,417 @@ const ChatButton = ({
719
719
  ] }) })
720
720
  ] });
721
721
  };
722
- const TTS_WS_RETRY_DELAYS_MS = [250, 750, 1500];
723
- const FORCED_TTS_VOICE = "zeynep";
724
- const normalizeBaseUrl = (baseUrl) => {
725
- const trimmed = baseUrl.trim().replace(/\/+$/, "");
726
- if (/^https?:\/\//i.test(trimmed)) {
727
- return trimmed;
722
+ const MAX_LINKS = 20;
723
+ const MAX_INTERACTABLES = 24;
724
+ const MAX_HEADINGS = 10;
725
+ const MAX_TEXT_SNIPPETS = 4;
726
+ const MAX_OUTER_HTML_DIGEST = 760;
727
+ const MAX_CACHED_PAGES = 20;
728
+ const MAX_PAGE_SCAN_ELEMENTS = 2e3;
729
+ const MAX_EVENT_HINTS_PER_ELEMENT = 4;
730
+ const MAX_BRANCH_SAMPLES = 4;
731
+ const MAX_BRANCH_DEPTH = 2;
732
+ const MAX_CONTEXT_SUMMARY_CHARS = 3400;
733
+ const MAX_CONTEXT_WITH_HISTORY_CHARS = 4200;
734
+ const PAGE_CONTEXT_CACHE_VERSION = 2;
735
+ const PAGE_CONTEXT_CACHE_KEY = "auticbot_page_context_cache_v2";
736
+ const NON_CONTENT_TAGS = /* @__PURE__ */ new Set([
737
+ "script",
738
+ "style",
739
+ "noscript",
740
+ "template",
741
+ "link",
742
+ "meta"
743
+ ]);
744
+ const NATIVE_INTERACTIVE_TAGS = /* @__PURE__ */ new Set([
745
+ "a",
746
+ "button",
747
+ "input",
748
+ "textarea",
749
+ "select",
750
+ "summary",
751
+ "details",
752
+ "option"
753
+ ]);
754
+ const INTERACTIVE_ROLES = /* @__PURE__ */ new Set([
755
+ "button",
756
+ "link",
757
+ "tab",
758
+ "menuitem",
759
+ "option",
760
+ "checkbox",
761
+ "radio",
762
+ "switch",
763
+ "combobox",
764
+ "textbox",
765
+ "searchbox",
766
+ "slider",
767
+ "spinbutton",
768
+ "treeitem"
769
+ ]);
770
+ const TRACKED_DISPLAY_VALUES = /* @__PURE__ */ new Set([
771
+ "block",
772
+ "inline",
773
+ "inline-block",
774
+ "flex",
775
+ "inline-flex",
776
+ "grid",
777
+ "inline-grid"
778
+ ]);
779
+ const TRACKED_POSITION_VALUES = /* @__PURE__ */ new Set([
780
+ "relative",
781
+ "absolute",
782
+ "fixed",
783
+ "sticky"
784
+ ]);
785
+ const EVENT_HINT_NAMES = [
786
+ "click",
787
+ "dblclick",
788
+ "mousedown",
789
+ "mouseup",
790
+ "pointerdown",
791
+ "pointerup",
792
+ "touchstart",
793
+ "touchend",
794
+ "keydown",
795
+ "keyup",
796
+ "keypress",
797
+ "input",
798
+ "change",
799
+ "submit",
800
+ "focus",
801
+ "blur"
802
+ ];
803
+ const ARIA_INTERACTION_ATTRS = [
804
+ "aria-controls",
805
+ "aria-expanded",
806
+ "aria-haspopup",
807
+ "aria-pressed",
808
+ "aria-selected"
809
+ ];
810
+ const DATA_INTERACTION_PATTERN = /(action|click|press|toggle|target|trigger|nav|open|close|menu|modal|command|submit)/i;
811
+ const pageContextCache = /* @__PURE__ */ new Map();
812
+ let cacheHydrated = false;
813
+ const normalizeWhitespace = (value) => value.replace(/\s+/g, " ").trim();
814
+ const truncate = (value, maxChars) => {
815
+ if (value.length <= maxChars) {
816
+ return value;
728
817
  }
729
- return `https://${trimmed}`;
730
- };
731
- const toWebSocketUrl = (baseUrl, path2) => {
732
- const normalized = normalizeBaseUrl(baseUrl);
733
- const url = new URL(normalized);
734
- url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
735
- url.pathname = `${url.pathname.replace(/\/$/, "")}${path2}`;
736
- url.search = "";
737
- url.hash = "";
738
- return url.toString();
818
+ const suffix = "\n...[truncated]";
819
+ return `${value.slice(0, Math.max(0, maxChars - suffix.length))}${suffix}`;
739
820
  };
740
- const createRequestId = () => {
741
- if (typeof crypto !== "undefined" && "randomUUID" in crypto) {
742
- return crypto.randomUUID();
821
+ const truncateInline = (value, maxChars) => {
822
+ if (value.length <= maxChars) {
823
+ return value;
743
824
  }
744
- return `tts-${Date.now()}-${Math.random().toString(16).slice(2)}`;
825
+ return `${value.slice(0, Math.max(0, maxChars - 3))}...`;
745
826
  };
746
- const parseTtsWsEventPayload = (value) => {
827
+ const canonicalUrl = (rawUrl) => {
747
828
  try {
748
- if (typeof value !== "string") {
749
- return null;
750
- }
751
- return JSON.parse(value);
829
+ return new URL(rawUrl, rawUrl).href;
752
830
  } catch {
753
- return null;
831
+ return rawUrl;
754
832
  }
755
833
  };
756
- const shouldAcceptAudioSeq = (incomingSeq, highestSeqSeen) => incomingSeq > highestSeqSeen;
757
- const shouldFallbackToSse = (error) => {
758
- if (typeof error === "object" && error !== null && "retryable" in error) {
759
- return Boolean(error.retryable);
834
+ const isCacheEntry = (value) => {
835
+ if (typeof value !== "object" || value === null) {
836
+ return false;
760
837
  }
761
- return true;
838
+ const obj = value;
839
+ return typeof obj.url === "string" && typeof obj.summary === "string" && Array.isArray(obj.links) && Array.isArray(obj.interactables) && typeof obj.capturedAt === "number" && typeof obj.version === "number";
762
840
  };
763
- const parseErrorBody = async (response) => {
764
- try {
765
- const data2 = await response.json();
766
- const detail = data2.detail;
767
- if (typeof detail === "string") return detail;
768
- if (detail && typeof detail === "object") return JSON.stringify(detail);
769
- return data2.error || data2.message || response.statusText;
770
- } catch {
771
- return response.statusText;
841
+ const bumpCount = (map, key) => {
842
+ if (!key) {
843
+ return;
772
844
  }
845
+ map.set(key, (map.get(key) ?? 0) + 1);
773
846
  };
774
- const sleep = (ms) => new Promise((resolve) => {
775
- setTimeout(resolve, ms);
776
- });
777
- const base64ToUint8Array = (base64) => {
778
- const cleanBase64 = base64.replace(/^data:audio\/\w+;base64,/, "");
779
- const binaryString = atob(cleanBase64);
780
- const bytes = new Uint8Array(binaryString.length);
781
- for (let i = 0; i < binaryString.length; i += 1) {
782
- bytes[i] = binaryString.charCodeAt(i);
847
+ const formatTopCounts = (map, maxItems) => {
848
+ if (map.size === 0) {
849
+ return "none";
783
850
  }
784
- return bytes;
851
+ return Array.from(map.entries()).sort((a2, b) => b[1] - a2[1] || a2[0].localeCompare(b[0])).slice(0, maxItems).map(([name, count]) => `${name}*${count}`).join(", ");
785
852
  };
786
- const createWavHeader = (length, sampleRate = 16e3) => {
787
- const buffer = new ArrayBuffer(44);
788
- const view = new DataView(buffer);
789
- const channels = 1;
790
- view.setUint32(0, 1380533830, false);
791
- view.setUint32(4, 36 + length, true);
792
- view.setUint32(8, 1463899717, false);
793
- view.setUint32(12, 1718449184, false);
794
- view.setUint32(16, 16, true);
795
- view.setUint16(20, 1, true);
796
- view.setUint16(22, channels, true);
797
- view.setUint32(24, sampleRate, true);
798
- view.setUint32(28, sampleRate * channels * 2, true);
799
- view.setUint16(32, channels * 2, true);
800
- view.setUint16(34, 16, true);
801
- view.setUint32(36, 1684108385, false);
802
- view.setUint32(40, length, true);
803
- return new Uint8Array(buffer);
853
+ const parseTabIndex = (value) => {
854
+ if (value === null) {
855
+ return null;
856
+ }
857
+ const parsed = Number.parseInt(value, 10);
858
+ return Number.isNaN(parsed) ? null : parsed;
804
859
  };
805
- const waitForPlaybackEnd = async (audioElement) => {
806
- if (audioElement.ended) {
807
- return;
860
+ const compactToken = (value, maxChars = 18) => {
861
+ const compact = value.replace(/\s+/g, "-").replace(/[^a-zA-Z0-9_-]/g, "");
862
+ return compact ? truncateInline(compact, maxChars) : "";
863
+ };
864
+ const getElementDepth = (element) => {
865
+ let depth = 0;
866
+ let cursor = element;
867
+ while (cursor == null ? void 0 : cursor.parentElement) {
868
+ depth += 1;
869
+ cursor = cursor.parentElement;
870
+ if (cursor === document.body) {
871
+ break;
872
+ }
808
873
  }
809
- await new Promise((resolve, reject) => {
810
- const watchdog = window.setInterval(() => {
811
- if (!audioElement.ended) {
812
- console.info("[Bulut] playback watchdog: still playing...");
813
- }
814
- }, 3e4);
815
- const onEnded = () => {
816
- cleanup();
817
- resolve();
818
- };
819
- const onError = () => {
820
- cleanup();
821
- reject(new Error("Ses oynatma hatası oluştu."));
822
- };
823
- const cleanup = () => {
824
- window.clearInterval(watchdog);
825
- audioElement.removeEventListener("ended", onEnded);
826
- audioElement.removeEventListener("error", onError);
827
- };
828
- audioElement.addEventListener("ended", onEnded);
829
- audioElement.addEventListener("error", onError);
830
- });
874
+ return depth;
831
875
  };
832
- const playBufferedAudio = async (chunks, mimeType, sampleRate = 16e3, onAudioStateChange) => {
833
- if (chunks.length === 0) {
834
- onAudioStateChange == null ? void 0 : onAudioStateChange("done");
876
+ const getPrimaryRole = (element) => {
877
+ const rawRole = normalizeWhitespace(element.getAttribute("role") || "").toLowerCase().split(" ")[0];
878
+ return rawRole || "";
879
+ };
880
+ const hydrateCacheFromStorage = () => {
881
+ if (cacheHydrated || typeof sessionStorage === "undefined") {
835
882
  return;
836
883
  }
837
- const totalBytes = chunks.reduce((acc, c2) => acc + c2.byteLength, 0);
838
- console.log(`[Bulut] Playing buffered audio: ${chunks.length} chunks, ${totalBytes} bytes, type=${mimeType}`);
839
- onAudioStateChange == null ? void 0 : onAudioStateChange("fallback");
840
- const blobParts = chunks.map((chunk) => {
841
- const copied = new Uint8Array(chunk.byteLength);
842
- copied.set(chunk);
843
- return copied.buffer;
844
- });
845
- let detectedMime = mimeType;
846
- if (chunks.length > 0 && chunks[0].length >= 4) {
847
- const header = Array.from(chunks[0].slice(0, 4)).map((b) => b.toString(16).padStart(2, "0").toUpperCase()).join(" ");
848
- console.log(`[Bulut] Audio header (hex): ${header}`);
849
- if (header.startsWith("49 44 33")) {
850
- detectedMime = "audio/mpeg";
851
- } else if (header.startsWith("FF F3") || header.startsWith("FF F2")) {
852
- detectedMime = "audio/mpeg";
853
- } else if (header.startsWith("52 49 46 46")) {
854
- detectedMime = "audio/wav";
855
- } else if (header.startsWith("1A 45 DF A3")) {
856
- detectedMime = "audio/webm";
884
+ cacheHydrated = true;
885
+ try {
886
+ const raw = sessionStorage.getItem(PAGE_CONTEXT_CACHE_KEY);
887
+ if (!raw) {
888
+ return;
889
+ }
890
+ const parsed = JSON.parse(raw);
891
+ if (!Array.isArray(parsed)) {
892
+ return;
893
+ }
894
+ for (const value of parsed) {
895
+ if (!isCacheEntry(value)) {
896
+ continue;
897
+ }
898
+ if (value.version !== PAGE_CONTEXT_CACHE_VERSION) {
899
+ continue;
900
+ }
901
+ pageContextCache.set(value.url, value);
902
+ }
903
+ if (pageContextCache.size > 0) {
904
+ console.info(
905
+ `[Autic] context cache restored entries=${pageContextCache.size}`
906
+ );
857
907
  }
908
+ } catch (error) {
909
+ console.warn("[Autic] context cache restore failed", error);
858
910
  }
859
- let safeMimeType = detectedMime && detectedMime.includes("/") ? detectedMime : "audio/mpeg";
860
- let finalBlobParts = blobParts;
861
- if (mimeType === "audio/pcm") {
862
- const totalLength = chunks.reduce((acc, c2) => acc + c2.byteLength, 0);
863
- const header = createWavHeader(totalLength, sampleRate);
864
- finalBlobParts = [header.buffer, ...blobParts];
865
- safeMimeType = "audio/wav";
866
- console.log(`[Bulut] Wrapped raw PCM in WAV (rate=${sampleRate})`);
911
+ };
912
+ const persistCacheToStorage = () => {
913
+ if (typeof sessionStorage === "undefined") {
914
+ return;
867
915
  }
868
- console.log(`[Bulut] Creating blob with type: ${safeMimeType} (original: ${mimeType})`);
869
- const blob = new Blob(finalBlobParts, { type: safeMimeType });
870
- const audioElement = new Audio();
871
- const objectUrl = URL.createObjectURL(blob);
872
916
  try {
873
- audioElement.preload = "auto";
874
- audioElement.autoplay = true;
875
- audioElement.setAttribute("playsinline", "true");
876
- audioElement.src = objectUrl;
877
- await audioElement.play();
878
- onAudioStateChange == null ? void 0 : onAudioStateChange("playing");
879
- await waitForPlaybackEnd(audioElement);
880
- onAudioStateChange == null ? void 0 : onAudioStateChange("done");
881
- } catch (err) {
882
- console.error(`[Bulut] Playback failed: ${err}`, { mimeType: safeMimeType, size: blob.size });
883
- onAudioStateChange == null ? void 0 : onAudioStateChange("done");
884
- throw err;
885
- } finally {
886
- audioElement.pause();
887
- audioElement.removeAttribute("src");
888
- audioElement.load();
889
- URL.revokeObjectURL(objectUrl);
917
+ const serialized = JSON.stringify(
918
+ Array.from(pageContextCache.values()).sort(
919
+ (a2, b) => a2.capturedAt - b.capturedAt
920
+ )
921
+ );
922
+ sessionStorage.setItem(PAGE_CONTEXT_CACHE_KEY, serialized);
923
+ } catch (error) {
924
+ console.warn("[Autic] context cache persist failed", error);
890
925
  }
891
926
  };
892
- const parseSseEventPayload = (eventBlock) => {
893
- const dataLines = eventBlock.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.startsWith("data:")).map((line) => line.slice(5).trimStart());
894
- if (dataLines.length === 0) {
895
- return null;
927
+ const pruneOldestCacheEntries = () => {
928
+ if (pageContextCache.size <= MAX_CACHED_PAGES) {
929
+ return;
896
930
  }
897
- const dataStr = dataLines.join("\n");
898
- if (dataStr === "[DONE]") {
899
- return { type: "done" };
931
+ const sorted = Array.from(pageContextCache.values()).sort(
932
+ (a2, b) => a2.capturedAt - b.capturedAt
933
+ );
934
+ const overflow = sorted.length - MAX_CACHED_PAGES;
935
+ for (let i = 0; i < overflow; i += 1) {
936
+ pageContextCache.delete(sorted[i].url);
937
+ }
938
+ };
939
+ const buildSummaryWithHistory = (current) => {
940
+ const recentPages = Array.from(pageContextCache.values()).filter((entry) => entry.url !== current.url).sort((a2, b) => b.capturedAt - a2.capturedAt).slice(0, 3);
941
+ if (recentPages.length === 0) {
942
+ return current.summary;
943
+ }
944
+ const historySection = [
945
+ "Recent Page Memory:",
946
+ ...recentPages.map((entry) => {
947
+ const compactSummary = normalizeWhitespace(entry.summary).slice(0, 180);
948
+ return `- ${entry.url} :: ${compactSummary}`;
949
+ })
950
+ ].join("\n");
951
+ return truncate(
952
+ `${current.summary}
953
+
954
+ ${historySection}`,
955
+ MAX_CONTEXT_WITH_HISTORY_CHARS
956
+ );
957
+ };
958
+ const isVisible = (element) => {
959
+ if (element.getAttribute("aria-hidden") === "true") {
960
+ return false;
961
+ }
962
+ if (element instanceof HTMLElement && element.hidden) {
963
+ return false;
900
964
  }
965
+ const style = window.getComputedStyle(element);
966
+ if (style.display === "none" || style.visibility === "hidden") {
967
+ return false;
968
+ }
969
+ const rect = element.getBoundingClientRect();
970
+ return rect.width > 0 && rect.height > 0;
971
+ };
972
+ const toAbsoluteUrl = (href) => {
901
973
  try {
902
- return JSON.parse(dataStr);
903
- } catch (error) {
904
- console.warn("Error parsing SSE chunk:", error);
905
- return null;
974
+ return new URL(href, window.location.href).href;
975
+ } catch {
976
+ return href;
906
977
  }
907
978
  };
908
- const isAudioSsePayload = (payload) => typeof payload.audio === "string" && (payload.type === void 0 || payload.type === "audio");
909
- async function transcribeAudio(baseUrl, file, projectId, sessionId, language) {
910
- const url = `${normalizeBaseUrl(baseUrl)}/chat/stt`;
911
- const formData = new FormData();
912
- formData.append("file", file);
913
- formData.append("project_id", projectId);
914
- if (sessionId) formData.append("session_id", sessionId);
915
- formData.append("language", language);
916
- const response = await fetch(url, { method: "POST", body: formData });
917
- if (!response.ok) {
918
- throw new Error(await parseErrorBody(response));
979
+ const escapeCssValue = (value) => {
980
+ if (typeof CSS !== "undefined" && typeof CSS.escape === "function") {
981
+ return CSS.escape(value);
919
982
  }
920
- return response.json();
921
- }
922
- const buildError = (message, retryable = true) => {
923
- const error = new Error(message);
924
- error.retryable = retryable;
925
- return error;
983
+ return value.replace(/([ #;&,.+*~':"!^$\[\]()=>|\/@])/g, "\\$1");
926
984
  };
927
- const collectTtsViaSse = async (baseUrl, assistantText, accessibilityMode, isStopped, setReader) => {
928
- var _a;
929
- const ttsFormData = new FormData();
930
- ttsFormData.append("text", assistantText);
931
- ttsFormData.append("voice", FORCED_TTS_VOICE);
932
- ttsFormData.append("accessibility_mode", String(accessibilityMode));
933
- const ttsResponse = await fetch(`${normalizeBaseUrl(baseUrl)}/chat/tts`, {
934
- method: "POST",
935
- body: ttsFormData
936
- });
937
- if (!ttsResponse.ok) {
938
- throw buildError(await parseErrorBody(ttsResponse), false);
985
+ const buildSelector = (element) => {
986
+ const tag = element.tagName.toLowerCase();
987
+ if (element.id) {
988
+ return `#${escapeCssValue(element.id)}`;
939
989
  }
940
- const reader = (_a = ttsResponse.body) == null ? void 0 : _a.getReader();
941
- if (!reader) {
942
- throw buildError("TTS response body is not readable", false);
990
+ const name = element.getAttribute("name");
991
+ if (name) {
992
+ return `${tag}[name="${escapeCssValue(name)}"]`;
943
993
  }
944
- setReader(reader);
945
- const chunks = [];
946
- let mimeType = "audio/mpeg";
947
- let sampleRate = 16e3;
948
- const decoder = new TextDecoder();
949
- let buffer = "";
950
- while (true) {
951
- if (isStopped()) {
952
- break;
994
+ const ariaLabel = element.getAttribute("aria-label");
995
+ if (ariaLabel) {
996
+ return `${tag}[aria-label="${escapeCssValue(ariaLabel)}"]`;
997
+ }
998
+ const classes = Array.from(element.classList).filter(Boolean).slice(0, 2).map((className) => `.${escapeCssValue(className)}`).join("");
999
+ if (classes) {
1000
+ return `${tag}${classes}`;
1001
+ }
1002
+ const parent2 = element.parentElement;
1003
+ if (!parent2) {
1004
+ return tag;
1005
+ }
1006
+ const siblingsOfTag = Array.from(parent2.children).filter(
1007
+ (sibling) => sibling.tagName === element.tagName
1008
+ );
1009
+ const index = siblingsOfTag.indexOf(element) + 1;
1010
+ return `${tag}:nth-of-type(${index})`;
1011
+ };
1012
+ const getElementLabel = (element) => {
1013
+ const text = normalizeWhitespace(
1014
+ (element instanceof HTMLElement ? element.innerText : element.textContent) || ""
1015
+ );
1016
+ const ariaLabel = normalizeWhitespace(element.getAttribute("aria-label") || "");
1017
+ const title = normalizeWhitespace(element.getAttribute("title") || "");
1018
+ const placeholder = normalizeWhitespace(
1019
+ element.getAttribute("placeholder") || ""
1020
+ );
1021
+ const name = normalizeWhitespace(element.getAttribute("name") || "");
1022
+ const value = element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement || element instanceof HTMLButtonElement ? normalizeWhitespace(element.value || "") : "";
1023
+ const classHint = Array.from(element.classList).map((item) => compactToken(item, 16)).find(Boolean);
1024
+ const fallback = element.id && `#${element.id}` || classHint && `.${classHint}` || buildSelector(element);
1025
+ const label = text || ariaLabel || title || placeholder || value || name || fallback;
1026
+ if (element.tagName.toLowerCase() === "input") {
1027
+ const inputType = element.getAttribute("type") || "text";
1028
+ return `${inputType} ${label || "input"}`;
1029
+ }
1030
+ return label || "untitled";
1031
+ };
1032
+ const getEventHints = (element) => {
1033
+ const record = element;
1034
+ const eventHints = [];
1035
+ for (const eventName of EVENT_HINT_NAMES) {
1036
+ const handlerKey = `on${eventName}`;
1037
+ const hasInlineHandler = Boolean(element.getAttribute(handlerKey));
1038
+ const hasPropertyHandler = typeof record[handlerKey] === "function";
1039
+ if (!hasInlineHandler && !hasPropertyHandler) {
1040
+ continue;
953
1041
  }
954
- const { done, value } = await reader.read();
955
- if (done) {
1042
+ eventHints.push(eventName);
1043
+ if (eventHints.length >= MAX_EVENT_HINTS_PER_ELEMENT) {
956
1044
  break;
957
1045
  }
958
- buffer += decoder.decode(value, { stream: true });
959
- const blocks = buffer.split(/\r?\n\r?\n/);
960
- buffer = blocks.pop() || "";
961
- for (const block of blocks) {
962
- const payload = parseSseEventPayload(block);
963
- if (!payload) {
964
- continue;
965
- }
966
- if (isAudioSsePayload(payload)) {
967
- const format = payload.format || "mp3";
968
- mimeType = payload.mime_type || (format === "webm" ? "audio/webm" : "audio/mpeg");
969
- chunks.push(base64ToUint8Array(payload.audio));
970
- if (payload.sample_rate) {
971
- sampleRate = payload.sample_rate;
972
- }
973
- }
974
- }
975
1046
  }
976
- reader.releaseLock();
977
- setReader(void 0);
978
- return { chunks, mimeType, sampleRate };
1047
+ return eventHints;
979
1048
  };
980
- const collectTtsViaWebSocket = async (baseUrl, assistantText, accessibilityMode, isStopped, setSocket) => {
981
- const wsUrl = toWebSocketUrl(baseUrl, "/chat/tts/ws");
982
- const requestId = createRequestId();
983
- const chunks = [];
984
- let mimeType = "audio/mpeg";
985
- let sampleRate = 16e3;
986
- let highestSeqSeen = 0;
987
- const connectOnce = () => new Promise((resolve, reject) => {
988
- if (isStopped()) {
989
- reject(buildError("stream_stopped", false));
990
- return;
1049
+ const getAriaInteractionHints = (element) => ARIA_INTERACTION_ATTRS.filter((attrName) => element.hasAttribute(attrName)).map(
1050
+ (attrName) => attrName.replace("aria-", "")
1051
+ );
1052
+ const getDataInteractionHints = (element) => element.getAttributeNames().filter(
1053
+ (attrName) => attrName.startsWith("data-") && DATA_INTERACTION_PATTERN.test(attrName)
1054
+ ).slice(0, 2).map((attrName) => attrName.replace("data-", ""));
1055
+ const getStyleHints = (style) => {
1056
+ const styleHints = [];
1057
+ if (style.cursor === "pointer") {
1058
+ styleHints.push("cursor:pointer");
1059
+ }
1060
+ if (style.display === "flex" || style.display === "grid" || style.display === "inline-flex" || style.display === "inline-grid") {
1061
+ styleHints.push(`display:${style.display}`);
1062
+ }
1063
+ if (style.position === "fixed" || style.position === "sticky") {
1064
+ styleHints.push(`position:${style.position}`);
1065
+ }
1066
+ return styleHints.slice(0, 2);
1067
+ };
1068
+ const buildBlueprintToken = (element) => {
1069
+ const tag = element.tagName.toLowerCase();
1070
+ const idToken = element.id ? `#${compactToken(element.id)}` : "";
1071
+ const classToken = Array.from(element.classList).map((item) => compactToken(item, 16)).find(Boolean);
1072
+ return `${tag}${idToken}${classToken ? `.${classToken}` : ""}`;
1073
+ };
1074
+ const buildBranchDigest = (element, depth) => {
1075
+ const token = buildBlueprintToken(element);
1076
+ if (depth <= 0) {
1077
+ return token;
1078
+ }
1079
+ const children = Array.from(element.children).filter((child) => !NON_CONTENT_TAGS.has(child.tagName.toLowerCase())).filter((child) => isVisible(child));
1080
+ if (children.length === 0) {
1081
+ return token;
1082
+ }
1083
+ const sampled = children.slice(0, 3).map((child) => buildBranchDigest(child, depth - 1));
1084
+ const overflow = children.length > sampled.length ? `+${children.length - sampled.length}` : "";
1085
+ return `${token}>${sampled.join("+")}${overflow}`;
1086
+ };
1087
+ const collectDomBranchDigest = () => {
1088
+ const root = document.body ?? document.documentElement;
1089
+ const topLevelNodes = Array.from(root.children).filter((child) => !NON_CONTENT_TAGS.has(child.tagName.toLowerCase())).filter((child) => isVisible(child)).slice(0, MAX_BRANCH_SAMPLES);
1090
+ return topLevelNodes.map(
1091
+ (child) => truncateInline(buildBranchDigest(child, MAX_BRANCH_DEPTH), 140)
1092
+ );
1093
+ };
1094
+ const formatSection = (title, lines) => {
1095
+ if (lines.length === 0) {
1096
+ return `${title}:
1097
+ - none`;
1098
+ }
1099
+ return `${title}:
1100
+ ${lines.join("\n")}`;
1101
+ };
1102
+ const buildOuterHtmlDigest = () => {
1103
+ var _a;
1104
+ const raw = ((_a = document.body) == null ? void 0 : _a.outerHTML) || document.documentElement.outerHTML;
1105
+ const withoutScripts = raw.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<noscript[\s\S]*?<\/noscript>/gi, "").replace(/<!--[\s\S]*?-->/g, "").replace(/\s+/g, " ").trim();
1106
+ const structural = withoutScripts.replace(/>[^<]*</g, "><").replace(/\s+/g, " ").trim();
1107
+ return truncate(structural, MAX_OUTER_HTML_DIGEST);
1108
+ };
1109
+ const collectTextSnippets = () => {
1110
+ const root = document.querySelector("main, article, [role='main']") ?? document.body;
1111
+ const snippets = [];
1112
+ const seen = /* @__PURE__ */ new Set();
1113
+ const candidates = Array.from(root.querySelectorAll("p, li, h1, h2, h3"));
1114
+ for (const node of candidates) {
1115
+ if (!isVisible(node)) {
1116
+ continue;
991
1117
  }
992
- let done = false;
993
- let finalError = null;
994
- const socket = new WebSocket(wsUrl);
995
- setSocket(socket);
996
- const finalize = (mode, error) => {
997
- socket.onopen = null;
998
- socket.onmessage = null;
999
- socket.onerror = null;
1000
- socket.onclose = null;
1001
- setSocket(null);
1002
- if (mode === "resolve") {
1003
- resolve();
1004
- return;
1005
- }
1006
- reject(error || buildError("tts_ws_closed", true));
1007
- };
1008
- socket.onopen = () => {
1009
- console.info(
1010
- `[Bulut] TTS WS connected request_id=${requestId} resume_seq=${highestSeqSeen}`
1011
- );
1012
- socket.send(
1013
- JSON.stringify({
1014
- type: "start",
1015
- request_id: requestId,
1016
- text: assistantText,
1017
- voice: FORCED_TTS_VOICE,
1018
- accessibility_mode: accessibilityMode,
1019
- last_seq: highestSeqSeen
1020
- })
1021
- );
1022
- };
1023
- socket.onmessage = (event) => {
1024
- const payload = parseTtsWsEventPayload(String(event.data));
1025
- if (!payload) {
1026
- console.warn("[Bulut] TTS WS invalid JSON payload");
1027
- return;
1028
- }
1029
- if (payload.type === "audio" && typeof payload.audio === "string") {
1030
- const seq = typeof payload.seq === "number" ? payload.seq : 0;
1031
- if (shouldAcceptAudioSeq(seq, highestSeqSeen)) {
1032
- chunks.push(base64ToUint8Array(payload.audio));
1033
- highestSeqSeen = seq;
1034
- if (payload.mime_type) {
1035
- mimeType = payload.mime_type;
1036
- }
1037
- if (typeof payload.sample_rate === "number") {
1038
- sampleRate = payload.sample_rate;
1039
- }
1040
- } else {
1041
- console.info(
1042
- `[Bulut] TTS WS duplicate chunk ignored request_id=${requestId} seq=${seq} seen=${highestSeqSeen}`
1043
- );
1044
- }
1045
- if (socket.readyState === WebSocket.OPEN) {
1046
- socket.send(
1047
- JSON.stringify({
1048
- type: "ack",
1049
- request_id: requestId,
1050
- last_seq: highestSeqSeen
1051
- })
1052
- );
1053
- }
1054
- return;
1055
- }
1056
- if (payload.type === "done") {
1057
- const streamLastSeq = typeof payload.last_seq === "number" ? payload.last_seq : highestSeqSeen;
1058
- if (streamLastSeq > highestSeqSeen) {
1059
- finalError = buildError("tts_ws_sequence_gap", true);
1060
- done = false;
1061
- socket.close();
1062
- return;
1063
- }
1064
- done = true;
1065
- socket.close();
1066
- return;
1067
- }
1068
- if (payload.type === "error") {
1069
- finalError = buildError(payload.error || "tts_ws_error", payload.retryable !== false);
1070
- done = false;
1071
- socket.close();
1072
- }
1073
- };
1074
- socket.onerror = () => {
1075
- if (!finalError) {
1076
- finalError = buildError("tts_ws_transport_error", true);
1077
- }
1078
- };
1079
- socket.onclose = () => {
1080
- if (isStopped()) {
1081
- finalize("reject", buildError("stream_stopped", false));
1082
- return;
1083
- }
1084
- if (done) {
1085
- finalize("resolve");
1086
- return;
1087
- }
1088
- finalize("reject", finalError || buildError("tts_ws_closed_before_done", true));
1089
- };
1090
- });
1091
- for (let attempt = 0; attempt <= TTS_WS_RETRY_DELAYS_MS.length; attempt += 1) {
1092
- if (attempt > 0) {
1093
- const delay = TTS_WS_RETRY_DELAYS_MS[attempt - 1];
1094
- console.warn(
1095
- `[Bulut] TTS WS retry attempt=${attempt} delay_ms=${delay} last_seq=${highestSeqSeen}`
1096
- );
1097
- await sleep(delay);
1118
+ const text = normalizeWhitespace(node.textContent || "");
1119
+ if (!text || text.length < 20) {
1120
+ continue;
1098
1121
  }
1099
- try {
1100
- await connectOnce();
1101
- return { chunks, mimeType, sampleRate };
1102
- } catch (error) {
1103
- const retryable = shouldFallbackToSse(error);
1104
- const message = error instanceof Error ? error.message : String(error);
1105
- console.warn(
1106
- `[Bulut] TTS WS attempt failed attempt=${attempt} retryable=${retryable} error=${message}`
1107
- );
1108
- if (!retryable || attempt === TTS_WS_RETRY_DELAYS_MS.length) {
1109
- throw error;
1110
- }
1122
+ const compact = truncateInline(text, 180);
1123
+ if (seen.has(compact)) {
1124
+ continue;
1125
+ }
1126
+ seen.add(compact);
1127
+ snippets.push(`- ${compact}`);
1128
+ if (snippets.length >= MAX_TEXT_SNIPPETS) {
1129
+ break;
1111
1130
  }
1112
1131
  }
1113
- throw buildError("tts_ws_exhausted", true);
1114
- };
1115
- const voiceChatStream = (baseUrl, audioFile, projectId, sessionId, config, events) => {
1116
- let isStopped = false;
1117
- let activeReader;
1118
- let activeSocket = null;
1119
- const donePromise = new Promise(async (resolve, reject) => {
1120
- var _a, _b, _c, _d, _e, _f, _g, _h;
1121
- try {
1122
- if (isStopped) return resolve();
1123
- const sttResult = await transcribeAudio(baseUrl, audioFile, projectId, sessionId, "tr");
1124
- const currentSessionId = sttResult.session_id;
1125
- const userText = sttResult.text;
1126
- (_a = events.onTranscription) == null ? void 0 : _a.call(events, {
1127
- session_id: currentSessionId,
1128
- user_text: userText
1129
- });
1130
- if (isStopped) return resolve();
1131
- const llmFormData = new FormData();
1132
- llmFormData.append("project_id", projectId);
1133
- llmFormData.append("session_id", currentSessionId);
1134
- llmFormData.append("user_text", userText);
1135
- llmFormData.append("model", config.model);
1136
- if (config.pageContext) llmFormData.append("page_context", config.pageContext);
1137
- llmFormData.append("accessibility_mode", String(Boolean(config.accessibilityMode)));
1138
- const llmResponse = await fetch(`${normalizeBaseUrl(baseUrl)}/chat/llm`, {
1139
- method: "POST",
1140
- body: llmFormData
1141
- });
1142
- if (!llmResponse.ok) {
1143
- throw new Error(await parseErrorBody(llmResponse));
1144
- }
1145
- activeReader = (_b = llmResponse.body) == null ? void 0 : _b.getReader();
1146
- if (!activeReader) throw new Error("LLM response body is not readable");
1147
- const decoder = new TextDecoder();
1148
- let buffer = "";
1149
- let assistantText = "";
1150
- while (true) {
1151
- if (isStopped) break;
1152
- const { done, value } = await activeReader.read();
1153
- if (done) break;
1154
- buffer += decoder.decode(value, { stream: true });
1155
- const chunks = buffer.split(/\r?\n\r?\n/);
1156
- buffer = chunks.pop() || "";
1157
- for (const chunk of chunks) {
1158
- const data2 = parseSseEventPayload(chunk);
1159
- if (!data2) continue;
1160
- if (data2.type === "session" && data2.session_id) {
1161
- (_c = events.onTranscription) == null ? void 0 : _c.call(events, {
1162
- session_id: data2.session_id,
1163
- user_text: sttResult.text
1164
- });
1165
- continue;
1166
- }
1167
- if (data2.type === "llm_delta" && typeof data2.delta === "string") {
1168
- (_d = events.onAssistantDelta) == null ? void 0 : _d.call(events, data2.delta);
1169
- continue;
1170
- }
1171
- if (data2.type === "llm_done") {
1172
- assistantText = data2.assistant_text || "";
1173
- (_e = events.onAssistantDone) == null ? void 0 : _e.call(events, assistantText);
1174
- continue;
1175
- }
1176
- if (data2.type === "error") {
1177
- throw new Error(data2.error || "LLM Error");
1178
- }
1179
- }
1180
- }
1181
- if (activeReader) {
1182
- activeReader.releaseLock();
1183
- activeReader = void 0;
1184
- }
1185
- if (isStopped || !assistantText) {
1186
- return resolve();
1187
- }
1188
- console.info(
1189
- `[Bulut] TTS start mode=voice requested_voice=${config.voice} forced_voice=${FORCED_TTS_VOICE} accessibility_mode=${Boolean(config.accessibilityMode)}`
1190
- );
1191
- (_f = events.onAudioStateChange) == null ? void 0 : _f.call(events, "rendering");
1192
- let ttsResult;
1193
- try {
1194
- ttsResult = await collectTtsViaWebSocket(
1195
- baseUrl,
1196
- assistantText,
1197
- Boolean(config.accessibilityMode),
1198
- () => isStopped,
1199
- (socket) => {
1200
- activeSocket = socket;
1201
- }
1202
- );
1203
- } catch (wsError) {
1204
- if (isStopped) {
1205
- return resolve();
1206
- }
1207
- console.warn(
1208
- `[Bulut] TTS WS failed, falling back to SSE: ${wsError instanceof Error ? wsError.message : String(wsError)}`
1209
- );
1210
- ttsResult = await collectTtsViaSse(
1211
- baseUrl,
1212
- assistantText,
1213
- Boolean(config.accessibilityMode),
1214
- () => isStopped,
1215
- (reader) => {
1216
- activeReader = reader;
1217
- }
1218
- );
1219
- }
1220
- if (!isStopped && ttsResult.chunks.length > 0) {
1221
- await playBufferedAudio(
1222
- ttsResult.chunks,
1223
- ttsResult.mimeType,
1224
- ttsResult.sampleRate,
1225
- events.onAudioStateChange
1226
- );
1227
- } else {
1228
- (_g = events.onAudioStateChange) == null ? void 0 : _g.call(events, "done");
1229
- }
1230
- resolve();
1231
- } catch (err) {
1232
- const msg = err instanceof Error ? err.message : String(err);
1233
- (_h = events.onError) == null ? void 0 : _h.call(events, msg);
1234
- reject(err);
1235
- } finally {
1236
- activeReader == null ? void 0 : activeReader.cancel().catch(() => {
1237
- });
1238
- if (activeSocket && activeSocket.readyState <= WebSocket.OPEN) {
1239
- activeSocket.close();
1240
- }
1241
- activeSocket = null;
1242
- }
1243
- });
1244
- return {
1245
- stop: () => {
1246
- isStopped = true;
1247
- if (activeReader) {
1248
- activeReader.cancel().catch(() => {
1249
- });
1250
- }
1251
- if (activeSocket && activeSocket.readyState <= WebSocket.OPEN) {
1252
- activeSocket.close();
1253
- }
1254
- },
1255
- done: donePromise
1256
- };
1257
- };
1258
- const agentVoiceChatStream = (baseUrl, audioFile, projectId, sessionId, config, events, executeTool) => {
1259
- let isStopped = false;
1260
- let activeSocket = null;
1261
- let activeReader;
1262
- let errorEmitted = false;
1263
- const donePromise = new Promise(async (resolve, reject) => {
1264
- var _a, _b, _c, _d;
1265
- try {
1266
- if (isStopped) return resolve();
1267
- const sttResult = await transcribeAudio(
1268
- baseUrl,
1269
- audioFile,
1270
- projectId,
1271
- sessionId,
1272
- "tr"
1273
- );
1274
- const currentSessionId = sttResult.session_id;
1275
- const userText = sttResult.text;
1276
- (_a = events.onTranscription) == null ? void 0 : _a.call(events, {
1277
- session_id: currentSessionId,
1278
- user_text: userText
1279
- });
1280
- if (isStopped) return resolve();
1281
- const assistantText = await new Promise((agentResolve, agentReject) => {
1282
- if (isStopped) {
1283
- agentResolve("");
1284
- return;
1285
- }
1286
- const wsUrl = toWebSocketUrl(baseUrl, "/chat/agent/ws");
1287
- const socket = new WebSocket(wsUrl);
1288
- activeSocket = socket;
1289
- let finalReply = "";
1290
- let resolved = false;
1291
- const finish = (reply) => {
1292
- if (resolved) return;
1293
- resolved = true;
1294
- agentResolve(reply);
1295
- };
1296
- const fail = (error) => {
1297
- if (resolved) return;
1298
- resolved = true;
1299
- agentReject(error);
1300
- };
1301
- socket.onopen = () => {
1302
- console.info("[Bulut] Agent WS connected");
1303
- socket.send(JSON.stringify({
1304
- type: "start",
1305
- project_id: projectId,
1306
- session_id: currentSessionId,
1307
- user_text: userText,
1308
- model: config.model,
1309
- page_context: config.pageContext,
1310
- accessibility_mode: config.accessibilityMode
1311
- }));
1312
- };
1313
- socket.onmessage = async (event) => {
1314
- var _a2, _b2, _c2, _d2, _e, _f, _g, _h;
1315
- let data2;
1316
- try {
1317
- data2 = JSON.parse(String(event.data));
1318
- } catch {
1319
- console.warn("[Bulut] Agent WS invalid JSON");
1320
- return;
1321
- }
1322
- const msgType = data2.type;
1323
- if (msgType === "session" && typeof data2.session_id === "string") {
1324
- (_a2 = events.onSessionId) == null ? void 0 : _a2.call(events, data2.session_id);
1325
- return;
1326
- }
1327
- if (msgType === "iteration") {
1328
- (_b2 = events.onIteration) == null ? void 0 : _b2.call(
1329
- events,
1330
- data2.iteration,
1331
- data2.max_iterations
1332
- );
1333
- return;
1334
- }
1335
- if (msgType === "reply_delta" && typeof data2.delta === "string") {
1336
- (_c2 = events.onAssistantDelta) == null ? void 0 : _c2.call(events, data2.delta);
1337
- return;
1338
- }
1339
- if (msgType === "tool_calls" && Array.isArray(data2.calls)) {
1340
- const calls = data2.calls;
1341
- (_d2 = events.onToolCalls) == null ? void 0 : _d2.call(events, calls);
1342
- const results = [];
1343
- for (const call2 of calls) {
1344
- const result = await executeTool(call2);
1345
- (_e = events.onToolResult) == null ? void 0 : _e.call(events, call2.call_id, call2.tool, result.result);
1346
- results.push(result);
1347
- }
1348
- if (socket.readyState === WebSocket.OPEN) {
1349
- socket.send(JSON.stringify({
1350
- type: "tool_results",
1351
- results
1352
- }));
1353
- }
1354
- return;
1355
- }
1356
- if (msgType === "agent_done") {
1357
- finalReply = data2.final_reply || "";
1358
- (_f = events.onAssistantDone) == null ? void 0 : _f.call(events, finalReply);
1359
- if (typeof data2.session_id === "string") {
1360
- (_g = events.onSessionId) == null ? void 0 : _g.call(events, data2.session_id);
1361
- }
1362
- finish(finalReply);
1363
- return;
1364
- }
1365
- if (msgType === "error") {
1366
- const errMsg = data2.error || "Agent error";
1367
- errorEmitted = true;
1368
- (_h = events.onError) == null ? void 0 : _h.call(events, errMsg);
1369
- fail(new Error(errMsg));
1370
- return;
1371
- }
1372
- };
1373
- socket.onerror = () => {
1374
- var _a2;
1375
- console.error("[Bulut] Agent WS error");
1376
- errorEmitted = true;
1377
- (_a2 = events.onError) == null ? void 0 : _a2.call(events, "Agent WebSocket connection error");
1378
- fail(new Error("Agent WebSocket connection error"));
1379
- };
1380
- socket.onclose = () => {
1381
- console.info("[Bulut] Agent WS closed");
1382
- finish(finalReply);
1383
- };
1384
- });
1385
- activeSocket = null;
1386
- if (isStopped || !assistantText) {
1387
- return resolve();
1388
- }
1389
- console.info(
1390
- `[Bulut] TTS start mode=agent forced_voice=${FORCED_TTS_VOICE}`
1391
- );
1392
- (_b = events.onAudioStateChange) == null ? void 0 : _b.call(events, "rendering");
1393
- let ttsResult;
1394
- try {
1395
- ttsResult = await collectTtsViaWebSocket(
1396
- baseUrl,
1397
- assistantText,
1398
- Boolean(config.accessibilityMode),
1399
- () => isStopped,
1400
- (socket) => {
1401
- activeSocket = socket;
1402
- }
1403
- );
1404
- } catch (wsError) {
1405
- if (isStopped) return resolve();
1406
- console.warn(
1407
- `[Bulut] TTS WS failed, falling back to SSE: ${wsError instanceof Error ? wsError.message : String(wsError)}`
1408
- );
1409
- ttsResult = await collectTtsViaSse(
1410
- baseUrl,
1411
- assistantText,
1412
- Boolean(config.accessibilityMode),
1413
- () => isStopped,
1414
- (reader) => {
1415
- activeReader = reader;
1416
- }
1417
- );
1418
- }
1419
- if (!isStopped && ttsResult.chunks.length > 0) {
1420
- await playBufferedAudio(
1421
- ttsResult.chunks,
1422
- ttsResult.mimeType,
1423
- ttsResult.sampleRate,
1424
- events.onAudioStateChange
1425
- );
1426
- } else {
1427
- (_c = events.onAudioStateChange) == null ? void 0 : _c.call(events, "done");
1428
- }
1429
- resolve();
1430
- } catch (err) {
1431
- if (!errorEmitted) {
1432
- const msg = err instanceof Error ? err.message : String(err);
1433
- (_d = events.onError) == null ? void 0 : _d.call(events, msg);
1434
- }
1435
- reject(err);
1436
- } finally {
1437
- activeReader == null ? void 0 : activeReader.cancel().catch(() => {
1438
- });
1439
- if (activeSocket && activeSocket.readyState <= WebSocket.OPEN) {
1440
- activeSocket.close();
1441
- }
1442
- activeSocket = null;
1443
- }
1444
- });
1445
- return {
1446
- stop: () => {
1447
- isStopped = true;
1448
- if (activeReader) {
1449
- activeReader.cancel().catch(() => {
1450
- });
1451
- }
1452
- if (activeSocket && activeSocket.readyState <= WebSocket.OPEN) {
1453
- activeSocket.close();
1454
- }
1455
- },
1456
- done: donePromise
1457
- };
1458
- };
1459
- const MAX_LINKS = 20;
1460
- const MAX_INTERACTABLES = 24;
1461
- const MAX_HEADINGS = 10;
1462
- const MAX_TEXT_SNIPPETS = 4;
1463
- const MAX_OUTER_HTML_DIGEST = 760;
1464
- const MAX_CACHED_PAGES = 20;
1465
- const MAX_PAGE_SCAN_ELEMENTS = 2e3;
1466
- const MAX_EVENT_HINTS_PER_ELEMENT = 4;
1467
- const MAX_BRANCH_SAMPLES = 4;
1468
- const MAX_BRANCH_DEPTH = 2;
1469
- const MAX_CONTEXT_SUMMARY_CHARS = 3400;
1470
- const MAX_CONTEXT_WITH_HISTORY_CHARS = 4200;
1471
- const PAGE_CONTEXT_CACHE_VERSION = 2;
1472
- const PAGE_CONTEXT_CACHE_KEY = "auticbot_page_context_cache_v2";
1473
- const NON_CONTENT_TAGS = /* @__PURE__ */ new Set([
1474
- "script",
1475
- "style",
1476
- "noscript",
1477
- "template",
1478
- "link",
1479
- "meta"
1480
- ]);
1481
- const NATIVE_INTERACTIVE_TAGS = /* @__PURE__ */ new Set([
1482
- "a",
1483
- "button",
1484
- "input",
1485
- "textarea",
1486
- "select",
1487
- "summary",
1488
- "details",
1489
- "option"
1490
- ]);
1491
- const INTERACTIVE_ROLES = /* @__PURE__ */ new Set([
1492
- "button",
1493
- "link",
1494
- "tab",
1495
- "menuitem",
1496
- "option",
1497
- "checkbox",
1498
- "radio",
1499
- "switch",
1500
- "combobox",
1501
- "textbox",
1502
- "searchbox",
1503
- "slider",
1504
- "spinbutton",
1505
- "treeitem"
1506
- ]);
1507
- const TRACKED_DISPLAY_VALUES = /* @__PURE__ */ new Set([
1508
- "block",
1509
- "inline",
1510
- "inline-block",
1511
- "flex",
1512
- "inline-flex",
1513
- "grid",
1514
- "inline-grid"
1515
- ]);
1516
- const TRACKED_POSITION_VALUES = /* @__PURE__ */ new Set([
1517
- "relative",
1518
- "absolute",
1519
- "fixed",
1520
- "sticky"
1521
- ]);
1522
- const EVENT_HINT_NAMES = [
1523
- "click",
1524
- "dblclick",
1525
- "mousedown",
1526
- "mouseup",
1527
- "pointerdown",
1528
- "pointerup",
1529
- "touchstart",
1530
- "touchend",
1531
- "keydown",
1532
- "keyup",
1533
- "keypress",
1534
- "input",
1535
- "change",
1536
- "submit",
1537
- "focus",
1538
- "blur"
1539
- ];
1540
- const ARIA_INTERACTION_ATTRS = [
1541
- "aria-controls",
1542
- "aria-expanded",
1543
- "aria-haspopup",
1544
- "aria-pressed",
1545
- "aria-selected"
1546
- ];
1547
- const DATA_INTERACTION_PATTERN = /(action|click|press|toggle|target|trigger|nav|open|close|menu|modal|command|submit)/i;
1548
- const pageContextCache = /* @__PURE__ */ new Map();
1549
- let cacheHydrated = false;
1550
- const normalizeWhitespace = (value) => value.replace(/\s+/g, " ").trim();
1551
- const truncate = (value, maxChars) => {
1552
- if (value.length <= maxChars) {
1553
- return value;
1554
- }
1555
- const suffix = "\n...[truncated]";
1556
- return `${value.slice(0, Math.max(0, maxChars - suffix.length))}${suffix}`;
1557
- };
1558
- const truncateInline = (value, maxChars) => {
1559
- if (value.length <= maxChars) {
1560
- return value;
1561
- }
1562
- return `${value.slice(0, Math.max(0, maxChars - 3))}...`;
1563
- };
1564
- const canonicalUrl = (rawUrl) => {
1565
- try {
1566
- return new URL(rawUrl, rawUrl).href;
1567
- } catch {
1568
- return rawUrl;
1569
- }
1570
- };
1571
- const isCacheEntry = (value) => {
1572
- if (typeof value !== "object" || value === null) {
1573
- return false;
1574
- }
1575
- const obj = value;
1576
- return typeof obj.url === "string" && typeof obj.summary === "string" && Array.isArray(obj.links) && Array.isArray(obj.interactables) && typeof obj.capturedAt === "number" && typeof obj.version === "number";
1577
- };
1578
- const bumpCount = (map, key) => {
1579
- if (!key) {
1580
- return;
1581
- }
1582
- map.set(key, (map.get(key) ?? 0) + 1);
1583
- };
1584
- const formatTopCounts = (map, maxItems) => {
1585
- if (map.size === 0) {
1586
- return "none";
1587
- }
1588
- return Array.from(map.entries()).sort((a2, b) => b[1] - a2[1] || a2[0].localeCompare(b[0])).slice(0, maxItems).map(([name, count]) => `${name}*${count}`).join(", ");
1589
- };
1590
- const parseTabIndex = (value) => {
1591
- if (value === null) {
1592
- return null;
1593
- }
1594
- const parsed = Number.parseInt(value, 10);
1595
- return Number.isNaN(parsed) ? null : parsed;
1596
- };
1597
- const compactToken = (value, maxChars = 18) => {
1598
- const compact = value.replace(/\s+/g, "-").replace(/[^a-zA-Z0-9_-]/g, "");
1599
- return compact ? truncateInline(compact, maxChars) : "";
1600
- };
1601
- const getElementDepth = (element) => {
1602
- let depth = 0;
1603
- let cursor = element;
1604
- while (cursor == null ? void 0 : cursor.parentElement) {
1605
- depth += 1;
1606
- cursor = cursor.parentElement;
1607
- if (cursor === document.body) {
1608
- break;
1609
- }
1610
- }
1611
- return depth;
1612
- };
1613
- const getPrimaryRole = (element) => {
1614
- const rawRole = normalizeWhitespace(element.getAttribute("role") || "").toLowerCase().split(" ")[0];
1615
- return rawRole || "";
1616
- };
1617
- const hydrateCacheFromStorage = () => {
1618
- if (cacheHydrated || typeof sessionStorage === "undefined") {
1619
- return;
1620
- }
1621
- cacheHydrated = true;
1622
- try {
1623
- const raw = sessionStorage.getItem(PAGE_CONTEXT_CACHE_KEY);
1624
- if (!raw) {
1625
- return;
1626
- }
1627
- const parsed = JSON.parse(raw);
1628
- if (!Array.isArray(parsed)) {
1629
- return;
1630
- }
1631
- for (const value of parsed) {
1632
- if (!isCacheEntry(value)) {
1633
- continue;
1634
- }
1635
- if (value.version !== PAGE_CONTEXT_CACHE_VERSION) {
1636
- continue;
1637
- }
1638
- pageContextCache.set(value.url, value);
1639
- }
1640
- if (pageContextCache.size > 0) {
1641
- console.info(
1642
- `[Autic] context cache restored entries=${pageContextCache.size}`
1643
- );
1644
- }
1645
- } catch (error) {
1646
- console.warn("[Autic] context cache restore failed", error);
1647
- }
1648
- };
1649
- const persistCacheToStorage = () => {
1650
- if (typeof sessionStorage === "undefined") {
1651
- return;
1652
- }
1653
- try {
1654
- const serialized = JSON.stringify(
1655
- Array.from(pageContextCache.values()).sort(
1656
- (a2, b) => a2.capturedAt - b.capturedAt
1657
- )
1658
- );
1659
- sessionStorage.setItem(PAGE_CONTEXT_CACHE_KEY, serialized);
1660
- } catch (error) {
1661
- console.warn("[Autic] context cache persist failed", error);
1662
- }
1663
- };
1664
- const pruneOldestCacheEntries = () => {
1665
- if (pageContextCache.size <= MAX_CACHED_PAGES) {
1666
- return;
1667
- }
1668
- const sorted = Array.from(pageContextCache.values()).sort(
1669
- (a2, b) => a2.capturedAt - b.capturedAt
1670
- );
1671
- const overflow = sorted.length - MAX_CACHED_PAGES;
1672
- for (let i = 0; i < overflow; i += 1) {
1673
- pageContextCache.delete(sorted[i].url);
1674
- }
1675
- };
1676
- const buildSummaryWithHistory = (current) => {
1677
- const recentPages = Array.from(pageContextCache.values()).filter((entry) => entry.url !== current.url).sort((a2, b) => b.capturedAt - a2.capturedAt).slice(0, 3);
1678
- if (recentPages.length === 0) {
1679
- return current.summary;
1680
- }
1681
- const historySection = [
1682
- "Recent Page Memory:",
1683
- ...recentPages.map((entry) => {
1684
- const compactSummary = normalizeWhitespace(entry.summary).slice(0, 180);
1685
- return `- ${entry.url} :: ${compactSummary}`;
1686
- })
1687
- ].join("\n");
1688
- return truncate(
1689
- `${current.summary}
1690
-
1691
- ${historySection}`,
1692
- MAX_CONTEXT_WITH_HISTORY_CHARS
1693
- );
1694
- };
1695
- const isVisible = (element) => {
1696
- if (element.getAttribute("aria-hidden") === "true") {
1697
- return false;
1698
- }
1699
- if (element instanceof HTMLElement && element.hidden) {
1700
- return false;
1701
- }
1702
- const style = window.getComputedStyle(element);
1703
- if (style.display === "none" || style.visibility === "hidden") {
1704
- return false;
1705
- }
1706
- const rect = element.getBoundingClientRect();
1707
- return rect.width > 0 && rect.height > 0;
1708
- };
1709
- const toAbsoluteUrl = (href) => {
1710
- try {
1711
- return new URL(href, window.location.href).href;
1712
- } catch {
1713
- return href;
1714
- }
1715
- };
1716
- const escapeCssValue = (value) => {
1717
- if (typeof CSS !== "undefined" && typeof CSS.escape === "function") {
1718
- return CSS.escape(value);
1719
- }
1720
- return value.replace(/([ #;&,.+*~':"!^$\[\]()=>|\/@])/g, "\\$1");
1721
- };
1722
- const buildSelector = (element) => {
1723
- const tag = element.tagName.toLowerCase();
1724
- if (element.id) {
1725
- return `#${escapeCssValue(element.id)}`;
1726
- }
1727
- const name = element.getAttribute("name");
1728
- if (name) {
1729
- return `${tag}[name="${escapeCssValue(name)}"]`;
1730
- }
1731
- const ariaLabel = element.getAttribute("aria-label");
1732
- if (ariaLabel) {
1733
- return `${tag}[aria-label="${escapeCssValue(ariaLabel)}"]`;
1734
- }
1735
- const classes = Array.from(element.classList).filter(Boolean).slice(0, 2).map((className) => `.${escapeCssValue(className)}`).join("");
1736
- if (classes) {
1737
- return `${tag}${classes}`;
1738
- }
1739
- const parent2 = element.parentElement;
1740
- if (!parent2) {
1741
- return tag;
1742
- }
1743
- const siblingsOfTag = Array.from(parent2.children).filter(
1744
- (sibling) => sibling.tagName === element.tagName
1745
- );
1746
- const index = siblingsOfTag.indexOf(element) + 1;
1747
- return `${tag}:nth-of-type(${index})`;
1748
- };
1749
- const getElementLabel = (element) => {
1750
- const text = normalizeWhitespace(
1751
- (element instanceof HTMLElement ? element.innerText : element.textContent) || ""
1752
- );
1753
- const ariaLabel = normalizeWhitespace(element.getAttribute("aria-label") || "");
1754
- const title = normalizeWhitespace(element.getAttribute("title") || "");
1755
- const placeholder = normalizeWhitespace(
1756
- element.getAttribute("placeholder") || ""
1757
- );
1758
- const name = normalizeWhitespace(element.getAttribute("name") || "");
1759
- const value = element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement || element instanceof HTMLButtonElement ? normalizeWhitespace(element.value || "") : "";
1760
- const classHint = Array.from(element.classList).map((item) => compactToken(item, 16)).find(Boolean);
1761
- const fallback = element.id && `#${element.id}` || classHint && `.${classHint}` || buildSelector(element);
1762
- const label = text || ariaLabel || title || placeholder || value || name || fallback;
1763
- if (element.tagName.toLowerCase() === "input") {
1764
- const inputType = element.getAttribute("type") || "text";
1765
- return `${inputType} ${label || "input"}`;
1766
- }
1767
- return label || "untitled";
1768
- };
1769
- const getEventHints = (element) => {
1770
- const record = element;
1771
- const eventHints = [];
1772
- for (const eventName of EVENT_HINT_NAMES) {
1773
- const handlerKey = `on${eventName}`;
1774
- const hasInlineHandler = Boolean(element.getAttribute(handlerKey));
1775
- const hasPropertyHandler = typeof record[handlerKey] === "function";
1776
- if (!hasInlineHandler && !hasPropertyHandler) {
1777
- continue;
1778
- }
1779
- eventHints.push(eventName);
1780
- if (eventHints.length >= MAX_EVENT_HINTS_PER_ELEMENT) {
1781
- break;
1782
- }
1783
- }
1784
- return eventHints;
1785
- };
1786
- const getAriaInteractionHints = (element) => ARIA_INTERACTION_ATTRS.filter((attrName) => element.hasAttribute(attrName)).map(
1787
- (attrName) => attrName.replace("aria-", "")
1788
- );
1789
- const getDataInteractionHints = (element) => element.getAttributeNames().filter(
1790
- (attrName) => attrName.startsWith("data-") && DATA_INTERACTION_PATTERN.test(attrName)
1791
- ).slice(0, 2).map((attrName) => attrName.replace("data-", ""));
1792
- const getStyleHints = (style) => {
1793
- const styleHints = [];
1794
- if (style.cursor === "pointer") {
1795
- styleHints.push("cursor:pointer");
1796
- }
1797
- if (style.display === "flex" || style.display === "grid" || style.display === "inline-flex" || style.display === "inline-grid") {
1798
- styleHints.push(`display:${style.display}`);
1799
- }
1800
- if (style.position === "fixed" || style.position === "sticky") {
1801
- styleHints.push(`position:${style.position}`);
1802
- }
1803
- return styleHints.slice(0, 2);
1804
- };
1805
- const buildBlueprintToken = (element) => {
1806
- const tag = element.tagName.toLowerCase();
1807
- const idToken = element.id ? `#${compactToken(element.id)}` : "";
1808
- const classToken = Array.from(element.classList).map((item) => compactToken(item, 16)).find(Boolean);
1809
- return `${tag}${idToken}${classToken ? `.${classToken}` : ""}`;
1810
- };
1811
- const buildBranchDigest = (element, depth) => {
1812
- const token = buildBlueprintToken(element);
1813
- if (depth <= 0) {
1814
- return token;
1815
- }
1816
- const children = Array.from(element.children).filter((child) => !NON_CONTENT_TAGS.has(child.tagName.toLowerCase())).filter((child) => isVisible(child));
1817
- if (children.length === 0) {
1818
- return token;
1819
- }
1820
- const sampled = children.slice(0, 3).map((child) => buildBranchDigest(child, depth - 1));
1821
- const overflow = children.length > sampled.length ? `+${children.length - sampled.length}` : "";
1822
- return `${token}>${sampled.join("+")}${overflow}`;
1823
- };
1824
- const collectDomBranchDigest = () => {
1825
- const root = document.body ?? document.documentElement;
1826
- const topLevelNodes = Array.from(root.children).filter((child) => !NON_CONTENT_TAGS.has(child.tagName.toLowerCase())).filter((child) => isVisible(child)).slice(0, MAX_BRANCH_SAMPLES);
1827
- return topLevelNodes.map(
1828
- (child) => truncateInline(buildBranchDigest(child, MAX_BRANCH_DEPTH), 140)
1829
- );
1830
- };
1831
- const formatSection = (title, lines) => {
1832
- if (lines.length === 0) {
1833
- return `${title}:
1834
- - none`;
1835
- }
1836
- return `${title}:
1837
- ${lines.join("\n")}`;
1838
- };
1839
- const buildOuterHtmlDigest = () => {
1840
- var _a;
1841
- const raw = ((_a = document.body) == null ? void 0 : _a.outerHTML) || document.documentElement.outerHTML;
1842
- const withoutScripts = raw.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<noscript[\s\S]*?<\/noscript>/gi, "").replace(/<!--[\s\S]*?-->/g, "").replace(/\s+/g, " ").trim();
1843
- const structural = withoutScripts.replace(/>[^<]*</g, "><").replace(/\s+/g, " ").trim();
1844
- return truncate(structural, MAX_OUTER_HTML_DIGEST);
1845
- };
1846
- const collectTextSnippets = () => {
1847
- const root = document.querySelector("main, article, [role='main']") ?? document.body;
1848
- const snippets = [];
1849
- const seen = /* @__PURE__ */ new Set();
1850
- const candidates = Array.from(root.querySelectorAll("p, li, h1, h2, h3"));
1851
- for (const node of candidates) {
1852
- if (!isVisible(node)) {
1853
- continue;
1854
- }
1855
- const text = normalizeWhitespace(node.textContent || "");
1856
- if (!text || text.length < 20) {
1857
- continue;
1858
- }
1859
- const compact = truncateInline(text, 180);
1860
- if (seen.has(compact)) {
1861
- continue;
1862
- }
1863
- seen.add(compact);
1864
- snippets.push(`- ${compact}`);
1865
- if (snippets.length >= MAX_TEXT_SNIPPETS) {
1866
- break;
1867
- }
1868
- }
1869
- return snippets;
1132
+ return snippets;
1870
1133
  };
1871
1134
  const collectLandmarkSnapshot = () => {
1872
1135
  const probes = [
@@ -1918,900 +1181,1899 @@ const collectPageSignalSnapshot = () => {
1918
1181
  if (NON_CONTENT_TAGS.has(tag)) {
1919
1182
  continue;
1920
1183
  }
1921
- if (!isVisible(element)) {
1184
+ if (!isVisible(element)) {
1185
+ continue;
1186
+ }
1187
+ visibleElements += 1;
1188
+ bumpCount(tagCounts, tag);
1189
+ const role = getPrimaryRole(element);
1190
+ if (role) {
1191
+ bumpCount(roleCounts, role);
1192
+ }
1193
+ const depth = getElementDepth(element);
1194
+ if (depth > maxDepth) {
1195
+ maxDepth = depth;
1196
+ }
1197
+ const style = window.getComputedStyle(element);
1198
+ if (TRACKED_DISPLAY_VALUES.has(style.display)) {
1199
+ bumpCount(displayCounts, style.display);
1200
+ }
1201
+ if (TRACKED_POSITION_VALUES.has(style.position)) {
1202
+ bumpCount(positionCounts, style.position);
1203
+ }
1204
+ const eventHints = getEventHints(element);
1205
+ if (eventHints.length > 0) {
1206
+ for (const eventName of eventHints) {
1207
+ bumpCount(eventCounts, eventName);
1208
+ }
1209
+ }
1210
+ const tabIndex = parseTabIndex(element.getAttribute("tabindex"));
1211
+ const hasTabStop = tabIndex !== null && tabIndex >= 0;
1212
+ if (hasTabStop) {
1213
+ tabStopElements += 1;
1214
+ }
1215
+ const hasPointerCursor = style.cursor === "pointer";
1216
+ if (hasPointerCursor) {
1217
+ pointerCursorElements += 1;
1218
+ }
1219
+ const dataHints = getDataInteractionHints(element);
1220
+ if (dataHints.length > 0) {
1221
+ dataHintElements += 1;
1222
+ }
1223
+ const ariaHints = getAriaInteractionHints(element);
1224
+ if (ariaHints.length > 0) {
1225
+ ariaHintElements += 1;
1226
+ }
1227
+ const isContentEditable = element.getAttribute("contenteditable") === "true";
1228
+ if (isContentEditable) {
1229
+ contentEditableElements += 1;
1230
+ }
1231
+ const href = element.getAttribute("href");
1232
+ const isNativeInteractive = NATIVE_INTERACTIVE_TAGS.has(tag) && (tag !== "a" || Boolean(href));
1233
+ const isRoleInteractive = INTERACTIVE_ROLES.has(role);
1234
+ const isDisabled = element.hasAttribute("disabled") || element.getAttribute("aria-disabled") === "true";
1235
+ if (tag === "a" && href && !href.startsWith("#") && !href.startsWith("javascript:")) {
1236
+ const absoluteHref = toAbsoluteUrl(href);
1237
+ const label2 = getElementLabel(element) || absoluteHref;
1238
+ const line2 = `- ${truncateInline(label2, 90)} -> ${truncateInline(absoluteHref, 140)}`;
1239
+ if (!linkSet.has(line2)) {
1240
+ linkSet.add(line2);
1241
+ links.push(line2);
1242
+ }
1243
+ }
1244
+ const hasInteractionSignals = isNativeInteractive || isRoleInteractive || isContentEditable || eventHints.length > 0 || hasTabStop || hasPointerCursor || dataHints.length > 0 || ariaHints.length > 0;
1245
+ if (!hasInteractionSignals || isDisabled) {
1246
+ continue;
1247
+ }
1248
+ if (isNativeInteractive) {
1249
+ semanticInteractables += 1;
1250
+ } else {
1251
+ nonSemanticInteractables += 1;
1252
+ }
1253
+ const selector = buildSelector(element);
1254
+ const label = truncateInline(getElementLabel(element), 90);
1255
+ const styleHints = getStyleHints(style);
1256
+ const signalTokens = [];
1257
+ if (eventHints.length > 0) {
1258
+ signalTokens.push(`evt:${eventHints.join("|")}`);
1259
+ }
1260
+ if (isRoleInteractive) {
1261
+ signalTokens.push(`role:${role}`);
1262
+ }
1263
+ if (hasTabStop) {
1264
+ signalTokens.push(`tab:${tabIndex}`);
1265
+ }
1266
+ if (dataHints.length > 0) {
1267
+ signalTokens.push(`data:${dataHints.join("|")}`);
1268
+ }
1269
+ if (ariaHints.length > 0) {
1270
+ signalTokens.push(`aria:${ariaHints.join("|")}`);
1271
+ }
1272
+ if (styleHints.length > 0) {
1273
+ signalTokens.push(`css:${styleHints.join("|")}`);
1274
+ } else if (hasPointerCursor) {
1275
+ signalTokens.push("css:cursor:pointer");
1276
+ }
1277
+ const signalBlock = signalTokens.length > 0 ? ` [${signalTokens.join("; ")}]` : "";
1278
+ const line = truncateInline(
1279
+ `- ${tag} ${selector}${signalBlock} (${label})`,
1280
+ 240
1281
+ );
1282
+ const score = eventHints.length * 5 + (isNativeInteractive ? 5 : 0) + (isRoleInteractive ? 4 : 0) + (hasTabStop ? 2 : 0) + (hasPointerCursor ? 2 : 0) + (dataHints.length > 0 ? 2 : 0) + (ariaHints.length > 0 ? 1 : 0) + (isContentEditable ? 2 : 0);
1283
+ const existing = interactableCandidates.get(line);
1284
+ if (!existing || score > existing.score) {
1285
+ interactableCandidates.set(line, { line, score, order });
1286
+ }
1287
+ }
1288
+ const interactables = Array.from(interactableCandidates.values()).sort((a2, b) => b.score - a2.score || a2.order - b.order).slice(0, MAX_INTERACTABLES).map((candidate) => candidate.line);
1289
+ const interactiveRoleCounts = new Map(
1290
+ Array.from(roleCounts.entries()).filter(
1291
+ ([role]) => INTERACTIVE_ROLES.has(role)
1292
+ )
1293
+ );
1294
+ const interactionSignals = [
1295
+ `- coverage: semantic=${semanticInteractables}, non-semantic=${nonSemanticInteractables}, contenteditable=${contentEditableElements}`,
1296
+ `- listener hints: ${formatTopCounts(eventCounts, 8)}`,
1297
+ `- interaction cues: tabindex>=0=${tabStopElements}, pointer-cursor=${pointerCursorElements}, data-hints=${dataHintElements}, aria-hints=${ariaHintElements}`,
1298
+ `- role hints: ${formatTopCounts(interactiveRoleCounts, 8)}`,
1299
+ "- listener scope: inline/on* handlers are detected directly; addEventListener handlers are inferred via cues."
1300
+ ].map((line) => truncateInline(line, 250));
1301
+ const branchDigest = collectDomBranchDigest();
1302
+ const pageBlueprint = [
1303
+ `- nodes: total=${allElements.length}, scanned=${sampledElements.length}, visible=${visibleElements}, max-depth=${maxDepth}${allElements.length > sampledElements.length ? ", sampling=on" : ""}`,
1304
+ `- tag density: ${formatTopCounts(tagCounts, 10)}`,
1305
+ `- role density: ${formatTopCounts(roleCounts, 8)}`,
1306
+ `- layout density: display(${formatTopCounts(displayCounts, 6)}), position(${formatTopCounts(positionCounts, 4)})`,
1307
+ `- branch digest: ${branchDigest.length > 0 ? branchDigest.join(" || ") : "none"}`
1308
+ ].map((line) => truncateInline(line, 260));
1309
+ return {
1310
+ links: links.slice(0, MAX_LINKS),
1311
+ interactables,
1312
+ interactionSignals,
1313
+ pageBlueprint
1314
+ };
1315
+ };
1316
+ const buildPageContextSummary = (input, maxChars = MAX_CONTEXT_SUMMARY_CHARS) => {
1317
+ const sections = [
1318
+ formatSection("Meta", [
1319
+ `- URL: ${input.url || "unknown"}`,
1320
+ `- Title: ${input.title || "unknown"}`,
1321
+ `- Lang: ${input.lang || "unknown"}`
1322
+ ]),
1323
+ formatSection("Headings", input.headings),
1324
+ formatSection("Landmark Snapshot", input.landmarks),
1325
+ formatSection("Interaction Signals", input.interactionSignals),
1326
+ formatSection("Compressed Page Blueprint", input.pageBlueprint),
1327
+ formatSection("Top Links", input.links),
1328
+ formatSection("Top Interactables", input.interactables),
1329
+ formatSection("Main Content Snippets", input.textSnippets),
1330
+ formatSection("OuterHTML Skeleton", [
1331
+ `- ${input.outerHtmlDigest || "unavailable"}`
1332
+ ])
1333
+ ];
1334
+ return truncate(sections.join("\n\n"), maxChars);
1335
+ };
1336
+ const getPageContext = () => {
1337
+ if (typeof window === "undefined" || typeof document === "undefined") {
1338
+ return {
1339
+ links: [],
1340
+ interactables: [],
1341
+ summary: ""
1342
+ };
1343
+ }
1344
+ hydrateCacheFromStorage();
1345
+ const url = canonicalUrl(window.location.href);
1346
+ const cached = pageContextCache.get(url);
1347
+ if (cached) {
1348
+ console.info(`[Autic] context cache hit url=${url}`);
1349
+ return {
1350
+ links: cached.links,
1351
+ interactables: cached.interactables,
1352
+ summary: buildSummaryWithHistory(cached)
1353
+ };
1354
+ }
1355
+ console.info(`[Autic] context cache miss url=${url}`);
1356
+ const snapshot = collectPageSignalSnapshot();
1357
+ const headings = Array.from(document.querySelectorAll("h1, h2, h3")).filter((element) => isVisible(element)).map(
1358
+ (element) => `- ${truncateInline(normalizeWhitespace(element.textContent || ""), 120)}`
1359
+ ).filter((line) => line !== "- ").slice(0, MAX_HEADINGS);
1360
+ const summary = buildPageContextSummary({
1361
+ url,
1362
+ title: document.title,
1363
+ lang: document.documentElement.lang,
1364
+ headings,
1365
+ landmarks: collectLandmarkSnapshot(),
1366
+ links: snapshot.links,
1367
+ interactables: snapshot.interactables,
1368
+ interactionSignals: snapshot.interactionSignals,
1369
+ pageBlueprint: snapshot.pageBlueprint,
1370
+ textSnippets: collectTextSnippets(),
1371
+ outerHtmlDigest: buildOuterHtmlDigest()
1372
+ });
1373
+ const entry = {
1374
+ url,
1375
+ summary,
1376
+ links: snapshot.links,
1377
+ interactables: snapshot.interactables,
1378
+ capturedAt: Date.now(),
1379
+ version: PAGE_CONTEXT_CACHE_VERSION
1380
+ };
1381
+ pageContextCache.set(url, entry);
1382
+ pruneOldestCacheEntries();
1383
+ persistCacheToStorage();
1384
+ console.info(
1385
+ `[Autic] context cache stored url=${url} size=${pageContextCache.size}`
1386
+ );
1387
+ return {
1388
+ links: entry.links,
1389
+ interactables: entry.interactables,
1390
+ summary: buildSummaryWithHistory(entry)
1391
+ };
1392
+ };
1393
+ const AGENT_CURSOR_ID = "auticbot-agent-cursor";
1394
+ const CURSOR_STORAGE_KEY = "auticbot_agent_cursor_state";
1395
+ const CURSOR_MOVE_DURATION_MS = 900;
1396
+ const SCROLL_DURATION_MS = 900;
1397
+ const CURSOR_EASING = "cubic-bezier(0.4, 0, 0.2, 1)";
1398
+ const CURSOR_HOVER_RADIUS_PX = 14;
1399
+ const RESUME_STORAGE_KEY = "bulut_agent_resume";
1400
+ const RESUME_TTL_MS = 6e4;
1401
+ const savePendingAgentResume = (state) => {
1402
+ if (typeof localStorage === "undefined") return;
1403
+ try {
1404
+ localStorage.setItem(
1405
+ RESUME_STORAGE_KEY,
1406
+ JSON.stringify({ ...state, savedAt: Date.now() })
1407
+ );
1408
+ } catch {
1409
+ }
1410
+ };
1411
+ const getPendingAgentResume = () => {
1412
+ if (typeof localStorage === "undefined") return null;
1413
+ const raw = localStorage.getItem(RESUME_STORAGE_KEY);
1414
+ if (!raw) return null;
1415
+ try {
1416
+ const parsed = JSON.parse(raw);
1417
+ if (Date.now() - parsed.savedAt > RESUME_TTL_MS) {
1418
+ clearPendingAgentResume();
1419
+ return null;
1420
+ }
1421
+ return parsed;
1422
+ } catch {
1423
+ clearPendingAgentResume();
1424
+ return null;
1425
+ }
1426
+ };
1427
+ const clearPendingAgentResume = () => {
1428
+ if (typeof localStorage === "undefined") return;
1429
+ localStorage.removeItem(RESUME_STORAGE_KEY);
1430
+ };
1431
+ const isObject$b = (value) => typeof value === "object" && value !== null && !Array.isArray(value);
1432
+ const asString = (value) => typeof value === "string" && value.trim() ? value.trim() : void 0;
1433
+ const asNumber = (value) => typeof value === "number" && Number.isFinite(value) ? value : void 0;
1434
+ const extractJsonCandidate = (raw) => {
1435
+ const trimmed = raw.trim();
1436
+ const fencedMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/i);
1437
+ if (fencedMatch == null ? void 0 : fencedMatch[1]) {
1438
+ return fencedMatch[1].trim();
1439
+ }
1440
+ return trimmed;
1441
+ };
1442
+ const extractFirstJsonObject = (input) => {
1443
+ const start = input.indexOf("{");
1444
+ if (start < 0) {
1445
+ return null;
1446
+ }
1447
+ let depth = 0;
1448
+ let inString = false;
1449
+ let isEscaped = false;
1450
+ for (let i = start; i < input.length; i += 1) {
1451
+ const char = input[i];
1452
+ if (inString) {
1453
+ if (isEscaped) {
1454
+ isEscaped = false;
1455
+ } else if (char === "\\") {
1456
+ isEscaped = true;
1457
+ } else if (char === '"') {
1458
+ inString = false;
1459
+ }
1922
1460
  continue;
1923
1461
  }
1924
- visibleElements += 1;
1925
- bumpCount(tagCounts, tag);
1926
- const role = getPrimaryRole(element);
1927
- if (role) {
1928
- bumpCount(roleCounts, role);
1929
- }
1930
- const depth = getElementDepth(element);
1931
- if (depth > maxDepth) {
1932
- maxDepth = depth;
1933
- }
1934
- const style = window.getComputedStyle(element);
1935
- if (TRACKED_DISPLAY_VALUES.has(style.display)) {
1936
- bumpCount(displayCounts, style.display);
1462
+ if (char === '"') {
1463
+ inString = true;
1464
+ continue;
1937
1465
  }
1938
- if (TRACKED_POSITION_VALUES.has(style.position)) {
1939
- bumpCount(positionCounts, style.position);
1466
+ if (char === "{") {
1467
+ depth += 1;
1468
+ continue;
1940
1469
  }
1941
- const eventHints = getEventHints(element);
1942
- if (eventHints.length > 0) {
1943
- for (const eventName of eventHints) {
1944
- bumpCount(eventCounts, eventName);
1470
+ if (char === "}") {
1471
+ depth -= 1;
1472
+ if (depth === 0) {
1473
+ return input.slice(start, i + 1);
1945
1474
  }
1946
1475
  }
1947
- const tabIndex = parseTabIndex(element.getAttribute("tabindex"));
1948
- const hasTabStop = tabIndex !== null && tabIndex >= 0;
1949
- if (hasTabStop) {
1950
- tabStopElements += 1;
1951
- }
1952
- const hasPointerCursor = style.cursor === "pointer";
1953
- if (hasPointerCursor) {
1954
- pointerCursorElements += 1;
1476
+ }
1477
+ return null;
1478
+ };
1479
+ const parseJsonFromRaw = (raw) => {
1480
+ const candidate = extractJsonCandidate(raw);
1481
+ try {
1482
+ return JSON.parse(candidate);
1483
+ } catch {
1484
+ const objectCandidate = extractFirstJsonObject(candidate);
1485
+ if (!objectCandidate) {
1486
+ return null;
1955
1487
  }
1956
- const dataHints = getDataInteractionHints(element);
1957
- if (dataHints.length > 0) {
1958
- dataHintElements += 1;
1488
+ try {
1489
+ return JSON.parse(objectCandidate);
1490
+ } catch {
1491
+ return null;
1959
1492
  }
1960
- const ariaHints = getAriaInteractionHints(element);
1961
- if (ariaHints.length > 0) {
1962
- ariaHintElements += 1;
1493
+ }
1494
+ };
1495
+ const sanitizeToolCalls = (value) => {
1496
+ if (!Array.isArray(value)) {
1497
+ return [];
1498
+ }
1499
+ const toolCalls = [];
1500
+ for (const item of value) {
1501
+ if (!isObject$b(item)) {
1502
+ continue;
1963
1503
  }
1964
- const isContentEditable = element.getAttribute("contenteditable") === "true";
1965
- if (isContentEditable) {
1966
- contentEditableElements += 1;
1504
+ if (item.tool === "interact") {
1505
+ const action = asString(item.action);
1506
+ if (!action || !["move", "click", "type", "submit"].includes(action)) {
1507
+ continue;
1508
+ }
1509
+ toolCalls.push({
1510
+ tool: "interact",
1511
+ action,
1512
+ selector: asString(item.selector),
1513
+ text: typeof item.text === "string" ? item.text : void 0,
1514
+ x: asNumber(item.x),
1515
+ y: asNumber(item.y)
1516
+ });
1517
+ continue;
1967
1518
  }
1968
- const href = element.getAttribute("href");
1969
- const isNativeInteractive = NATIVE_INTERACTIVE_TAGS.has(tag) && (tag !== "a" || Boolean(href));
1970
- const isRoleInteractive = INTERACTIVE_ROLES.has(role);
1971
- const isDisabled = element.hasAttribute("disabled") || element.getAttribute("aria-disabled") === "true";
1972
- if (tag === "a" && href && !href.startsWith("#") && !href.startsWith("javascript:")) {
1973
- const absoluteHref = toAbsoluteUrl(href);
1974
- const label2 = getElementLabel(element) || absoluteHref;
1975
- const line2 = `- ${truncateInline(label2, 90)} -> ${truncateInline(absoluteHref, 140)}`;
1976
- if (!linkSet.has(line2)) {
1977
- linkSet.add(line2);
1978
- links.push(line2);
1519
+ if (item.tool === "navigate") {
1520
+ const url = asString(item.url);
1521
+ if (!url) {
1522
+ continue;
1979
1523
  }
1524
+ toolCalls.push({
1525
+ tool: "navigate",
1526
+ url
1527
+ });
1528
+ continue;
1980
1529
  }
1981
- const hasInteractionSignals = isNativeInteractive || isRoleInteractive || isContentEditable || eventHints.length > 0 || hasTabStop || hasPointerCursor || dataHints.length > 0 || ariaHints.length > 0;
1982
- if (!hasInteractionSignals || isDisabled) {
1530
+ if (item.tool === "getPageContext") {
1531
+ toolCalls.push({
1532
+ tool: "getPageContext"
1533
+ });
1983
1534
  continue;
1984
1535
  }
1985
- if (isNativeInteractive) {
1986
- semanticInteractables += 1;
1987
- } else {
1988
- nonSemanticInteractables += 1;
1536
+ if (item.tool === "scroll") {
1537
+ const selector = asString(item.selector);
1538
+ if (!selector) {
1539
+ continue;
1540
+ }
1541
+ toolCalls.push({
1542
+ tool: "scroll",
1543
+ selector
1544
+ });
1989
1545
  }
1990
- const selector = buildSelector(element);
1991
- const label = truncateInline(getElementLabel(element), 90);
1992
- const styleHints = getStyleHints(style);
1993
- const signalTokens = [];
1994
- if (eventHints.length > 0) {
1995
- signalTokens.push(`evt:${eventHints.join("|")}`);
1546
+ }
1547
+ return toolCalls;
1548
+ };
1549
+ const parseAgentResponse = (raw) => {
1550
+ const parsed = parseJsonFromRaw(raw);
1551
+ if (!isObject$b(parsed)) {
1552
+ return {
1553
+ reply: raw.trim(),
1554
+ toolCalls: []
1555
+ };
1556
+ }
1557
+ const reply = asString(parsed.reply) || "";
1558
+ const toolCalls = sanitizeToolCalls(parsed.tool_calls ?? parsed.toolCalls);
1559
+ return {
1560
+ reply,
1561
+ toolCalls
1562
+ };
1563
+ };
1564
+ const clamp = (value, min2, max2) => Math.min(max2, Math.max(min2, value));
1565
+ const easeInOutSine = (progress) => -(Math.cos(Math.PI * progress) - 1) / 2;
1566
+ const isRectOutsideViewport = (rect, viewportHeight) => rect.top < 0 || rect.bottom > viewportHeight;
1567
+ const computeCenteredScrollTop = (currentScrollY, rectTop, rectHeight, viewportHeight, maxScrollTop) => {
1568
+ const desired = currentScrollY + rectTop - (viewportHeight / 2 - rectHeight / 2);
1569
+ return clamp(desired, 0, Math.max(0, maxScrollTop));
1570
+ };
1571
+ const animateWindowScrollTo = async (targetY, durationMs = SCROLL_DURATION_MS) => {
1572
+ if (typeof window === "undefined") {
1573
+ return;
1574
+ }
1575
+ const startY = window.scrollY;
1576
+ const delta = targetY - startY;
1577
+ if (Math.abs(delta) < 1) {
1578
+ return;
1579
+ }
1580
+ await new Promise((resolve) => {
1581
+ const raf = window.requestAnimationFrame || ((callback) => window.setTimeout(() => callback(performance.now()), 16));
1582
+ const startTime = performance.now();
1583
+ const step = (now) => {
1584
+ const elapsed = now - startTime;
1585
+ const progress = clamp(elapsed / durationMs, 0, 1);
1586
+ const eased = easeInOutSine(progress);
1587
+ window.scrollTo(0, startY + delta * eased);
1588
+ if (progress < 1) {
1589
+ raf(step);
1590
+ } else {
1591
+ resolve();
1592
+ }
1593
+ };
1594
+ raf(step);
1595
+ });
1596
+ };
1597
+ const getPersistedCursorState = () => {
1598
+ if (typeof localStorage === "undefined") {
1599
+ return null;
1600
+ }
1601
+ try {
1602
+ const raw = localStorage.getItem(CURSOR_STORAGE_KEY);
1603
+ if (!raw) {
1604
+ return null;
1605
+ }
1606
+ const parsed = JSON.parse(raw);
1607
+ if (typeof parsed.url !== "string" || typeof parsed.x !== "number" || !Number.isFinite(parsed.x) || typeof parsed.y !== "number" || !Number.isFinite(parsed.y)) {
1608
+ return null;
1609
+ }
1610
+ return {
1611
+ url: parsed.url,
1612
+ x: parsed.x,
1613
+ y: parsed.y,
1614
+ visible: parsed.visible !== false
1615
+ };
1616
+ } catch {
1617
+ return null;
1618
+ }
1619
+ };
1620
+ const persistCursorState = (x2, y2, visible) => {
1621
+ if (typeof localStorage === "undefined") {
1622
+ return;
1623
+ }
1624
+ try {
1625
+ const payload = {
1626
+ url: window.location.href,
1627
+ x: x2,
1628
+ y: y2,
1629
+ visible
1630
+ };
1631
+ localStorage.setItem(CURSOR_STORAGE_KEY, JSON.stringify(payload));
1632
+ } catch {
1633
+ }
1634
+ };
1635
+ const setCursorPosition = (cursor, x2, y2) => {
1636
+ cursor.style.left = `${x2}px`;
1637
+ cursor.style.top = `${y2}px`;
1638
+ };
1639
+ const getCursorPosition = (cursor) => ({
1640
+ x: Number.parseFloat(cursor.style.left) || 0,
1641
+ y: Number.parseFloat(cursor.style.top) || 0
1642
+ });
1643
+ const setCursorVisibility = (cursor, visible) => {
1644
+ cursor.style.opacity = visible ? "1" : "0";
1645
+ };
1646
+ let cursorHoverTrackingInitialized = false;
1647
+ const initializeCursorHoverTracking = () => {
1648
+ if (cursorHoverTrackingInitialized) {
1649
+ return;
1650
+ }
1651
+ cursorHoverTrackingInitialized = true;
1652
+ document.addEventListener("mousemove", (event) => {
1653
+ const cursor = document.getElementById(AGENT_CURSOR_ID);
1654
+ if (!(cursor instanceof HTMLElement)) {
1655
+ return;
1996
1656
  }
1997
- if (isRoleInteractive) {
1998
- signalTokens.push(`role:${role}`);
1657
+ if (cursor.style.opacity !== "1") {
1658
+ return;
1999
1659
  }
2000
- if (hasTabStop) {
2001
- signalTokens.push(`tab:${tabIndex}`);
1660
+ const { x: x2, y: y2 } = getCursorPosition(cursor);
1661
+ const pointerX = event.pageX;
1662
+ const pointerY = event.pageY;
1663
+ const distance = Math.hypot(pointerX - x2, pointerY - y2);
1664
+ if (distance <= CURSOR_HOVER_RADIUS_PX) {
1665
+ setCursorVisibility(cursor, false);
1666
+ persistCursorState(x2, y2, false);
2002
1667
  }
2003
- if (dataHints.length > 0) {
2004
- signalTokens.push(`data:${dataHints.join("|")}`);
1668
+ });
1669
+ };
1670
+ const applyStoredCursorStateForCurrentUrl = (cursor) => {
1671
+ const stored = getPersistedCursorState();
1672
+ if (!stored || stored.url !== window.location.href) {
1673
+ return;
1674
+ }
1675
+ setCursorPosition(cursor, stored.x, stored.y);
1676
+ setCursorVisibility(cursor, stored.visible);
1677
+ };
1678
+ const ensureCursor = () => {
1679
+ const existing = document.getElementById(AGENT_CURSOR_ID);
1680
+ if (existing) {
1681
+ initializeCursorHoverTracking();
1682
+ return existing;
1683
+ }
1684
+ const cursor = document.createElement("div");
1685
+ cursor.id = AGENT_CURSOR_ID;
1686
+ cursor.style.position = "absolute";
1687
+ cursor.style.left = "0px";
1688
+ cursor.style.top = "0px";
1689
+ cursor.style.opacity = "0";
1690
+ const width = 25;
1691
+ cursor.style.width = `${width}px`;
1692
+ cursor.style.height = `${width}px`;
1693
+ cursor.style.borderRadius = "50%";
1694
+ const baseColor = COLORS.primary;
1695
+ cursor.style.background = baseColor;
1696
+ const border = 25 * 16 / 100;
1697
+ cursor.style.border = `${border}px solid #ffffff`;
1698
+ cursor.style.boxShadow = "0px 0px 10px rgba(0, 11, 26, 0.5)";
1699
+ cursor.style.boxSizing = "border-box";
1700
+ cursor.style.zIndex = "2147483647";
1701
+ cursor.style.pointerEvents = "none";
1702
+ cursor.style.transform = "translate(-50%, -50%)";
1703
+ cursor.style.transition = `left ${CURSOR_MOVE_DURATION_MS}ms ${CURSOR_EASING}, top ${CURSOR_MOVE_DURATION_MS}ms ${CURSOR_EASING}, opacity 150ms ease-out`;
1704
+ document.body.appendChild(cursor);
1705
+ initializeCursorHoverTracking();
1706
+ applyStoredCursorStateForCurrentUrl(cursor);
1707
+ console.info(`[Autic] cursor created color=${baseColor} duration=${CURSOR_MOVE_DURATION_MS}ms`);
1708
+ return cursor;
1709
+ };
1710
+ const moveCursor = async (x2, y2) => {
1711
+ const cursor = ensureCursor();
1712
+ setCursorPosition(cursor, x2, y2);
1713
+ setCursorVisibility(cursor, true);
1714
+ persistCursorState(x2, y2, true);
1715
+ await new Promise((resolve) => setTimeout(resolve, CURSOR_MOVE_DURATION_MS));
1716
+ };
1717
+ const getElementCenter = (element) => {
1718
+ const rect = element.getBoundingClientRect();
1719
+ return {
1720
+ x: rect.left + window.scrollX + rect.width / 2,
1721
+ y: rect.top + window.scrollY + rect.height / 2
1722
+ };
1723
+ };
1724
+ const CONTAINS_SELECTOR_PATTERN = /^(.*?):contains\((['"])(.*?)\2\)\s*$/;
1725
+ const findElementBySelector = (selector) => {
1726
+ var _a, _b, _c;
1727
+ try {
1728
+ return document.querySelector(selector);
1729
+ } catch (error) {
1730
+ const containsMatch = selector.match(CONTAINS_SELECTOR_PATTERN);
1731
+ if (!containsMatch) {
1732
+ console.warn(`AuticBot selector invalid: ${selector}`, error);
1733
+ return null;
2005
1734
  }
2006
- if (ariaHints.length > 0) {
2007
- signalTokens.push(`aria:${ariaHints.join("|")}`);
1735
+ const baseSelector = ((_a = containsMatch[1]) == null ? void 0 : _a.trim()) || "*";
1736
+ const expectedText = ((_b = containsMatch[3]) == null ? void 0 : _b.trim()) || "";
1737
+ if (!expectedText) {
1738
+ console.warn(`AuticBot selector contains empty text: ${selector}`);
1739
+ return null;
2008
1740
  }
2009
- if (styleHints.length > 0) {
2010
- signalTokens.push(`css:${styleHints.join("|")}`);
2011
- } else if (hasPointerCursor) {
2012
- signalTokens.push("css:cursor:pointer");
1741
+ try {
1742
+ const candidates = document.querySelectorAll(baseSelector);
1743
+ for (const candidate of candidates) {
1744
+ if ((_c = candidate.textContent) == null ? void 0 : _c.includes(expectedText)) {
1745
+ return candidate;
1746
+ }
1747
+ }
1748
+ return null;
1749
+ } catch (fallbackError) {
1750
+ console.warn(`AuticBot selector fallback invalid: ${selector}`, fallbackError);
1751
+ return null;
2013
1752
  }
2014
- const signalBlock = signalTokens.length > 0 ? ` [${signalTokens.join("; ")}]` : "";
2015
- const line = truncateInline(
2016
- `- ${tag} ${selector}${signalBlock} (${label})`,
2017
- 240
2018
- );
2019
- const score = eventHints.length * 5 + (isNativeInteractive ? 5 : 0) + (isRoleInteractive ? 4 : 0) + (hasTabStop ? 2 : 0) + (hasPointerCursor ? 2 : 0) + (dataHints.length > 0 ? 2 : 0) + (ariaHints.length > 0 ? 1 : 0) + (isContentEditable ? 2 : 0);
2020
- const existing = interactableCandidates.get(line);
2021
- if (!existing || score > existing.score) {
2022
- interactableCandidates.set(line, { line, score, order });
1753
+ }
1754
+ };
1755
+ const resolveTarget = (call2) => {
1756
+ if (call2.selector) {
1757
+ const selected = findElementBySelector(call2.selector);
1758
+ if (selected instanceof HTMLElement) {
1759
+ const center = getElementCenter(selected);
1760
+ return {
1761
+ element: selected,
1762
+ x: center.x,
1763
+ y: center.y
1764
+ };
2023
1765
  }
1766
+ console.warn(`AuticBot interact: selector not found: ${call2.selector}`);
2024
1767
  }
2025
- const interactables = Array.from(interactableCandidates.values()).sort((a2, b) => b.score - a2.score || a2.order - b.order).slice(0, MAX_INTERACTABLES).map((candidate) => candidate.line);
2026
- const interactiveRoleCounts = new Map(
2027
- Array.from(roleCounts.entries()).filter(
2028
- ([role]) => INTERACTIVE_ROLES.has(role)
2029
- )
1768
+ if (typeof call2.x === "number" && typeof call2.y === "number") {
1769
+ return {
1770
+ x: call2.x,
1771
+ y: call2.y
1772
+ };
1773
+ }
1774
+ console.warn("AuticBot interact: missing target selector or coordinates.", call2);
1775
+ return null;
1776
+ };
1777
+ const dispatchMouseEvent = (element, type, x2, y2) => {
1778
+ element.dispatchEvent(
1779
+ new MouseEvent(type, {
1780
+ bubbles: true,
1781
+ cancelable: true,
1782
+ view: window,
1783
+ clientX: x2 - window.scrollX,
1784
+ clientY: y2 - window.scrollY
1785
+ })
2030
1786
  );
2031
- const interactionSignals = [
2032
- `- coverage: semantic=${semanticInteractables}, non-semantic=${nonSemanticInteractables}, contenteditable=${contentEditableElements}`,
2033
- `- listener hints: ${formatTopCounts(eventCounts, 8)}`,
2034
- `- interaction cues: tabindex>=0=${tabStopElements}, pointer-cursor=${pointerCursorElements}, data-hints=${dataHintElements}, aria-hints=${ariaHintElements}`,
2035
- `- role hints: ${formatTopCounts(interactiveRoleCounts, 8)}`,
2036
- "- listener scope: inline/on* handlers are detected directly; addEventListener handlers are inferred via cues."
2037
- ].map((line) => truncateInline(line, 250));
2038
- const branchDigest = collectDomBranchDigest();
2039
- const pageBlueprint = [
2040
- `- nodes: total=${allElements.length}, scanned=${sampledElements.length}, visible=${visibleElements}, max-depth=${maxDepth}${allElements.length > sampledElements.length ? ", sampling=on" : ""}`,
2041
- `- tag density: ${formatTopCounts(tagCounts, 10)}`,
2042
- `- role density: ${formatTopCounts(roleCounts, 8)}`,
2043
- `- layout density: display(${formatTopCounts(displayCounts, 6)}), position(${formatTopCounts(positionCounts, 4)})`,
2044
- `- branch digest: ${branchDigest.length > 0 ? branchDigest.join(" || ") : "none"}`
2045
- ].map((line) => truncateInline(line, 260));
2046
- return {
2047
- links: links.slice(0, MAX_LINKS),
2048
- interactables,
2049
- interactionSignals,
2050
- pageBlueprint
2051
- };
2052
1787
  };
2053
- const buildPageContextSummary = (input, maxChars = MAX_CONTEXT_SUMMARY_CHARS) => {
2054
- const sections = [
2055
- formatSection("Meta", [
2056
- `- URL: ${input.url || "unknown"}`,
2057
- `- Title: ${input.title || "unknown"}`,
2058
- `- Lang: ${input.lang || "unknown"}`
2059
- ]),
2060
- formatSection("Headings", input.headings),
2061
- formatSection("Landmark Snapshot", input.landmarks),
2062
- formatSection("Interaction Signals", input.interactionSignals),
2063
- formatSection("Compressed Page Blueprint", input.pageBlueprint),
2064
- formatSection("Top Links", input.links),
2065
- formatSection("Top Interactables", input.interactables),
2066
- formatSection("Main Content Snippets", input.textSnippets),
2067
- formatSection("OuterHTML Skeleton", [
2068
- `- ${input.outerHtmlDigest || "unavailable"}`
2069
- ])
2070
- ];
2071
- return truncate(sections.join("\n\n"), maxChars);
1788
+ const typeIntoElement = (element, text) => {
1789
+ const tagName = element.tagName.toUpperCase();
1790
+ if (tagName === "INPUT" || tagName === "TEXTAREA") {
1791
+ element.focus();
1792
+ element.value = text;
1793
+ element.dispatchEvent(new Event("input", { bubbles: true }));
1794
+ element.dispatchEvent(new Event("change", { bubbles: true }));
1795
+ return;
1796
+ }
1797
+ if (element.isContentEditable) {
1798
+ element.focus();
1799
+ element.textContent = text;
1800
+ element.dispatchEvent(new Event("input", { bubbles: true }));
1801
+ return;
1802
+ }
1803
+ console.warn(
1804
+ "AuticBot interact: type action requires input, textarea, or contenteditable target."
1805
+ );
1806
+ };
1807
+ const submitElement = (element) => {
1808
+ var _a;
1809
+ if (element.tagName === "FORM") {
1810
+ element.requestSubmit();
1811
+ return;
1812
+ }
1813
+ if (element.tagName === "BUTTON" && element.form) {
1814
+ (_a = element.form) == null ? void 0 : _a.requestSubmit();
1815
+ return;
1816
+ }
1817
+ const parentForm = element.closest("form");
1818
+ if (parentForm) {
1819
+ parentForm.requestSubmit();
1820
+ return;
1821
+ }
1822
+ console.warn("AuticBot interact: submit action requires a form target.");
1823
+ };
1824
+ const slowScrollElementIntoView = async (element) => {
1825
+ await slowScrollElementIntoViewWithMode(element, false);
1826
+ };
1827
+ const slowScrollElementIntoViewWithMode = async (element, forceCenter) => {
1828
+ const rect = element.getBoundingClientRect();
1829
+ const viewportHeight = window.innerHeight;
1830
+ if (!forceCenter && !isRectOutsideViewport(rect, viewportHeight)) {
1831
+ return;
1832
+ }
1833
+ const maxScrollTop = Math.max(
1834
+ 0,
1835
+ Math.max(document.body.scrollHeight, document.documentElement.scrollHeight) - viewportHeight
1836
+ );
1837
+ const targetY = computeCenteredScrollTop(
1838
+ window.scrollY,
1839
+ rect.top,
1840
+ rect.height,
1841
+ viewportHeight,
1842
+ maxScrollTop
1843
+ );
1844
+ await animateWindowScrollTo(targetY, SCROLL_DURATION_MS);
2072
1845
  };
2073
- const getPageContext = () => {
2074
- if (typeof window === "undefined" || typeof document === "undefined") {
2075
- return {
2076
- links: [],
2077
- interactables: [],
2078
- summary: ""
2079
- };
1846
+ const executeScroll = async (call2) => {
1847
+ const selected = findElementBySelector(call2.selector);
1848
+ if (!(selected instanceof HTMLElement)) {
1849
+ console.warn(`AuticBot scroll: selector not found: ${call2.selector}`);
1850
+ return;
2080
1851
  }
2081
- hydrateCacheFromStorage();
2082
- const url = canonicalUrl(window.location.href);
2083
- const cached = pageContextCache.get(url);
2084
- if (cached) {
2085
- console.info(`[Autic] context cache hit url=${url}`);
2086
- return {
2087
- links: cached.links,
2088
- interactables: cached.interactables,
2089
- summary: buildSummaryWithHistory(cached)
2090
- };
1852
+ await slowScrollElementIntoViewWithMode(selected, true);
1853
+ const center = getElementCenter(selected);
1854
+ await moveCursor(center.x, center.y);
1855
+ };
1856
+ const executeInteract = async (call2) => {
1857
+ const target = resolveTarget(call2);
1858
+ if (!target) {
1859
+ return;
2091
1860
  }
2092
- console.info(`[Autic] context cache miss url=${url}`);
2093
- const snapshot = collectPageSignalSnapshot();
2094
- const headings = Array.from(document.querySelectorAll("h1, h2, h3")).filter((element) => isVisible(element)).map(
2095
- (element) => `- ${truncateInline(normalizeWhitespace(element.textContent || ""), 120)}`
2096
- ).filter((line) => line !== "- ").slice(0, MAX_HEADINGS);
2097
- const summary = buildPageContextSummary({
2098
- url,
2099
- title: document.title,
2100
- lang: document.documentElement.lang,
2101
- headings,
2102
- landmarks: collectLandmarkSnapshot(),
2103
- links: snapshot.links,
2104
- interactables: snapshot.interactables,
2105
- interactionSignals: snapshot.interactionSignals,
2106
- pageBlueprint: snapshot.pageBlueprint,
2107
- textSnippets: collectTextSnippets(),
2108
- outerHtmlDigest: buildOuterHtmlDigest()
2109
- });
2110
- const entry = {
2111
- url,
2112
- summary,
2113
- links: snapshot.links,
2114
- interactables: snapshot.interactables,
2115
- capturedAt: Date.now(),
2116
- version: PAGE_CONTEXT_CACHE_VERSION
2117
- };
2118
- pageContextCache.set(url, entry);
2119
- pruneOldestCacheEntries();
2120
- persistCacheToStorage();
2121
- console.info(
2122
- `[Autic] context cache stored url=${url} size=${pageContextCache.size}`
2123
- );
2124
- return {
2125
- links: entry.links,
2126
- interactables: entry.interactables,
2127
- summary: buildSummaryWithHistory(entry)
2128
- };
1861
+ if (call2.action === "click" && target.element) {
1862
+ await slowScrollElementIntoView(target.element);
1863
+ const center = getElementCenter(target.element);
1864
+ target.x = center.x;
1865
+ target.y = center.y;
1866
+ }
1867
+ await moveCursor(target.x, target.y);
1868
+ if (call2.action === "move") {
1869
+ return;
1870
+ }
1871
+ if (!target.element) {
1872
+ console.warn("AuticBot interact: target element not available for action.", call2.action);
1873
+ return;
1874
+ }
1875
+ if (call2.action === "click") {
1876
+ dispatchMouseEvent(target.element, "pointerdown", target.x, target.y);
1877
+ dispatchMouseEvent(target.element, "mousedown", target.x, target.y);
1878
+ dispatchMouseEvent(target.element, "pointerup", target.x, target.y);
1879
+ dispatchMouseEvent(target.element, "mouseup", target.x, target.y);
1880
+ target.element.click();
1881
+ return;
1882
+ }
1883
+ if (call2.action === "type") {
1884
+ typeIntoElement(target.element, call2.text ?? "");
1885
+ return;
1886
+ }
1887
+ submitElement(target.element);
2129
1888
  };
2130
- const AGENT_CURSOR_ID = "auticbot-agent-cursor";
2131
- const CURSOR_STORAGE_KEY = "auticbot_agent_cursor_state";
2132
- const CURSOR_MOVE_DURATION_MS = 900;
2133
- const SCROLL_DURATION_MS = 900;
2134
- const CURSOR_EASING = "cubic-bezier(0.4, 0, 0.2, 1)";
2135
- const CURSOR_HOVER_RADIUS_PX = 14;
2136
- const isObject$b = (value) => typeof value === "object" && value !== null && !Array.isArray(value);
2137
- const asString = (value) => typeof value === "string" && value.trim() ? value.trim() : void 0;
2138
- const asNumber = (value) => typeof value === "number" && Number.isFinite(value) ? value : void 0;
2139
- const extractJsonCandidate = (raw) => {
2140
- const trimmed = raw.trim();
2141
- const fencedMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)\s*```/i);
2142
- if (fencedMatch == null ? void 0 : fencedMatch[1]) {
2143
- return fencedMatch[1].trim();
1889
+ const isSamePageNavigation = (targetUrl) => {
1890
+ try {
1891
+ const current = new URL(window.location.href);
1892
+ const target = new URL(targetUrl);
1893
+ return current.origin === target.origin && current.pathname === target.pathname;
1894
+ } catch {
1895
+ return false;
2144
1896
  }
2145
- return trimmed;
2146
1897
  };
2147
- const extractFirstJsonObject = (input) => {
2148
- const start = input.indexOf("{");
2149
- if (start < 0) {
2150
- return null;
1898
+ const findMatchingLinkForTarget = (targetUrl) => {
1899
+ let parsedTarget = null;
1900
+ try {
1901
+ parsedTarget = new URL(targetUrl, window.location.href);
1902
+ } catch {
2151
1903
  }
2152
- let depth = 0;
2153
- let inString = false;
2154
- let isEscaped = false;
2155
- for (let i = start; i < input.length; i += 1) {
2156
- const char = input[i];
2157
- if (inString) {
2158
- if (isEscaped) {
2159
- isEscaped = false;
2160
- } else if (char === "\\") {
2161
- isEscaped = true;
2162
- } else if (char === '"') {
2163
- inString = false;
1904
+ const allLinks = Array.from(
1905
+ document.querySelectorAll('a[href], [role="link"][href], [data-href]')
1906
+ );
1907
+ for (const el of allLinks) {
1908
+ if (el instanceof HTMLAnchorElement && el.href === (parsedTarget == null ? void 0 : parsedTarget.href)) {
1909
+ return el;
1910
+ }
1911
+ }
1912
+ if (parsedTarget) {
1913
+ for (const el of allLinks) {
1914
+ if (!(el instanceof HTMLAnchorElement)) continue;
1915
+ try {
1916
+ const elUrl = new URL(el.href, window.location.href);
1917
+ if (elUrl.pathname === parsedTarget.pathname && elUrl.search === parsedTarget.search && elUrl.hash === parsedTarget.hash) {
1918
+ return el;
1919
+ }
1920
+ } catch {
1921
+ continue;
2164
1922
  }
2165
- continue;
2166
1923
  }
2167
- if (char === '"') {
2168
- inString = true;
2169
- continue;
1924
+ for (const el of allLinks) {
1925
+ if (!(el instanceof HTMLAnchorElement)) continue;
1926
+ try {
1927
+ const elUrl = new URL(el.href, window.location.href);
1928
+ if (elUrl.pathname === parsedTarget.pathname) {
1929
+ return el;
1930
+ }
1931
+ } catch {
1932
+ continue;
1933
+ }
2170
1934
  }
2171
- if (char === "{") {
2172
- depth += 1;
2173
- continue;
1935
+ const rawUrl = targetUrl.replace(/^\//, "");
1936
+ for (const el of allLinks) {
1937
+ const href = el.getAttribute("href") || el.getAttribute("data-href") || "";
1938
+ if (href && (href === targetUrl || href === rawUrl || href === `/${rawUrl}`)) {
1939
+ return el;
1940
+ }
2174
1941
  }
2175
- if (char === "}") {
2176
- depth -= 1;
2177
- if (depth === 0) {
2178
- return input.slice(start, i + 1);
1942
+ }
1943
+ const urlSegments = targetUrl.replace(/^https?:\/\/[^/]+/, "").replace(/[?#].*$/, "").split("/").filter(Boolean);
1944
+ const lastSegment = urlSegments[urlSegments.length - 1] || "";
1945
+ if (lastSegment) {
1946
+ let searchTerms = [lastSegment];
1947
+ if (parsedTarget) {
1948
+ for (const [, value] of parsedTarget.searchParams.entries()) {
1949
+ if (value) searchTerms.push(value);
1950
+ }
1951
+ if (parsedTarget.hash) {
1952
+ searchTerms.push(parsedTarget.hash.replace(/^#/, ""));
1953
+ }
1954
+ }
1955
+ searchTerms = searchTerms.map((t2) => t2.toLowerCase());
1956
+ const clickables = Array.from(
1957
+ document.querySelectorAll(
1958
+ 'a, button, [role="link"], [role="tab"], [role="button"], [data-tab], [onclick]'
1959
+ )
1960
+ );
1961
+ for (const el of clickables) {
1962
+ const text = (el.textContent || "").trim().toLowerCase();
1963
+ const ariaLabel = (el.getAttribute("aria-label") || "").toLowerCase();
1964
+ const dataTab = (el.getAttribute("data-tab") || "").toLowerCase();
1965
+ for (const term of searchTerms) {
1966
+ if (text === term || ariaLabel === term || dataTab === term || text.includes(term)) {
1967
+ return el;
1968
+ }
1969
+ }
1970
+ }
1971
+ }
1972
+ return null;
1973
+ };
1974
+ const executeNavigate = async (call2) => {
1975
+ try {
1976
+ const targetUrl = call2.url;
1977
+ let resolvedUrl;
1978
+ try {
1979
+ resolvedUrl = new URL(targetUrl, window.location.href).href;
1980
+ } catch {
1981
+ resolvedUrl = targetUrl;
1982
+ }
1983
+ const matchingElement = findMatchingLinkForTarget(targetUrl);
1984
+ if (matchingElement) {
1985
+ console.log("AuticBot navigate: clicking element", resolvedUrl, matchingElement.tagName);
1986
+ await slowScrollElementIntoView(matchingElement);
1987
+ const center = getElementCenter(matchingElement);
1988
+ await moveCursor(center.x, center.y);
1989
+ matchingElement.dispatchEvent(new MouseEvent("pointerdown", { bubbles: true, view: window }));
1990
+ matchingElement.dispatchEvent(new MouseEvent("mousedown", { bubbles: true, view: window }));
1991
+ matchingElement.dispatchEvent(new MouseEvent("pointerup", { bubbles: true, view: window }));
1992
+ matchingElement.dispatchEvent(new MouseEvent("mouseup", { bubbles: true, view: window }));
1993
+ matchingElement.click();
1994
+ return !isSamePageNavigation(resolvedUrl);
1995
+ }
1996
+ console.log("AuticBot navigate: no matching element found, using direct navigation", resolvedUrl);
1997
+ try {
1998
+ const parsed = new URL(resolvedUrl);
1999
+ if (parsed.origin === window.location.origin && parsed.pathname === window.location.pathname && parsed.hash) {
2000
+ window.location.hash = parsed.hash;
2001
+ return false;
2179
2002
  }
2180
- }
2181
- }
2182
- return null;
2183
- };
2184
- const parseJsonFromRaw = (raw) => {
2185
- const candidate = extractJsonCandidate(raw);
2186
- try {
2187
- return JSON.parse(candidate);
2188
- } catch {
2189
- const objectCandidate = extractFirstJsonObject(candidate);
2190
- if (!objectCandidate) {
2191
- return null;
2003
+ } catch {
2192
2004
  }
2193
2005
  try {
2194
- return JSON.parse(objectCandidate);
2006
+ const parsed = new URL(resolvedUrl);
2007
+ if (parsed.origin === window.location.origin) {
2008
+ const newPath = parsed.pathname + parsed.search + parsed.hash;
2009
+ window.history.pushState({}, "", newPath);
2010
+ window.dispatchEvent(new PopStateEvent("popstate", { state: {} }));
2011
+ return false;
2012
+ }
2195
2013
  } catch {
2196
- return null;
2197
2014
  }
2015
+ window.location.href = resolvedUrl;
2016
+ return true;
2017
+ } catch (error) {
2018
+ console.warn("AuticBot navigate: error", call2.url, error);
2019
+ return false;
2198
2020
  }
2199
2021
  };
2200
- const sanitizeToolCalls = (value) => {
2201
- if (!Array.isArray(value)) {
2202
- return [];
2203
- }
2204
- const toolCalls = [];
2205
- for (const item of value) {
2206
- if (!isObject$b(item)) {
2207
- continue;
2208
- }
2209
- if (item.tool === "interact") {
2210
- const action = asString(item.action);
2211
- if (!action || !["move", "click", "type", "submit"].includes(action)) {
2212
- continue;
2213
- }
2214
- toolCalls.push({
2215
- tool: "interact",
2216
- action,
2217
- selector: asString(item.selector),
2218
- text: typeof item.text === "string" ? item.text : void 0,
2219
- x: asNumber(item.x),
2220
- y: asNumber(item.y)
2221
- });
2022
+ const executeGetPageContext = async () => {
2023
+ const context = getPageContext();
2024
+ console.info(
2025
+ `[Autic] getPageContext tool executed links=${context.links.length} interactables=${context.interactables.length} summary_len=${context.summary.length}`
2026
+ );
2027
+ };
2028
+ const executeToolCalls = async (toolCalls) => {
2029
+ for (const toolCall of toolCalls) {
2030
+ if (toolCall.tool === "interact") {
2031
+ await executeInteract(toolCall);
2222
2032
  continue;
2223
2033
  }
2224
- if (item.tool === "navigate") {
2225
- const url = asString(item.url);
2226
- if (!url) {
2227
- continue;
2228
- }
2229
- toolCalls.push({
2230
- tool: "navigate",
2231
- url
2232
- });
2034
+ if (toolCall.tool === "scroll") {
2035
+ await executeScroll(toolCall);
2233
2036
  continue;
2234
2037
  }
2235
- if (item.tool === "getPageContext") {
2236
- toolCalls.push({
2237
- tool: "getPageContext"
2238
- });
2038
+ if (toolCall.tool === "getPageContext") {
2039
+ await executeGetPageContext();
2239
2040
  continue;
2240
2041
  }
2241
- if (item.tool === "scroll") {
2242
- const selector = asString(item.selector);
2243
- if (!selector) {
2244
- continue;
2042
+ if (toolCall.tool === "navigate") {
2043
+ const terminalNavigation = await executeNavigate(toolCall);
2044
+ if (terminalNavigation) {
2045
+ break;
2245
2046
  }
2246
- toolCalls.push({
2247
- tool: "scroll",
2248
- selector
2249
- });
2250
2047
  }
2251
2048
  }
2252
- return toolCalls;
2253
- };
2254
- const parseAgentResponse = (raw) => {
2255
- const parsed = parseJsonFromRaw(raw);
2256
- if (!isObject$b(parsed)) {
2257
- return {
2258
- reply: raw.trim(),
2259
- toolCalls: []
2260
- };
2261
- }
2262
- const reply = asString(parsed.reply) || "";
2263
- const toolCalls = sanitizeToolCalls(parsed.tool_calls ?? parsed.toolCalls);
2264
- return {
2265
- reply,
2266
- toolCalls
2267
- };
2268
- };
2269
- const clamp = (value, min2, max2) => Math.min(max2, Math.max(min2, value));
2270
- const easeInOutSine = (progress) => -(Math.cos(Math.PI * progress) - 1) / 2;
2271
- const isRectOutsideViewport = (rect, viewportHeight) => rect.top < 0 || rect.bottom > viewportHeight;
2272
- const computeCenteredScrollTop = (currentScrollY, rectTop, rectHeight, viewportHeight, maxScrollTop) => {
2273
- const desired = currentScrollY + rectTop - (viewportHeight / 2 - rectHeight / 2);
2274
- return clamp(desired, 0, Math.max(0, maxScrollTop));
2275
- };
2276
- const animateWindowScrollTo = async (targetY, durationMs = SCROLL_DURATION_MS) => {
2277
- if (typeof window === "undefined") {
2278
- return;
2279
- }
2280
- const startY = window.scrollY;
2281
- const delta = targetY - startY;
2282
- if (Math.abs(delta) < 1) {
2283
- return;
2284
- }
2285
- await new Promise((resolve) => {
2286
- const raf = window.requestAnimationFrame || ((callback) => window.setTimeout(() => callback(performance.now()), 16));
2287
- const startTime = performance.now();
2288
- const step = (now) => {
2289
- const elapsed = now - startTime;
2290
- const progress = clamp(elapsed / durationMs, 0, 1);
2291
- const eased = easeInOutSine(progress);
2292
- window.scrollTo(0, startY + delta * eased);
2293
- if (progress < 1) {
2294
- raf(step);
2295
- } else {
2296
- resolve();
2297
- }
2298
- };
2299
- raf(step);
2300
- });
2301
2049
  };
2302
- const getPersistedCursorState = () => {
2303
- if (typeof localStorage === "undefined") {
2304
- return null;
2305
- }
2050
+ const executeSingleToolCall = async (call2) => {
2051
+ const callId = call2.call_id;
2306
2052
  try {
2307
- const raw = localStorage.getItem(CURSOR_STORAGE_KEY);
2308
- if (!raw) {
2309
- return null;
2053
+ if (call2.tool === "interact") {
2054
+ await executeInteract(call2);
2055
+ return {
2056
+ call_id: callId,
2057
+ result: `Etkileşim başarılı: ${call2.action}`
2058
+ };
2310
2059
  }
2311
- const parsed = JSON.parse(raw);
2312
- if (typeof parsed.url !== "string" || typeof parsed.x !== "number" || !Number.isFinite(parsed.x) || typeof parsed.y !== "number" || !Number.isFinite(parsed.y)) {
2313
- return null;
2060
+ if (call2.tool === "scroll") {
2061
+ await executeScroll(call2);
2062
+ return {
2063
+ call_id: callId,
2064
+ result: "Öğeye kaydırma başarılı."
2065
+ };
2314
2066
  }
2315
- return {
2316
- url: parsed.url,
2317
- x: parsed.x,
2318
- y: parsed.y,
2319
- visible: parsed.visible !== false
2320
- };
2321
- } catch {
2322
- return null;
2323
- }
2324
- };
2325
- const persistCursorState = (x2, y2, visible) => {
2326
- if (typeof localStorage === "undefined") {
2327
- return;
2328
- }
2329
- try {
2330
- const payload = {
2331
- url: window.location.href,
2332
- x: x2,
2333
- y: y2,
2334
- visible
2335
- };
2336
- localStorage.setItem(CURSOR_STORAGE_KEY, JSON.stringify(payload));
2337
- } catch {
2067
+ if (call2.tool === "getPageContext") {
2068
+ const context = getPageContext();
2069
+ return {
2070
+ call_id: callId,
2071
+ result: context.summary
2072
+ };
2073
+ }
2074
+ if (call2.tool === "navigate") {
2075
+ await executeNavigate(call2);
2076
+ await new Promise((resolve) => setTimeout(resolve, 1500));
2077
+ const context = getPageContext();
2078
+ return {
2079
+ call_id: callId,
2080
+ result: `Navigasyon tamamlandı. Şu anki sayfa: ${window.location.href}
2081
+ Sayfa bağlamı: ${context.summary}`
2082
+ };
2083
+ }
2084
+ return { call_id: callId, result: "Bilinmeyen araç." };
2085
+ } catch (error) {
2086
+ const msg = error instanceof Error ? error.message : String(error);
2087
+ console.warn(`[Autic] Tool execution error: ${call2.tool}`, error);
2088
+ return { call_id: callId, result: `Hata: ${msg}` };
2338
2089
  }
2339
2090
  };
2340
- const setCursorPosition = (cursor, x2, y2) => {
2341
- cursor.style.left = `${x2}px`;
2342
- cursor.style.top = `${y2}px`;
2343
- };
2344
- const getCursorPosition = (cursor) => ({
2345
- x: Number.parseFloat(cursor.style.left) || 0,
2346
- y: Number.parseFloat(cursor.style.top) || 0
2347
- });
2348
- const setCursorVisibility = (cursor, visible) => {
2349
- cursor.style.opacity = visible ? "1" : "0";
2350
- };
2351
- let cursorHoverTrackingInitialized = false;
2352
- const initializeCursorHoverTracking = () => {
2353
- if (cursorHoverTrackingInitialized) {
2091
+ const restoreCursorFromStorageForCurrentUrl = () => {
2092
+ if (typeof document === "undefined" || typeof window === "undefined") {
2354
2093
  return;
2355
2094
  }
2356
- cursorHoverTrackingInitialized = true;
2357
- document.addEventListener("mousemove", (event) => {
2358
- const cursor = document.getElementById(AGENT_CURSOR_ID);
2359
- if (!(cursor instanceof HTMLElement)) {
2360
- return;
2361
- }
2362
- if (cursor.style.opacity !== "1") {
2363
- return;
2364
- }
2365
- const { x: x2, y: y2 } = getCursorPosition(cursor);
2366
- const pointerX = event.pageX;
2367
- const pointerY = event.pageY;
2368
- const distance = Math.hypot(pointerX - x2, pointerY - y2);
2369
- if (distance <= CURSOR_HOVER_RADIUS_PX) {
2370
- setCursorVisibility(cursor, false);
2371
- persistCursorState(x2, y2, false);
2372
- }
2373
- });
2374
- };
2375
- const applyStoredCursorStateForCurrentUrl = (cursor) => {
2376
2095
  const stored = getPersistedCursorState();
2377
2096
  if (!stored || stored.url !== window.location.href) {
2378
2097
  return;
2379
2098
  }
2380
- setCursorPosition(cursor, stored.x, stored.y);
2381
- setCursorVisibility(cursor, stored.visible);
2099
+ ensureCursor();
2382
2100
  };
2383
- const ensureCursor = () => {
2384
- const existing = document.getElementById(AGENT_CURSOR_ID);
2385
- if (existing) {
2386
- initializeCursorHoverTracking();
2387
- return existing;
2101
+ if (typeof document !== "undefined") {
2102
+ if (document.readyState === "loading") {
2103
+ document.addEventListener("DOMContentLoaded", restoreCursorFromStorageForCurrentUrl, {
2104
+ once: true
2105
+ });
2106
+ } else {
2107
+ restoreCursorFromStorageForCurrentUrl();
2388
2108
  }
2389
- const cursor = document.createElement("div");
2390
- cursor.id = AGENT_CURSOR_ID;
2391
- cursor.style.position = "absolute";
2392
- cursor.style.left = "0px";
2393
- cursor.style.top = "0px";
2394
- cursor.style.opacity = "0";
2395
- const width = 25;
2396
- cursor.style.width = `${width}px`;
2397
- cursor.style.height = `${width}px`;
2398
- cursor.style.borderRadius = "50%";
2399
- const baseColor = COLORS.primary;
2400
- cursor.style.background = baseColor;
2401
- const border = 25 * 16 / 100;
2402
- cursor.style.border = `${border}px solid #ffffff`;
2403
- cursor.style.boxShadow = "0px 0px 10px rgba(0, 11, 26, 0.5)";
2404
- cursor.style.boxSizing = "border-box";
2405
- cursor.style.zIndex = "2147483647";
2406
- cursor.style.pointerEvents = "none";
2407
- cursor.style.transform = "translate(-50%, -50%)";
2408
- cursor.style.transition = `left ${CURSOR_MOVE_DURATION_MS}ms ${CURSOR_EASING}, top ${CURSOR_MOVE_DURATION_MS}ms ${CURSOR_EASING}, opacity 150ms ease-out`;
2409
- document.body.appendChild(cursor);
2410
- initializeCursorHoverTracking();
2411
- applyStoredCursorStateForCurrentUrl(cursor);
2412
- console.info(`[Autic] cursor created color=${baseColor} duration=${CURSOR_MOVE_DURATION_MS}ms`);
2413
- return cursor;
2109
+ }
2110
+ const TTS_WS_RETRY_DELAYS_MS = [250, 750, 1500];
2111
+ const FORCED_TTS_VOICE = "zeynep";
2112
+ const normalizeBaseUrl = (baseUrl) => {
2113
+ const trimmed = baseUrl.trim().replace(/\/+$/, "");
2114
+ if (/^https?:\/\//i.test(trimmed)) {
2115
+ return trimmed;
2116
+ }
2117
+ return `https://${trimmed}`;
2414
2118
  };
2415
- const moveCursor = async (x2, y2) => {
2416
- const cursor = ensureCursor();
2417
- setCursorPosition(cursor, x2, y2);
2418
- setCursorVisibility(cursor, true);
2419
- persistCursorState(x2, y2, true);
2420
- await new Promise((resolve) => setTimeout(resolve, CURSOR_MOVE_DURATION_MS));
2119
+ const toWebSocketUrl = (baseUrl, path2) => {
2120
+ const normalized = normalizeBaseUrl(baseUrl);
2121
+ const url = new URL(normalized);
2122
+ url.protocol = url.protocol === "https:" ? "wss:" : "ws:";
2123
+ url.pathname = `${url.pathname.replace(/\/$/, "")}${path2}`;
2124
+ url.search = "";
2125
+ url.hash = "";
2126
+ return url.toString();
2421
2127
  };
2422
- const getElementCenter = (element) => {
2423
- const rect = element.getBoundingClientRect();
2424
- return {
2425
- x: rect.left + window.scrollX + rect.width / 2,
2426
- y: rect.top + window.scrollY + rect.height / 2
2427
- };
2128
+ const createRequestId = () => {
2129
+ if (typeof crypto !== "undefined" && "randomUUID" in crypto) {
2130
+ return crypto.randomUUID();
2131
+ }
2132
+ return `tts-${Date.now()}-${Math.random().toString(16).slice(2)}`;
2428
2133
  };
2429
- const CONTAINS_SELECTOR_PATTERN = /^(.*?):contains\((['"])(.*?)\2\)\s*$/;
2430
- const findElementBySelector = (selector) => {
2431
- var _a, _b, _c;
2134
+ const parseTtsWsEventPayload = (value) => {
2432
2135
  try {
2433
- return document.querySelector(selector);
2434
- } catch (error) {
2435
- const containsMatch = selector.match(CONTAINS_SELECTOR_PATTERN);
2436
- if (!containsMatch) {
2437
- console.warn(`AuticBot selector invalid: ${selector}`, error);
2438
- return null;
2439
- }
2440
- const baseSelector = ((_a = containsMatch[1]) == null ? void 0 : _a.trim()) || "*";
2441
- const expectedText = ((_b = containsMatch[3]) == null ? void 0 : _b.trim()) || "";
2442
- if (!expectedText) {
2443
- console.warn(`AuticBot selector contains empty text: ${selector}`);
2444
- return null;
2445
- }
2446
- try {
2447
- const candidates = document.querySelectorAll(baseSelector);
2448
- for (const candidate of candidates) {
2449
- if ((_c = candidate.textContent) == null ? void 0 : _c.includes(expectedText)) {
2450
- return candidate;
2451
- }
2452
- }
2453
- return null;
2454
- } catch (fallbackError) {
2455
- console.warn(`AuticBot selector fallback invalid: ${selector}`, fallbackError);
2136
+ if (typeof value !== "string") {
2456
2137
  return null;
2457
2138
  }
2139
+ return JSON.parse(value);
2140
+ } catch {
2141
+ return null;
2458
2142
  }
2459
2143
  };
2460
- const resolveTarget = (call2) => {
2461
- if (call2.selector) {
2462
- const selected = findElementBySelector(call2.selector);
2463
- if (selected instanceof HTMLElement) {
2464
- const center = getElementCenter(selected);
2465
- return {
2466
- element: selected,
2467
- x: center.x,
2468
- y: center.y
2469
- };
2470
- }
2471
- console.warn(`AuticBot interact: selector not found: ${call2.selector}`);
2144
+ const shouldAcceptAudioSeq = (incomingSeq, highestSeqSeen) => incomingSeq > highestSeqSeen;
2145
+ const shouldFallbackToSse = (error) => {
2146
+ if (typeof error === "object" && error !== null && "retryable" in error) {
2147
+ return Boolean(error.retryable);
2472
2148
  }
2473
- if (typeof call2.x === "number" && typeof call2.y === "number") {
2474
- return {
2475
- x: call2.x,
2476
- y: call2.y
2477
- };
2149
+ return true;
2150
+ };
2151
+ const parseErrorBody = async (response) => {
2152
+ try {
2153
+ const data2 = await response.json();
2154
+ const detail = data2.detail;
2155
+ if (typeof detail === "string") return detail;
2156
+ if (detail && typeof detail === "object") return JSON.stringify(detail);
2157
+ return data2.error || data2.message || response.statusText;
2158
+ } catch {
2159
+ return response.statusText;
2478
2160
  }
2479
- console.warn("AuticBot interact: missing target selector or coordinates.", call2);
2480
- return null;
2481
2161
  };
2482
- const dispatchMouseEvent = (element, type, x2, y2) => {
2483
- element.dispatchEvent(
2484
- new MouseEvent(type, {
2485
- bubbles: true,
2486
- cancelable: true,
2487
- view: window,
2488
- clientX: x2 - window.scrollX,
2489
- clientY: y2 - window.scrollY
2490
- })
2491
- );
2162
+ const sleep = (ms) => new Promise((resolve) => {
2163
+ setTimeout(resolve, ms);
2164
+ });
2165
+ const base64ToUint8Array = (base64) => {
2166
+ const cleanBase64 = base64.replace(/^data:audio\/\w+;base64,/, "");
2167
+ const binaryString = atob(cleanBase64);
2168
+ const bytes = new Uint8Array(binaryString.length);
2169
+ for (let i = 0; i < binaryString.length; i += 1) {
2170
+ bytes[i] = binaryString.charCodeAt(i);
2171
+ }
2172
+ return bytes;
2492
2173
  };
2493
- const typeIntoElement = (element, text) => {
2494
- const tagName = element.tagName.toUpperCase();
2495
- if (tagName === "INPUT" || tagName === "TEXTAREA") {
2496
- element.focus();
2497
- element.value = text;
2498
- element.dispatchEvent(new Event("input", { bubbles: true }));
2499
- element.dispatchEvent(new Event("change", { bubbles: true }));
2174
+ const createWavHeader = (length, sampleRate = 16e3) => {
2175
+ const buffer = new ArrayBuffer(44);
2176
+ const view = new DataView(buffer);
2177
+ const channels = 1;
2178
+ view.setUint32(0, 1380533830, false);
2179
+ view.setUint32(4, 36 + length, true);
2180
+ view.setUint32(8, 1463899717, false);
2181
+ view.setUint32(12, 1718449184, false);
2182
+ view.setUint32(16, 16, true);
2183
+ view.setUint16(20, 1, true);
2184
+ view.setUint16(22, channels, true);
2185
+ view.setUint32(24, sampleRate, true);
2186
+ view.setUint32(28, sampleRate * channels * 2, true);
2187
+ view.setUint16(32, channels * 2, true);
2188
+ view.setUint16(34, 16, true);
2189
+ view.setUint32(36, 1684108385, false);
2190
+ view.setUint32(40, length, true);
2191
+ return new Uint8Array(buffer);
2192
+ };
2193
+ const waitForPlaybackEnd = async (audioElement) => {
2194
+ if (audioElement.ended) {
2500
2195
  return;
2501
2196
  }
2502
- if (element.isContentEditable) {
2503
- element.focus();
2504
- element.textContent = text;
2505
- element.dispatchEvent(new Event("input", { bubbles: true }));
2197
+ await new Promise((resolve, reject) => {
2198
+ const watchdog = window.setInterval(() => {
2199
+ if (!audioElement.ended) {
2200
+ console.info("[Bulut] playback watchdog: still playing...");
2201
+ }
2202
+ }, 3e4);
2203
+ const onEnded = () => {
2204
+ cleanup();
2205
+ resolve();
2206
+ };
2207
+ const onError = () => {
2208
+ cleanup();
2209
+ reject(new Error("Ses oynatma hatası oluştu."));
2210
+ };
2211
+ const cleanup = () => {
2212
+ window.clearInterval(watchdog);
2213
+ audioElement.removeEventListener("ended", onEnded);
2214
+ audioElement.removeEventListener("error", onError);
2215
+ };
2216
+ audioElement.addEventListener("ended", onEnded);
2217
+ audioElement.addEventListener("error", onError);
2218
+ });
2219
+ };
2220
+ const playBufferedAudio = async (chunks, mimeType, sampleRate = 16e3, onAudioStateChange) => {
2221
+ if (chunks.length === 0) {
2222
+ onAudioStateChange == null ? void 0 : onAudioStateChange("done");
2506
2223
  return;
2507
2224
  }
2508
- console.warn(
2509
- "AuticBot interact: type action requires input, textarea, or contenteditable target."
2510
- );
2225
+ const totalBytes = chunks.reduce((acc, c2) => acc + c2.byteLength, 0);
2226
+ console.log(`[Bulut] Playing buffered audio: ${chunks.length} chunks, ${totalBytes} bytes, type=${mimeType}`);
2227
+ onAudioStateChange == null ? void 0 : onAudioStateChange("fallback");
2228
+ const blobParts = chunks.map((chunk) => {
2229
+ const copied = new Uint8Array(chunk.byteLength);
2230
+ copied.set(chunk);
2231
+ return copied.buffer;
2232
+ });
2233
+ let detectedMime = mimeType;
2234
+ if (chunks.length > 0 && chunks[0].length >= 4) {
2235
+ const header = Array.from(chunks[0].slice(0, 4)).map((b) => b.toString(16).padStart(2, "0").toUpperCase()).join(" ");
2236
+ console.log(`[Bulut] Audio header (hex): ${header}`);
2237
+ if (header.startsWith("49 44 33")) {
2238
+ detectedMime = "audio/mpeg";
2239
+ } else if (header.startsWith("FF F3") || header.startsWith("FF F2")) {
2240
+ detectedMime = "audio/mpeg";
2241
+ } else if (header.startsWith("52 49 46 46")) {
2242
+ detectedMime = "audio/wav";
2243
+ } else if (header.startsWith("1A 45 DF A3")) {
2244
+ detectedMime = "audio/webm";
2245
+ }
2246
+ }
2247
+ let safeMimeType = detectedMime && detectedMime.includes("/") ? detectedMime : "audio/mpeg";
2248
+ let finalBlobParts = blobParts;
2249
+ if (mimeType === "audio/pcm") {
2250
+ const totalLength = chunks.reduce((acc, c2) => acc + c2.byteLength, 0);
2251
+ const header = createWavHeader(totalLength, sampleRate);
2252
+ finalBlobParts = [header.buffer, ...blobParts];
2253
+ safeMimeType = "audio/wav";
2254
+ console.log(`[Bulut] Wrapped raw PCM in WAV (rate=${sampleRate})`);
2255
+ }
2256
+ console.log(`[Bulut] Creating blob with type: ${safeMimeType} (original: ${mimeType})`);
2257
+ const blob = new Blob(finalBlobParts, { type: safeMimeType });
2258
+ const audioElement = new Audio();
2259
+ const objectUrl = URL.createObjectURL(blob);
2260
+ try {
2261
+ audioElement.preload = "auto";
2262
+ audioElement.autoplay = true;
2263
+ audioElement.setAttribute("playsinline", "true");
2264
+ audioElement.src = objectUrl;
2265
+ await audioElement.play();
2266
+ onAudioStateChange == null ? void 0 : onAudioStateChange("playing");
2267
+ await waitForPlaybackEnd(audioElement);
2268
+ onAudioStateChange == null ? void 0 : onAudioStateChange("done");
2269
+ } catch (err) {
2270
+ console.error(`[Bulut] Playback failed: ${err}`, { mimeType: safeMimeType, size: blob.size });
2271
+ onAudioStateChange == null ? void 0 : onAudioStateChange("done");
2272
+ throw err;
2273
+ } finally {
2274
+ audioElement.pause();
2275
+ audioElement.removeAttribute("src");
2276
+ audioElement.load();
2277
+ URL.revokeObjectURL(objectUrl);
2278
+ }
2511
2279
  };
2512
- const submitElement = (element) => {
2513
- var _a;
2514
- if (element.tagName === "FORM") {
2515
- element.requestSubmit();
2516
- return;
2517
- }
2518
- if (element.tagName === "BUTTON" && element.form) {
2519
- (_a = element.form) == null ? void 0 : _a.requestSubmit();
2520
- return;
2280
+ const parseSseEventPayload = (eventBlock) => {
2281
+ const dataLines = eventBlock.split(/\r?\n/).map((line) => line.trim()).filter((line) => line.startsWith("data:")).map((line) => line.slice(5).trimStart());
2282
+ if (dataLines.length === 0) {
2283
+ return null;
2521
2284
  }
2522
- const parentForm = element.closest("form");
2523
- if (parentForm) {
2524
- parentForm.requestSubmit();
2525
- return;
2285
+ const dataStr = dataLines.join("\n");
2286
+ if (dataStr === "[DONE]") {
2287
+ return { type: "done" };
2526
2288
  }
2527
- console.warn("AuticBot interact: submit action requires a form target.");
2528
- };
2529
- const slowScrollElementIntoView = async (element) => {
2530
- await slowScrollElementIntoViewWithMode(element, false);
2531
- };
2532
- const slowScrollElementIntoViewWithMode = async (element, forceCenter) => {
2533
- const rect = element.getBoundingClientRect();
2534
- const viewportHeight = window.innerHeight;
2535
- if (!forceCenter && !isRectOutsideViewport(rect, viewportHeight)) {
2536
- return;
2289
+ try {
2290
+ return JSON.parse(dataStr);
2291
+ } catch (error) {
2292
+ console.warn("Error parsing SSE chunk:", error);
2293
+ return null;
2537
2294
  }
2538
- const maxScrollTop = Math.max(
2539
- 0,
2540
- Math.max(document.body.scrollHeight, document.documentElement.scrollHeight) - viewportHeight
2541
- );
2542
- const targetY = computeCenteredScrollTop(
2543
- window.scrollY,
2544
- rect.top,
2545
- rect.height,
2546
- viewportHeight,
2547
- maxScrollTop
2548
- );
2549
- await animateWindowScrollTo(targetY, SCROLL_DURATION_MS);
2550
2295
  };
2551
- const executeScroll = async (call2) => {
2552
- const selected = findElementBySelector(call2.selector);
2553
- if (!(selected instanceof HTMLElement)) {
2554
- console.warn(`AuticBot scroll: selector not found: ${call2.selector}`);
2555
- return;
2296
+ const isAudioSsePayload = (payload) => typeof payload.audio === "string" && (payload.type === void 0 || payload.type === "audio");
2297
+ async function transcribeAudio(baseUrl, file, projectId, sessionId, language) {
2298
+ const url = `${normalizeBaseUrl(baseUrl)}/chat/stt`;
2299
+ const formData = new FormData();
2300
+ formData.append("file", file);
2301
+ formData.append("project_id", projectId);
2302
+ if (sessionId) formData.append("session_id", sessionId);
2303
+ formData.append("language", language);
2304
+ const response = await fetch(url, { method: "POST", body: formData });
2305
+ if (!response.ok) {
2306
+ throw new Error(await parseErrorBody(response));
2556
2307
  }
2557
- await slowScrollElementIntoViewWithMode(selected, true);
2558
- const center = getElementCenter(selected);
2559
- await moveCursor(center.x, center.y);
2308
+ return response.json();
2309
+ }
2310
+ const buildError = (message, retryable = true) => {
2311
+ const error = new Error(message);
2312
+ error.retryable = retryable;
2313
+ return error;
2560
2314
  };
2561
- const executeInteract = async (call2) => {
2562
- const target = resolveTarget(call2);
2563
- if (!target) {
2564
- return;
2565
- }
2566
- if (call2.action === "click" && target.element) {
2567
- await slowScrollElementIntoView(target.element);
2568
- const center = getElementCenter(target.element);
2569
- target.x = center.x;
2570
- target.y = center.y;
2571
- }
2572
- await moveCursor(target.x, target.y);
2573
- if (call2.action === "move") {
2574
- return;
2575
- }
2576
- if (!target.element) {
2577
- console.warn("AuticBot interact: target element not available for action.", call2.action);
2578
- return;
2579
- }
2580
- if (call2.action === "click") {
2581
- dispatchMouseEvent(target.element, "pointerdown", target.x, target.y);
2582
- dispatchMouseEvent(target.element, "mousedown", target.x, target.y);
2583
- dispatchMouseEvent(target.element, "pointerup", target.x, target.y);
2584
- dispatchMouseEvent(target.element, "mouseup", target.x, target.y);
2585
- target.element.click();
2586
- return;
2315
+ const collectTtsViaSse = async (baseUrl, assistantText, accessibilityMode, isStopped, setReader) => {
2316
+ var _a;
2317
+ const ttsFormData = new FormData();
2318
+ ttsFormData.append("text", assistantText);
2319
+ ttsFormData.append("voice", FORCED_TTS_VOICE);
2320
+ ttsFormData.append("accessibility_mode", String(accessibilityMode));
2321
+ const ttsResponse = await fetch(`${normalizeBaseUrl(baseUrl)}/chat/tts`, {
2322
+ method: "POST",
2323
+ body: ttsFormData
2324
+ });
2325
+ if (!ttsResponse.ok) {
2326
+ throw buildError(await parseErrorBody(ttsResponse), false);
2587
2327
  }
2588
- if (call2.action === "type") {
2589
- typeIntoElement(target.element, call2.text ?? "");
2590
- return;
2328
+ const reader = (_a = ttsResponse.body) == null ? void 0 : _a.getReader();
2329
+ if (!reader) {
2330
+ throw buildError("TTS response body is not readable", false);
2591
2331
  }
2592
- submitElement(target.element);
2593
- };
2594
- const isSamePageNavigation = (targetUrl) => {
2595
- try {
2596
- const current = new URL(window.location.href);
2597
- const target = new URL(targetUrl);
2598
- return current.origin === target.origin && current.pathname === target.pathname;
2599
- } catch {
2600
- return false;
2332
+ setReader(reader);
2333
+ const chunks = [];
2334
+ let mimeType = "audio/mpeg";
2335
+ let sampleRate = 16e3;
2336
+ const decoder = new TextDecoder();
2337
+ let buffer = "";
2338
+ while (true) {
2339
+ if (isStopped()) {
2340
+ break;
2341
+ }
2342
+ const { done, value } = await reader.read();
2343
+ if (done) {
2344
+ break;
2345
+ }
2346
+ buffer += decoder.decode(value, { stream: true });
2347
+ const blocks = buffer.split(/\r?\n\r?\n/);
2348
+ buffer = blocks.pop() || "";
2349
+ for (const block of blocks) {
2350
+ const payload = parseSseEventPayload(block);
2351
+ if (!payload) {
2352
+ continue;
2353
+ }
2354
+ if (isAudioSsePayload(payload)) {
2355
+ const format = payload.format || "mp3";
2356
+ mimeType = payload.mime_type || (format === "webm" ? "audio/webm" : "audio/mpeg");
2357
+ chunks.push(base64ToUint8Array(payload.audio));
2358
+ if (payload.sample_rate) {
2359
+ sampleRate = payload.sample_rate;
2360
+ }
2361
+ }
2362
+ }
2601
2363
  }
2364
+ reader.releaseLock();
2365
+ setReader(void 0);
2366
+ return { chunks, mimeType, sampleRate };
2602
2367
  };
2603
- const findMatchingLinkForTarget = (targetUrl) => {
2604
- let parsedTarget = null;
2605
- try {
2606
- parsedTarget = new URL(targetUrl, window.location.href);
2607
- } catch {
2608
- }
2609
- const allLinks = Array.from(
2610
- document.querySelectorAll('a[href], [role="link"][href], [data-href]')
2611
- );
2612
- for (const el of allLinks) {
2613
- if (el instanceof HTMLAnchorElement && el.href === (parsedTarget == null ? void 0 : parsedTarget.href)) {
2614
- return el;
2368
+ const collectTtsViaWebSocket = async (baseUrl, assistantText, accessibilityMode, isStopped, setSocket) => {
2369
+ const wsUrl = toWebSocketUrl(baseUrl, "/chat/tts/ws");
2370
+ const requestId = createRequestId();
2371
+ const chunks = [];
2372
+ let mimeType = "audio/mpeg";
2373
+ let sampleRate = 16e3;
2374
+ let highestSeqSeen = 0;
2375
+ const connectOnce = () => new Promise((resolve, reject) => {
2376
+ if (isStopped()) {
2377
+ reject(buildError("stream_stopped", false));
2378
+ return;
2379
+ }
2380
+ let done = false;
2381
+ let finalError = null;
2382
+ const socket = new WebSocket(wsUrl);
2383
+ setSocket(socket);
2384
+ const finalize = (mode, error) => {
2385
+ socket.onopen = null;
2386
+ socket.onmessage = null;
2387
+ socket.onerror = null;
2388
+ socket.onclose = null;
2389
+ setSocket(null);
2390
+ if (mode === "resolve") {
2391
+ resolve();
2392
+ return;
2393
+ }
2394
+ reject(error || buildError("tts_ws_closed", true));
2395
+ };
2396
+ socket.onopen = () => {
2397
+ console.info(
2398
+ `[Bulut] TTS WS connected request_id=${requestId} resume_seq=${highestSeqSeen}`
2399
+ );
2400
+ socket.send(
2401
+ JSON.stringify({
2402
+ type: "start",
2403
+ request_id: requestId,
2404
+ text: assistantText,
2405
+ voice: FORCED_TTS_VOICE,
2406
+ accessibility_mode: accessibilityMode,
2407
+ last_seq: highestSeqSeen
2408
+ })
2409
+ );
2410
+ };
2411
+ socket.onmessage = (event) => {
2412
+ const payload = parseTtsWsEventPayload(String(event.data));
2413
+ if (!payload) {
2414
+ console.warn("[Bulut] TTS WS invalid JSON payload");
2415
+ return;
2416
+ }
2417
+ if (payload.type === "audio" && typeof payload.audio === "string") {
2418
+ const seq = typeof payload.seq === "number" ? payload.seq : 0;
2419
+ if (shouldAcceptAudioSeq(seq, highestSeqSeen)) {
2420
+ chunks.push(base64ToUint8Array(payload.audio));
2421
+ highestSeqSeen = seq;
2422
+ if (payload.mime_type) {
2423
+ mimeType = payload.mime_type;
2424
+ }
2425
+ if (typeof payload.sample_rate === "number") {
2426
+ sampleRate = payload.sample_rate;
2427
+ }
2428
+ } else {
2429
+ console.info(
2430
+ `[Bulut] TTS WS duplicate chunk ignored request_id=${requestId} seq=${seq} seen=${highestSeqSeen}`
2431
+ );
2432
+ }
2433
+ if (socket.readyState === WebSocket.OPEN) {
2434
+ socket.send(
2435
+ JSON.stringify({
2436
+ type: "ack",
2437
+ request_id: requestId,
2438
+ last_seq: highestSeqSeen
2439
+ })
2440
+ );
2441
+ }
2442
+ return;
2443
+ }
2444
+ if (payload.type === "done") {
2445
+ const streamLastSeq = typeof payload.last_seq === "number" ? payload.last_seq : highestSeqSeen;
2446
+ if (streamLastSeq > highestSeqSeen) {
2447
+ finalError = buildError("tts_ws_sequence_gap", true);
2448
+ done = false;
2449
+ socket.close();
2450
+ return;
2451
+ }
2452
+ done = true;
2453
+ socket.close();
2454
+ return;
2455
+ }
2456
+ if (payload.type === "error") {
2457
+ finalError = buildError(payload.error || "tts_ws_error", payload.retryable !== false);
2458
+ done = false;
2459
+ socket.close();
2460
+ }
2461
+ };
2462
+ socket.onerror = () => {
2463
+ if (!finalError) {
2464
+ finalError = buildError("tts_ws_transport_error", true);
2465
+ }
2466
+ };
2467
+ socket.onclose = () => {
2468
+ if (isStopped()) {
2469
+ finalize("reject", buildError("stream_stopped", false));
2470
+ return;
2471
+ }
2472
+ if (done) {
2473
+ finalize("resolve");
2474
+ return;
2475
+ }
2476
+ finalize("reject", finalError || buildError("tts_ws_closed_before_done", true));
2477
+ };
2478
+ });
2479
+ for (let attempt = 0; attempt <= TTS_WS_RETRY_DELAYS_MS.length; attempt += 1) {
2480
+ if (attempt > 0) {
2481
+ const delay = TTS_WS_RETRY_DELAYS_MS[attempt - 1];
2482
+ console.warn(
2483
+ `[Bulut] TTS WS retry attempt=${attempt} delay_ms=${delay} last_seq=${highestSeqSeen}`
2484
+ );
2485
+ await sleep(delay);
2486
+ }
2487
+ try {
2488
+ await connectOnce();
2489
+ return { chunks, mimeType, sampleRate };
2490
+ } catch (error) {
2491
+ const retryable = shouldFallbackToSse(error);
2492
+ const message = error instanceof Error ? error.message : String(error);
2493
+ console.warn(
2494
+ `[Bulut] TTS WS attempt failed attempt=${attempt} retryable=${retryable} error=${message}`
2495
+ );
2496
+ if (!retryable || attempt === TTS_WS_RETRY_DELAYS_MS.length) {
2497
+ throw error;
2498
+ }
2615
2499
  }
2616
2500
  }
2617
- if (parsedTarget) {
2618
- for (const el of allLinks) {
2619
- if (!(el instanceof HTMLAnchorElement)) continue;
2501
+ throw buildError("tts_ws_exhausted", true);
2502
+ };
2503
+ const voiceChatStream = (baseUrl, audioFile, projectId, sessionId, config, events) => {
2504
+ let isStopped = false;
2505
+ let activeReader;
2506
+ let activeSocket = null;
2507
+ const donePromise = new Promise(async (resolve, reject) => {
2508
+ var _a, _b, _c, _d, _e, _f, _g, _h;
2509
+ try {
2510
+ if (isStopped) return resolve();
2511
+ const sttResult = await transcribeAudio(baseUrl, audioFile, projectId, sessionId, "tr");
2512
+ const currentSessionId = sttResult.session_id;
2513
+ const userText = sttResult.text;
2514
+ (_a = events.onTranscription) == null ? void 0 : _a.call(events, {
2515
+ session_id: currentSessionId,
2516
+ user_text: userText
2517
+ });
2518
+ if (isStopped) return resolve();
2519
+ const llmFormData = new FormData();
2520
+ llmFormData.append("project_id", projectId);
2521
+ llmFormData.append("session_id", currentSessionId);
2522
+ llmFormData.append("user_text", userText);
2523
+ llmFormData.append("model", config.model);
2524
+ if (config.pageContext) llmFormData.append("page_context", config.pageContext);
2525
+ llmFormData.append("accessibility_mode", String(Boolean(config.accessibilityMode)));
2526
+ const llmResponse = await fetch(`${normalizeBaseUrl(baseUrl)}/chat/llm`, {
2527
+ method: "POST",
2528
+ body: llmFormData
2529
+ });
2530
+ if (!llmResponse.ok) {
2531
+ throw new Error(await parseErrorBody(llmResponse));
2532
+ }
2533
+ activeReader = (_b = llmResponse.body) == null ? void 0 : _b.getReader();
2534
+ if (!activeReader) throw new Error("LLM response body is not readable");
2535
+ const decoder = new TextDecoder();
2536
+ let buffer = "";
2537
+ let assistantText = "";
2538
+ while (true) {
2539
+ if (isStopped) break;
2540
+ const { done, value } = await activeReader.read();
2541
+ if (done) break;
2542
+ buffer += decoder.decode(value, { stream: true });
2543
+ const chunks = buffer.split(/\r?\n\r?\n/);
2544
+ buffer = chunks.pop() || "";
2545
+ for (const chunk of chunks) {
2546
+ const data2 = parseSseEventPayload(chunk);
2547
+ if (!data2) continue;
2548
+ if (data2.type === "session" && data2.session_id) {
2549
+ (_c = events.onTranscription) == null ? void 0 : _c.call(events, {
2550
+ session_id: data2.session_id,
2551
+ user_text: sttResult.text
2552
+ });
2553
+ continue;
2554
+ }
2555
+ if (data2.type === "llm_delta" && typeof data2.delta === "string") {
2556
+ (_d = events.onAssistantDelta) == null ? void 0 : _d.call(events, data2.delta);
2557
+ continue;
2558
+ }
2559
+ if (data2.type === "llm_done") {
2560
+ assistantText = data2.assistant_text || "";
2561
+ (_e = events.onAssistantDone) == null ? void 0 : _e.call(events, assistantText);
2562
+ continue;
2563
+ }
2564
+ if (data2.type === "error") {
2565
+ throw new Error(data2.error || "LLM Error");
2566
+ }
2567
+ }
2568
+ }
2569
+ if (activeReader) {
2570
+ activeReader.releaseLock();
2571
+ activeReader = void 0;
2572
+ }
2573
+ if (isStopped || !assistantText) {
2574
+ return resolve();
2575
+ }
2576
+ console.info(
2577
+ `[Bulut] TTS start mode=voice requested_voice=${config.voice} forced_voice=${FORCED_TTS_VOICE} accessibility_mode=${Boolean(config.accessibilityMode)}`
2578
+ );
2579
+ (_f = events.onAudioStateChange) == null ? void 0 : _f.call(events, "rendering");
2580
+ let ttsResult;
2620
2581
  try {
2621
- const elUrl = new URL(el.href, window.location.href);
2622
- if (elUrl.pathname === parsedTarget.pathname && elUrl.search === parsedTarget.search && elUrl.hash === parsedTarget.hash) {
2623
- return el;
2582
+ ttsResult = await collectTtsViaWebSocket(
2583
+ baseUrl,
2584
+ assistantText,
2585
+ Boolean(config.accessibilityMode),
2586
+ () => isStopped,
2587
+ (socket) => {
2588
+ activeSocket = socket;
2589
+ }
2590
+ );
2591
+ } catch (wsError) {
2592
+ if (isStopped) {
2593
+ return resolve();
2624
2594
  }
2625
- } catch {
2626
- continue;
2595
+ console.warn(
2596
+ `[Bulut] TTS WS failed, falling back to SSE: ${wsError instanceof Error ? wsError.message : String(wsError)}`
2597
+ );
2598
+ ttsResult = await collectTtsViaSse(
2599
+ baseUrl,
2600
+ assistantText,
2601
+ Boolean(config.accessibilityMode),
2602
+ () => isStopped,
2603
+ (reader) => {
2604
+ activeReader = reader;
2605
+ }
2606
+ );
2607
+ }
2608
+ if (!isStopped && ttsResult.chunks.length > 0) {
2609
+ await playBufferedAudio(
2610
+ ttsResult.chunks,
2611
+ ttsResult.mimeType,
2612
+ ttsResult.sampleRate,
2613
+ events.onAudioStateChange
2614
+ );
2615
+ } else {
2616
+ (_g = events.onAudioStateChange) == null ? void 0 : _g.call(events, "done");
2617
+ }
2618
+ resolve();
2619
+ } catch (err) {
2620
+ const msg = err instanceof Error ? err.message : String(err);
2621
+ (_h = events.onError) == null ? void 0 : _h.call(events, msg);
2622
+ reject(err);
2623
+ } finally {
2624
+ activeReader == null ? void 0 : activeReader.cancel().catch(() => {
2625
+ });
2626
+ if (activeSocket && activeSocket.readyState <= WebSocket.OPEN) {
2627
+ activeSocket.close();
2628
+ }
2629
+ activeSocket = null;
2630
+ }
2631
+ });
2632
+ return {
2633
+ stop: () => {
2634
+ isStopped = true;
2635
+ if (activeReader) {
2636
+ activeReader.cancel().catch(() => {
2637
+ });
2638
+ }
2639
+ if (activeSocket && activeSocket.readyState <= WebSocket.OPEN) {
2640
+ activeSocket.close();
2641
+ }
2642
+ },
2643
+ done: donePromise
2644
+ };
2645
+ };
2646
+ const agentVoiceChatStream = (baseUrl, audioFile, projectId, sessionId, config, events, executeTool) => {
2647
+ let isStopped = false;
2648
+ let activeSocket = null;
2649
+ let activeReader;
2650
+ let errorEmitted = false;
2651
+ const donePromise = new Promise(async (resolve, reject) => {
2652
+ var _a, _b, _c, _d;
2653
+ try {
2654
+ if (isStopped) return resolve();
2655
+ const sttResult = await transcribeAudio(
2656
+ baseUrl,
2657
+ audioFile,
2658
+ projectId,
2659
+ sessionId,
2660
+ "tr"
2661
+ );
2662
+ const currentSessionId = sttResult.session_id;
2663
+ let effectiveSessionId = currentSessionId;
2664
+ const userText = sttResult.text;
2665
+ (_a = events.onTranscription) == null ? void 0 : _a.call(events, {
2666
+ session_id: currentSessionId,
2667
+ user_text: userText
2668
+ });
2669
+ if (isStopped) return resolve();
2670
+ const assistantText = await new Promise((agentResolve, agentReject) => {
2671
+ if (isStopped) {
2672
+ agentResolve("");
2673
+ return;
2674
+ }
2675
+ const wsUrl = toWebSocketUrl(baseUrl, "/chat/agent/ws");
2676
+ const socket = new WebSocket(wsUrl);
2677
+ activeSocket = socket;
2678
+ let finalReply = "";
2679
+ let resolved = false;
2680
+ const finish = (reply) => {
2681
+ if (resolved) return;
2682
+ resolved = true;
2683
+ agentResolve(reply);
2684
+ };
2685
+ const fail = (error) => {
2686
+ if (resolved) return;
2687
+ resolved = true;
2688
+ agentReject(error);
2689
+ };
2690
+ socket.onopen = () => {
2691
+ console.info("[Bulut] Agent WS connected");
2692
+ socket.send(JSON.stringify({
2693
+ type: "start",
2694
+ project_id: projectId,
2695
+ session_id: currentSessionId,
2696
+ user_text: userText,
2697
+ model: config.model,
2698
+ page_context: config.pageContext,
2699
+ accessibility_mode: config.accessibilityMode
2700
+ }));
2701
+ };
2702
+ socket.onmessage = async (event) => {
2703
+ var _a2, _b2, _c2, _d2, _e, _f, _g, _h;
2704
+ let data2;
2705
+ try {
2706
+ data2 = JSON.parse(String(event.data));
2707
+ } catch {
2708
+ console.warn("[Bulut] Agent WS invalid JSON");
2709
+ return;
2710
+ }
2711
+ const msgType = data2.type;
2712
+ if (msgType === "session" && typeof data2.session_id === "string") {
2713
+ effectiveSessionId = data2.session_id;
2714
+ (_a2 = events.onSessionId) == null ? void 0 : _a2.call(events, effectiveSessionId);
2715
+ return;
2716
+ }
2717
+ if (msgType === "iteration") {
2718
+ (_b2 = events.onIteration) == null ? void 0 : _b2.call(
2719
+ events,
2720
+ data2.iteration,
2721
+ data2.max_iterations
2722
+ );
2723
+ return;
2724
+ }
2725
+ if (msgType === "reply_delta" && typeof data2.delta === "string") {
2726
+ (_c2 = events.onAssistantDelta) == null ? void 0 : _c2.call(events, data2.delta);
2727
+ return;
2728
+ }
2729
+ if (msgType === "tool_calls" && Array.isArray(data2.calls)) {
2730
+ const calls = data2.calls;
2731
+ (_d2 = events.onToolCalls) == null ? void 0 : _d2.call(events, calls);
2732
+ const results = [];
2733
+ for (const call2 of calls) {
2734
+ const isNavigate = call2.tool === "navigate";
2735
+ if (isNavigate) {
2736
+ savePendingAgentResume({
2737
+ sessionId: effectiveSessionId,
2738
+ projectId,
2739
+ model: config.model,
2740
+ accessibilityMode: Boolean(config.accessibilityMode),
2741
+ pendingToolCalls: calls.map((c2) => ({
2742
+ call_id: c2.call_id,
2743
+ tool: c2.tool,
2744
+ args: c2.args
2745
+ })),
2746
+ completedResults: [...results]
2747
+ });
2748
+ }
2749
+ const result = await executeTool(call2);
2750
+ if (isNavigate) {
2751
+ clearPendingAgentResume();
2752
+ }
2753
+ (_e = events.onToolResult) == null ? void 0 : _e.call(events, call2.call_id, call2.tool, result.result);
2754
+ results.push(result);
2755
+ }
2756
+ if (socket.readyState === WebSocket.OPEN) {
2757
+ socket.send(JSON.stringify({
2758
+ type: "tool_results",
2759
+ results
2760
+ }));
2761
+ }
2762
+ return;
2763
+ }
2764
+ if (msgType === "agent_done") {
2765
+ finalReply = data2.final_reply || "";
2766
+ (_f = events.onAssistantDone) == null ? void 0 : _f.call(events, finalReply);
2767
+ if (typeof data2.session_id === "string") {
2768
+ (_g = events.onSessionId) == null ? void 0 : _g.call(events, data2.session_id);
2769
+ }
2770
+ finish(finalReply);
2771
+ return;
2772
+ }
2773
+ if (msgType === "error") {
2774
+ const errMsg = data2.error || "Agent error";
2775
+ errorEmitted = true;
2776
+ (_h = events.onError) == null ? void 0 : _h.call(events, errMsg);
2777
+ fail(new Error(errMsg));
2778
+ return;
2779
+ }
2780
+ };
2781
+ socket.onerror = () => {
2782
+ var _a2;
2783
+ console.error("[Bulut] Agent WS error");
2784
+ errorEmitted = true;
2785
+ (_a2 = events.onError) == null ? void 0 : _a2.call(events, "Agent WebSocket connection error");
2786
+ fail(new Error("Agent WebSocket connection error"));
2787
+ };
2788
+ socket.onclose = () => {
2789
+ console.info("[Bulut] Agent WS closed");
2790
+ finish(finalReply);
2791
+ };
2792
+ });
2793
+ activeSocket = null;
2794
+ if (isStopped || !assistantText) {
2795
+ return resolve();
2627
2796
  }
2628
- }
2629
- for (const el of allLinks) {
2630
- if (!(el instanceof HTMLAnchorElement)) continue;
2797
+ console.info(
2798
+ `[Bulut] TTS start mode=agent forced_voice=${FORCED_TTS_VOICE}`
2799
+ );
2800
+ (_b = events.onAudioStateChange) == null ? void 0 : _b.call(events, "rendering");
2801
+ let ttsResult;
2631
2802
  try {
2632
- const elUrl = new URL(el.href, window.location.href);
2633
- if (elUrl.pathname === parsedTarget.pathname) {
2634
- return el;
2635
- }
2636
- } catch {
2637
- continue;
2803
+ ttsResult = await collectTtsViaWebSocket(
2804
+ baseUrl,
2805
+ assistantText,
2806
+ Boolean(config.accessibilityMode),
2807
+ () => isStopped,
2808
+ (socket) => {
2809
+ activeSocket = socket;
2810
+ }
2811
+ );
2812
+ } catch (wsError) {
2813
+ if (isStopped) return resolve();
2814
+ console.warn(
2815
+ `[Bulut] TTS WS failed, falling back to SSE: ${wsError instanceof Error ? wsError.message : String(wsError)}`
2816
+ );
2817
+ ttsResult = await collectTtsViaSse(
2818
+ baseUrl,
2819
+ assistantText,
2820
+ Boolean(config.accessibilityMode),
2821
+ () => isStopped,
2822
+ (reader) => {
2823
+ activeReader = reader;
2824
+ }
2825
+ );
2826
+ }
2827
+ if (!isStopped && ttsResult.chunks.length > 0) {
2828
+ await playBufferedAudio(
2829
+ ttsResult.chunks,
2830
+ ttsResult.mimeType,
2831
+ ttsResult.sampleRate,
2832
+ events.onAudioStateChange
2833
+ );
2834
+ } else {
2835
+ (_c = events.onAudioStateChange) == null ? void 0 : _c.call(events, "done");
2836
+ }
2837
+ resolve();
2838
+ } catch (err) {
2839
+ if (!errorEmitted) {
2840
+ const msg = err instanceof Error ? err.message : String(err);
2841
+ (_d = events.onError) == null ? void 0 : _d.call(events, msg);
2842
+ }
2843
+ reject(err);
2844
+ } finally {
2845
+ activeReader == null ? void 0 : activeReader.cancel().catch(() => {
2846
+ });
2847
+ if (activeSocket && activeSocket.readyState <= WebSocket.OPEN) {
2848
+ activeSocket.close();
2638
2849
  }
2850
+ activeSocket = null;
2639
2851
  }
2640
- const rawUrl = targetUrl.replace(/^\//, "");
2641
- for (const el of allLinks) {
2642
- const href = el.getAttribute("href") || el.getAttribute("data-href") || "";
2643
- if (href && (href === targetUrl || href === rawUrl || href === `/${rawUrl}`)) {
2644
- return el;
2852
+ });
2853
+ return {
2854
+ stop: () => {
2855
+ isStopped = true;
2856
+ if (activeReader) {
2857
+ activeReader.cancel().catch(() => {
2858
+ });
2859
+ }
2860
+ if (activeSocket && activeSocket.readyState <= WebSocket.OPEN) {
2861
+ activeSocket.close();
2645
2862
  }
2863
+ },
2864
+ done: donePromise
2865
+ };
2866
+ };
2867
+ const agentResumeStream = (baseUrl, resumeState, pageContext, events, executeTool) => {
2868
+ let isStopped = false;
2869
+ let activeSocket = null;
2870
+ let activeReader;
2871
+ let errorEmitted = false;
2872
+ const allResults = [...resumeState.completedResults];
2873
+ for (const tc of resumeState.pendingToolCalls) {
2874
+ if (allResults.some((r2) => r2.call_id === tc.call_id)) continue;
2875
+ if (tc.tool === "navigate") {
2876
+ allResults.push({
2877
+ call_id: tc.call_id,
2878
+ result: `Navigasyon tamamlandı. Şu anki sayfa: ${typeof window !== "undefined" ? window.location.href : ""}
2879
+ Sayfa bağlamı: ${pageContext}`
2880
+ });
2881
+ } else {
2882
+ allResults.push({
2883
+ call_id: tc.call_id,
2884
+ result: "Sayfa yeniden yüklendi, bu araç çalıştırılamadı."
2885
+ });
2646
2886
  }
2647
2887
  }
2648
- const urlSegments = targetUrl.replace(/^https?:\/\/[^/]+/, "").replace(/[?#].*$/, "").split("/").filter(Boolean);
2649
- const lastSegment = urlSegments[urlSegments.length - 1] || "";
2650
- if (lastSegment) {
2651
- let searchTerms = [lastSegment];
2652
- if (parsedTarget) {
2653
- for (const [, value] of parsedTarget.searchParams.entries()) {
2654
- if (value) searchTerms.push(value);
2655
- }
2656
- if (parsedTarget.hash) {
2657
- searchTerms.push(parsedTarget.hash.replace(/^#/, ""));
2888
+ const donePromise = new Promise(async (resolve, reject) => {
2889
+ var _a, _b, _c;
2890
+ try {
2891
+ if (isStopped) return resolve();
2892
+ let effectiveSessionId = resumeState.sessionId;
2893
+ const assistantText = await new Promise((agentResolve, agentReject) => {
2894
+ if (isStopped) {
2895
+ agentResolve("");
2896
+ return;
2897
+ }
2898
+ const wsUrl = toWebSocketUrl(baseUrl, "/chat/agent/ws");
2899
+ const socket = new WebSocket(wsUrl);
2900
+ activeSocket = socket;
2901
+ let finalReply = "";
2902
+ let resolved = false;
2903
+ const finish = (reply) => {
2904
+ if (resolved) return;
2905
+ resolved = true;
2906
+ agentResolve(reply);
2907
+ };
2908
+ const fail = (error) => {
2909
+ if (resolved) return;
2910
+ resolved = true;
2911
+ agentReject(error);
2912
+ };
2913
+ socket.onopen = () => {
2914
+ console.info("[Bulut] Agent WS resume connected");
2915
+ socket.send(JSON.stringify({
2916
+ type: "resume",
2917
+ project_id: resumeState.projectId,
2918
+ session_id: resumeState.sessionId,
2919
+ model: resumeState.model,
2920
+ page_context: pageContext,
2921
+ accessibility_mode: resumeState.accessibilityMode,
2922
+ pending_tool_calls: resumeState.pendingToolCalls,
2923
+ tool_results: allResults
2924
+ }));
2925
+ };
2926
+ socket.onmessage = async (event) => {
2927
+ var _a2, _b2, _c2, _d, _e, _f, _g, _h;
2928
+ let data2;
2929
+ try {
2930
+ data2 = JSON.parse(String(event.data));
2931
+ } catch {
2932
+ return;
2933
+ }
2934
+ const msgType = data2.type;
2935
+ if (msgType === "session" && typeof data2.session_id === "string") {
2936
+ effectiveSessionId = data2.session_id;
2937
+ (_a2 = events.onSessionId) == null ? void 0 : _a2.call(events, effectiveSessionId);
2938
+ return;
2939
+ }
2940
+ if (msgType === "iteration") {
2941
+ (_b2 = events.onIteration) == null ? void 0 : _b2.call(
2942
+ events,
2943
+ data2.iteration,
2944
+ data2.max_iterations
2945
+ );
2946
+ return;
2947
+ }
2948
+ if (msgType === "reply_delta" && typeof data2.delta === "string") {
2949
+ (_c2 = events.onAssistantDelta) == null ? void 0 : _c2.call(events, data2.delta);
2950
+ return;
2951
+ }
2952
+ if (msgType === "tool_calls" && Array.isArray(data2.calls)) {
2953
+ const calls = data2.calls;
2954
+ (_d = events.onToolCalls) == null ? void 0 : _d.call(events, calls);
2955
+ const results = [];
2956
+ for (const call2 of calls) {
2957
+ const isNavigate = call2.tool === "navigate";
2958
+ if (isNavigate) {
2959
+ savePendingAgentResume({
2960
+ sessionId: effectiveSessionId,
2961
+ projectId: resumeState.projectId,
2962
+ model: resumeState.model,
2963
+ accessibilityMode: resumeState.accessibilityMode,
2964
+ pendingToolCalls: calls.map((c2) => ({
2965
+ call_id: c2.call_id,
2966
+ tool: c2.tool,
2967
+ args: c2.args
2968
+ })),
2969
+ completedResults: [...results]
2970
+ });
2971
+ }
2972
+ const result = await executeTool(call2);
2973
+ if (isNavigate) {
2974
+ clearPendingAgentResume();
2975
+ }
2976
+ (_e = events.onToolResult) == null ? void 0 : _e.call(events, call2.call_id, call2.tool, result.result);
2977
+ results.push(result);
2978
+ }
2979
+ if (socket.readyState === WebSocket.OPEN) {
2980
+ socket.send(JSON.stringify({ type: "tool_results", results }));
2981
+ }
2982
+ return;
2983
+ }
2984
+ if (msgType === "agent_done") {
2985
+ finalReply = data2.final_reply || "";
2986
+ (_f = events.onAssistantDone) == null ? void 0 : _f.call(events, finalReply);
2987
+ if (typeof data2.session_id === "string") {
2988
+ (_g = events.onSessionId) == null ? void 0 : _g.call(events, data2.session_id);
2989
+ }
2990
+ finish(finalReply);
2991
+ return;
2992
+ }
2993
+ if (msgType === "error") {
2994
+ const errMsg = data2.error || "Agent error";
2995
+ errorEmitted = true;
2996
+ (_h = events.onError) == null ? void 0 : _h.call(events, errMsg);
2997
+ fail(new Error(errMsg));
2998
+ return;
2999
+ }
3000
+ };
3001
+ socket.onerror = () => {
3002
+ var _a2;
3003
+ errorEmitted = true;
3004
+ (_a2 = events.onError) == null ? void 0 : _a2.call(events, "Agent WebSocket error");
3005
+ fail(new Error("Agent WebSocket error"));
3006
+ };
3007
+ socket.onclose = () => finish(finalReply);
3008
+ });
3009
+ activeSocket = null;
3010
+ if (isStopped || !assistantText) return resolve();
3011
+ console.info(`[Bulut] TTS start mode=resume forced_voice=${FORCED_TTS_VOICE}`);
3012
+ (_a = events.onAudioStateChange) == null ? void 0 : _a.call(events, "rendering");
3013
+ let ttsResult;
3014
+ try {
3015
+ ttsResult = await collectTtsViaWebSocket(
3016
+ baseUrl,
3017
+ assistantText,
3018
+ Boolean(resumeState.accessibilityMode),
3019
+ () => isStopped,
3020
+ (socket) => {
3021
+ activeSocket = socket;
3022
+ }
3023
+ );
3024
+ } catch (wsError) {
3025
+ if (isStopped) return resolve();
3026
+ console.warn(
3027
+ `[Bulut] TTS WS failed, falling back to SSE: ${wsError instanceof Error ? wsError.message : String(wsError)}`
3028
+ );
3029
+ ttsResult = await collectTtsViaSse(
3030
+ baseUrl,
3031
+ assistantText,
3032
+ Boolean(resumeState.accessibilityMode),
3033
+ () => isStopped,
3034
+ (reader) => {
3035
+ activeReader = reader;
3036
+ }
3037
+ );
2658
3038
  }
2659
- }
2660
- searchTerms = searchTerms.map((t2) => t2.toLowerCase());
2661
- const clickables = Array.from(
2662
- document.querySelectorAll(
2663
- 'a, button, [role="link"], [role="tab"], [role="button"], [data-tab], [onclick]'
2664
- )
2665
- );
2666
- for (const el of clickables) {
2667
- const text = (el.textContent || "").trim().toLowerCase();
2668
- const ariaLabel = (el.getAttribute("aria-label") || "").toLowerCase();
2669
- const dataTab = (el.getAttribute("data-tab") || "").toLowerCase();
2670
- for (const term of searchTerms) {
2671
- if (text === term || ariaLabel === term || dataTab === term || text.includes(term)) {
2672
- return el;
2673
- }
3039
+ if (!isStopped && ttsResult.chunks.length > 0) {
3040
+ await playBufferedAudio(
3041
+ ttsResult.chunks,
3042
+ ttsResult.mimeType,
3043
+ ttsResult.sampleRate,
3044
+ events.onAudioStateChange
3045
+ );
3046
+ } else {
3047
+ (_b = events.onAudioStateChange) == null ? void 0 : _b.call(events, "done");
2674
3048
  }
2675
- }
2676
- }
2677
- return null;
2678
- };
2679
- const executeNavigate = async (call2) => {
2680
- try {
2681
- const targetUrl = call2.url;
2682
- let resolvedUrl;
2683
- try {
2684
- resolvedUrl = new URL(targetUrl, window.location.href).href;
2685
- } catch {
2686
- resolvedUrl = targetUrl;
2687
- }
2688
- const matchingElement = findMatchingLinkForTarget(targetUrl);
2689
- if (matchingElement) {
2690
- console.log("AuticBot navigate: clicking element", resolvedUrl, matchingElement.tagName);
2691
- await slowScrollElementIntoView(matchingElement);
2692
- const center = getElementCenter(matchingElement);
2693
- await moveCursor(center.x, center.y);
2694
- matchingElement.dispatchEvent(new MouseEvent("pointerdown", { bubbles: true, view: window }));
2695
- matchingElement.dispatchEvent(new MouseEvent("mousedown", { bubbles: true, view: window }));
2696
- matchingElement.dispatchEvent(new MouseEvent("pointerup", { bubbles: true, view: window }));
2697
- matchingElement.dispatchEvent(new MouseEvent("mouseup", { bubbles: true, view: window }));
2698
- matchingElement.click();
2699
- return !isSamePageNavigation(resolvedUrl);
2700
- }
2701
- console.log("AuticBot navigate: no matching element found, using direct navigation", resolvedUrl);
2702
- try {
2703
- const parsed = new URL(resolvedUrl);
2704
- if (parsed.origin === window.location.origin && parsed.pathname === window.location.pathname && parsed.hash) {
2705
- window.location.hash = parsed.hash;
2706
- return false;
3049
+ resolve();
3050
+ } catch (err) {
3051
+ if (!errorEmitted) {
3052
+ const msg = err instanceof Error ? err.message : String(err);
3053
+ (_c = events.onError) == null ? void 0 : _c.call(events, msg);
2707
3054
  }
2708
- } catch {
2709
- }
2710
- try {
2711
- const parsed = new URL(resolvedUrl);
2712
- if (parsed.origin === window.location.origin) {
2713
- const newPath = parsed.pathname + parsed.search + parsed.hash;
2714
- window.history.pushState({}, "", newPath);
2715
- window.dispatchEvent(new PopStateEvent("popstate", { state: {} }));
2716
- return false;
3055
+ reject(err);
3056
+ } finally {
3057
+ activeReader == null ? void 0 : activeReader.cancel().catch(() => {
3058
+ });
3059
+ if (activeSocket && activeSocket.readyState <= WebSocket.OPEN) {
3060
+ activeSocket.close();
2717
3061
  }
2718
- } catch {
2719
- }
2720
- window.location.href = resolvedUrl;
2721
- return true;
2722
- } catch (error) {
2723
- console.warn("AuticBot navigate: error", call2.url, error);
2724
- return false;
2725
- }
2726
- };
2727
- const executeGetPageContext = async () => {
2728
- const context = getPageContext();
2729
- console.info(
2730
- `[Autic] getPageContext tool executed links=${context.links.length} interactables=${context.interactables.length} summary_len=${context.summary.length}`
2731
- );
2732
- };
2733
- const executeToolCalls = async (toolCalls) => {
2734
- for (const toolCall of toolCalls) {
2735
- if (toolCall.tool === "interact") {
2736
- await executeInteract(toolCall);
2737
- continue;
2738
- }
2739
- if (toolCall.tool === "scroll") {
2740
- await executeScroll(toolCall);
2741
- continue;
2742
- }
2743
- if (toolCall.tool === "getPageContext") {
2744
- await executeGetPageContext();
2745
- continue;
3062
+ activeSocket = null;
2746
3063
  }
2747
- if (toolCall.tool === "navigate") {
2748
- const terminalNavigation = await executeNavigate(toolCall);
2749
- if (terminalNavigation) {
2750
- break;
3064
+ });
3065
+ return {
3066
+ stop: () => {
3067
+ isStopped = true;
3068
+ if (activeReader) activeReader.cancel().catch(() => {
3069
+ });
3070
+ if (activeSocket && activeSocket.readyState <= WebSocket.OPEN) {
3071
+ activeSocket.close();
2751
3072
  }
2752
- }
2753
- }
2754
- };
2755
- const executeSingleToolCall = async (call2) => {
2756
- const callId = call2.call_id;
2757
- try {
2758
- if (call2.tool === "interact") {
2759
- await executeInteract(call2);
2760
- return {
2761
- call_id: callId,
2762
- result: `Etkileşim başarılı: ${call2.action}`
2763
- };
2764
- }
2765
- if (call2.tool === "scroll") {
2766
- await executeScroll(call2);
2767
- return {
2768
- call_id: callId,
2769
- result: "Öğeye kaydırma başarılı."
2770
- };
2771
- }
2772
- if (call2.tool === "getPageContext") {
2773
- const context = getPageContext();
2774
- return {
2775
- call_id: callId,
2776
- result: context.summary
2777
- };
2778
- }
2779
- if (call2.tool === "navigate") {
2780
- await executeNavigate(call2);
2781
- await new Promise((resolve) => setTimeout(resolve, 1500));
2782
- const context = getPageContext();
2783
- return {
2784
- call_id: callId,
2785
- result: `Navigasyon tamamlandı. Şu anki sayfa: ${window.location.href}
2786
- Sayfa bağlamı: ${context.summary}`
2787
- };
2788
- }
2789
- return { call_id: callId, result: "Bilinmeyen araç." };
2790
- } catch (error) {
2791
- const msg = error instanceof Error ? error.message : String(error);
2792
- console.warn(`[Autic] Tool execution error: ${call2.tool}`, error);
2793
- return { call_id: callId, result: `Hata: ${msg}` };
2794
- }
2795
- };
2796
- const restoreCursorFromStorageForCurrentUrl = () => {
2797
- if (typeof document === "undefined" || typeof window === "undefined") {
2798
- return;
2799
- }
2800
- const stored = getPersistedCursorState();
2801
- if (!stored || stored.url !== window.location.href) {
2802
- return;
2803
- }
2804
- ensureCursor();
3073
+ },
3074
+ done: donePromise
3075
+ };
2805
3076
  };
2806
- if (typeof document !== "undefined") {
2807
- if (document.readyState === "loading") {
2808
- document.addEventListener("DOMContentLoaded", restoreCursorFromStorageForCurrentUrl, {
2809
- once: true
2810
- });
2811
- } else {
2812
- restoreCursorFromStorageForCurrentUrl();
2813
- }
2814
- }
2815
3077
  const STORAGE_KEY = "bulut_chat_history";
2816
3078
  const TIMESTAMP_KEY = "bulut_chat_timestamp";
2817
3079
  const SESSION_ID_KEY$1 = "bulut_session_id";
@@ -3087,6 +3349,132 @@ const ChatWindow = ({
3087
3349
  },
3088
3350
  []
3089
3351
  );
3352
+ y(() => {
3353
+ const resumeState = getPendingAgentResume();
3354
+ if (!resumeState) return;
3355
+ clearPendingAgentResume();
3356
+ console.info("[Bulut] Resuming agent after navigation");
3357
+ if (resumeState.sessionId) {
3358
+ sessionIdRef.current = resumeState.sessionId;
3359
+ if (typeof localStorage !== "undefined") {
3360
+ localStorage.setItem(SESSION_ID_KEY$1, resumeState.sessionId);
3361
+ }
3362
+ }
3363
+ setIsBusy(true);
3364
+ setIsRunningTools(true);
3365
+ setStatusOverride(STATUS_LABELS.thinking);
3366
+ const freshPageContext = getPageContext().summary;
3367
+ const resumeToolExec = async (call2) => {
3368
+ const toolCall = {
3369
+ tool: call2.tool,
3370
+ call_id: call2.call_id,
3371
+ ...call2.args
3372
+ };
3373
+ return executeSingleToolCall(toolCall);
3374
+ };
3375
+ const controller = agentResumeStream(
3376
+ config.backendBaseUrl,
3377
+ resumeState,
3378
+ freshPageContext,
3379
+ {
3380
+ onSessionId: (sid) => {
3381
+ if (sid && sid !== sessionIdRef.current) {
3382
+ sessionIdRef.current = sid;
3383
+ if (typeof localStorage !== "undefined") {
3384
+ localStorage.setItem(SESSION_ID_KEY$1, sid);
3385
+ }
3386
+ }
3387
+ },
3388
+ onAssistantDelta: (delta) => {
3389
+ setIsRunningTools(false);
3390
+ setIsThinking(true);
3391
+ setStatusOverride(null);
3392
+ pendingAssistantTextRef.current += delta;
3393
+ if (assistantMessageIdRef.current === null) {
3394
+ assistantMessageIdRef.current = appendMessage(
3395
+ pendingAssistantTextRef.current,
3396
+ false
3397
+ );
3398
+ } else {
3399
+ updateMessageText(
3400
+ assistantMessageIdRef.current,
3401
+ pendingAssistantTextRef.current
3402
+ );
3403
+ }
3404
+ },
3405
+ onAssistantDone: (assistantText) => {
3406
+ setStatusOverride(null);
3407
+ setIsThinking(false);
3408
+ setIsRenderingAudio(true);
3409
+ const finalDisplayText = assistantText || pendingAssistantTextRef.current;
3410
+ pendingAssistantTextRef.current = finalDisplayText;
3411
+ if (assistantMessageIdRef.current !== null) {
3412
+ updateMessageText(
3413
+ assistantMessageIdRef.current,
3414
+ finalDisplayText
3415
+ );
3416
+ } else {
3417
+ assistantMessageIdRef.current = appendMessage(
3418
+ finalDisplayText,
3419
+ false
3420
+ );
3421
+ }
3422
+ },
3423
+ onToolCalls: (calls) => {
3424
+ setIsRunningTools(true);
3425
+ setStatusOverride(STATUS_LABELS.runningTools);
3426
+ for (const call2 of calls) {
3427
+ const toolLabel = call2.tool === "navigate" ? `Sayfaya gidiliyor: ${call2.args.url ?? ""}` : call2.tool === "getPageContext" ? "Sayfa bağlamı alınıyor…" : call2.tool === "interact" ? `Etkileşim: ${call2.args.action ?? ""}` : call2.tool === "scroll" ? "Kaydırılıyor…" : call2.tool;
3428
+ appendMessage(`🔧 ${toolLabel}`, false);
3429
+ setMessages((prev) => {
3430
+ const last = prev[prev.length - 1];
3431
+ if (last && !last.isUser) {
3432
+ return [
3433
+ ...prev.slice(0, -1),
3434
+ { ...last, type: "tool" }
3435
+ ];
3436
+ }
3437
+ return prev;
3438
+ });
3439
+ }
3440
+ assistantMessageIdRef.current = null;
3441
+ pendingAssistantTextRef.current = "";
3442
+ },
3443
+ onToolResult: () => {
3444
+ },
3445
+ onIteration: () => {
3446
+ setIsThinking(true);
3447
+ setStatusOverride(STATUS_LABELS.thinking);
3448
+ },
3449
+ onAudioStateChange: handleAudioStateChange,
3450
+ onError: (err) => {
3451
+ setStatusOverride(null);
3452
+ appendMessage(`Hata: ${err}`, false);
3453
+ }
3454
+ },
3455
+ resumeToolExec
3456
+ );
3457
+ activeStreamControllerRef.current = controller;
3458
+ controller.done.catch(() => {
3459
+ }).finally(() => {
3460
+ setIsBusy(false);
3461
+ setIsRunningTools(false);
3462
+ setIsThinking(false);
3463
+ setIsRenderingAudio(false);
3464
+ setIsPlayingAudio(false);
3465
+ setStatusOverride(null);
3466
+ pendingAssistantTextRef.current = "";
3467
+ assistantMessageIdRef.current = null;
3468
+ activeStreamControllerRef.current = null;
3469
+ if (shouldAutoListenAfterAudio(
3470
+ accessibilityMode,
3471
+ isRecordingRef.current,
3472
+ isBusyRef.current
3473
+ )) {
3474
+ void startRecording("vad");
3475
+ }
3476
+ });
3477
+ }, []);
3090
3478
  const appendMessage = (text, isUser) => {
3091
3479
  const id2 = nextMessageIdRef.current++;
3092
3480
  setMessages((previous) => [