npm - cursor-buddy - Versions diffs - 0.0.9 → 0.0.10 - Mend

cursor-buddy 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/README.md +8 -10
package/dist/{client-CSVSY-KV.mjs → client-CliXcNch.mjs} +212 -297
package/dist/client-CliXcNch.mjs.map +1 -0
package/dist/{client-Ba6rv-du.d.mts → client-sjVVGYPU.d.mts} +6 -35
package/dist/client-sjVVGYPU.d.mts.map +1 -0
package/dist/index.d.mts +2 -2
package/dist/index.mjs +2 -2
package/dist/point-tool-DZJmhD8e.mjs +16 -0
package/dist/point-tool-DZJmhD8e.mjs.map +1 -0
package/dist/point-tool-l3FewgM9.d.mts +22 -0
package/dist/point-tool-l3FewgM9.d.mts.map +1 -0
package/dist/react/index.d.mts +1 -1
package/dist/react/index.mjs +1 -1
package/dist/server/index.d.mts +2 -2
package/dist/server/index.mjs +70 -71
package/dist/server/index.mjs.map +1 -1
package/package.json +1 -1
package/dist/client-Ba6rv-du.d.mts.map +0 -1
package/dist/client-CSVSY-KV.mjs.map +0 -1
package/dist/point-tool-Cv39qylv.mjs +0 -54
package/dist/point-tool-Cv39qylv.mjs.map +0 -1
package/dist/point-tool-kIviMn1q.d.mts +0 -46
package/dist/point-tool-kIviMn1q.d.mts.map +0 -1

package/README.md CHANGED Viewed

@@ -12,7 +12,7 @@ Customize its prompt, pass custom tools, choose between browser or server-side s
 - **Push-to-talk voice input** — Hold a hotkey to speak, release to send
 - **Browser-first live transcription** — Realtime transcript while speaking, with server fallback
-- **Annotated screenshot context** — AI sees your current viewport with numbered interactive elements
+- **DOM snapshot context** — AI sees a token-efficient representation of your visible page structure
 - **Voice responses** — Browser or server TTS, with optional streaming playback
 - **Cursor pointing** — AI can point at UI elements it references
 - **Voice interruption** — Start talking again to cut off current response
@@ -367,17 +367,15 @@ client.stopListening()
 1. User holds the hotkey
 2. Microphone captures audio, waveform shows audio level, and browser speech recognition starts when available
-3. User releases hotkey
-4. An annotated screenshot of the viewport is captured, with numbered markers on visible interactive elements, based on [agent-browser](https://github.com/vercel-labs/agent-browser) implementation.
+3. At the same time, a screenshot and token-efficient DOM snapshot of the viewport are captured in the background. This runs in parallel with speech capture to minimize latency
+4. User releases hotkey
 5. The client prefers the browser transcript; if it is unavailable or empty in `auto` mode, the recorded audio is transcribed on the server
-6. Screenshot + marker context are sent to the AI model
-7. AI responds with text and can optionally call the `point` tool to indicate a location on screen:
-   - `type: "marker"` with `markerId` for numbered interactive elements (most accurate)
-   - `type: "coordinates"` with `x, y` pixel coordinates for anything without a marker
+6. The already-captured screenshot + DOM snapshot are sent to the AI model. Each element has an `@ID` (e.g., `@12`) that the AI can reference.
+7. AI responds with text and can optionally call the `point` tool to indicate an element on screen by its `@ID` from the DOM snapshot
 8. Response is spoken in the browser or on the server based on `speech.mode`,
-   and can either wait for the full response or stream sentence-by-sentence
-   based on `speech.allowStreaming`
-9. If the AI calls the point tool, the cursor animates to the target location — markers resolve to live DOM elements, coordinates map to viewport positions
+    and can either wait for the full response or stream sentence-by-sentence
+    based on `speech.allowStreaming`
+9. If the AI calls the point tool, the cursor animates to the target element's current position (it resolves the element from the snapshot registry and computes its center point)
 10. **If user presses hotkey again at any point, current response is interrupted**
 ## Security Best Practices

package/dist/{client-CSVSY-KV.mjs → client-CliXcNch.mjs} RENAMED Viewed

@@ -611,231 +611,199 @@ var PointerController = class {
 	}
 };
 //#endregion
-//#region src/core/utils/annotations.ts
-const DEFAULT_STYLE = {
-	borderColor: "rgba(255, 0, 0, 0.8)",
-	labelBackground: "rgba(255, 0, 0, 0.9)",
-	labelColor: "#ffffff",
-	borderWidth: 2,
-	fontSize: 15,
-	labelPadding: 4
-};
-/**
-* Draw annotation markers onto a canvas.
-* Modifies the canvas in place.
-*
-* @param ctx Canvas 2D context to draw on
-* @param markers Marker map from element discovery
-* @param style Optional style overrides
-*/
-function drawAnnotations(ctx, markers, style = {}) {
-	const s = {
-		...DEFAULT_STYLE,
-		...style
-	};
-	ctx.save();
-	for (const marker of markers.values()) {
-		const { rect, id } = marker;
-		ctx.strokeStyle = s.borderColor;
-		ctx.lineWidth = s.borderWidth;
-		ctx.strokeRect(rect.left, rect.top, rect.width, rect.height);
-		const label = String(id);
-		ctx.font = `bold ${s.fontSize}px monospace`;
-		const textWidth = ctx.measureText(label).width;
-		const textHeight = s.fontSize;
-		const labelWidth = textWidth + s.labelPadding * 2;
-		const labelHeight = textHeight + s.labelPadding;
-		const labelX = rect.left - s.borderWidth;
-		const labelY = rect.top < labelHeight + 4 ? rect.top + 2 : rect.top - labelHeight;
-		ctx.fillStyle = s.labelBackground;
-		ctx.beginPath();
-		ctx.roundRect(labelX, labelY, labelWidth, labelHeight, 2);
-		ctx.fill();
-		ctx.fillStyle = s.labelColor;
-		ctx.textBaseline = "top";
-		ctx.fillText(label, labelX + s.labelPadding, labelY + s.labelPadding / 2);
-	}
-	ctx.restore();
-}
-/**
-* Create an annotated copy of a canvas.
-* Does not modify the original canvas.
-*
-* @param sourceCanvas Original screenshot canvas
-* @param markers Marker map from element discovery
-* @returns New canvas with annotations drawn
-*/
-function createAnnotatedCanvas(sourceCanvas, markers) {
-	const canvas = document.createElement("canvas");
-	canvas.width = sourceCanvas.width;
-	canvas.height = sourceCanvas.height;
-	const ctx = canvas.getContext("2d");
-	if (!ctx) throw new Error("Failed to get canvas 2D context");
-	ctx.drawImage(sourceCanvas, 0, 0);
-	drawAnnotations(ctx, markers);
-	return canvas;
-}
-/**
-* Generate marker context string for AI prompt.
-* Lists available markers with their descriptions.
-*
-* @param markers Marker map from element discovery
-* @returns Formatted string listing markers
-*/
-function generateMarkerContext(markers) {
-	if (markers.size === 0) return "No interactive elements detected.";
-	const lines = ["Interactive elements (use marker number to point):"];
-	for (const marker of markers.values()) lines.push(`  ${marker.id}: ${marker.description}`);
-	return lines.join("\n");
-}
-//#endregion
-//#region src/core/utils/elements.ts
-/**
-* Element discovery for annotated screenshots.
-* Finds visible interactive elements and assigns marker IDs.
-*/
-/** Max characters for element descriptions passed to the model. */
-const MAX_DESCRIPTION_LENGTH = 50;
-/** Pixels tolerance for grouping elements into the same visual row. */
-const ROW_TOLERANCE_PX = 20;
-/**
-* Interactive element selectors - elements users would want to click/interact with.
-* Mirrors accessibility roles from agent-browser but using CSS selectors.
-*/
-const INTERACTIVE_SELECTORS = [
-	"button",
-	"[role=\"button\"]",
-	"input[type=\"button\"]",
-	"input[type=\"submit\"]",
-	"input[type=\"reset\"]",
-	"a[href]",
-	"[role=\"link\"]",
-	"input:not([type=\"hidden\"])",
-	"textarea",
-	"select",
-	"[role=\"textbox\"]",
-	"[role=\"searchbox\"]",
-	"[role=\"combobox\"]",
-	"[role=\"listbox\"]",
-	"[role=\"slider\"]",
-	"[role=\"spinbutton\"]",
-	"[role=\"checkbox\"]",
-	"[role=\"radio\"]",
-	"[role=\"switch\"]",
-	"[role=\"menuitem\"]",
-	"[role=\"menuitemcheckbox\"]",
-	"[role=\"menuitemradio\"]",
-	"[role=\"option\"]",
-	"[role=\"tab\"]",
-	"[role=\"treeitem\"]",
-	"video",
-	"audio",
-	"[data-cursor-buddy-interactive]"
+//#region src/core/utils/dom-snapshot.ts
+const EXCLUDED_TAGS = new Set([
+	"script",
+	"link",
+	"style",
+	"noscript",
+	"head"
+]);
+const DEFAULT_INCLUDED_ATTRIBUTES = [
+	"id",
+	"name",
+	"type",
+	"placeholder",
+	"href",
+	"title",
+	"value",
+	"role"
 ];
-/**
-* Check if an element is visible in the viewport.
-*/
-function isElementVisible(element, rect = element.getBoundingClientRect()) {
-	if (rect.width <= 0 || rect.height <= 0) return false;
-	if (rect.bottom < 0 || rect.top > window.innerHeight || rect.right < 0 || rect.left > window.innerWidth) return false;
-	const style = window.getComputedStyle(element);
-	if (style.visibility === "hidden" || style.display === "none") return false;
-	if (Number.parseFloat(style.opacity) === 0) return false;
-	return true;
-}
-function truncateDescription(value) {
-	return value.slice(0, MAX_DESCRIPTION_LENGTH);
-}
-/**
-* Generate a brief description for an element.
-*/
-function describeElement(element) {
-	const tag = element.tagName.toLowerCase();
-	const ariaLabel = element.getAttribute("aria-label");
-	if (ariaLabel) return truncateDescription(ariaLabel);
-	if (tag === "button" || tag === "a") {
-		const text = element.textContent?.trim();
-		if (text) return truncateDescription(text);
-	}
-	if (tag === "input" || tag === "textarea") {
-		const placeholder = element.getAttribute("placeholder");
-		if (placeholder) return truncateDescription(placeholder);
-		return `${element.getAttribute("type") || "text"} input`;
-	}
-	if (tag === "img") {
-		const alt = element.getAttribute("alt");
-		if (alt) return truncateDescription(alt);
-		return "image";
-	}
-	const role = element.getAttribute("role");
-	if (role) return role;
-	return tag;
-}
-function collectVisibleInteractiveElements() {
-	const selector = INTERACTIVE_SELECTORS.join(",");
-	const allElements = document.querySelectorAll(selector);
-	const visible = [];
-	for (const element of allElements) {
-		const rect = element.getBoundingClientRect();
-		if (!isElementVisible(element, rect)) continue;
-		visible.push({
-			element,
-			rect
-		});
+function buildVisibleDomSnapshot(root, options = {}) {
+	const { maxTextLength = 80, maxNodes = 1500, includeRects = true, rootLabel = "viewport", includedAttributes = DEFAULT_INCLUDED_ATTRIBUTES } = options;
+	const doc = root instanceof Document ? root : root.ownerDocument || document;
+	const startRoot = root instanceof Document ? root.documentElement : root;
+	const win = doc.defaultView || window;
+	const viewportW = win.innerWidth || 0;
+	const viewportH = win.innerHeight || 0;
+	let nextId = 1;
+	let nodeCount = 0;
+	const idToElement = /* @__PURE__ */ new Map();
+	const lines = [`# ${rootLabel} ${viewportW}x${viewportH}`];
+	/**
+	* Returns true when the element is worth considering for the snapshot.
+	*
+	* This is intentionally simple:
+	* - skip excluded tags
+	* - skip hidden/display:none/visibility:hidden/etc
+	* - skip zero-size elements
+	* - skip elements fully outside the viewport
+	*/
+	function isElementVisible(el) {
+		const tag = el.tagName.toLowerCase();
+		if (EXCLUDED_TAGS.has(tag)) return false;
+		if (!(el instanceof HTMLElement)) return false;
+		if (el.hidden) return false;
+		if (el.closest("head")) return false;
+		if (typeof el.checkVisibility === "function") try {
+			if (!el.checkVisibility({
+				opacityProperty: true,
+				visibilityProperty: true,
+				contentVisibilityAuto: true
+			})) return false;
+		} catch {}
+		const style = win.getComputedStyle(el);
+		if (style.display === "none") return false;
+		if (style.visibility === "hidden" || style.visibility === "collapse") return false;
+		if (style.opacity === "0") return false;
+		if (style.contentVisibility === "hidden") return false;
+		const rect = el.getBoundingClientRect();
+		if (rect.width <= 0 || rect.height <= 0) return false;
+		if (rect.bottom <= 0 || rect.right <= 0) return false;
+		if (rect.top >= viewportH || rect.left >= viewportW) return false;
+		return true;
 	}
-	visible.sort((a, b) => {
-		const rowDiff = Math.floor(a.rect.top / ROW_TOLERANCE_PX) - Math.floor(b.rect.top / ROW_TOLERANCE_PX);
-		if (rowDiff !== 0) return rowDiff;
-		return a.rect.left - b.rect.left;
-	});
-	return visible;
-}
-/**
-* Create marker map from visible interactive elements.
-* Assigns sequential IDs starting from 1.
-*/
-function createMarkerMap() {
-	const elements = collectVisibleInteractiveElements();
-	const map = /* @__PURE__ */ new Map();
-	elements.forEach(({ element, rect }, index) => {
-		const id = index + 1;
-		map.set(id, {
+	/**
+	* Extracts a compact text representation from the element itself.
+	*
+	* No semantic guessing:
+	* - prefer innerText when available
+	* - otherwise fall back to textContent
+	* - normalize whitespace
+	* - truncate aggressively
+	*/
+	function getElementText(el) {
+		const text = normalizeWhitespace(el.innerText || el.textContent || "");
+		if (!text) return "";
+		return truncate(text, maxTextLength);
+	}
+	/**
+	* Keeps only a small allowlist of raw DOM attributes.
+	*
+	* This avoids dumping the full attribute bag, which is usually noisy
+	* and expensive in tokens.
+	*/
+	function getIncludedAttributes(el) {
+		const attrs = {};
+		for (const name of includedAttributes) {
+			const value = el.getAttribute(name);
+			if (value == null) continue;
+			const clean = truncate(normalizeWhitespace(value), maxTextLength);
+			if (!clean) continue;
+			attrs[name] = clean;
+		}
+		return attrs;
+	}
+	/**
+	* Rounds the client rect so the output is smaller and more stable.
+	*/
+	function quantizeRect(el) {
+		const r = el.getBoundingClientRect();
+		return {
+			x: Math.max(0, Math.round(r.left)),
+			y: Math.max(0, Math.round(r.top)),
+			w: Math.round(r.width),
+			h: Math.round(r.height)
+		};
+	}
+	/**
+	* Decides whether this node should be emitted.
+	*
+	* Simple rule:
+	* - keep it if it has visible kept children
+	* - or keep it if it has some text
+	* - or keep it if it has at least one included attribute
+	*
+	* This allows non-semantic div-heavy UIs to survive without trying
+	* to guess intent.
+	*/
+	function shouldKeepNode(text, attrs, children) {
+		if (children.length > 0) return true;
+		if (text.length > 0) return true;
+		if (Object.keys(attrs).length > 0) return true;
+		return false;
+	}
+	/**
+	* Single DFS traversal over the DOM.
+	*
+	* Complexity target:
+	* - O(N) DOM walk
+	* - O(1) work per element, aside from browser layout/style calls
+	*/
+	function walk(el) {
+		if (nodeCount >= maxNodes) return null;
+		if (!(el instanceof HTMLElement)) return null;
+		if (!isElementVisible(el)) return null;
+		const children = [];
+		for (const child of Array.from(el.children)) {
+			const childNode = walk(child);
+			if (childNode) children.push(childNode);
+			if (nodeCount >= maxNodes) break;
+		}
+		const text = getElementText(el);
+		const attrs = getIncludedAttributes(el);
+		if (!shouldKeepNode(text, attrs, children)) return null;
+		const id = nextId++;
+		nodeCount++;
+		idToElement.set(id, el);
+		return {
 			id,
-			element,
-			rect,
-			description: describeElement(element)
-		});
-	});
-	return map;
-}
-/**
-* Get the center point of an element in viewport coordinates.
-*/
-function getElementCenter(element) {
-	const rect = element.getBoundingClientRect();
+			tag: el.tagName.toLowerCase(),
+			text,
+			attrs,
+			rect: includeRects ? quantizeRect(el) : void 0,
+			children
+		};
+	}
+	/**
+	* Emits the final compact line-based format.
+	*
+	* Example:
+	* @12 div "Settings" [id="settings"] [x=10 y=20 w=200 h=40]
+	*/
+	function emit(node, depth) {
+		const parts = [`${"  ".repeat(depth)}@${node.id} ${node.tag}`];
+		if (node.text) parts.push(`"${escapeQuotes(node.text)}"`);
+		for (const [key, value] of Object.entries(node.attrs)) parts.push(`[${key}="${escapeQuotes(value)}"]`);
+		if (node.rect) parts.push(`[x=${node.rect.x} y=${node.rect.y} w=${node.rect.w} h=${node.rect.h}]`);
+		lines.push(parts.join(" "));
+		for (const child of node.children) emit(child, depth + 1);
+	}
+	const tree = walk(startRoot);
+	if (tree) emit(tree, 0);
 	return {
-		x: Math.round(rect.left + rect.width / 2),
-		y: Math.round(rect.top + rect.height / 2)
+		text: lines.join("\n"),
+		idToElement,
+		nodeCount
 	};
 }
-/**
-* Resolve a marker ID to viewport coordinates.
-* Returns null if marker not found or element no longer visible.
-*/
-function resolveMarkerToCoordinates(markerMap, markerId) {
-	const marker = markerMap.get(markerId);
-	if (!marker) return null;
-	if (!document.contains(marker.element)) return null;
-	if (!isElementVisible(marker.element)) return null;
-	return getElementCenter(marker.element);
+function normalizeWhitespace(text) {
+	return text.replace(/\s+/g, " ").trim();
+}
+function truncate(text, maxLength) {
+	if (text.length <= maxLength) return text;
+	return text.slice(0, maxLength - 1).trimEnd() + "…";
+}
+function escapeQuotes(text) {
+	return text.replace(/"/g, "\\\"");
 }
 //#endregion
 //#region src/core/utils/screenshot.ts
 const CLONE_RESOURCE_TIMEOUT_MS = 3e3;
 /** Maximum width for compressed screenshots (maintains aspect ratio) */
-const MAX_SCREENSHOT_WIDTH = 1280;
-/** JPEG quality for compressed screenshots (0-1) */
-const JPEG_QUALITY = .8;
+const MAX_SCREENSHOT_WIDTH = 1920;
+/** JPEG quality for compressed screenshots (0-1) - higher quality for clearer details */
+const JPEG_QUALITY = .95;
 /**
 * Compress a canvas image by downscaling and converting to JPEG.
 * Maintains aspect ratio and falls back to original if compression fails.
@@ -971,9 +939,10 @@ function createFallbackCanvas() {
 	return canvas;
 }
 /**
-* Capture a screenshot of the current viewport.
-* Uses html2canvas to render the DOM to a canvas, then compresses to JPEG.
-* Falls back to a placeholder if capture fails (e.g., due to unsupported CSS).
+* Capture a screenshot and DOM snapshot of the current viewport.
+* Uses html2canvas to render the DOM to a canvas, compresses to high-quality JPEG,
+* and builds a token-efficient DOM snapshot for AI context.
+* Falls back to a placeholder if capture fails.
 */
 async function captureViewport() {
 	const captureMetrics = getCaptureMetrics();
@@ -993,48 +962,19 @@ async function captureViewport() {
 			height: canvas.height
 		};
 	}
-	return {
-		imageData: compressed.imageData,
-		width: compressed.width,
-		height: compressed.height,
-		viewportWidth: captureMetrics.viewportWidth,
-		viewportHeight: captureMetrics.viewportHeight
-	};
-}
-/**
-* Capture an annotated screenshot of the current viewport.
-* Interactive elements are marked with numbered labels.
-* Returns both the annotated image and a marker map for resolving IDs.
-*/
-async function captureAnnotatedViewport() {
-	const captureMetrics = getCaptureMetrics();
-	const markerMap = createMarkerMap();
-	let sourceCanvas;
-	try {
-		sourceCanvas = await html2canvas(document.body, getHtml2CanvasOptions(captureMetrics));
-	} catch {
-		sourceCanvas = createFallbackCanvas();
-	}
-	const canvas = markerMap.size > 0 ? createAnnotatedCanvas(sourceCanvas, markerMap) : sourceCanvas;
-	const markerContext = generateMarkerContext(markerMap);
-	let compressed;
-	try {
-		compressed = compressImage(canvas);
-	} catch {
-		compressed = {
-			imageData: canvas.toDataURL("image/png"),
-			width: canvas.width,
-			height: canvas.height
-		};
-	}
+	const snapshot = buildVisibleDomSnapshot(document.body, {
+		maxNodes: 1500,
+		maxTextLength: 80,
+		includeRects: true
+	});
 	return {
 		imageData: compressed.imageData,
 		width: compressed.width,
 		height: compressed.height,
 		viewportWidth: captureMetrics.viewportWidth,
 		viewportHeight: captureMetrics.viewportHeight,
-		markerMap,
-		markerContext
+		domSnapshot: snapshot.text,
+		elementRegistry: snapshot.idToElement
 	};
 }
 //#endregion
@@ -1044,20 +984,12 @@ async function captureAnnotatedViewport() {
 */
 var ScreenCaptureService = class {
 	/**
-	* Capture a screenshot of the current viewport.
-	* @returns Screenshot result with image data and dimensions
+	* Capture a screenshot and DOM snapshot of the current viewport.
+	* @returns Screenshot result with image data, dimensions, and DOM snapshot
 	*/
 	async capture() {
 		return captureViewport();
 	}
-	/**
-	* Capture an annotated screenshot with marker overlays.
-	* Interactive elements are marked with numbered labels.
-	* @returns Annotated screenshot result with marker map
-	*/
-	async captureAnnotated() {
-		return captureAnnotatedViewport();
-	}
 };
 //#endregion
 //#region src/core/services/tts-playback-queue.ts
@@ -1300,12 +1232,12 @@ const AUDIO_LEVEL_NOISE_GATE = 5e-4;
 const AUDIO_LEVEL_INPUT_GAIN = 600;
 const AUDIO_LEVEL_ATTACK = .7;
 const AUDIO_LEVEL_RELEASE = .25;
-function clamp$1(value, min, max) {
+function clamp(value, min, max) {
 	return Math.min(Math.max(value, min), max);
 }
 function normalizeAudioLevel(rms) {
 	const gatedRms = Math.max(0, rms - AUDIO_LEVEL_NOISE_GATE);
-	return clamp$1(Math.log1p(gatedRms * AUDIO_LEVEL_INPUT_GAIN) / Math.log1p(AUDIO_LEVEL_INPUT_GAIN), 0, 1);
+	return clamp(Math.log1p(gatedRms * AUDIO_LEVEL_INPUT_GAIN) / Math.log1p(AUDIO_LEVEL_INPUT_GAIN), 0, 1);
 }
 function smoothAudioLevel(current, target) {
 	const smoothing = target > current ? AUDIO_LEVEL_ATTACK : AUDIO_LEVEL_RELEASE;
@@ -1548,7 +1480,7 @@ function parseUIStreamLine(line) {
 * Check if a tool call is a point tool call with valid input.
 */
 function isPointToolCall(chunk) {
-	return chunk.type === "tool-input-available" && chunk.toolName === "point" && chunk.input != null && typeof chunk.input === "object" && "type" in chunk.input && "label" in chunk.input;
+	return chunk.type === "tool-input-available" && chunk.toolName === "point" && chunk.input != null && typeof chunk.input === "object" && "elementId" in chunk.input && "label" in chunk.input;
 }
 //#endregion
 //#region src/core/utils/response-processor.ts
@@ -1691,9 +1623,6 @@ var ProgressiveResponseProcessor = class {
 };
 //#endregion
 //#region src/core/client.ts
-function clamp(value, min, max) {
-	return Math.min(Math.max(value, min), max);
-}
 async function readErrorMessage(response, fallbackMessage) {
 	try {
 		if ((response.headers.get("Content-Type") ?? "").includes("application/json")) {
@@ -1706,21 +1635,6 @@ async function readErrorMessage(response, fallbackMessage) {
 	return fallbackMessage;
 }
 /**
-* Map coordinate-based pointing from screenshot space to viewport space.
-*/
-function mapCoordinatesToViewport(x, y, screenshot) {
-	if (screenshot.width <= 0 || screenshot.height <= 0) return {
-		x,
-		y
-	};
-	const scaleX = screenshot.viewportWidth / screenshot.width;
-	const scaleY = screenshot.viewportHeight / screenshot.height;
-	return {
-		x: clamp(Math.round(x * scaleX), 0, Math.max(screenshot.viewportWidth - 1, 0)),
-		y: clamp(Math.round(y * scaleY), 0, Math.max(screenshot.viewportHeight - 1, 0))
-	};
-}
-/**
 * Framework-agnostic client for cursor buddy voice interactions.
 *
 * Manages the complete voice interaction flow:
@@ -1789,7 +1703,7 @@ var CursorBuddyClient = class {
 		this.notify();
 		this.abortController = new AbortController();
 		const signal = this.abortController.signal;
-		this.screenshotPromise = this.screenCapture.captureAnnotated();
+		this.screenshotPromise = this.screenCapture.capture();
 		this.beginListeningSession(signal).catch((error) => {
 			if (signal.aborted) return;
 			this.voiceCapture.dispose();
@@ -1842,16 +1756,17 @@ var CursorBuddyClient = class {
 			if (signal?.aborted) return;
 			this.options.onResponse?.(cleanResponse);
 			let pointTarget = null;
-			if (pointToolCall) if (pointToolCall.type === "marker") {
-				const coords = resolveMarkerToCoordinates(screenshot.markerMap, pointToolCall.markerId);
-				if (coords) pointTarget = {
-					...coords,
-					label: pointToolCall.label
-				};
-			} else pointTarget = {
-				...mapCoordinatesToViewport(pointToolCall.x, pointToolCall.y, screenshot),
-				label: pointToolCall.label
-			};
+			if (pointToolCall) {
+				const element = screenshot.elementRegistry.get(pointToolCall.elementId);
+				if (element) {
+					const rect = element.getBoundingClientRect();
+					pointTarget = {
+						x: Math.round(rect.left + rect.width / 2),
+						y: Math.round(rect.top + rect.height / 2),
+						label: pointToolCall.label
+					};
+				}
+			}
 			if (pointTarget) {
 				this.options.onPoint?.(pointTarget);
 				this.pointerController.pointAt(pointTarget);
@@ -2017,7 +1932,7 @@ var CursorBuddyClient = class {
 				},
 				transcript,
 				history,
-				markerContext: screenshot.markerContext
+				domSnapshot: screenshot.domSnapshot
 			}),
 			signal
 		});
@@ -2251,4 +2166,4 @@ var CursorBuddyClient = class {
 //#endregion
 export { $buddyScale as a, $buddyRotation as i, $audioLevel as n, $cursorPosition as o, $buddyPosition as r, $pointingTarget as s, CursorBuddyClient as t };
-//# sourceMappingURL=client-CSVSY-KV.mjs.map
+//# sourceMappingURL=client-CliXcNch.mjs.map