sonance-brand-mcp 1.3.94 → 1.3.95

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -113,6 +113,8 @@ export function ChatInterface({
113
113
  const [isAnnotating, setIsAnnotating] = useState(false);
114
114
  const [annotatedScreenshot, setAnnotatedScreenshot] = useState<string | null>(null);
115
115
  const [manualFocusBounds, setManualFocusBounds] = useState<Rectangle | null>(null);
116
+ // Discovered elements from annotation tool (for targeting when no element was clicked)
117
+ const [annotationDiscoveredElements, setAnnotationDiscoveredElements] = useState<VisionFocusedElement[]>([]);
116
118
 
117
119
  // Auto-dismiss toast after 5 seconds
118
120
  useEffect(() => {
@@ -177,10 +179,20 @@ export function ChatInterface({
177
179
  }, []);
178
180
 
179
181
  // Handle annotation confirmation - screenshot is already captured and annotated
180
- const handleAnnotationConfirm = useCallback((annotated: string, bounds: Rectangle) => {
181
- console.log("[Vision Mode] Annotation confirmed:", { bounds });
182
+ // Now also receives discovered elements from within the drawn rectangle
183
+ const handleAnnotationConfirm = useCallback((annotated: string, bounds: Rectangle, discoveredElements: VisionFocusedElement[]) => {
184
+ console.log("[Vision Mode] Annotation confirmed:", {
185
+ bounds,
186
+ discoveredElementsCount: discoveredElements.length,
187
+ discoveredElements: discoveredElements.map(e => ({
188
+ name: e.name,
189
+ text: e.textContent?.substring(0, 30),
190
+ id: e.elementId,
191
+ })),
192
+ });
182
193
  setAnnotatedScreenshot(annotated);
183
194
  setManualFocusBounds(bounds);
195
+ setAnnotationDiscoveredElements(discoveredElements);
184
196
  setIsAnnotating(false);
185
197
  // Focus the input so user can type their prompt
186
198
  setTimeout(() => inputRef.current?.focus(), 100);
@@ -191,10 +203,11 @@ export function ChatInterface({
191
203
  setIsAnnotating(false);
192
204
  }, []);
193
205
 
194
- // Clear the current annotation
206
+ // Clear the current annotation and discovered elements
195
207
  const clearAnnotation = useCallback(() => {
196
208
  setAnnotatedScreenshot(null);
197
209
  setManualFocusBounds(null);
210
+ setAnnotationDiscoveredElements([]);
198
211
  }, []);
199
212
 
200
213
  // Handle vision mode edit request
@@ -202,9 +215,18 @@ export function ChatInterface({
202
215
  // Use Apply-First mode if callback is provided (new Cursor-style workflow)
203
216
  const useApplyFirst = !!onApplyFirstComplete;
204
217
 
218
+ // Determine which focused elements to use:
219
+ // - If user clicked an element, use visionFocusedElements (passed from parent)
220
+ // - If user used annotation tool without clicking, use annotationDiscoveredElements
221
+ const effectiveFocusedElements = visionFocusedElements.length > 0
222
+ ? visionFocusedElements
223
+ : annotationDiscoveredElements;
224
+
205
225
  console.log("[Vision Mode] Starting edit request:", {
206
226
  prompt,
207
- focusedElements: visionFocusedElements.length,
227
+ focusedElementsFromClick: visionFocusedElements.length,
228
+ focusedElementsFromAnnotation: annotationDiscoveredElements.length,
229
+ effectiveFocusedElements: effectiveFocusedElements.length,
208
230
  mode: useApplyFirst ? "apply-first" : "preview-first"
209
231
  });
210
232
 
@@ -226,9 +248,16 @@ export function ChatInterface({
226
248
  // PRIORITY 1: Use manually annotated screenshot if available
227
249
  // This is when user drew a focus area using the annotation tool
228
250
  if (annotatedScreenshot) {
229
- console.log("[Vision Mode] Using manually annotated screenshot");
251
+ console.log("[Vision Mode] Using manually annotated screenshot with discovered elements:", {
252
+ discoveredCount: annotationDiscoveredElements.length,
253
+ elements: annotationDiscoveredElements.slice(0, 3).map(e => ({
254
+ name: e.name,
255
+ text: e.textContent?.substring(0, 20),
256
+ id: e.elementId,
257
+ })),
258
+ });
230
259
  screenshot = annotatedScreenshot;
231
- // Clear the annotation after use
260
+ // Clear the annotation after use (but keep discovered elements for the API call)
232
261
  setAnnotatedScreenshot(null);
233
262
  setManualFocusBounds(null);
234
263
  } else {
@@ -240,8 +269,8 @@ export function ChatInterface({
240
269
  // Annotate screenshot with section highlight if parent section exists
241
270
  // This helps the LLM visually identify the target area for modifications
242
271
  screenshot = rawScreenshot;
243
- if (rawScreenshot && visionFocusedElements.length > 0) {
244
- const parentSection = visionFocusedElements[0].parentSection;
272
+ if (rawScreenshot && effectiveFocusedElements.length > 0) {
273
+ const parentSection = effectiveFocusedElements[0].parentSection;
245
274
  if (parentSection?.coordinates) {
246
275
  screenshot = await drawSectionHighlight(rawScreenshot, parentSection.coordinates);
247
276
  console.log("[Vision Mode] Added section highlight to screenshot:", {
@@ -254,7 +283,9 @@ export function ChatInterface({
254
283
 
255
284
  // Choose API endpoint based on mode
256
285
  const endpoint = useApplyFirst ? "/api/sonance-vision-apply" : "/api/sonance-vision-edit";
257
- console.log("[Vision Mode] Sending to API:", endpoint);
286
+ console.log("[Vision Mode] Sending to API:", endpoint, {
287
+ effectiveFocusedElements: effectiveFocusedElements.length,
288
+ });
258
289
 
259
290
  const response = await fetch(endpoint, {
260
291
  method: "POST",
@@ -265,9 +296,13 @@ export function ChatInterface({
265
296
  screenshot,
266
297
  pageRoute: window.location.pathname,
267
298
  userPrompt: prompt,
268
- focusedElements: visionFocusedElements,
299
+ // Use effective focused elements (from click OR from annotation discovery)
300
+ focusedElements: effectiveFocusedElements,
269
301
  }),
270
302
  });
303
+
304
+ // Clear annotation discovered elements after API call
305
+ setAnnotationDiscoveredElements([]);
271
306
 
272
307
  const data = await response.json();
273
308
  console.log("[Vision Mode] API response:", {
@@ -4,6 +4,7 @@ import React, { useEffect, useState, useCallback } from "react";
4
4
  import { createPortal } from "react-dom";
5
5
  import { Check, X, RotateCcw, Crop } from "lucide-react";
6
6
  import html2canvas from "html2canvas-pro";
7
+ import { VisionFocusedElement } from "../types";
7
8
 
8
9
  export interface Rectangle {
9
10
  x: number;
@@ -12,9 +13,157 @@ export interface Rectangle {
12
13
  height: number;
13
14
  }
14
15
 
16
+ /** Discovered element info for scoring and ranking */
17
+ interface DiscoveredElement {
18
+ element: Element;
19
+ score: number;
20
+ textContent: string;
21
+ className: string;
22
+ elementId: string;
23
+ tagName: string;
24
+ rect: DOMRect;
25
+ }
26
+
27
+ /**
28
+ * Discover DOM elements within the given rectangle bounds.
29
+ * Uses a grid sampling approach to find all elements in the area,
30
+ * then scores and ranks them for targeting accuracy.
31
+ */
32
+ function discoverElementsInBounds(rect: Rectangle): VisionFocusedElement[] {
33
+ const discoveredMap = new Map<Element, DiscoveredElement>();
34
+
35
+ // Tags to skip - generic containers and non-content elements
36
+ const skipTags = new Set(['html', 'body', 'head', 'script', 'style', 'meta', 'link', 'noscript']);
37
+
38
+ // Semantic elements get bonus points
39
+ const semanticElements = new Set(['section', 'article', 'form', 'header', 'footer', 'main', 'nav', 'aside', 'dialog']);
40
+
41
+ // Sample points in a grid pattern within the rectangle
42
+ const gridSize = 5; // 5x5 grid = 25 sample points
43
+ const stepX = rect.width / (gridSize + 1);
44
+ const stepY = rect.height / (gridSize + 1);
45
+
46
+ for (let i = 1; i <= gridSize; i++) {
47
+ for (let j = 1; j <= gridSize; j++) {
48
+ const x = rect.x + stepX * i;
49
+ const y = rect.y + stepY * j;
50
+
51
+ // Get all elements at this point (from top to bottom)
52
+ const elementsAtPoint = document.elementsFromPoint(x, y);
53
+
54
+ for (const el of elementsAtPoint) {
55
+ // Skip if already processed
56
+ if (discoveredMap.has(el)) continue;
57
+
58
+ // Skip DevTools elements
59
+ if (el.hasAttribute('data-sonance-devtools') ||
60
+ el.hasAttribute('data-annotator-overlay') ||
61
+ el.hasAttribute('data-annotator-toolbar') ||
62
+ el.hasAttribute('data-vision-mode-border')) {
63
+ continue;
64
+ }
65
+
66
+ const tagName = el.tagName.toLowerCase();
67
+
68
+ // Skip generic/non-content elements
69
+ if (skipTags.has(tagName)) continue;
70
+
71
+ // Get element info
72
+ const elRect = el.getBoundingClientRect();
73
+ const id = el.id || '';
74
+ const className = el.className && typeof el.className === 'string' ? el.className : '';
75
+
76
+ // Extract meaningful text content (not from children with their own text)
77
+ let textContent = '';
78
+ for (const node of el.childNodes) {
79
+ if (node.nodeType === Node.TEXT_NODE) {
80
+ const text = node.textContent?.trim();
81
+ if (text) textContent += text + ' ';
82
+ }
83
+ }
84
+ textContent = textContent.trim().substring(0, 100);
85
+
86
+ // If no direct text, try to get visible text from element
87
+ if (!textContent && el instanceof HTMLElement) {
88
+ // For inputs, use placeholder or value
89
+ if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) {
90
+ textContent = el.placeholder || el.value || '';
91
+ } else if (el instanceof HTMLButtonElement || tagName === 'a') {
92
+ textContent = el.textContent?.trim().substring(0, 100) || '';
93
+ }
94
+ }
95
+
96
+ // Calculate score for ranking
97
+ let score = 0;
98
+
99
+ // ID is most valuable for targeting
100
+ if (id) score += 100;
101
+
102
+ // Text content helps identify the element
103
+ if (textContent) score += 50;
104
+
105
+ // Semantic elements are better targets
106
+ if (semanticElements.has(tagName)) score += 30;
107
+
108
+ // Interactive elements are often targets
109
+ if (['button', 'a', 'input', 'select', 'textarea'].includes(tagName)) score += 25;
110
+
111
+ // Component-like classNames (PascalCase patterns, not Tailwind utilities)
112
+ if (className) {
113
+ const classes = className.split(/\s+/);
114
+ const hasComponentClass = classes.some(c =>
115
+ /^[A-Z][a-zA-Z0-9]+/.test(c) || // PascalCase
116
+ /^[a-z]+-[a-z]+-/.test(c) // kebab-case with multiple segments (likely BEM)
117
+ );
118
+ if (hasComponentClass) score += 20;
119
+ }
120
+
121
+ // Heading elements are important
122
+ if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) score += 20;
123
+
124
+ // Penalize very large elements (probably containers)
125
+ if (elRect.width > window.innerWidth * 0.8 && elRect.height > window.innerHeight * 0.8) {
126
+ score -= 30;
127
+ }
128
+
129
+ discoveredMap.set(el, {
130
+ element: el,
131
+ score,
132
+ textContent,
133
+ className,
134
+ elementId: id,
135
+ tagName,
136
+ rect: elRect,
137
+ });
138
+ }
139
+ }
140
+ }
141
+
142
+ // Convert to array, sort by score descending, take top 10
143
+ const sorted = Array.from(discoveredMap.values())
144
+ .sort((a, b) => b.score - a.score)
145
+ .slice(0, 10);
146
+
147
+ // Convert to VisionFocusedElement format
148
+ return sorted.map((item): VisionFocusedElement => ({
149
+ name: item.elementId || item.tagName,
150
+ type: 'component', // Generic type since we're discovering
151
+ coordinates: {
152
+ x: item.rect.left + window.scrollX,
153
+ y: item.rect.top + window.scrollY,
154
+ width: item.rect.width,
155
+ height: item.rect.height,
156
+ },
157
+ textContent: item.textContent || undefined,
158
+ className: item.className || undefined,
159
+ elementId: item.elementId || undefined,
160
+ description: `${item.tagName}${item.elementId ? '#' + item.elementId : ''}${item.textContent ? ': "' + item.textContent.substring(0, 30) + '"' : ''}`,
161
+ }));
162
+ }
163
+
15
164
  interface ScreenshotAnnotatorProps {
16
- /** Called when user confirms their selection with captured screenshot */
17
- onConfirm: (annotatedScreenshot: string, bounds: Rectangle) => void;
165
+ /** Called when user confirms their selection with captured screenshot and discovered elements */
166
+ onConfirm: (annotatedScreenshot: string, bounds: Rectangle, discoveredElements: VisionFocusedElement[]) => void;
18
167
  /** Called when user cancels */
19
168
  onCancel: () => void;
20
169
  }
@@ -88,13 +237,26 @@ export function ScreenshotAnnotator({
88
237
  setStartPos(null);
89
238
  }, []);
90
239
 
91
- // Confirm: capture screenshot and annotate it
240
+ // Confirm: discover elements in bounds, capture screenshot and annotate it
92
241
  const handleConfirm = useCallback(async () => {
93
242
  if (!currentRect || isCapturing) return;
94
243
 
95
244
  setIsCapturing(true);
96
245
 
97
246
  try {
247
+ // FIRST: Discover DOM elements within the drawn rectangle
248
+ // This must happen BEFORE the overlay is removed to get accurate results
249
+ console.log("[ScreenshotAnnotator] Discovering elements in bounds:", currentRect);
250
+ const discoveredElements = discoverElementsInBounds(currentRect);
251
+ console.log("[ScreenshotAnnotator] Discovered elements:", {
252
+ count: discoveredElements.length,
253
+ elements: discoveredElements.map(e => ({
254
+ name: e.name,
255
+ text: e.textContent?.substring(0, 30),
256
+ id: e.elementId,
257
+ })),
258
+ });
259
+
98
260
  // Capture the full page screenshot (excluding DevTools elements)
99
261
  const canvas = await html2canvas(document.body, {
100
262
  ignoreElements: (element) => {
@@ -140,7 +302,9 @@ export function ScreenshotAnnotator({
140
302
  ctx.fillText(labelText, labelX + labelPadding, labelY + 16);
141
303
 
142
304
  const annotatedScreenshot = canvas.toDataURL("image/png", 0.9);
143
- onConfirm(annotatedScreenshot, currentRect);
305
+
306
+ // Pass screenshot, bounds, AND discovered elements to callback
307
+ onConfirm(annotatedScreenshot, currentRect, discoveredElements);
144
308
  } catch (error) {
145
309
  console.error("Failed to capture screenshot:", error);
146
310
  setIsCapturing(false);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sonance-brand-mcp",
3
- "version": "1.3.94",
3
+ "version": "1.3.95",
4
4
  "description": "MCP Server for Sonance Brand Guidelines and Component Library - gives Claude instant access to brand colors, typography, and UI components.",
5
5
  "main": "dist/index.js",
6
6
  "type": "module",