sonance-brand-mcp 1.3.94 → 1.3.95
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -113,6 +113,8 @@ export function ChatInterface({
|
|
|
113
113
|
const [isAnnotating, setIsAnnotating] = useState(false);
|
|
114
114
|
const [annotatedScreenshot, setAnnotatedScreenshot] = useState<string | null>(null);
|
|
115
115
|
const [manualFocusBounds, setManualFocusBounds] = useState<Rectangle | null>(null);
|
|
116
|
+
// Discovered elements from annotation tool (for targeting when no element was clicked)
|
|
117
|
+
const [annotationDiscoveredElements, setAnnotationDiscoveredElements] = useState<VisionFocusedElement[]>([]);
|
|
116
118
|
|
|
117
119
|
// Auto-dismiss toast after 5 seconds
|
|
118
120
|
useEffect(() => {
|
|
@@ -177,10 +179,20 @@ export function ChatInterface({
|
|
|
177
179
|
}, []);
|
|
178
180
|
|
|
179
181
|
// Handle annotation confirmation - screenshot is already captured and annotated
|
|
180
|
-
|
|
181
|
-
|
|
182
|
+
// Now also receives discovered elements from within the drawn rectangle
|
|
183
|
+
const handleAnnotationConfirm = useCallback((annotated: string, bounds: Rectangle, discoveredElements: VisionFocusedElement[]) => {
|
|
184
|
+
console.log("[Vision Mode] Annotation confirmed:", {
|
|
185
|
+
bounds,
|
|
186
|
+
discoveredElementsCount: discoveredElements.length,
|
|
187
|
+
discoveredElements: discoveredElements.map(e => ({
|
|
188
|
+
name: e.name,
|
|
189
|
+
text: e.textContent?.substring(0, 30),
|
|
190
|
+
id: e.elementId,
|
|
191
|
+
})),
|
|
192
|
+
});
|
|
182
193
|
setAnnotatedScreenshot(annotated);
|
|
183
194
|
setManualFocusBounds(bounds);
|
|
195
|
+
setAnnotationDiscoveredElements(discoveredElements);
|
|
184
196
|
setIsAnnotating(false);
|
|
185
197
|
// Focus the input so user can type their prompt
|
|
186
198
|
setTimeout(() => inputRef.current?.focus(), 100);
|
|
@@ -191,10 +203,11 @@ export function ChatInterface({
|
|
|
191
203
|
setIsAnnotating(false);
|
|
192
204
|
}, []);
|
|
193
205
|
|
|
194
|
-
// Clear the current annotation
|
|
206
|
+
// Clear the current annotation and discovered elements
|
|
195
207
|
const clearAnnotation = useCallback(() => {
|
|
196
208
|
setAnnotatedScreenshot(null);
|
|
197
209
|
setManualFocusBounds(null);
|
|
210
|
+
setAnnotationDiscoveredElements([]);
|
|
198
211
|
}, []);
|
|
199
212
|
|
|
200
213
|
// Handle vision mode edit request
|
|
@@ -202,9 +215,18 @@ export function ChatInterface({
|
|
|
202
215
|
// Use Apply-First mode if callback is provided (new Cursor-style workflow)
|
|
203
216
|
const useApplyFirst = !!onApplyFirstComplete;
|
|
204
217
|
|
|
218
|
+
// Determine which focused elements to use:
|
|
219
|
+
// - If user clicked an element, use visionFocusedElements (passed from parent)
|
|
220
|
+
// - If user used annotation tool without clicking, use annotationDiscoveredElements
|
|
221
|
+
const effectiveFocusedElements = visionFocusedElements.length > 0
|
|
222
|
+
? visionFocusedElements
|
|
223
|
+
: annotationDiscoveredElements;
|
|
224
|
+
|
|
205
225
|
console.log("[Vision Mode] Starting edit request:", {
|
|
206
226
|
prompt,
|
|
207
|
-
|
|
227
|
+
focusedElementsFromClick: visionFocusedElements.length,
|
|
228
|
+
focusedElementsFromAnnotation: annotationDiscoveredElements.length,
|
|
229
|
+
effectiveFocusedElements: effectiveFocusedElements.length,
|
|
208
230
|
mode: useApplyFirst ? "apply-first" : "preview-first"
|
|
209
231
|
});
|
|
210
232
|
|
|
@@ -226,9 +248,16 @@ export function ChatInterface({
|
|
|
226
248
|
// PRIORITY 1: Use manually annotated screenshot if available
|
|
227
249
|
// This is when user drew a focus area using the annotation tool
|
|
228
250
|
if (annotatedScreenshot) {
|
|
229
|
-
console.log("[Vision Mode] Using manually annotated screenshot"
|
|
251
|
+
console.log("[Vision Mode] Using manually annotated screenshot with discovered elements:", {
|
|
252
|
+
discoveredCount: annotationDiscoveredElements.length,
|
|
253
|
+
elements: annotationDiscoveredElements.slice(0, 3).map(e => ({
|
|
254
|
+
name: e.name,
|
|
255
|
+
text: e.textContent?.substring(0, 20),
|
|
256
|
+
id: e.elementId,
|
|
257
|
+
})),
|
|
258
|
+
});
|
|
230
259
|
screenshot = annotatedScreenshot;
|
|
231
|
-
// Clear the annotation after use
|
|
260
|
+
// Clear the annotation after use (but keep discovered elements for the API call)
|
|
232
261
|
setAnnotatedScreenshot(null);
|
|
233
262
|
setManualFocusBounds(null);
|
|
234
263
|
} else {
|
|
@@ -240,8 +269,8 @@ export function ChatInterface({
|
|
|
240
269
|
// Annotate screenshot with section highlight if parent section exists
|
|
241
270
|
// This helps the LLM visually identify the target area for modifications
|
|
242
271
|
screenshot = rawScreenshot;
|
|
243
|
-
if (rawScreenshot &&
|
|
244
|
-
const parentSection =
|
|
272
|
+
if (rawScreenshot && effectiveFocusedElements.length > 0) {
|
|
273
|
+
const parentSection = effectiveFocusedElements[0].parentSection;
|
|
245
274
|
if (parentSection?.coordinates) {
|
|
246
275
|
screenshot = await drawSectionHighlight(rawScreenshot, parentSection.coordinates);
|
|
247
276
|
console.log("[Vision Mode] Added section highlight to screenshot:", {
|
|
@@ -254,7 +283,9 @@ export function ChatInterface({
|
|
|
254
283
|
|
|
255
284
|
// Choose API endpoint based on mode
|
|
256
285
|
const endpoint = useApplyFirst ? "/api/sonance-vision-apply" : "/api/sonance-vision-edit";
|
|
257
|
-
console.log("[Vision Mode] Sending to API:", endpoint
|
|
286
|
+
console.log("[Vision Mode] Sending to API:", endpoint, {
|
|
287
|
+
effectiveFocusedElements: effectiveFocusedElements.length,
|
|
288
|
+
});
|
|
258
289
|
|
|
259
290
|
const response = await fetch(endpoint, {
|
|
260
291
|
method: "POST",
|
|
@@ -265,9 +296,13 @@ export function ChatInterface({
|
|
|
265
296
|
screenshot,
|
|
266
297
|
pageRoute: window.location.pathname,
|
|
267
298
|
userPrompt: prompt,
|
|
268
|
-
|
|
299
|
+
// Use effective focused elements (from click OR from annotation discovery)
|
|
300
|
+
focusedElements: effectiveFocusedElements,
|
|
269
301
|
}),
|
|
270
302
|
});
|
|
303
|
+
|
|
304
|
+
// Clear annotation discovered elements after API call
|
|
305
|
+
setAnnotationDiscoveredElements([]);
|
|
271
306
|
|
|
272
307
|
const data = await response.json();
|
|
273
308
|
console.log("[Vision Mode] API response:", {
|
|
@@ -4,6 +4,7 @@ import React, { useEffect, useState, useCallback } from "react";
|
|
|
4
4
|
import { createPortal } from "react-dom";
|
|
5
5
|
import { Check, X, RotateCcw, Crop } from "lucide-react";
|
|
6
6
|
import html2canvas from "html2canvas-pro";
|
|
7
|
+
import { VisionFocusedElement } from "../types";
|
|
7
8
|
|
|
8
9
|
export interface Rectangle {
|
|
9
10
|
x: number;
|
|
@@ -12,9 +13,157 @@ export interface Rectangle {
|
|
|
12
13
|
height: number;
|
|
13
14
|
}
|
|
14
15
|
|
|
16
|
+
/** Discovered element info for scoring and ranking */
|
|
17
|
+
interface DiscoveredElement {
|
|
18
|
+
element: Element;
|
|
19
|
+
score: number;
|
|
20
|
+
textContent: string;
|
|
21
|
+
className: string;
|
|
22
|
+
elementId: string;
|
|
23
|
+
tagName: string;
|
|
24
|
+
rect: DOMRect;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Discover DOM elements within the given rectangle bounds.
|
|
29
|
+
* Uses a grid sampling approach to find all elements in the area,
|
|
30
|
+
* then scores and ranks them for targeting accuracy.
|
|
31
|
+
*/
|
|
32
|
+
function discoverElementsInBounds(rect: Rectangle): VisionFocusedElement[] {
|
|
33
|
+
const discoveredMap = new Map<Element, DiscoveredElement>();
|
|
34
|
+
|
|
35
|
+
// Tags to skip - generic containers and non-content elements
|
|
36
|
+
const skipTags = new Set(['html', 'body', 'head', 'script', 'style', 'meta', 'link', 'noscript']);
|
|
37
|
+
|
|
38
|
+
// Semantic elements get bonus points
|
|
39
|
+
const semanticElements = new Set(['section', 'article', 'form', 'header', 'footer', 'main', 'nav', 'aside', 'dialog']);
|
|
40
|
+
|
|
41
|
+
// Sample points in a grid pattern within the rectangle
|
|
42
|
+
const gridSize = 5; // 5x5 grid = 25 sample points
|
|
43
|
+
const stepX = rect.width / (gridSize + 1);
|
|
44
|
+
const stepY = rect.height / (gridSize + 1);
|
|
45
|
+
|
|
46
|
+
for (let i = 1; i <= gridSize; i++) {
|
|
47
|
+
for (let j = 1; j <= gridSize; j++) {
|
|
48
|
+
const x = rect.x + stepX * i;
|
|
49
|
+
const y = rect.y + stepY * j;
|
|
50
|
+
|
|
51
|
+
// Get all elements at this point (from top to bottom)
|
|
52
|
+
const elementsAtPoint = document.elementsFromPoint(x, y);
|
|
53
|
+
|
|
54
|
+
for (const el of elementsAtPoint) {
|
|
55
|
+
// Skip if already processed
|
|
56
|
+
if (discoveredMap.has(el)) continue;
|
|
57
|
+
|
|
58
|
+
// Skip DevTools elements
|
|
59
|
+
if (el.hasAttribute('data-sonance-devtools') ||
|
|
60
|
+
el.hasAttribute('data-annotator-overlay') ||
|
|
61
|
+
el.hasAttribute('data-annotator-toolbar') ||
|
|
62
|
+
el.hasAttribute('data-vision-mode-border')) {
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const tagName = el.tagName.toLowerCase();
|
|
67
|
+
|
|
68
|
+
// Skip generic/non-content elements
|
|
69
|
+
if (skipTags.has(tagName)) continue;
|
|
70
|
+
|
|
71
|
+
// Get element info
|
|
72
|
+
const elRect = el.getBoundingClientRect();
|
|
73
|
+
const id = el.id || '';
|
|
74
|
+
const className = el.className && typeof el.className === 'string' ? el.className : '';
|
|
75
|
+
|
|
76
|
+
// Extract meaningful text content (not from children with their own text)
|
|
77
|
+
let textContent = '';
|
|
78
|
+
for (const node of el.childNodes) {
|
|
79
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
80
|
+
const text = node.textContent?.trim();
|
|
81
|
+
if (text) textContent += text + ' ';
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
textContent = textContent.trim().substring(0, 100);
|
|
85
|
+
|
|
86
|
+
// If no direct text, try to get visible text from element
|
|
87
|
+
if (!textContent && el instanceof HTMLElement) {
|
|
88
|
+
// For inputs, use placeholder or value
|
|
89
|
+
if (el instanceof HTMLInputElement || el instanceof HTMLTextAreaElement) {
|
|
90
|
+
textContent = el.placeholder || el.value || '';
|
|
91
|
+
} else if (el instanceof HTMLButtonElement || tagName === 'a') {
|
|
92
|
+
textContent = el.textContent?.trim().substring(0, 100) || '';
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Calculate score for ranking
|
|
97
|
+
let score = 0;
|
|
98
|
+
|
|
99
|
+
// ID is most valuable for targeting
|
|
100
|
+
if (id) score += 100;
|
|
101
|
+
|
|
102
|
+
// Text content helps identify the element
|
|
103
|
+
if (textContent) score += 50;
|
|
104
|
+
|
|
105
|
+
// Semantic elements are better targets
|
|
106
|
+
if (semanticElements.has(tagName)) score += 30;
|
|
107
|
+
|
|
108
|
+
// Interactive elements are often targets
|
|
109
|
+
if (['button', 'a', 'input', 'select', 'textarea'].includes(tagName)) score += 25;
|
|
110
|
+
|
|
111
|
+
// Component-like classNames (PascalCase patterns, not Tailwind utilities)
|
|
112
|
+
if (className) {
|
|
113
|
+
const classes = className.split(/\s+/);
|
|
114
|
+
const hasComponentClass = classes.some(c =>
|
|
115
|
+
/^[A-Z][a-zA-Z0-9]+/.test(c) || // PascalCase
|
|
116
|
+
/^[a-z]+-[a-z]+-/.test(c) // kebab-case with multiple segments (likely BEM)
|
|
117
|
+
);
|
|
118
|
+
if (hasComponentClass) score += 20;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Heading elements are important
|
|
122
|
+
if (['h1', 'h2', 'h3', 'h4', 'h5', 'h6'].includes(tagName)) score += 20;
|
|
123
|
+
|
|
124
|
+
// Penalize very large elements (probably containers)
|
|
125
|
+
if (elRect.width > window.innerWidth * 0.8 && elRect.height > window.innerHeight * 0.8) {
|
|
126
|
+
score -= 30;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
discoveredMap.set(el, {
|
|
130
|
+
element: el,
|
|
131
|
+
score,
|
|
132
|
+
textContent,
|
|
133
|
+
className,
|
|
134
|
+
elementId: id,
|
|
135
|
+
tagName,
|
|
136
|
+
rect: elRect,
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Convert to array, sort by score descending, take top 10
|
|
143
|
+
const sorted = Array.from(discoveredMap.values())
|
|
144
|
+
.sort((a, b) => b.score - a.score)
|
|
145
|
+
.slice(0, 10);
|
|
146
|
+
|
|
147
|
+
// Convert to VisionFocusedElement format
|
|
148
|
+
return sorted.map((item): VisionFocusedElement => ({
|
|
149
|
+
name: item.elementId || item.tagName,
|
|
150
|
+
type: 'component', // Generic type since we're discovering
|
|
151
|
+
coordinates: {
|
|
152
|
+
x: item.rect.left + window.scrollX,
|
|
153
|
+
y: item.rect.top + window.scrollY,
|
|
154
|
+
width: item.rect.width,
|
|
155
|
+
height: item.rect.height,
|
|
156
|
+
},
|
|
157
|
+
textContent: item.textContent || undefined,
|
|
158
|
+
className: item.className || undefined,
|
|
159
|
+
elementId: item.elementId || undefined,
|
|
160
|
+
description: `${item.tagName}${item.elementId ? '#' + item.elementId : ''}${item.textContent ? ': "' + item.textContent.substring(0, 30) + '"' : ''}`,
|
|
161
|
+
}));
|
|
162
|
+
}
|
|
163
|
+
|
|
15
164
|
interface ScreenshotAnnotatorProps {
|
|
16
|
-
/** Called when user confirms their selection with captured screenshot */
|
|
17
|
-
onConfirm: (annotatedScreenshot: string, bounds: Rectangle) => void;
|
|
165
|
+
/** Called when user confirms their selection with captured screenshot and discovered elements */
|
|
166
|
+
onConfirm: (annotatedScreenshot: string, bounds: Rectangle, discoveredElements: VisionFocusedElement[]) => void;
|
|
18
167
|
/** Called when user cancels */
|
|
19
168
|
onCancel: () => void;
|
|
20
169
|
}
|
|
@@ -88,13 +237,26 @@ export function ScreenshotAnnotator({
|
|
|
88
237
|
setStartPos(null);
|
|
89
238
|
}, []);
|
|
90
239
|
|
|
91
|
-
// Confirm: capture screenshot and annotate it
|
|
240
|
+
// Confirm: discover elements in bounds, capture screenshot and annotate it
|
|
92
241
|
const handleConfirm = useCallback(async () => {
|
|
93
242
|
if (!currentRect || isCapturing) return;
|
|
94
243
|
|
|
95
244
|
setIsCapturing(true);
|
|
96
245
|
|
|
97
246
|
try {
|
|
247
|
+
// FIRST: Discover DOM elements within the drawn rectangle
|
|
248
|
+
// This must happen BEFORE the overlay is removed to get accurate results
|
|
249
|
+
console.log("[ScreenshotAnnotator] Discovering elements in bounds:", currentRect);
|
|
250
|
+
const discoveredElements = discoverElementsInBounds(currentRect);
|
|
251
|
+
console.log("[ScreenshotAnnotator] Discovered elements:", {
|
|
252
|
+
count: discoveredElements.length,
|
|
253
|
+
elements: discoveredElements.map(e => ({
|
|
254
|
+
name: e.name,
|
|
255
|
+
text: e.textContent?.substring(0, 30),
|
|
256
|
+
id: e.elementId,
|
|
257
|
+
})),
|
|
258
|
+
});
|
|
259
|
+
|
|
98
260
|
// Capture the full page screenshot (excluding DevTools elements)
|
|
99
261
|
const canvas = await html2canvas(document.body, {
|
|
100
262
|
ignoreElements: (element) => {
|
|
@@ -140,7 +302,9 @@ export function ScreenshotAnnotator({
|
|
|
140
302
|
ctx.fillText(labelText, labelX + labelPadding, labelY + 16);
|
|
141
303
|
|
|
142
304
|
const annotatedScreenshot = canvas.toDataURL("image/png", 0.9);
|
|
143
|
-
|
|
305
|
+
|
|
306
|
+
// Pass screenshot, bounds, AND discovered elements to callback
|
|
307
|
+
onConfirm(annotatedScreenshot, currentRect, discoveredElements);
|
|
144
308
|
} catch (error) {
|
|
145
309
|
console.error("Failed to capture screenshot:", error);
|
|
146
310
|
setIsCapturing(false);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "sonance-brand-mcp",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.95",
|
|
4
4
|
"description": "MCP Server for Sonance Brand Guidelines and Component Library - gives Claude instant access to brand colors, typography, and UI components.",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"type": "module",
|