tanuki-telemetry 1.4.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/install.sh +65 -33
- package/package.json +1 -1
- package/skills/commands/compare-image.md +414 -0
- package/skills/commands/coordinate.md +283 -0
- package/skills/commands/live-browser.md +45 -0
- package/skills/commands/marathon.md +111 -0
- package/skills/commands/record.md +55 -0
- package/skills/commands/revive.md +144 -0
- package/skills/commands/speak.md +49 -0
- package/skills/scripts/record-browser.sh +100 -0
- package/skills/scripts/revive-watcher.sh +149 -0
package/install.sh
CHANGED
|
@@ -7,7 +7,7 @@ set -e
|
|
|
7
7
|
|
|
8
8
|
TANUKI_DIR="${TANUKI_DIR:-$HOME/.claude/mcp-servers/telemetry}"
|
|
9
9
|
DATA_DIR="${DATA_DIR:-$HOME/.tanuki/data}"
|
|
10
|
-
VERSION="1.
|
|
10
|
+
VERSION="1.4.0"
|
|
11
11
|
|
|
12
12
|
echo ""
|
|
13
13
|
echo " ┌─────────────────────────┐"
|
|
@@ -15,8 +15,8 @@ echo " │ TANUKI // v${VERSION} │"
|
|
|
15
15
|
echo " └─────────────────────────┘"
|
|
16
16
|
echo ""
|
|
17
17
|
|
|
18
|
-
# Check prerequisites
|
|
19
|
-
command -v
|
|
18
|
+
# Check prerequisites (Node only — no Docker required)
|
|
19
|
+
command -v node >/dev/null 2>&1 || { echo "Error: node is required. Install Node.js 18+ first."; exit 1; }
|
|
20
20
|
command -v git >/dev/null 2>&1 || { echo "Error: git is required."; exit 1; }
|
|
21
21
|
|
|
22
22
|
# Clone or update
|
|
@@ -27,58 +27,90 @@ if [ -d "$TANUKI_DIR/.git" ]; then
|
|
|
27
27
|
else
|
|
28
28
|
echo "[1/4] Cloning tanuki..."
|
|
29
29
|
mkdir -p "$(dirname "$TANUKI_DIR")"
|
|
30
|
-
git clone https://github.com/
|
|
30
|
+
git clone https://github.com/ykim-24/tanuki-telemetry.git "$TANUKI_DIR" 2>/dev/null || {
|
|
31
31
|
echo " Repo not accessible — using local copy"
|
|
32
32
|
}
|
|
33
33
|
fi
|
|
34
34
|
|
|
35
35
|
cd "$TANUKI_DIR"
|
|
36
36
|
|
|
37
|
-
# Create data
|
|
38
|
-
mkdir -p "$DATA_DIR"
|
|
37
|
+
# Create data directories
|
|
38
|
+
mkdir -p "$DATA_DIR/screenshots"
|
|
39
|
+
mkdir -p "$DATA_DIR/artifacts"
|
|
40
|
+
mkdir -p "$DATA_DIR/walkthrough-screenshots"
|
|
39
41
|
|
|
40
|
-
#
|
|
41
|
-
echo "[2/4] Building
|
|
42
|
-
|
|
42
|
+
# Install and build
|
|
43
|
+
echo "[2/4] Building..."
|
|
44
|
+
npm ci --silent 2>/dev/null || npm install --silent
|
|
45
|
+
npm run build
|
|
43
46
|
|
|
44
|
-
#
|
|
47
|
+
# Start dashboard (native Node, no Docker)
|
|
45
48
|
echo "[3/4] Starting dashboard..."
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
-p 3333:3333 \
|
|
50
|
-
-v "$DATA_DIR:/data" \
|
|
51
|
-
telemetry-dashboard:latest
|
|
49
|
+
chmod +x start-dashboard.sh
|
|
50
|
+
DATA_DIR="$DATA_DIR" ./start-dashboard.sh stop 2>/dev/null || true
|
|
51
|
+
DATA_DIR="$DATA_DIR" ./start-dashboard.sh start
|
|
52
52
|
|
|
53
|
-
#
|
|
54
|
-
echo "[4/
|
|
53
|
+
# Install skills
|
|
54
|
+
echo "[4/5] Installing skills..."
|
|
55
|
+
mkdir -p "$HOME/.claude/commands" "$HOME/.claude/scripts"
|
|
56
|
+
for cmd in skills/commands/*.md; do
|
|
57
|
+
[ -f "$cmd" ] && cp "$cmd" "$HOME/.claude/commands/" && echo " ✓ $(basename "$cmd")"
|
|
58
|
+
done
|
|
59
|
+
for script in skills/scripts/*; do
|
|
60
|
+
[ -f "$script" ] && cp "$script" "$HOME/.claude/scripts/" && chmod +x "$HOME/.claude/scripts/$(basename "$script")" && echo " ✓ $(basename "$script")"
|
|
61
|
+
done
|
|
62
|
+
|
|
63
|
+
# Configure Claude Code MCP (native Node, no Docker)
|
|
64
|
+
echo "[5/5] Configuring Claude Code..."
|
|
55
65
|
CLAUDE_CONFIG="$HOME/.claude.json"
|
|
56
66
|
if [ -f "$CLAUDE_CONFIG" ]; then
|
|
57
|
-
# Check if telemetry MCP is already configured
|
|
58
67
|
if grep -q '"telemetry"' "$CLAUDE_CONFIG" 2>/dev/null; then
|
|
59
|
-
|
|
68
|
+
# Update to native mode
|
|
69
|
+
python3 -c "
|
|
70
|
+
import json
|
|
71
|
+
with open('$CLAUDE_CONFIG', 'r') as f:
|
|
72
|
+
config = json.load(f)
|
|
73
|
+
config['mcpServers']['telemetry'] = {
|
|
74
|
+
'type': 'stdio',
|
|
75
|
+
'command': 'node',
|
|
76
|
+
'args': ['$TANUKI_DIR/dist/index.js'],
|
|
77
|
+
'env': {'DATA_DIR': '$DATA_DIR'}
|
|
78
|
+
}
|
|
79
|
+
with open('$CLAUDE_CONFIG', 'w') as f:
|
|
80
|
+
json.dump(config, f, indent=2)
|
|
81
|
+
"
|
|
82
|
+
echo " ✓ Updated MCP config to native mode"
|
|
60
83
|
else
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
84
|
+
python3 -c "
|
|
85
|
+
import json
|
|
86
|
+
with open('$CLAUDE_CONFIG', 'r') as f:
|
|
87
|
+
config = json.load(f)
|
|
88
|
+
if 'mcpServers' not in config:
|
|
89
|
+
config['mcpServers'] = {}
|
|
90
|
+
config['mcpServers']['telemetry'] = {
|
|
91
|
+
'type': 'stdio',
|
|
92
|
+
'command': 'node',
|
|
93
|
+
'args': ['$TANUKI_DIR/dist/index.js'],
|
|
94
|
+
'env': {'DATA_DIR': '$DATA_DIR'}
|
|
95
|
+
}
|
|
96
|
+
with open('$CLAUDE_CONFIG', 'w') as f:
|
|
97
|
+
json.dump(config, f, indent=2)
|
|
98
|
+
"
|
|
99
|
+
echo " ✓ Added telemetry MCP to .claude.json"
|
|
69
100
|
fi
|
|
70
101
|
else
|
|
71
|
-
echo "
|
|
102
|
+
echo " Add this to your .claude.json:"
|
|
72
103
|
echo ""
|
|
73
|
-
echo
|
|
104
|
+
echo " \"mcpServers\": { \"telemetry\": { \"type\": \"stdio\", \"command\": \"node\", \"args\": [\"$TANUKI_DIR/dist/index.js\"], \"env\": { \"DATA_DIR\": \"$DATA_DIR\" } } }"
|
|
74
105
|
echo ""
|
|
75
106
|
fi
|
|
76
107
|
|
|
77
|
-
#
|
|
108
|
+
# Verify
|
|
78
109
|
sleep 2
|
|
79
|
-
if curl -s http://localhost:3333/health | grep -q '"ok":true'; then
|
|
110
|
+
if curl -s http://localhost:3333/health 2>/dev/null | grep -q '"ok":true'; then
|
|
80
111
|
echo ""
|
|
81
|
-
echo " Tanuki is running at http://localhost:3333"
|
|
112
|
+
echo " ✓ Tanuki is running at http://localhost:3333"
|
|
113
|
+
echo " ✓ Restart Claude Code to connect the MCP server"
|
|
82
114
|
echo ""
|
|
83
115
|
else
|
|
84
116
|
echo ""
|
package/package.json
CHANGED
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
# Compare Image — Visual Diff with Qualitative Annotations
|
|
2
|
+
|
|
3
|
+
Compare two sets of images (reference vs actual) with pixel-diff heatmaps and qualitative callouts. Works for any before/after image comparison: UI screenshots, design mockups, rendered templates, chart output, etc.
|
|
4
|
+
|
|
5
|
+
## Usage
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
/compare-image <ref-dir> <actual-dir>
|
|
9
|
+
/compare-image ./mockups ./screenshots
|
|
10
|
+
/compare-image --ref ./expected --actual ./output --session-id=<existing-session>
|
|
11
|
+
/compare-image --ref ./v1-screenshots --actual ./v2-screenshots --output-dir=./diffs
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
**Arguments:**
|
|
15
|
+
- `ref-dir`: Directory containing reference (expected) images — PNGs, numbered or named.
|
|
16
|
+
- `actual-dir`: Directory containing actual (generated/current) images to compare against.
|
|
17
|
+
- `--session-id=<id>`: Attach to an existing telemetry session instead of creating a new one.
|
|
18
|
+
- `--output-dir=<path>`: Override output directory (default: `$TANUKI_OUTPUTS/comparisons/`)
|
|
19
|
+
- `--label=<name>`: Label for this comparison set (default: derived from directory names).
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
## Prerequisites
|
|
24
|
+
|
|
25
|
+
- **Python packages:** `fitz` (PyMuPDF — only if comparing PDFs), `PIL` (Pillow), `numpy`
|
|
26
|
+
- **agent-browser** via `npx agent-browser` (only if capturing live screenshots)
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Workflow
|
|
31
|
+
|
|
32
|
+
### Phase 1: Setup & Discovery
|
|
33
|
+
|
|
34
|
+
1. **Parse arguments** — extract directories and flags.
|
|
35
|
+
2. **Create telemetry session** (unless `--session-id` provided):
|
|
36
|
+
```
|
|
37
|
+
mcp__telemetry__log_session_start({ worktree_name: "image-comparison-<date>" })
|
|
38
|
+
```
|
|
39
|
+
3. **Discover image pairs** — match reference and actual images by filename or index:
|
|
40
|
+
- Sort both directories by filename
|
|
41
|
+
- Pair them 1:1 (ref-01.png ↔ actual-01.png, or by matching name stems)
|
|
42
|
+
- Report any unmatched images
|
|
43
|
+
4. **Log event** with pair count and any mismatches.
|
|
44
|
+
|
|
45
|
+
### Phase 2: Prepare Reference Images
|
|
46
|
+
|
|
47
|
+
Depending on your source format, prepare reference PNGs:
|
|
48
|
+
|
|
49
|
+
- **Already PNGs:** Use directly — no conversion needed.
|
|
50
|
+
- **From PDF:** Render pages to PNGs via PyMuPDF:
|
|
51
|
+
```python
|
|
52
|
+
import fitz
|
|
53
|
+
doc = fitz.open(pdf_path)
|
|
54
|
+
for i, page in enumerate(doc):
|
|
55
|
+
zoom = 1920 / page.rect.width
|
|
56
|
+
mat = fitz.Matrix(zoom, zoom)
|
|
57
|
+
pix = page.get_pixmap(matrix=mat)
|
|
58
|
+
pix.save(f'ref/image-{i+1:02d}.png')
|
|
59
|
+
```
|
|
60
|
+
- **From live URL:** Capture with agent-browser:
|
|
61
|
+
```bash
|
|
62
|
+
npx agent-browser --url "http://localhost:3000/page" --width 1920 --height 1080 --output ref/page.png
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### Phase 3: Prepare Actual Images
|
|
66
|
+
|
|
67
|
+
Same as Phase 2 — get actual/generated images as PNGs by whatever method fits your use case (screenshots, renders, exports, etc.).
|
|
68
|
+
|
|
69
|
+
### Phase 4: Qualitative Analysis (visual review)
|
|
70
|
+
|
|
71
|
+
For each image pair, **read both images** and identify every meaningful difference. Think of this as a visual code review — call out specifics, not just "things changed."
|
|
72
|
+
|
|
73
|
+
| Category | What to look for |
|
|
74
|
+
|----------|-----------------|
|
|
75
|
+
| **Layout** | Element positioning, spacing, alignment, column/grid structure |
|
|
76
|
+
| **Text** | Content differences, missing text, placeholder values, truncation |
|
|
77
|
+
| **Images/icons** | Missing assets, wrong variants, broken renders, placeholder boxes |
|
|
78
|
+
| **Color/style** | Background, accent colors, borders, gradients, opacity |
|
|
79
|
+
| **Typography** | Font size, weight, color, line height changes |
|
|
80
|
+
| **Data** | Missing values, wrong numbers, empty states |
|
|
81
|
+
| **Chrome/UI** | Headers, footers, navigation, page numbers, timestamps |
|
|
82
|
+
|
|
83
|
+
**Severity classification:**
|
|
84
|
+
- **CRITICAL** (red): Missing content, broken layout, data that should exist but doesn't
|
|
85
|
+
- **NOTABLE** (yellow): Important differences — content changes, removed elements, placeholder values
|
|
86
|
+
- **MINOR** (blue): Rendering differences — font antialiasing, sub-pixel spacing, minor color shifts
|
|
87
|
+
- **GOOD** (green): Things that match correctly — always include at least one positive finding per pair
|
|
88
|
+
|
|
89
|
+
Build a `callouts` list for each pair: `[{severity, title, details}]` (max 4 per image).
|
|
90
|
+
|
|
91
|
+
### Phase 5: Generate Comparison Images
|
|
92
|
+
|
|
93
|
+
Each comparison image has three columns plus qualitative callout boxes.
|
|
94
|
+
|
|
95
|
+
**Layout:**
|
|
96
|
+
```
|
|
97
|
+
┌──────────────────────────────────────────────────────────────────────────┐
|
|
98
|
+
│ Title: "Page 3 — Dashboard" [DIFF 18.2%] │
|
|
99
|
+
├────────────────────────┬──────────────────────┬──────────────────────────┤
|
|
100
|
+
│ REFERENCE │ PIXEL DIFF HEATMAP │ ACTUAL │
|
|
101
|
+
│ (Expected) │ (red = changes) │ (Current) │
|
|
102
|
+
│ [green border] │ [red border] │ [blue border] │
|
|
103
|
+
│ [533x450] │ [533x450] │ [533x450] │
|
|
104
|
+
├────────────────────────┴──────────────────────┴──────────────────────────┤
|
|
105
|
+
│ DIFFERENCES: │
|
|
106
|
+
│ ┌─CRITICAL────────┐ ┌─NOTABLE─────────┐ ┌─MINOR──────────┐ ┌─GOOD───┐│
|
|
107
|
+
│ │ Title │ │ Title │ │ Title │ │ Title ││
|
|
108
|
+
│ │ Details... │ │ Details... │ │ Details... │ │ Details ││
|
|
109
|
+
│ └─────────────────┘ └─────────────────┘ └────────────────┘ └─────────┘│
|
|
110
|
+
└──────────────────────────────────────────────────────────────────────────┘
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
**Python implementation** (Pillow + numpy):
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
from PIL import Image, ImageDraw, ImageFont, ImageFilter
|
|
117
|
+
import numpy as np
|
|
118
|
+
|
|
119
|
+
# --- Constants ---
|
|
120
|
+
COL_W, COL_H = 533, 450 # 3 equal columns
|
|
121
|
+
CALLOUT_H = 170
|
|
122
|
+
PAD = 20
|
|
123
|
+
|
|
124
|
+
COLORS = {
|
|
125
|
+
"critical": ((200, 40, 40), (255, 80, 80)),
|
|
126
|
+
"notable": ((180, 130, 0), (255, 200, 50)),
|
|
127
|
+
"minor": ((60, 130, 180), (100, 180, 240)),
|
|
128
|
+
"good": ((40, 150, 40), (80, 200, 80)),
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
FONT_TITLE = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 28)
|
|
133
|
+
FONT_LABEL = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 18)
|
|
134
|
+
FONT_CALLOUT = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 14)
|
|
135
|
+
FONT_SMALL = ImageFont.truetype("/System/Library/Fonts/Helvetica.ttc", 12)
|
|
136
|
+
except Exception:
|
|
137
|
+
FONT_TITLE = FONT_LABEL = FONT_CALLOUT = FONT_SMALL = ImageFont.load_default()
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def normalize_to_size(img, target_w, target_h, bg_color=(255, 255, 255)):
|
|
141
|
+
"""
|
|
142
|
+
Scale image to fit within target_w x target_h preserving aspect ratio,
|
|
143
|
+
then center it on a background. No stretching, no black bars.
|
|
144
|
+
"""
|
|
145
|
+
img_w, img_h = img.size
|
|
146
|
+
scale = min(target_w / img_w, target_h / img_h)
|
|
147
|
+
new_w = int(img_w * scale)
|
|
148
|
+
new_h = int(img_h * scale)
|
|
149
|
+
scaled = img.resize((new_w, new_h), Image.LANCZOS)
|
|
150
|
+
|
|
151
|
+
canvas = Image.new("RGB", (target_w, target_h), bg_color)
|
|
152
|
+
offset_x = (target_w - new_w) // 2
|
|
153
|
+
offset_y = (target_h - new_h) // 2
|
|
154
|
+
canvas.paste(scaled, (offset_x, offset_y))
|
|
155
|
+
return canvas
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def compute_diff_heatmap(ref, gen, threshold=25):
|
|
159
|
+
"""
|
|
160
|
+
Compute a red pixel-diff heatmap overlaid on the actual image.
|
|
161
|
+
Returns (overlay_image, diff_percentage).
|
|
162
|
+
"""
|
|
163
|
+
ref_arr = np.array(ref.convert("RGB"), dtype=np.float32)
|
|
164
|
+
gen_arr = np.array(gen.convert("RGB"), dtype=np.float32)
|
|
165
|
+
diff = np.abs(ref_arr - gen_arr).mean(axis=2) # grayscale diff per pixel
|
|
166
|
+
|
|
167
|
+
diff_pct = (diff > threshold).sum() / diff.size * 100
|
|
168
|
+
|
|
169
|
+
# Normalize diff to 0-255 intensity
|
|
170
|
+
diff_norm = np.clip(diff * (255 / max(diff.max(), 1)), 0, 255).astype(np.uint8)
|
|
171
|
+
|
|
172
|
+
# Create red overlay: high-diff pixels get bright red, low-diff transparent
|
|
173
|
+
overlay_rgba = np.zeros((*diff_norm.shape, 4), dtype=np.uint8)
|
|
174
|
+
mask = diff_norm > threshold
|
|
175
|
+
overlay_rgba[mask, 0] = 255 # R
|
|
176
|
+
overlay_rgba[mask, 1] = 0 # G
|
|
177
|
+
overlay_rgba[mask, 2] = 0 # B
|
|
178
|
+
overlay_rgba[mask, 3] = np.clip(diff_norm[mask], 60, 180) # A (semi-transparent)
|
|
179
|
+
|
|
180
|
+
red_overlay = Image.fromarray(overlay_rgba, mode="RGBA")
|
|
181
|
+
heatmap = Image.alpha_composite(gen.convert("RGBA"), red_overlay).convert("RGB")
|
|
182
|
+
|
|
183
|
+
return heatmap, diff_pct
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def create_comparison(ref_path, gen_path, out_path, label, callouts):
|
|
187
|
+
"""
|
|
188
|
+
Generate a full comparison image with 3 columns side by side:
|
|
189
|
+
REFERENCE | HEATMAP | ACTUAL — all same height, equal width.
|
|
190
|
+
Plus qualitative callout boxes below.
|
|
191
|
+
"""
|
|
192
|
+
ref = normalize_to_size(Image.open(ref_path), COL_W, COL_H)
|
|
193
|
+
gen = normalize_to_size(Image.open(gen_path), COL_W, COL_H)
|
|
194
|
+
|
|
195
|
+
heatmap, diff_pct = compute_diff_heatmap(ref, gen)
|
|
196
|
+
heatmap_col = heatmap
|
|
197
|
+
|
|
198
|
+
content_w = COL_W * 3 + PAD * 4
|
|
199
|
+
total_h = PAD + 40 + 24 + COL_H + PAD + 24 + CALLOUT_H + PAD
|
|
200
|
+
canvas = Image.new("RGB", (content_w, total_h), (25, 25, 25))
|
|
201
|
+
draw = ImageDraw.Draw(canvas)
|
|
202
|
+
|
|
203
|
+
y = PAD
|
|
204
|
+
|
|
205
|
+
# --- Title bar + diff badge ---
|
|
206
|
+
draw.text((PAD, y), label, fill=(255, 255, 255), font=FONT_TITLE)
|
|
207
|
+
badge_text = f"DIFF {diff_pct:.1f}%"
|
|
208
|
+
if diff_pct < 5:
|
|
209
|
+
badge_color = (40, 150, 40)
|
|
210
|
+
elif diff_pct < 20:
|
|
211
|
+
badge_color = (180, 130, 0)
|
|
212
|
+
else:
|
|
213
|
+
badge_color = (200, 40, 40)
|
|
214
|
+
badge_x = content_w - PAD - 140
|
|
215
|
+
draw.rectangle([badge_x, y, badge_x + 130, y + 30], fill=badge_color)
|
|
216
|
+
draw.text((badge_x + 8, y + 6), badge_text, fill=(255, 255, 255), font=FONT_LABEL)
|
|
217
|
+
y += 44
|
|
218
|
+
|
|
219
|
+
# --- Column labels ---
|
|
220
|
+
col1_x = PAD
|
|
221
|
+
col2_x = PAD * 2 + COL_W
|
|
222
|
+
col3_x = PAD * 3 + COL_W * 2
|
|
223
|
+
draw.text((col1_x, y), "REFERENCE (Expected)", fill=(140, 200, 140), font=FONT_LABEL)
|
|
224
|
+
draw.text((col2_x, y), "PIXEL DIFF HEATMAP", fill=(200, 120, 120), font=FONT_LABEL)
|
|
225
|
+
draw.text((col3_x, y), "ACTUAL (Current)", fill=(140, 160, 240), font=FONT_LABEL)
|
|
226
|
+
y += 24
|
|
227
|
+
|
|
228
|
+
# --- 3 images side by side ---
|
|
229
|
+
canvas.paste(ref, (col1_x, y))
|
|
230
|
+
canvas.paste(heatmap_col, (col2_x, y))
|
|
231
|
+
canvas.paste(gen, (col3_x, y))
|
|
232
|
+
draw.rectangle([col1_x - 1, y - 1, col1_x + COL_W, y + COL_H], outline=(100, 200, 100), width=2)
|
|
233
|
+
draw.rectangle([col2_x - 1, y - 1, col2_x + COL_W, y + COL_H], outline=(200, 60, 60), width=2)
|
|
234
|
+
draw.rectangle([col3_x - 1, y - 1, col3_x + COL_W, y + COL_H], outline=(100, 120, 240), width=2)
|
|
235
|
+
y += COL_H + PAD
|
|
236
|
+
|
|
237
|
+
# --- Callout boxes ---
|
|
238
|
+
draw.text((PAD, y), "DIFFERENCES:", fill=(220, 220, 220), font=FONT_LABEL)
|
|
239
|
+
y += 24
|
|
240
|
+
num = min(len(callouts), 4)
|
|
241
|
+
if num > 0:
|
|
242
|
+
box_w = (content_w - PAD * (num + 1)) // num
|
|
243
|
+
box_h = CALLOUT_H - 30
|
|
244
|
+
for i, c in enumerate(callouts[:4]):
|
|
245
|
+
bx = PAD + i * (box_w + PAD)
|
|
246
|
+
bg, accent = COLORS.get(c["severity"], COLORS["minor"])
|
|
247
|
+
draw.rectangle([bx, y, bx + box_w, y + box_h], fill=(40, 40, 40), outline=accent, width=2)
|
|
248
|
+
draw.rectangle([bx + 2, y + 2, bx + 80, y + 20], fill=bg)
|
|
249
|
+
draw.text((bx + 6, y + 3), c["severity"].upper(), fill=(255, 255, 255), font=FONT_SMALL)
|
|
250
|
+
draw.text((bx + 8, y + 24), c["title"], fill=accent, font=FONT_CALLOUT)
|
|
251
|
+
# Wrap details text
|
|
252
|
+
words = c["details"].split()
|
|
253
|
+
lines, current = [], ""
|
|
254
|
+
for w in words:
|
|
255
|
+
test = f"{current} {w}".strip()
|
|
256
|
+
bbox = draw.textbbox((0, 0), test, font=FONT_SMALL)
|
|
257
|
+
if bbox[2] - bbox[0] > box_w - 16 and current:
|
|
258
|
+
lines.append(current)
|
|
259
|
+
current = w
|
|
260
|
+
else:
|
|
261
|
+
current = test
|
|
262
|
+
if current:
|
|
263
|
+
lines.append(current)
|
|
264
|
+
for j, line in enumerate(lines[:4]):
|
|
265
|
+
draw.text((bx + 8, y + 42 + j * 16), line, fill=(200, 200, 200), font=FONT_SMALL)
|
|
266
|
+
|
|
267
|
+
canvas.save(out_path, quality=95)
|
|
268
|
+
return diff_pct
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
### Phase 6: Upload to Telemetry (structured findings)
|
|
272
|
+
|
|
273
|
+
Each image comparison produces telemetry artifacts: a screenshot, structured finding events per callout, and an image-level summary event.
|
|
274
|
+
|
|
275
|
+
#### 6a. Screenshots per image pair
|
|
276
|
+
|
|
277
|
+
**The comparison image is always the primary output:**
|
|
278
|
+
```
|
|
279
|
+
mcp__telemetry__log_screenshot({
|
|
280
|
+
session_id,
|
|
281
|
+
phase: "verification",
|
|
282
|
+
description: "[COMPARISON] <label> <N> — <highest severity>: <key finding>",
|
|
283
|
+
file_path: "<absolute path to comparison PNG>"
|
|
284
|
+
})
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
Also log as an artifact for download/browsing on the dashboard:
|
|
288
|
+
```
|
|
289
|
+
mcp__telemetry__log_artifact({
|
|
290
|
+
session_id,
|
|
291
|
+
file_path: "<absolute path to comparison PNG>",
|
|
292
|
+
artifact_type: "comparison",
|
|
293
|
+
description: "<label> image <N> comparison",
|
|
294
|
+
metadata: { label: "<label>", image_number: <N>, diff_pct: <X.X> }
|
|
295
|
+
})
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
#### 6b. Structured finding event per callout
|
|
299
|
+
|
|
300
|
+
For **each individual finding**, log a `comparison_finding` event with queryable metadata:
|
|
301
|
+
|
|
302
|
+
```
|
|
303
|
+
mcp__telemetry__log_event({
|
|
304
|
+
session_id,
|
|
305
|
+
phase: "verification",
|
|
306
|
+
event_type: "info",
|
|
307
|
+
message: "<severity>: <title> — <label> image <N>",
|
|
308
|
+
metadata: {
|
|
309
|
+
type: "comparison_finding",
|
|
310
|
+
label: "<label>",
|
|
311
|
+
image_number: <N>,
|
|
312
|
+
image_name: "<filename>",
|
|
313
|
+
severity: "<critical|notable|minor|good>",
|
|
314
|
+
finding_title: "<short title>",
|
|
315
|
+
finding_details: "<full description>",
|
|
316
|
+
diff_pct: <X.X>,
|
|
317
|
+
comparison_image: "<absolute path>",
|
|
318
|
+
ref_image: "<absolute path>",
|
|
319
|
+
actual_image: "<absolute path>"
|
|
320
|
+
}
|
|
321
|
+
})
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
#### 6c. Image-level summary event
|
|
325
|
+
|
|
326
|
+
After logging all findings for an image pair:
|
|
327
|
+
|
|
328
|
+
```
|
|
329
|
+
mcp__telemetry__log_event({
|
|
330
|
+
session_id,
|
|
331
|
+
phase: "verification",
|
|
332
|
+
event_type: "info",
|
|
333
|
+
message: "Compared <label> image <N> (<name>) — <highest severity>, diff <X.X>%",
|
|
334
|
+
metadata: {
|
|
335
|
+
type: "comparison_image_summary",
|
|
336
|
+
label: "<label>",
|
|
337
|
+
image_number: <N>,
|
|
338
|
+
image_name: "<filename>",
|
|
339
|
+
diff_pct: <X.X>,
|
|
340
|
+
highest_severity: "<critical|notable|minor|good>",
|
|
341
|
+
finding_count: { critical: <N>, notable: <N>, minor: <N>, good: <N> },
|
|
342
|
+
comparison_image: "<absolute path>"
|
|
343
|
+
}
|
|
344
|
+
})
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
#### 6d. Final rollup event
|
|
348
|
+
|
|
349
|
+
After all image pairs:
|
|
350
|
+
|
|
351
|
+
```
|
|
352
|
+
mcp__telemetry__log_event({
|
|
353
|
+
session_id,
|
|
354
|
+
phase: "deliverables",
|
|
355
|
+
event_type: "info",
|
|
356
|
+
message: "Image comparison complete — <N> images, <C> critical, <N> notable findings",
|
|
357
|
+
metadata: {
|
|
358
|
+
type: "comparison_rollup",
|
|
359
|
+
label: "<label>",
|
|
360
|
+
total_images: <N>,
|
|
361
|
+
total_findings: <N>,
|
|
362
|
+
by_severity: { critical: <N>, notable: <N>, minor: <N>, good: <N> },
|
|
363
|
+
avg_diff_pct: <X.X>
|
|
364
|
+
}
|
|
365
|
+
})
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
#### Querying findings programmatically
|
|
369
|
+
|
|
370
|
+
```sql
|
|
371
|
+
-- All critical findings across sessions
|
|
372
|
+
SELECT * FROM events
|
|
373
|
+
WHERE metadata->>'type' = 'comparison_finding'
|
|
374
|
+
AND metadata->>'severity' = 'critical';
|
|
375
|
+
|
|
376
|
+
-- All findings for a specific comparison
|
|
377
|
+
SELECT * FROM events
|
|
378
|
+
WHERE metadata->>'type' = 'comparison_finding'
|
|
379
|
+
AND metadata->>'label' = 'homepage-redesign';
|
|
380
|
+
|
|
381
|
+
-- Image summaries sorted by diff percentage
|
|
382
|
+
SELECT * FROM events
|
|
383
|
+
WHERE metadata->>'type' = 'comparison_image_summary'
|
|
384
|
+
ORDER BY (metadata->>'diff_pct')::float DESC;
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
### Phase 7: Summary Output
|
|
388
|
+
|
|
389
|
+
```markdown
|
|
390
|
+
## Image Comparison Results
|
|
391
|
+
|
|
392
|
+
| Image | Diff % | Severity | Key Finding |
|
|
393
|
+
|-------|:------:|----------|-------------|
|
|
394
|
+
| 01 — Homepage | 12.3% | NOTABLE | Header layout shifted, CTA button color changed |
|
|
395
|
+
| 02 — Dashboard | 18.2% | CRITICAL | Chart data missing, sidebar collapsed |
|
|
396
|
+
| ... | ... | ... | ... |
|
|
397
|
+
|
|
398
|
+
**Critical:** <count> images
|
|
399
|
+
**Notable:** <count> images
|
|
400
|
+
**Good:** <count> images
|
|
401
|
+
|
|
402
|
+
**Output:** <output-dir>/comparisons/
|
|
403
|
+
**Telemetry:** Session <id>
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
---
|
|
407
|
+
|
|
408
|
+
## Common Use Cases
|
|
409
|
+
|
|
410
|
+
- **UI regression testing:** Compare screenshots before/after a code change
|
|
411
|
+
- **Design fidelity:** Compare design mockup PNGs against implemented page screenshots
|
|
412
|
+
- **Generated content:** Compare expected output against LLM/AI-generated output
|
|
413
|
+
- **Email templates:** Compare HTML email reference renders against actual sends
|
|
414
|
+
- **Chart/data viz:** Compare expected chart renders against actual output
|