@triscope/mcp 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +31 -0
- package/bin/triscope-mcp-supervised.mjs +114 -0
- package/bin/triscope-mcp.mjs +11 -0
- package/dist/browser.mjs +348 -0
- package/dist/browser.mjs.map +1 -0
- package/dist/logger.mjs +51 -0
- package/dist/logger.mjs.map +1 -0
- package/dist/refs.mjs +396 -0
- package/dist/refs.mjs.map +1 -0
- package/dist/server.mjs +3125 -0
- package/dist/server.mjs.map +1 -0
- package/package.json +49 -0
- package/src/browser.ts +461 -0
- package/src/logger.ts +94 -0
- package/src/optimize.ts +142 -0
- package/src/refs.ts +468 -0
- package/src/server.ts +2678 -0
- package/src/targets.ts +163 -0
package/src/optimize.ts
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
// Derivative-free optimization for knob tuning. Pure (the objective is an
|
|
2
|
+
// injected async fn), so the search logic is unit-tested without a browser;
|
|
3
|
+
// the server wires `evalAt` to set_knob → wait → capture → SSIM.
|
|
4
|
+
|
|
5
|
+
export interface GoldenResult {
|
|
6
|
+
x: number;
|
|
7
|
+
fx: number;
|
|
8
|
+
history: Array<{ x: number; fx: number }>;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
const PHI = (1 + Math.sqrt(5)) / 2;
|
|
12
|
+
const INV_PHI = 1 / PHI;
|
|
13
|
+
|
|
14
|
+
/** Golden-section search for the MAXIMUM of a unimodal `f` over [a, b]. */
|
|
15
|
+
export async function goldenSectionMax(
|
|
16
|
+
f: (x: number) => Promise<number>,
|
|
17
|
+
a: number,
|
|
18
|
+
b: number,
|
|
19
|
+
maxIter = 12,
|
|
20
|
+
tol = 1e-4,
|
|
21
|
+
): Promise<GoldenResult> {
|
|
22
|
+
const history: Array<{ x: number; fx: number }> = [];
|
|
23
|
+
const cache = new Map<string, number>();
|
|
24
|
+
let budget = maxIter;
|
|
25
|
+
const evalAt = async (x: number): Promise<number> => {
|
|
26
|
+
const k = x.toFixed(6);
|
|
27
|
+
const hit = cache.get(k);
|
|
28
|
+
if (hit !== undefined) return hit;
|
|
29
|
+
if (budget <= 0) return Number.NEGATIVE_INFINITY;
|
|
30
|
+
budget--;
|
|
31
|
+
const fx = await f(x);
|
|
32
|
+
cache.set(k, fx);
|
|
33
|
+
history.push({ x, fx });
|
|
34
|
+
return fx;
|
|
35
|
+
};
|
|
36
|
+
let lo = a;
|
|
37
|
+
let hi = b;
|
|
38
|
+
let c = hi - (hi - lo) * INV_PHI;
|
|
39
|
+
let d = lo + (hi - lo) * INV_PHI;
|
|
40
|
+
let fc = await evalAt(c);
|
|
41
|
+
let fd = await evalAt(d);
|
|
42
|
+
while (budget > 0 && Math.abs(hi - lo) > tol * (b - a)) {
|
|
43
|
+
if (fc > fd) {
|
|
44
|
+
hi = d;
|
|
45
|
+
d = c;
|
|
46
|
+
fd = fc;
|
|
47
|
+
c = hi - (hi - lo) * INV_PHI;
|
|
48
|
+
fc = await evalAt(c);
|
|
49
|
+
} else {
|
|
50
|
+
lo = c;
|
|
51
|
+
c = d;
|
|
52
|
+
fc = fd;
|
|
53
|
+
d = lo + (hi - lo) * INV_PHI;
|
|
54
|
+
fd = await evalAt(d);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
let best = history[0] ?? { x: (a + b) / 2, fx: Number.NEGATIVE_INFINITY };
|
|
58
|
+
for (const h of history) if (h.fx > best.fx) best = h;
|
|
59
|
+
return { x: best.x, fx: best.fx, history };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export interface KnobSpec {
|
|
63
|
+
key: string;
|
|
64
|
+
min: number;
|
|
65
|
+
max: number;
|
|
66
|
+
start: number;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export interface DescentOptions {
|
|
70
|
+
knobs: KnobSpec[];
|
|
71
|
+
evalAt: (values: Record<string, number>) => Promise<number>;
|
|
72
|
+
maxCycles?: number;
|
|
73
|
+
perKnobIters?: number;
|
|
74
|
+
/** Hard cap on total objective evaluations — bounds worst-case wall time. */
|
|
75
|
+
maxEvaluations?: number;
|
|
76
|
+
/** Stop early when a full cycle improves the score by less than this. */
|
|
77
|
+
tol?: number;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export interface DescentResult {
|
|
81
|
+
best: Record<string, number>;
|
|
82
|
+
bestScore: number;
|
|
83
|
+
cycles: number;
|
|
84
|
+
evaluations: number;
|
|
85
|
+
history: Array<{ cycle: number; knob: string; value: number; score: number }>;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Coordinate descent: optimize one knob at a time via golden-section, repeat
|
|
90
|
+
* over all knobs until the per-cycle gain plateaus, `maxCycles` is hit, or the
|
|
91
|
+
* hard `maxEvaluations` cap is reached. Greedy but robust for the smooth,
|
|
92
|
+
* weakly-coupled knobs shader tuning involves; far cheaper than a grid.
|
|
93
|
+
*/
|
|
94
|
+
export async function coordinateDescent(opts: DescentOptions): Promise<DescentResult> {
|
|
95
|
+
const { knobs, evalAt } = opts;
|
|
96
|
+
const maxCycles = opts.maxCycles ?? 3;
|
|
97
|
+
const perKnobIters = opts.perKnobIters ?? 12;
|
|
98
|
+
const maxEvaluations = opts.maxEvaluations ?? Number.POSITIVE_INFINITY;
|
|
99
|
+
const tol = opts.tol ?? 1e-3;
|
|
100
|
+
|
|
101
|
+
const current: Record<string, number> = {};
|
|
102
|
+
for (const k of knobs) current[k.key] = k.start;
|
|
103
|
+
const history: DescentResult['history'] = [];
|
|
104
|
+
let evaluations = 0;
|
|
105
|
+
let bestScore = await evalAt({ ...current });
|
|
106
|
+
evaluations++;
|
|
107
|
+
let cycles = 0;
|
|
108
|
+
|
|
109
|
+
for (let cycle = 0; cycle < maxCycles; cycle++) {
|
|
110
|
+
cycles = cycle + 1;
|
|
111
|
+
const cycleStart = bestScore;
|
|
112
|
+
for (const k of knobs) {
|
|
113
|
+
if (evaluations >= maxEvaluations)
|
|
114
|
+
return { best: current, bestScore, cycles, evaluations, history };
|
|
115
|
+
const f = async (x: number): Promise<number> => {
|
|
116
|
+
if (evaluations >= maxEvaluations) return Number.NEGATIVE_INFINITY;
|
|
117
|
+
evaluations++;
|
|
118
|
+
const score = await evalAt({ ...current, [k.key]: x });
|
|
119
|
+
history.push({ cycle, knob: k.key, value: x, score });
|
|
120
|
+
return score;
|
|
121
|
+
};
|
|
122
|
+
const r = await goldenSectionMax(f, k.min, k.max, perKnobIters);
|
|
123
|
+
if (r.fx >= bestScore) {
|
|
124
|
+
current[k.key] = r.x;
|
|
125
|
+
bestScore = r.fx;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
if (bestScore - cycleStart < tol) break; // plateaued
|
|
129
|
+
}
|
|
130
|
+
return { best: current, bestScore, cycles, evaluations, history };
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/** True when an applied knob value matches the requested one (color-insensitive). */
|
|
134
|
+
export function knobMatches(requested: unknown, applied: unknown, tol = 1e-4): boolean {
|
|
135
|
+
if (typeof requested === 'number' && typeof applied === 'number') {
|
|
136
|
+
return Math.abs(requested - applied) <= tol;
|
|
137
|
+
}
|
|
138
|
+
if (typeof requested === 'string' && typeof applied === 'string') {
|
|
139
|
+
return requested.toLowerCase() === applied.toLowerCase();
|
|
140
|
+
}
|
|
141
|
+
return requested === applied;
|
|
142
|
+
}
|
package/src/refs.ts
ADDED
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
// Reference photo primitives: save a reference PNG per (element, camera),
|
|
2
|
+
// then compose a side-by-side diff against the current view + a scalar
|
|
3
|
+
// mean-absolute-difference. Designed so the user can paste an image in chat
|
|
4
|
+
// and have Claude pipe it straight into set_reference without an intermediate
|
|
5
|
+
// file system dance — both `path` and `base64` inputs are accepted.
|
|
6
|
+
|
|
7
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
|
|
8
|
+
import { dirname, join, resolve } from 'node:path';
|
|
9
|
+
import { PNG } from 'pngjs';
|
|
10
|
+
|
|
11
|
+
const stripPrefix = (s) => s.replace(/^data:image\/png;base64,/, '');
|
|
12
|
+
|
|
13
|
+
function refsRoot(cwd) {
|
|
14
|
+
return resolve(cwd, 'refs');
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function refsPath(cwd, element, camera) {
|
|
18
|
+
// Sanitize the camera name to a single path segment so any name the
|
|
19
|
+
// element declares can become a filename safely.
|
|
20
|
+
const safeCam = String(camera).replace(/[^A-Za-z0-9._-]/g, '_');
|
|
21
|
+
return join(refsRoot(cwd), element, `${safeCam}.png`);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function refsMotionPaths(cwd, element, camera) {
|
|
25
|
+
const safeCam = String(camera).replace(/[^A-Za-z0-9._-]/g, '_');
|
|
26
|
+
const base = join(refsRoot(cwd), element);
|
|
27
|
+
return {
|
|
28
|
+
filmstrip: join(base, `${safeCam}.motion.png`),
|
|
29
|
+
meta: join(base, `${safeCam}.motion.json`),
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function setReference({
|
|
34
|
+
cwd,
|
|
35
|
+
element,
|
|
36
|
+
camera,
|
|
37
|
+
path,
|
|
38
|
+
base64,
|
|
39
|
+
}: {
|
|
40
|
+
cwd: string;
|
|
41
|
+
element: string;
|
|
42
|
+
camera: string;
|
|
43
|
+
path?: string;
|
|
44
|
+
base64?: string;
|
|
45
|
+
}) {
|
|
46
|
+
if (!element || !camera) throw new Error('element and camera are required');
|
|
47
|
+
let bytes: Buffer;
|
|
48
|
+
if (path) {
|
|
49
|
+
if (!existsSync(path)) throw new Error(`reference file not found: ${path}`);
|
|
50
|
+
bytes = readFileSync(path);
|
|
51
|
+
} else if (base64) {
|
|
52
|
+
bytes = Buffer.from(stripPrefix(base64), 'base64');
|
|
53
|
+
} else {
|
|
54
|
+
throw new Error('provide either path or base64');
|
|
55
|
+
}
|
|
56
|
+
const dest = refsPath(cwd, element, camera);
|
|
57
|
+
mkdirSync(dirname(dest), { recursive: true });
|
|
58
|
+
writeFileSync(dest, bytes);
|
|
59
|
+
return { path: dest, bytes: bytes.length };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function decodePng(buffer) {
|
|
63
|
+
return PNG.sync.read(buffer);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export function nearestNeighborResize(src, targetW, targetH) {
|
|
67
|
+
if (src.width === targetW && src.height === targetH) return src;
|
|
68
|
+
const dst = new PNG({ width: targetW, height: targetH });
|
|
69
|
+
for (let y = 0; y < targetH; y++) {
|
|
70
|
+
const sy = Math.min(src.height - 1, Math.floor((y * src.height) / targetH));
|
|
71
|
+
for (let x = 0; x < targetW; x++) {
|
|
72
|
+
const sx = Math.min(src.width - 1, Math.floor((x * src.width) / targetW));
|
|
73
|
+
const si = (sy * src.width + sx) * 4;
|
|
74
|
+
const di = (y * targetW + x) * 4;
|
|
75
|
+
dst.data[di] = src.data[si];
|
|
76
|
+
dst.data[di + 1] = src.data[si + 1];
|
|
77
|
+
dst.data[di + 2] = src.data[si + 2];
|
|
78
|
+
dst.data[di + 3] = 255;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return dst;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export function composeSideBySide(left, right) {
|
|
85
|
+
// Match heights to the smaller of the two so we don't grow the payload,
|
|
86
|
+
// then concatenate horizontally with a 4-px black separator.
|
|
87
|
+
const h = Math.min(left.height, right.height);
|
|
88
|
+
const sep = 4;
|
|
89
|
+
const lw = Math.round((left.width * h) / left.height);
|
|
90
|
+
const rw = Math.round((right.width * h) / right.height);
|
|
91
|
+
const w = lw + sep + rw;
|
|
92
|
+
const L = nearestNeighborResize(left, lw, h);
|
|
93
|
+
const R = nearestNeighborResize(right, rw, h);
|
|
94
|
+
const out = new PNG({ width: w, height: h });
|
|
95
|
+
// Fill background black (default is zeros — alpha would be 0; force 255).
|
|
96
|
+
for (let i = 0; i < out.data.length; i += 4) out.data[i + 3] = 255;
|
|
97
|
+
for (let y = 0; y < h; y++) {
|
|
98
|
+
const orow = y * w * 4;
|
|
99
|
+
const lrow = y * lw * 4;
|
|
100
|
+
const rrow = y * rw * 4;
|
|
101
|
+
L.data.copy(out.data, orow, lrow, lrow + lw * 4);
|
|
102
|
+
R.data.copy(out.data, orow + (lw + sep) * 4, rrow, rrow + rw * 4);
|
|
103
|
+
}
|
|
104
|
+
return out;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export function meanAbsDiff(a, b) {
|
|
108
|
+
// Resize to a common 256x256 grid to keep the metric cheap and
|
|
109
|
+
// resolution-independent.
|
|
110
|
+
const W = 256;
|
|
111
|
+
const H = 256;
|
|
112
|
+
const A = nearestNeighborResize(a, W, H);
|
|
113
|
+
const B = nearestNeighborResize(b, W, H);
|
|
114
|
+
let sum = 0;
|
|
115
|
+
const pixels = W * H * 3;
|
|
116
|
+
for (let i = 0; i < W * H; i++) {
|
|
117
|
+
const j = i * 4;
|
|
118
|
+
sum += Math.abs(A.data[j] - B.data[j]);
|
|
119
|
+
sum += Math.abs(A.data[j + 1] - B.data[j + 1]);
|
|
120
|
+
sum += Math.abs(A.data[j + 2] - B.data[j + 2]);
|
|
121
|
+
}
|
|
122
|
+
return +(sum / pixels).toFixed(2);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Mean Structural Similarity Index (SSIM) — perceptual metric that's
|
|
127
|
+
* robust to mild brightness/contrast shifts and sensitive to structural
|
|
128
|
+
* change. Returns a value in [-1, 1] where 1 = identical, 0 = no
|
|
129
|
+
* correlation, negative = inverse. Most natural-image diffs land in
|
|
130
|
+
* [0.5, 1.0]; below ~0.9 starts to look visibly different.
|
|
131
|
+
*
|
|
132
|
+
* We compute it over Rec.709 luminance on a 256×256 downsample (same as
|
|
133
|
+
* meanAbsDiff for comparability), with non-overlapping 8×8 windows. The
|
|
134
|
+
* three terms are luminance, contrast, and structure with the standard
|
|
135
|
+
* stability constants K1=0.01, K2=0.03 against dynamic range L=255.
|
|
136
|
+
*
|
|
137
|
+
* This is the metric auto_tune optimizes against — meanAbsDiff is too
|
|
138
|
+
* noisy for an optimizer (chases pixel-level glitter), SSIM tracks the
|
|
139
|
+
* actual structure of what the human sees.
|
|
140
|
+
*/
|
|
141
|
+
export function ssim(a, b): number {
|
|
142
|
+
const W = 256;
|
|
143
|
+
const H = 256;
|
|
144
|
+
const A = nearestNeighborResize(a, W, H);
|
|
145
|
+
const B = nearestNeighborResize(b, W, H);
|
|
146
|
+
const lumA = new Float32Array(W * H);
|
|
147
|
+
const lumB = new Float32Array(W * H);
|
|
148
|
+
for (let i = 0; i < W * H; i++) {
|
|
149
|
+
const j = i * 4;
|
|
150
|
+
lumA[i] = 0.2126 * A.data[j] + 0.7152 * A.data[j + 1] + 0.0722 * A.data[j + 2];
|
|
151
|
+
lumB[i] = 0.2126 * B.data[j] + 0.7152 * B.data[j + 1] + 0.0722 * B.data[j + 2];
|
|
152
|
+
}
|
|
153
|
+
const L = 255;
|
|
154
|
+
const C1 = (0.01 * L) ** 2;
|
|
155
|
+
const C2 = (0.03 * L) ** 2;
|
|
156
|
+
const WIN = 8;
|
|
157
|
+
let total = 0;
|
|
158
|
+
let count = 0;
|
|
159
|
+
for (let wy = 0; wy < H; wy += WIN) {
|
|
160
|
+
for (let wx = 0; wx < W; wx += WIN) {
|
|
161
|
+
let muA = 0,
|
|
162
|
+
muB = 0;
|
|
163
|
+
for (let dy = 0; dy < WIN; dy++)
|
|
164
|
+
for (let dx = 0; dx < WIN; dx++) {
|
|
165
|
+
const i = (wy + dy) * W + (wx + dx);
|
|
166
|
+
muA += lumA[i];
|
|
167
|
+
muB += lumB[i];
|
|
168
|
+
}
|
|
169
|
+
muA /= WIN * WIN;
|
|
170
|
+
muB /= WIN * WIN;
|
|
171
|
+
let varA = 0,
|
|
172
|
+
varB = 0,
|
|
173
|
+
covAB = 0;
|
|
174
|
+
for (let dy = 0; dy < WIN; dy++)
|
|
175
|
+
for (let dx = 0; dx < WIN; dx++) {
|
|
176
|
+
const i = (wy + dy) * W + (wx + dx);
|
|
177
|
+
const da = lumA[i] - muA;
|
|
178
|
+
const db = lumB[i] - muB;
|
|
179
|
+
varA += da * da;
|
|
180
|
+
varB += db * db;
|
|
181
|
+
covAB += da * db;
|
|
182
|
+
}
|
|
183
|
+
varA /= WIN * WIN - 1;
|
|
184
|
+
varB /= WIN * WIN - 1;
|
|
185
|
+
covAB /= WIN * WIN - 1;
|
|
186
|
+
const num = (2 * muA * muB + C1) * (2 * covAB + C2);
|
|
187
|
+
const den = (muA * muA + muB * muB + C1) * (varA + varB + C2);
|
|
188
|
+
total += num / den;
|
|
189
|
+
count += 1;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return +(total / count).toFixed(4);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
export interface TileGrid {
|
|
196
|
+
/** SSIM per tile, grid[row][col], in [-1, 1] (1 = identical region). */
|
|
197
|
+
grid: number[][];
|
|
198
|
+
/** The single most-divergent tile — tells the agent WHERE frames differ. */
|
|
199
|
+
worst: { row: number; col: number; ssim: number };
|
|
200
|
+
min: number;
|
|
201
|
+
max: number;
|
|
202
|
+
mean: number;
|
|
203
|
+
gridSize: number;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Per-tile SSIM map. The scalar `ssim()` averages away the spatial detail it
|
|
208
|
+
* computes per window; this surfaces a coarse `gridSize × gridSize` map (default
|
|
209
|
+
* 8×8 over the 256×256 downsample, i.e. 32-px tiles) plus the worst tile, so an
|
|
210
|
+
* agent can localize "bow specular clipped, top-right" instead of re-reading the
|
|
211
|
+
* composite image. Same luminance + windowed-SSIM math as `ssim()`.
|
|
212
|
+
*/
|
|
213
|
+
export function ssimTileGrid(a, b, gridSize = 8): TileGrid {
|
|
214
|
+
const W = 256;
|
|
215
|
+
const H = 256;
|
|
216
|
+
const A = nearestNeighborResize(a, W, H);
|
|
217
|
+
const B = nearestNeighborResize(b, W, H);
|
|
218
|
+
const lumA = new Float32Array(W * H);
|
|
219
|
+
const lumB = new Float32Array(W * H);
|
|
220
|
+
for (let i = 0; i < W * H; i++) {
|
|
221
|
+
const j = i * 4;
|
|
222
|
+
lumA[i] = 0.2126 * A.data[j] + 0.7152 * A.data[j + 1] + 0.0722 * A.data[j + 2];
|
|
223
|
+
lumB[i] = 0.2126 * B.data[j] + 0.7152 * B.data[j + 1] + 0.0722 * B.data[j + 2];
|
|
224
|
+
}
|
|
225
|
+
const L = 255;
|
|
226
|
+
const C1 = (0.01 * L) ** 2;
|
|
227
|
+
const C2 = (0.03 * L) ** 2;
|
|
228
|
+
const tile = Math.floor(W / gridSize);
|
|
229
|
+
const grid: number[][] = [];
|
|
230
|
+
let worst = { row: 0, col: 0, ssim: Number.POSITIVE_INFINITY };
|
|
231
|
+
let min = Number.POSITIVE_INFINITY;
|
|
232
|
+
let max = Number.NEGATIVE_INFINITY;
|
|
233
|
+
let sum = 0;
|
|
234
|
+
let count = 0;
|
|
235
|
+
for (let r = 0; r < gridSize; r++) {
|
|
236
|
+
const rowArr: number[] = [];
|
|
237
|
+
for (let c = 0; c < gridSize; c++) {
|
|
238
|
+
const x0 = c * tile;
|
|
239
|
+
const y0 = r * tile;
|
|
240
|
+
const n = tile * tile;
|
|
241
|
+
let muA = 0;
|
|
242
|
+
let muB = 0;
|
|
243
|
+
for (let dy = 0; dy < tile; dy++)
|
|
244
|
+
for (let dx = 0; dx < tile; dx++) {
|
|
245
|
+
const i = (y0 + dy) * W + (x0 + dx);
|
|
246
|
+
muA += lumA[i];
|
|
247
|
+
muB += lumB[i];
|
|
248
|
+
}
|
|
249
|
+
muA /= n;
|
|
250
|
+
muB /= n;
|
|
251
|
+
let vA = 0;
|
|
252
|
+
let vB = 0;
|
|
253
|
+
let cov = 0;
|
|
254
|
+
for (let dy = 0; dy < tile; dy++)
|
|
255
|
+
for (let dx = 0; dx < tile; dx++) {
|
|
256
|
+
const i = (y0 + dy) * W + (x0 + dx);
|
|
257
|
+
const da = lumA[i] - muA;
|
|
258
|
+
const db = lumB[i] - muB;
|
|
259
|
+
vA += da * da;
|
|
260
|
+
vB += db * db;
|
|
261
|
+
cov += da * db;
|
|
262
|
+
}
|
|
263
|
+
vA /= n - 1;
|
|
264
|
+
vB /= n - 1;
|
|
265
|
+
cov /= n - 1;
|
|
266
|
+
const s =
|
|
267
|
+
((2 * muA * muB + C1) * (2 * cov + C2)) / ((muA * muA + muB * muB + C1) * (vA + vB + C2));
|
|
268
|
+
const sr = +s.toFixed(4);
|
|
269
|
+
rowArr.push(sr);
|
|
270
|
+
sum += sr;
|
|
271
|
+
count += 1;
|
|
272
|
+
if (sr < min) min = sr;
|
|
273
|
+
if (sr > max) max = sr;
|
|
274
|
+
if (sr < worst.ssim) worst = { row: r, col: c, ssim: sr };
|
|
275
|
+
}
|
|
276
|
+
grid.push(rowArr);
|
|
277
|
+
}
|
|
278
|
+
return {
|
|
279
|
+
grid,
|
|
280
|
+
worst,
|
|
281
|
+
min: +min.toFixed(4),
|
|
282
|
+
max: +max.toFixed(4),
|
|
283
|
+
mean: +(sum / count).toFixed(4),
|
|
284
|
+
gridSize,
|
|
285
|
+
};
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
function heatColor(t: number): [number, number, number] {
|
|
289
|
+
const x = Math.max(0, Math.min(1, t));
|
|
290
|
+
// black → blue → yellow → red
|
|
291
|
+
const stops: Array<[number, [number, number, number]]> = [
|
|
292
|
+
[0, [0, 0, 0]],
|
|
293
|
+
[0.33, [0, 0, 255]],
|
|
294
|
+
[0.66, [255, 255, 0]],
|
|
295
|
+
[1, [255, 0, 0]],
|
|
296
|
+
];
|
|
297
|
+
for (let i = 1; i < stops.length; i++) {
|
|
298
|
+
if (x <= stops[i][0]) {
|
|
299
|
+
const [t0, c0] = stops[i - 1];
|
|
300
|
+
const [t1, c1] = stops[i];
|
|
301
|
+
const f = (x - t0) / (t1 - t0 || 1);
|
|
302
|
+
return [
|
|
303
|
+
Math.round(c0[0] + f * (c1[0] - c0[0])),
|
|
304
|
+
Math.round(c0[1] + f * (c1[1] - c0[1])),
|
|
305
|
+
Math.round(c0[2] + f * (c1[2] - c0[2])),
|
|
306
|
+
];
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
return [255, 0, 0];
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
/**
|
|
313
|
+
* Per-pixel difference heatmap (black=identical → blue → yellow → red=max
|
|
314
|
+
* difference) on the 256×256 downsample. Returns a PNG Buffer the diff tool
|
|
315
|
+
* ships as a second image block so the agent SEES where the frames diverge.
|
|
316
|
+
*/
|
|
317
|
+
export function composeDiffHeatmap(a, b, size = 256): Buffer {
|
|
318
|
+
const A = nearestNeighborResize(a, size, size);
|
|
319
|
+
const B = nearestNeighborResize(b, size, size);
|
|
320
|
+
const out = new PNG({ width: size, height: size });
|
|
321
|
+
for (let p = 0; p < size * size; p++) {
|
|
322
|
+
const j = p * 4;
|
|
323
|
+
const d =
|
|
324
|
+
(Math.abs(A.data[j] - B.data[j]) +
|
|
325
|
+
Math.abs(A.data[j + 1] - B.data[j + 1]) +
|
|
326
|
+
Math.abs(A.data[j + 2] - B.data[j + 2])) /
|
|
327
|
+
(3 * 255);
|
|
328
|
+
const [r, g, bl] = heatColor(d);
|
|
329
|
+
out.data[j] = r;
|
|
330
|
+
out.data[j + 1] = g;
|
|
331
|
+
out.data[j + 2] = bl;
|
|
332
|
+
out.data[j + 3] = 255;
|
|
333
|
+
}
|
|
334
|
+
return PNG.sync.write(out);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
export function composeFilmstrip(frameBase64s: string[], opts: { sep?: number } = {}): Buffer {
|
|
338
|
+
// Tile N frames horizontally with a 2-px black separator. Each frame is
|
|
339
|
+
// resized to match the smallest source height (so payload stays bounded
|
|
340
|
+
// even if frames are e.g. 1600x900 each). Returns a PNG Buffer.
|
|
341
|
+
if (!Array.isArray(frameBase64s) || frameBase64s.length === 0) {
|
|
342
|
+
throw new Error('composeFilmstrip: no frames');
|
|
343
|
+
}
|
|
344
|
+
const sep = opts.sep ?? 2;
|
|
345
|
+
const frames = frameBase64s.map((b) => decodePng(Buffer.from(stripPrefix(b), 'base64')));
|
|
346
|
+
const h = Math.min(...frames.map((f) => f.height));
|
|
347
|
+
const resized = frames.map((f) =>
|
|
348
|
+
nearestNeighborResize(f, Math.round((f.width * h) / f.height), h),
|
|
349
|
+
);
|
|
350
|
+
const totalW = resized.reduce((acc, f, i) => acc + f.width + (i > 0 ? sep : 0), 0);
|
|
351
|
+
const out = new PNG({ width: totalW, height: h });
|
|
352
|
+
for (let i = 0; i < out.data.length; i += 4) out.data[i + 3] = 255;
|
|
353
|
+
let x = 0;
|
|
354
|
+
for (let f = 0; f < resized.length; f++) {
|
|
355
|
+
const img = resized[f];
|
|
356
|
+
for (let y = 0; y < h; y++) {
|
|
357
|
+
const srcRow = y * img.width * 4;
|
|
358
|
+
const dstRow = (y * totalW + x) * 4;
|
|
359
|
+
img.data.copy(out.data, dstRow, srcRow, srcRow + img.width * 4);
|
|
360
|
+
}
|
|
361
|
+
x += img.width + sep;
|
|
362
|
+
}
|
|
363
|
+
return PNG.sync.write(out);
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
export function motionMagnitudeFromFrames(frameBase64s) {
|
|
367
|
+
// Mean over consecutive-frame pairs of meanAbsDiff. 256x256 downscale.
|
|
368
|
+
// 0 = no motion; >5 = visible; >20 = vigorous.
|
|
369
|
+
if (!Array.isArray(frameBase64s) || frameBase64s.length < 2) return 0;
|
|
370
|
+
const decoded = frameBase64s.map((b) => decodePng(Buffer.from(stripPrefix(b), 'base64')));
|
|
371
|
+
let total = 0;
|
|
372
|
+
for (let i = 1; i < decoded.length; i++) {
|
|
373
|
+
total += meanAbsDiff(decoded[i - 1], decoded[i]);
|
|
374
|
+
}
|
|
375
|
+
return +(total / (decoded.length - 1)).toFixed(2);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
export function setReferenceMotion({ cwd, element, camera, frameBase64s, meta }) {
|
|
379
|
+
if (!Array.isArray(frameBase64s) || frameBase64s.length < 2) {
|
|
380
|
+
throw new Error('setReferenceMotion: need at least 2 frames');
|
|
381
|
+
}
|
|
382
|
+
const filmstrip = composeFilmstrip(frameBase64s);
|
|
383
|
+
const { filmstrip: fpath, meta: mpath } = refsMotionPaths(cwd, element, camera);
|
|
384
|
+
mkdirSync(dirname(fpath), { recursive: true });
|
|
385
|
+
writeFileSync(fpath, filmstrip);
|
|
386
|
+
writeFileSync(
|
|
387
|
+
mpath,
|
|
388
|
+
JSON.stringify(
|
|
389
|
+
{
|
|
390
|
+
frames: frameBase64s.length,
|
|
391
|
+
...meta,
|
|
392
|
+
savedAt: new Date().toISOString(),
|
|
393
|
+
},
|
|
394
|
+
null,
|
|
395
|
+
2,
|
|
396
|
+
),
|
|
397
|
+
);
|
|
398
|
+
return { filmstripPath: fpath, metaPath: mpath, frames: frameBase64s.length };
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
export function diffReferenceMotion({ cwd, element, camera, currentFrames }) {
|
|
402
|
+
const { filmstrip: fpath, meta: mpath } = refsMotionPaths(cwd, element, camera);
|
|
403
|
+
if (!existsSync(fpath)) {
|
|
404
|
+
throw new Error(`no motion reference at ${fpath} — call set_reference_motion first`);
|
|
405
|
+
}
|
|
406
|
+
if (!Array.isArray(currentFrames) || currentFrames.length === 0) {
|
|
407
|
+
throw new Error('currentFrames must be a non-empty array of base64 PNGs');
|
|
408
|
+
}
|
|
409
|
+
const refFilmstrip = decodePng(readFileSync(fpath));
|
|
410
|
+
const curFilmstrip = decodePng(composeFilmstrip(currentFrames));
|
|
411
|
+
// Stack vertically: reference on top, current on bottom, 4-px separator.
|
|
412
|
+
const h = Math.min(refFilmstrip.height, curFilmstrip.height);
|
|
413
|
+
const lw = Math.round((refFilmstrip.width * h) / refFilmstrip.height);
|
|
414
|
+
const rw = Math.round((curFilmstrip.width * h) / curFilmstrip.height);
|
|
415
|
+
const w = Math.max(lw, rw);
|
|
416
|
+
const sep = 4;
|
|
417
|
+
const composite = new PNG({ width: w, height: h * 2 + sep });
|
|
418
|
+
for (let i = 0; i < composite.data.length; i += 4) composite.data[i + 3] = 255;
|
|
419
|
+
const refResized = nearestNeighborResize(refFilmstrip, w, h);
|
|
420
|
+
const curResized = nearestNeighborResize(curFilmstrip, w, h);
|
|
421
|
+
for (let y = 0; y < h; y++) {
|
|
422
|
+
refResized.data.copy(composite.data, y * w * 4, y * w * 4, (y + 1) * w * 4);
|
|
423
|
+
curResized.data.copy(composite.data, (y + h + sep) * w * 4, y * w * 4, (y + 1) * w * 4);
|
|
424
|
+
}
|
|
425
|
+
// Per-frame mean abs diff if we have a saved frame count to align with.
|
|
426
|
+
let meta = null;
|
|
427
|
+
try {
|
|
428
|
+
meta = existsSync(mpath) ? JSON.parse(readFileSync(mpath, 'utf8')) : null;
|
|
429
|
+
} catch {
|
|
430
|
+
/* tolerate corrupt meta */
|
|
431
|
+
}
|
|
432
|
+
const motionDiff = meanAbsDiff(refFilmstrip, curFilmstrip);
|
|
433
|
+
return {
|
|
434
|
+
refFilmstripPath: fpath,
|
|
435
|
+
refMeta: meta,
|
|
436
|
+
motionDiff,
|
|
437
|
+
compositeBase64: PNG.sync.write(composite).toString('base64'),
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
export function diffReference({ cwd, element, camera, currentBase64 }) {
|
|
442
|
+
const refPath = refsPath(cwd, element, camera);
|
|
443
|
+
if (!existsSync(refPath)) {
|
|
444
|
+
throw new Error(`no reference at ${refPath} — call set_reference first`);
|
|
445
|
+
}
|
|
446
|
+
if (!currentBase64) throw new Error('currentBase64 is required');
|
|
447
|
+
const refPng = decodePng(readFileSync(refPath));
|
|
448
|
+
const curPng = decodePng(Buffer.from(stripPrefix(currentBase64), 'base64'));
|
|
449
|
+
const composite = composeSideBySide(refPng, curPng);
|
|
450
|
+
const compositeBuf = PNG.sync.write(composite);
|
|
451
|
+
const meanAbs = meanAbsDiff(refPng, curPng);
|
|
452
|
+
const ssimScore = ssim(refPng, curPng);
|
|
453
|
+
const tiles = ssimTileGrid(refPng, curPng);
|
|
454
|
+
const heatmap = composeDiffHeatmap(refPng, curPng);
|
|
455
|
+
return {
|
|
456
|
+
camera,
|
|
457
|
+
refPath,
|
|
458
|
+
meanAbsDiff: meanAbs, // 0 = identical, 255 = max difference
|
|
459
|
+
ssim: ssimScore, // 1 = identical, lower = more different
|
|
460
|
+
// Spatial localization: 8×8 SSIM map + the single worst tile, so the agent
|
|
461
|
+
// knows WHERE the divergence is without re-reading the composite image.
|
|
462
|
+
tileGrid: tiles.grid,
|
|
463
|
+
worstTile: tiles.worst,
|
|
464
|
+
tileStats: { min: tiles.min, max: tiles.max, mean: tiles.mean },
|
|
465
|
+
compositeBase64: compositeBuf.toString('base64'),
|
|
466
|
+
heatmapBase64: heatmap.toString('base64'),
|
|
467
|
+
};
|
|
468
|
+
}
|