npm - @writepanda/mcp - Versions diffs - 1.44.0 → 1.49.0 - Mend

@writepanda/mcp 1.44.0 → 1.49.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/bin/server.mjs +107 -17
package/package.json +1 -1

package/bin/server.mjs CHANGED Viewed

@@ -275,6 +275,52 @@ const TOOLS = [
 		},
 		command: "workspace.contents",
 	},
+	{
+		name: "workspace_get_brand",
+		description:
+			"Return the active workspace's brand kit (name, tagline, colors, typography, voice, logoPath). Use BEFORE authoring custom motion graphics, lower-thirds, outros, or thumbnails — the v1.32 motion-philosophy contract requires reading the brand and using its values verbatim instead of inventing a default look. Returns { brand: null } if no brand is configured (in which case ASK the user for at least the primary color + light/dark mood before authoring).",
+		inputSchema: { type: "object", properties: {} },
+		command: "workspace.get-brand",
+	},
+	{
+		name: "workspace_set_brand",
+		description:
+			"Write/replace the active workspace's brand kit. Pass any subset of fields under `brand`. Hex colors are normalized to #rrggbb on the way in; empty strings are dropped; unknown voice values are ignored. Pass brand=null (or omit it) to clear the brand entirely. Don't call this on every turn — only when the user explicitly tells you their brand is changing.",
+		inputSchema: {
+			type: "object",
+			properties: {
+				brand: {
+					type: ["object", "null"],
+					properties: {
+						name: { type: "string" },
+						tagline: { type: "string" },
+						colors: {
+							type: "object",
+							properties: {
+								primary: { type: "string" },
+								accent: { type: "string" },
+								ink: { type: "string" },
+								background: { type: "string" },
+							},
+						},
+						typography: {
+							type: "object",
+							properties: {
+								display: { type: "string" },
+								body: { type: "string" },
+							},
+						},
+						voice: {
+							type: "string",
+							enum: ["minimal", "bold", "editorial", "casual", "corporate", "playful"],
+						},
+						logoPath: { type: "string" },
+					},
+				},
+			},
+		},
+		command: "workspace.set-brand",
+	},
 	// ── YouTube publishing (v1.19) ──────────────────────────────────
 	// Connect Google accounts per workspace, publish exports, edit
@@ -1712,7 +1758,8 @@ const TOOLS = [
 			properties: {
 				templateId: {
 					type: "string",
-					description: "Template id from motion_list (e.g. creator-card, stat-reveal, split-panel).",
+					description:
+						"Template id from motion_list (e.g. creator-card, stat-reveal, split-panel).",
 				},
 				slots: {
 					type: "object",
@@ -1789,7 +1836,7 @@ const TOOLS = [
 	{
 		name: "motion_screenshot",
 		description:
-			"Capture a single PNG frame of an HTML composition at a given timestamp — no FFmpeg, sub-second. Use this to validate layout, font sizes, and element positions BEFORE committing to a full motion_render_html render. Returns { outputPath } directly (no jobId polling needed).",
+			"Capture a single PNG frame of an HTML composition at a given timestamp — no FFmpeg, sub-second. Use BEFORE committing to a full motion_render_html render to validate layout, fonts, brand colors, element positions. Returns { outputPath, previewPath, width, height }. `outputPath` is the full-res 1920×1080 PNG (user-facing artifact, keep on disk). `previewPath` is a 1280-wide downscaled sidecar — `read` THIS for vision-based verification (~600KB base64, processes in seconds). The full-res file is too big for most vision-model context to chew on quickly (a 1920×1080 PNG base64-encodes to ~2.1MB and stalls vision models for minutes). If `previewPath` is absent (source already ≤1280px wide), `read` outputPath directly. If your model isn't vision-capable, skip the `read` and surface outputPath to the user instead.",
 		inputSchema: {
 			type: "object",
 			properties: {
@@ -1845,7 +1892,7 @@ const TOOLS = [
 	{
 		name: "motion_verify_frames",
 		description:
-			"Extract PNG frames at given timestamps from a rendered video so the agent can VISUALLY verify the motion graphics landed. This operationalises the 'lint passing ≠ design working — VIEW THE FRAMES' rule in reference/motion-philosophy.md §4. Call this after motion_render_html or export.start and BEFORE declaring a motion-graphics deliverable done. Pass either entryId (export-library entry) or videoPath (arbitrary MP4). Timestamps are in seconds, typically 8-15 spread across hero moments. Returns { frames: [{timestampSeconds, path}...] } — the agent must then Read each path as an image (multimodal) and confirm no cropped faces / text overflow / blank frames / forbidden-zone occlusion / flat-white headlines.",
+			"Extract PNG frames at given timestamps from a rendered video for visual verification. Operationalises the 'lint passing ≠ design working — VIEW THE FRAMES' rule in reference/motion-philosophy.md. Call after motion_render_html or export.start before declaring a motion-graphics deliverable done. Pass either entryId (export-library entry) or videoPath (arbitrary MP4). Timestamps in seconds, typically 8-15 across hero moments. Returns { frames: [{ timestampSeconds, path, previewPath, previewWidth, previewHeight }, ...] }. For vision-capable models: `read` the `previewPath` of each frame (1280-wide, ~600KB base64, fast). The full-res `path` would be ~2MB base64 and stall the model for minutes. For non-vision models: skip the `read` and surface outDir to the user. Confirm: no cropped faces / text overflow / blank frames / forbidden-zone occlusion / flat-white headlines.",
 		inputSchema: {
 			type: "object",
 			required: ["timestamps"],
@@ -2113,14 +2160,15 @@ const TOOLS = [
 	{
 		name: "job_wait",
 		description:
-			"Block server-side until an async job (transcribe, audio_clean, motion_render_html, export_start) reaches a terminal state. Default timeout 60s, max 5 min. Always call this after kicking off async work.",
+			"Block server-side until an async job (transcribe, audio_clean, motion_render_html, export_start) reaches a terminal state. Default timeout 5 minutes, hard cap 30 minutes. Returns `{ job, timedOut: true }` when the deadline elapses with the job still running — this is NOT a failure; re-call job_wait with the same id to keep polling. The job continues regardless of whether anyone is waiting on it. Always call this after kicking off async work.",
 		inputSchema: {
 			type: "object",
 			properties: {
 				id: { type: "string", description: "Job id from the async tool's response" },
 				timeoutMs: {
 					type: "number",
-					description: "Max wait. Default 60_000, hard-capped at 300_000",
+					description:
+						"Max wait in ms. Default 300_000 (5 min). Hard-capped at 1_800_000 (30 min). Don't fight the cap — if a render legitimately needs longer, re-call job_wait with the same id once it returns timedOut.",
 				},
 			},
 			required: ["id"],
@@ -2228,19 +2276,43 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
 	try {
 		const result = await callPandastudio(command, dispatchArgs);
-		// Format the response for the MCP client. Every tool returns
-		// JSON; we wrap it in a text block so the agent can read it
-		// in their context window. For tools where a structured
-		// content block matters (e.g. preview_show returning an
-		// image), we'd add a richer content array — for now text is
-		// sufficient and matches what `pandastudio --json` returns.
+		// Format the response for the MCP client. Default is a text
+		// block carrying the JSON result. For `motion.screenshot` and
+		// `motion.verify-frames` we ALSO inline the downscaled preview
+		// PNGs as MCP image content blocks — vision-capable models see
+		// them directly in the tool result, so the agent doesn't have
+		// to issue a separate `read` call and we don't depend on
+		// opencode's read tool routing binary correctly. Inlining a
+		// ~160KB preview adds ~210KB base64 to the response, which is
+		// trivial vs the seconds-vs-minutes vision-processing gap we
+		// were trying to close.
+		const content = [
+			{
+				type: "text",
+				text: JSON.stringify(result, null, 2),
+			},
+		];
+		try {
+			const data = result?.data ?? result;
+			const previews = collectInlinePreviewPaths(command, data);
+			for (const previewPath of previews) {
+				const fs = await import("node:fs/promises");
+				const buf = await fs.readFile(previewPath);
+				content.push({
+					type: "image",
+					data: buf.toString("base64"),
+					mimeType: "image/png",
+				});
+			}
+		} catch (imgErr) {
+			// Non-fatal: model still gets the JSON path; vision check
+			// just won't fire this turn. Surface to stderr for debugging.
+			console.error(
+				`[pandastudio-mcp] could not inline preview image for ${command}: ${imgErr?.message ?? imgErr}`,
+			);
+		}
 		return {
-			content: [
-				{
-					type: "text",
-					text: JSON.stringify(result, null, 2),
-				},
-			],
+			content,
 			isError: result?.ok === false,
 		};
 	} catch (err) {
@@ -2251,6 +2323,24 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
 	}
 });
+/** Decide which (if any) preview PNGs to inline as image content
+ *  blocks for a given tool result. Only motion.screenshot (one
+ *  preview) and motion.verify-frames (one per frame) qualify today.
+ *  Returns the list of absolute paths, empty array otherwise. */
+function collectInlinePreviewPaths(command, data) {
+	if (!data || typeof data !== "object") return [];
+	if (command === "motion.screenshot") {
+		return typeof data.previewPath === "string" ? [data.previewPath] : [];
+	}
+	if (command === "motion.verify-frames") {
+		const frames = Array.isArray(data.frames) ? data.frames : [];
+		return frames
+			.map((f) => (f && typeof f.previewPath === "string" ? f.previewPath : null))
+			.filter((p) => p !== null);
+	}
+	return [];
+}
 // Start stdio transport (the MCP standard for local subprocess
 // servers). For HTTP/SSE transports there's @modelcontextprotocol/
 // sdk/server/sse — overkill for our use case since this server is

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@writepanda/mcp",
-	"version": "1.44.0",
+	"version": "1.49.0",
 	"description": "Model Context Protocol server for PandaStudio. Exposes the desktop video editor's automation surface to Cursor, Continue, Cline, Claude Desktop, and any MCP-compliant client.",
 	"keywords": [
 		"pandastudio",