@writepanda/mcp 1.45.0 → 1.50.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/bin/server.mjs +59 -16
  2. package/package.json +1 -1
package/bin/server.mjs CHANGED
@@ -1836,7 +1836,7 @@ const TOOLS = [
1836
1836
  {
1837
1837
  name: "motion_screenshot",
1838
1838
  description:
1839
- "Capture a single PNG frame of an HTML composition at a given timestamp — no FFmpeg, sub-second. Use this to validate layout, font sizes, and element positions BEFORE committing to a full motion_render_html render. Returns { outputPath } directly (no jobId polling needed).",
1839
+ "Capture a single PNG frame of an HTML composition at a given timestamp — no FFmpeg, sub-second. Use BEFORE committing to a full motion_render_html render to validate layout, fonts, brand colors, element positions. Returns { outputPath, previewPath, width, height }. `outputPath` is the full-res 1920×1080 PNG (user-facing artifact, keep on disk). `previewPath` is a 1280-wide downscaled sidecar — `read` THIS for vision-based verification (~600KB base64, processes in seconds). The full-res file is too big for most vision-model context to chew on quickly (a 1920×1080 PNG base64-encodes to ~2.1MB and stalls vision models for minutes). If `previewPath` is absent (source already ≤1280px wide), `read` outputPath directly. If your model isn't vision-capable, skip the `read` and surface outputPath to the user instead.",
1840
1840
  inputSchema: {
1841
1841
  type: "object",
1842
1842
  properties: {
@@ -1892,7 +1892,7 @@ const TOOLS = [
1892
1892
  {
1893
1893
  name: "motion_verify_frames",
1894
1894
  description:
1895
- "Extract PNG frames at given timestamps from a rendered video so the agent can VISUALLY verify the motion graphics landed. This operationalises the 'lint passing ≠ design working — VIEW THE FRAMES' rule in reference/motion-philosophy.md §4. Call this after motion_render_html or export.start and BEFORE declaring a motion-graphics deliverable done. Pass either entryId (export-library entry) or videoPath (arbitrary MP4). Timestamps are in seconds, typically 8-15 spread across hero moments. Returns { frames: [{timestampSeconds, path}...] } the agent must then Read each path as an image (multimodal) and confirm no cropped faces / text overflow / blank frames / forbidden-zone occlusion / flat-white headlines.",
1895
+ "Extract PNG frames at given timestamps from a rendered video for visual verification. Operationalises the 'lint passing ≠ design working — VIEW THE FRAMES' rule in reference/motion-philosophy.md. Call after motion_render_html or export.start before declaring a motion-graphics deliverable done. Pass either entryId (export-library entry) or videoPath (arbitrary MP4). Timestamps in seconds, typically 8-15 across hero moments. Returns { frames: [{ timestampSeconds, path, previewPath, previewWidth, previewHeight }, ...] }. For vision-capable models: `read` the `previewPath` of each frame (1280-wide, ~600KB base64, fast). The full-res `path` would be ~2MB base64 and stall the model for minutes. For non-vision models: skip the `read` and surface outDir to the user. Confirm: no cropped faces / text overflow / blank frames / forbidden-zone occlusion / flat-white headlines.",
1896
1896
  inputSchema: {
1897
1897
  type: "object",
1898
1898
  required: ["timestamps"],
@@ -2160,14 +2160,15 @@ const TOOLS = [
2160
2160
  {
2161
2161
  name: "job_wait",
2162
2162
  description:
2163
- "Block server-side until an async job (transcribe, audio_clean, motion_render_html, export_start) reaches a terminal state. Default timeout 60s, max 5 min. Always call this after kicking off async work.",
2163
+ "Block server-side until an async job (transcribe, audio_clean, motion_render_html, export_start) reaches a terminal state. Default timeout 5 minutes, hard cap 30 minutes. Returns `{ job, timedOut: true }` when the deadline elapses with the job still running — this is NOT a failure; re-call job_wait with the same id to keep polling. The job continues regardless of whether anyone is waiting on it. Always call this after kicking off async work.",
2164
2164
  inputSchema: {
2165
2165
  type: "object",
2166
2166
  properties: {
2167
2167
  id: { type: "string", description: "Job id from the async tool's response" },
2168
2168
  timeoutMs: {
2169
2169
  type: "number",
2170
- description: "Max wait. Default 60_000, hard-capped at 300_000",
2170
+ description:
2171
+ "Max wait in ms. Default 300_000 (5 min). Hard-capped at 1_800_000 (30 min). Don't fight the cap — if a render legitimately needs longer, re-call job_wait with the same id once it returns timedOut.",
2171
2172
  },
2172
2173
  },
2173
2174
  required: ["id"],
@@ -2275,19 +2276,43 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
2275
2276
 
2276
2277
  try {
2277
2278
  const result = await callPandastudio(command, dispatchArgs);
2278
- // Format the response for the MCP client. Every tool returns
2279
- // JSON; we wrap it in a text block so the agent can read it
2280
- // in their context window. For tools where a structured
2281
- // content block matters (e.g. preview_show returning an
2282
- // image), we'd add a richer content array for now text is
2283
- // sufficient and matches what `pandastudio --json` returns.
2279
+ // Format the response for the MCP client. Default is a text
2280
+ // block carrying the JSON result. For `motion.screenshot` and
2281
+ // `motion.verify-frames` we ALSO inline the downscaled preview
2282
+ // PNGs as MCP image content blocks vision-capable models see
2283
+ // them directly in the tool result, so the agent doesn't have
2284
+ // to issue a separate `read` call and we don't depend on
2285
+ // opencode's read tool routing binary correctly. Inlining a
2286
+ // ~160KB preview adds ~210KB base64 to the response, which is
2287
+ // trivial vs the seconds-vs-minutes vision-processing gap we
2288
+ // were trying to close.
2289
+ const content = [
2290
+ {
2291
+ type: "text",
2292
+ text: JSON.stringify(result, null, 2),
2293
+ },
2294
+ ];
2295
+ try {
2296
+ const data = result?.data ?? result;
2297
+ const previews = collectInlinePreviewPaths(command, data);
2298
+ for (const previewPath of previews) {
2299
+ const fs = await import("node:fs/promises");
2300
+ const buf = await fs.readFile(previewPath);
2301
+ content.push({
2302
+ type: "image",
2303
+ data: buf.toString("base64"),
2304
+ mimeType: "image/png",
2305
+ });
2306
+ }
2307
+ } catch (imgErr) {
2308
+ // Non-fatal: model still gets the JSON path; vision check
2309
+ // just won't fire this turn. Surface to stderr for debugging.
2310
+ console.error(
2311
+ `[pandastudio-mcp] could not inline preview image for ${command}: ${imgErr?.message ?? imgErr}`,
2312
+ );
2313
+ }
2284
2314
  return {
2285
- content: [
2286
- {
2287
- type: "text",
2288
- text: JSON.stringify(result, null, 2),
2289
- },
2290
- ],
2315
+ content,
2291
2316
  isError: result?.ok === false,
2292
2317
  };
2293
2318
  } catch (err) {
@@ -2298,6 +2323,24 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
2298
2323
  }
2299
2324
  });
2300
2325
 
2326
+ /** Decide which (if any) preview PNGs to inline as image content
2327
+ * blocks for a given tool result. Only motion.screenshot (one
2328
+ * preview) and motion.verify-frames (one per frame) qualify today.
2329
+ * Returns the list of absolute paths, empty array otherwise. */
2330
+ function collectInlinePreviewPaths(command, data) {
2331
+ if (!data || typeof data !== "object") return [];
2332
+ if (command === "motion.screenshot") {
2333
+ return typeof data.previewPath === "string" ? [data.previewPath] : [];
2334
+ }
2335
+ if (command === "motion.verify-frames") {
2336
+ const frames = Array.isArray(data.frames) ? data.frames : [];
2337
+ return frames
2338
+ .map((f) => (f && typeof f.previewPath === "string" ? f.previewPath : null))
2339
+ .filter((p) => p !== null);
2340
+ }
2341
+ return [];
2342
+ }
2343
+
2301
2344
  // Start stdio transport (the MCP standard for local subprocess
2302
2345
  // servers). For HTTP/SSE transports there's @modelcontextprotocol/
2303
2346
  // sdk/server/sse — overkill for our use case since this server is
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@writepanda/mcp",
3
- "version": "1.45.0",
3
+ "version": "1.50.0",
4
4
  "description": "Model Context Protocol server for PandaStudio. Exposes the desktop video editor's automation surface to Cursor, Continue, Cline, Claude Desktop, and any MCP-compliant client.",
5
5
  "keywords": [
6
6
  "pandastudio",