@writepanda/mcp 1.44.0 → 1.49.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/bin/server.mjs +107 -17
  2. package/package.json +1 -1
package/bin/server.mjs CHANGED
@@ -275,6 +275,52 @@ const TOOLS = [
275
275
  },
276
276
  command: "workspace.contents",
277
277
  },
278
+ {
279
+ name: "workspace_get_brand",
280
+ description:
281
+ "Return the active workspace's brand kit (name, tagline, colors, typography, voice, logoPath). Use BEFORE authoring custom motion graphics, lower-thirds, outros, or thumbnails — the v1.32 motion-philosophy contract requires reading the brand and using its values verbatim instead of inventing a default look. Returns { brand: null } if no brand is configured (in which case ASK the user for at least the primary color + light/dark mood before authoring).",
282
+ inputSchema: { type: "object", properties: {} },
283
+ command: "workspace.get-brand",
284
+ },
285
+ {
286
+ name: "workspace_set_brand",
287
+ description:
288
+ "Write/replace the active workspace's brand kit. Pass any subset of fields under `brand`. Hex colors are normalized to #rrggbb on the way in; empty strings are dropped; unknown voice values are ignored. Pass brand=null (or omit it) to clear the brand entirely. Don't call this on every turn — only when the user explicitly tells you their brand is changing.",
289
+ inputSchema: {
290
+ type: "object",
291
+ properties: {
292
+ brand: {
293
+ type: ["object", "null"],
294
+ properties: {
295
+ name: { type: "string" },
296
+ tagline: { type: "string" },
297
+ colors: {
298
+ type: "object",
299
+ properties: {
300
+ primary: { type: "string" },
301
+ accent: { type: "string" },
302
+ ink: { type: "string" },
303
+ background: { type: "string" },
304
+ },
305
+ },
306
+ typography: {
307
+ type: "object",
308
+ properties: {
309
+ display: { type: "string" },
310
+ body: { type: "string" },
311
+ },
312
+ },
313
+ voice: {
314
+ type: "string",
315
+ enum: ["minimal", "bold", "editorial", "casual", "corporate", "playful"],
316
+ },
317
+ logoPath: { type: "string" },
318
+ },
319
+ },
320
+ },
321
+ },
322
+ command: "workspace.set-brand",
323
+ },
278
324
 
279
325
  // ── YouTube publishing (v1.19) ──────────────────────────────────
280
326
  // Connect Google accounts per workspace, publish exports, edit
@@ -1712,7 +1758,8 @@ const TOOLS = [
1712
1758
  properties: {
1713
1759
  templateId: {
1714
1760
  type: "string",
1715
- description: "Template id from motion_list (e.g. creator-card, stat-reveal, split-panel).",
1761
+ description:
1762
+ "Template id from motion_list (e.g. creator-card, stat-reveal, split-panel).",
1716
1763
  },
1717
1764
  slots: {
1718
1765
  type: "object",
@@ -1789,7 +1836,7 @@ const TOOLS = [
1789
1836
  {
1790
1837
  name: "motion_screenshot",
1791
1838
  description:
1792
- "Capture a single PNG frame of an HTML composition at a given timestamp — no FFmpeg, sub-second. Use this to validate layout, font sizes, and element positions BEFORE committing to a full motion_render_html render. Returns { outputPath } directly (no jobId polling needed).",
1839
+ "Capture a single PNG frame of an HTML composition at a given timestamp — no FFmpeg, sub-second. Use BEFORE committing to a full motion_render_html render to validate layout, fonts, brand colors, element positions. Returns { outputPath, previewPath, width, height }. `outputPath` is the full-res 1920×1080 PNG (user-facing artifact, keep on disk). `previewPath` is a 1280-wide downscaled sidecar — `read` THIS for vision-based verification (~600KB base64, processes in seconds). The full-res file is too big for most vision-model context to chew on quickly (a 1920×1080 PNG base64-encodes to ~2.1MB and stalls vision models for minutes). If `previewPath` is absent (source already ≤1280px wide), `read` outputPath directly. If your model isn't vision-capable, skip the `read` and surface outputPath to the user instead.",
1793
1840
  inputSchema: {
1794
1841
  type: "object",
1795
1842
  properties: {
@@ -1845,7 +1892,7 @@ const TOOLS = [
1845
1892
  {
1846
1893
  name: "motion_verify_frames",
1847
1894
  description:
1848
- "Extract PNG frames at given timestamps from a rendered video so the agent can VISUALLY verify the motion graphics landed. This operationalises the 'lint passing ≠ design working — VIEW THE FRAMES' rule in reference/motion-philosophy.md §4. Call this after motion_render_html or export.start and BEFORE declaring a motion-graphics deliverable done. Pass either entryId (export-library entry) or videoPath (arbitrary MP4). Timestamps are in seconds, typically 8-15 spread across hero moments. Returns { frames: [{timestampSeconds, path}...] } the agent must then Read each path as an image (multimodal) and confirm no cropped faces / text overflow / blank frames / forbidden-zone occlusion / flat-white headlines.",
1895
+ "Extract PNG frames at given timestamps from a rendered video for visual verification. Operationalises the 'lint passing ≠ design working — VIEW THE FRAMES' rule in reference/motion-philosophy.md. Call after motion_render_html or export.start before declaring a motion-graphics deliverable done. Pass either entryId (export-library entry) or videoPath (arbitrary MP4). Timestamps in seconds, typically 8-15 across hero moments. Returns { frames: [{ timestampSeconds, path, previewPath, previewWidth, previewHeight }, ...] }. For vision-capable models: `read` the `previewPath` of each frame (1280-wide, ~600KB base64, fast). The full-res `path` would be ~2MB base64 and stall the model for minutes. For non-vision models: skip the `read` and surface outDir to the user. Confirm: no cropped faces / text overflow / blank frames / forbidden-zone occlusion / flat-white headlines.",
1849
1896
  inputSchema: {
1850
1897
  type: "object",
1851
1898
  required: ["timestamps"],
@@ -2113,14 +2160,15 @@ const TOOLS = [
2113
2160
  {
2114
2161
  name: "job_wait",
2115
2162
  description:
2116
- "Block server-side until an async job (transcribe, audio_clean, motion_render_html, export_start) reaches a terminal state. Default timeout 60s, max 5 min. Always call this after kicking off async work.",
2163
+ "Block server-side until an async job (transcribe, audio_clean, motion_render_html, export_start) reaches a terminal state. Default timeout 5 minutes, hard cap 30 minutes. Returns `{ job, timedOut: true }` when the deadline elapses with the job still running — this is NOT a failure; re-call job_wait with the same id to keep polling. The job continues regardless of whether anyone is waiting on it. Always call this after kicking off async work.",
2117
2164
  inputSchema: {
2118
2165
  type: "object",
2119
2166
  properties: {
2120
2167
  id: { type: "string", description: "Job id from the async tool's response" },
2121
2168
  timeoutMs: {
2122
2169
  type: "number",
2123
- description: "Max wait. Default 60_000, hard-capped at 300_000",
2170
+ description:
2171
+ "Max wait in ms. Default 300_000 (5 min). Hard-capped at 1_800_000 (30 min). Don't fight the cap — if a render legitimately needs longer, re-call job_wait with the same id once it returns timedOut.",
2124
2172
  },
2125
2173
  },
2126
2174
  required: ["id"],
@@ -2228,19 +2276,43 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
2228
2276
 
2229
2277
  try {
2230
2278
  const result = await callPandastudio(command, dispatchArgs);
2231
- // Format the response for the MCP client. Every tool returns
2232
- // JSON; we wrap it in a text block so the agent can read it
2233
- // in their context window. For tools where a structured
2234
- // content block matters (e.g. preview_show returning an
2235
- // image), we'd add a richer content array for now text is
2236
- // sufficient and matches what `pandastudio --json` returns.
2279
+ // Format the response for the MCP client. Default is a text
2280
+ // block carrying the JSON result. For `motion.screenshot` and
2281
+ // `motion.verify-frames` we ALSO inline the downscaled preview
2282
+ // PNGs as MCP image content blocks vision-capable models see
2283
+ // them directly in the tool result, so the agent doesn't have
2284
+ // to issue a separate `read` call and we don't depend on
2285
+ // opencode's read tool routing binary correctly. Inlining a
2286
+ // ~160KB preview adds ~210KB base64 to the response, which is
2287
+ // trivial vs the seconds-vs-minutes vision-processing gap we
2288
+ // were trying to close.
2289
+ const content = [
2290
+ {
2291
+ type: "text",
2292
+ text: JSON.stringify(result, null, 2),
2293
+ },
2294
+ ];
2295
+ try {
2296
+ const data = result?.data ?? result;
2297
+ const previews = collectInlinePreviewPaths(command, data);
2298
+ for (const previewPath of previews) {
2299
+ const fs = await import("node:fs/promises");
2300
+ const buf = await fs.readFile(previewPath);
2301
+ content.push({
2302
+ type: "image",
2303
+ data: buf.toString("base64"),
2304
+ mimeType: "image/png",
2305
+ });
2306
+ }
2307
+ } catch (imgErr) {
2308
+ // Non-fatal: model still gets the JSON path; vision check
2309
+ // just won't fire this turn. Surface to stderr for debugging.
2310
+ console.error(
2311
+ `[pandastudio-mcp] could not inline preview image for ${command}: ${imgErr?.message ?? imgErr}`,
2312
+ );
2313
+ }
2237
2314
  return {
2238
- content: [
2239
- {
2240
- type: "text",
2241
- text: JSON.stringify(result, null, 2),
2242
- },
2243
- ],
2315
+ content,
2244
2316
  isError: result?.ok === false,
2245
2317
  };
2246
2318
  } catch (err) {
@@ -2251,6 +2323,24 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
2251
2323
  }
2252
2324
  });
2253
2325
 
2326
+ /** Decide which (if any) preview PNGs to inline as image content
2327
+ * blocks for a given tool result. Only motion.screenshot (one
2328
+ * preview) and motion.verify-frames (one per frame) qualify today.
2329
+ * Returns the list of absolute paths, empty array otherwise. */
2330
+ function collectInlinePreviewPaths(command, data) {
2331
+ if (!data || typeof data !== "object") return [];
2332
+ if (command === "motion.screenshot") {
2333
+ return typeof data.previewPath === "string" ? [data.previewPath] : [];
2334
+ }
2335
+ if (command === "motion.verify-frames") {
2336
+ const frames = Array.isArray(data.frames) ? data.frames : [];
2337
+ return frames
2338
+ .map((f) => (f && typeof f.previewPath === "string" ? f.previewPath : null))
2339
+ .filter((p) => p !== null);
2340
+ }
2341
+ return [];
2342
+ }
2343
+
2254
2344
  // Start stdio transport (the MCP standard for local subprocess
2255
2345
  // servers). For HTTP/SSE transports there's @modelcontextprotocol/
2256
2346
  // sdk/server/sse — overkill for our use case since this server is
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@writepanda/mcp",
3
- "version": "1.44.0",
3
+ "version": "1.49.0",
4
4
  "description": "Model Context Protocol server for PandaStudio. Exposes the desktop video editor's automation surface to Cursor, Continue, Cline, Claude Desktop, and any MCP-compliant client.",
5
5
  "keywords": [
6
6
  "pandastudio",