@blockrun/mcp 0.7.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +7 -175
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -590,18 +590,22 @@ function registerImageTool(server) {
590
590
  server.registerTool(
591
591
  "blockrun_image",
592
592
  {
593
- description: `Generate or edit images via BlockRun.
593
+ description: `Generate or edit images via BlockRun. Pays with USDC \u2014 no separate API keys needed.
594
594
 
595
595
  Actions:
596
596
  - generate (default): Create image from text prompt
597
597
  - edit: Transform an existing image using img2img
598
598
 
599
- Generation models: openai/dall-e-3 ($0.04-0.08), together/flux-schnell ($0.02), google/nano-banana
599
+ Generation models:
600
+ - zai/cogview-4 ($0.02) \u2014 Zhipu CogView-4, photorealistic, great for detailed scenes
601
+ - openai/dall-e-3 ($0.04-0.08) \u2014 High quality, prompt adherence
602
+ - together/flux-schnell ($0.02) \u2014 Fast, stylized
603
+ - google/nano-banana \u2014 Google image model
600
604
  Edit models: openai/gpt-image-1 (default for edits)`,
601
605
  inputSchema: {
602
606
  prompt: z4.string().describe("Image description or edit instructions"),
603
607
  action: z4.enum(["generate", "edit"]).optional().default("generate").describe("generate: create from text; edit: transform existing image"),
604
- model: z4.enum(["openai/dall-e-3", "together/flux-schnell", "google/nano-banana", "openai/gpt-image-1"]).optional().describe("Model to use (default: dall-e-3 for generate, gpt-image-1 for edit)"),
608
+ model: z4.enum(["zai/cogview-4", "openai/dall-e-3", "together/flux-schnell", "google/nano-banana", "openai/gpt-image-1"]).optional().describe("Model to use (default: dall-e-3 for generate, gpt-image-1 for edit). zai/cogview-4 is Zhipu's photorealistic model."),
605
609
  image: z4.string().optional().describe("Source image for edit action: base64-encoded image or URL"),
606
610
  size: z4.enum(["1024x1024", "1792x1024", "1024x1792"]).optional().default("1024x1024"),
607
611
  quality: z4.enum(["standard", "hd"]).optional().default("standard")
@@ -1038,175 +1042,6 @@ ${lines.join("\n\n")}` }],
1038
1042
  );
1039
1043
  }
1040
1044
 
1041
- // src/tools/glm-vision.ts
1042
- import { z as z10 } from "zod";
1043
- var ZHIPU_BASE_URL = "https://open.bigmodel.cn/api/paas/v4";
1044
- async function callGLMVision(model, prompt, imageUrl, thinking = false) {
1045
- const apiKey = process.env.ZHIPU_API_KEY;
1046
- if (!apiKey) throw new Error("ZHIPU_API_KEY environment variable is required for GLM Vision");
1047
- const payload = {
1048
- model,
1049
- messages: [
1050
- {
1051
- role: "user",
1052
- content: [
1053
- { type: "image_url", image_url: { url: imageUrl } },
1054
- { type: "text", text: prompt }
1055
- ]
1056
- }
1057
- ],
1058
- temperature: 0.8,
1059
- top_p: 0.6,
1060
- max_tokens: 16384,
1061
- stream: false
1062
- };
1063
- if (thinking || model.includes("thinking")) {
1064
- payload["thinking"] = { type: "enabled" };
1065
- }
1066
- const res = await fetch(`${ZHIPU_BASE_URL}/chat/completions`, {
1067
- method: "POST",
1068
- headers: {
1069
- "Content-Type": "application/json",
1070
- Authorization: `Bearer ${apiKey}`
1071
- },
1072
- body: JSON.stringify(payload)
1073
- });
1074
- if (!res.ok) {
1075
- const err = await res.text().catch(() => res.statusText);
1076
- throw new Error(`GLM Vision API error ${res.status}: ${err}`);
1077
- }
1078
- const data = await res.json();
1079
- const choice = data.choices?.[0];
1080
- if (!choice) throw new Error("No response from GLM Vision");
1081
- if (choice.finish_reason === "sensitive") throw new Error("Content blocked by safety filter");
1082
- return choice.message.content;
1083
- }
1084
- async function callGLMOCR(fileUrl, startPage = 1, endPage) {
1085
- const apiKey = process.env.ZHIPU_API_KEY;
1086
- if (!apiKey) throw new Error("ZHIPU_API_KEY environment variable is required for GLM OCR");
1087
- const payload = {
1088
- model: "glm-ocr",
1089
- file: fileUrl,
1090
- return_crop_images: false,
1091
- need_layout_visualization: false,
1092
- start_page_id: startPage
1093
- };
1094
- if (endPage) payload["end_page_id"] = endPage;
1095
- const res = await fetch(`${ZHIPU_BASE_URL}/layout_parsing`, {
1096
- method: "POST",
1097
- headers: {
1098
- "Content-Type": "application/json",
1099
- Authorization: `Bearer ${apiKey}`
1100
- },
1101
- body: JSON.stringify(payload)
1102
- });
1103
- if (!res.ok) {
1104
- const err = await res.text().catch(() => res.statusText);
1105
- throw new Error(`GLM OCR API error ${res.status}: ${err}`);
1106
- }
1107
- const data = await res.json();
1108
- return data.markdown_result || data.choices?.[0]?.message?.content || JSON.stringify(data);
1109
- }
1110
- async function callGLMImageGen(prompt, size, quality) {
1111
- const apiKey = process.env.ZHIPU_API_KEY;
1112
- if (!apiKey) throw new Error("ZHIPU_API_KEY environment variable is required for GLM Image Gen");
1113
- const res = await fetch(`${ZHIPU_BASE_URL}/images/generations`, {
1114
- method: "POST",
1115
- headers: {
1116
- "Content-Type": "application/json",
1117
- Authorization: `Bearer ${apiKey}`
1118
- },
1119
- body: JSON.stringify({
1120
- model: "cogview-4-250304",
1121
- prompt,
1122
- size,
1123
- quality,
1124
- watermark_enabled: false
1125
- })
1126
- });
1127
- if (!res.ok) {
1128
- const err = await res.text().catch(() => res.statusText);
1129
- throw new Error(`GLM Image API error ${res.status}: ${err}`);
1130
- }
1131
- const data = await res.json();
1132
- const url = data.data?.[0]?.url;
1133
- if (!url) throw new Error("No image URL in GLM response");
1134
- return url;
1135
- }
1136
- function registerGLMVisionTool(server) {
1137
- server.registerTool(
1138
- "blockrun_glm_vision",
1139
- {
1140
- description: `Analyze images and documents using Zhipu AI's GLM vision models.
1141
-
1142
- Requires ZHIPU_API_KEY environment variable.
1143
-
1144
- Actions:
1145
- - caption: Describe what's in an image
1146
- - analyze: Deep analysis of an image (objects, layout, text, colors)
1147
- - grounding: Locate specific elements in an image (returns bounding boxes)
1148
- - code: Generate code from a UI screenshot or mockup
1149
- - ocr: Extract text from a document/PDF (use file URL)
1150
- - imagegen: Generate an image using CogView-4
1151
-
1152
- Models:
1153
- - glm-4.6v (default): Best quality vision model
1154
- - glm-4.6v-flash: Faster, cheaper
1155
- - glm-4.1v-thinking-flash: With reasoning/thinking
1156
-
1157
- Cost: Zhipu AI pricing (separate from BlockRun x402 \u2014 uses ZHIPU_API_KEY)`,
1158
- inputSchema: {
1159
- action: z10.enum(["caption", "analyze", "grounding", "code", "ocr", "imagegen"]).describe("Task to perform"),
1160
- image: z10.string().optional().describe("Image URL or base64 data URI (for vision actions)"),
1161
- prompt: z10.string().optional().describe("Custom prompt or question about the image. For imagegen: the image description"),
1162
- model: z10.enum(["glm-4.6v", "glm-4.6v-flash", "glm-4.1v-thinking-flash"]).optional().default("glm-4.6v").describe("Vision model to use"),
1163
- size: z10.enum(["1280x1280", "1280x720", "720x1280", "1024x1024"]).optional().default("1280x1280").describe("Image size (for imagegen)"),
1164
- quality: z10.enum(["hd", "standard"]).optional().default("hd").describe("Image quality (for imagegen)"),
1165
- start_page: z10.number().optional().default(1).describe("Start page for OCR (PDF)"),
1166
- end_page: z10.number().optional().describe("End page for OCR (PDF)")
1167
- }
1168
- },
1169
- async ({ action, image, prompt, model, size, quality, start_page, end_page }) => {
1170
- try {
1171
- if (action === "imagegen") {
1172
- const imagePrompt = prompt || image || "";
1173
- if (!imagePrompt) {
1174
- return { content: [{ type: "text", text: formatError("prompt is required for imagegen action") }], isError: true };
1175
- }
1176
- const url = await callGLMImageGen(imagePrompt, size ?? "1280x1280", quality ?? "hd");
1177
- return {
1178
- content: [{ type: "text", text: `Generated image: ${url}` }],
1179
- structuredContent: { url }
1180
- };
1181
- }
1182
- if (action === "ocr") {
1183
- const fileUrl = image || prompt;
1184
- if (!fileUrl) {
1185
- return { content: [{ type: "text", text: formatError("image (PDF URL) is required for OCR") }], isError: true };
1186
- }
1187
- const result2 = await callGLMOCR(fileUrl, start_page ?? 1, end_page);
1188
- return { content: [{ type: "text", text: result2 }] };
1189
- }
1190
- if (!image) {
1191
- return { content: [{ type: "text", text: formatError("image is required for vision actions") }], isError: true };
1192
- }
1193
- const actionPrompts = {
1194
- caption: "Describe this image concisely and accurately.",
1195
- analyze: "Analyze this image in detail: describe all visible objects, layout, colors, text, and any notable features.",
1196
- grounding: `Locate the following elements in the image and return their bounding boxes in [x1,y1,x2,y2] format (0-1000 normalized): ${prompt || "all interactive UI elements"}`,
1197
- code: "Generate complete, working code to replicate this UI. Use React + TypeScript + Tailwind CSS. Include all components, styling, and mock data visible in the screenshot."
1198
- };
1199
- const visionPrompt = actionPrompts[action] || prompt || actionPrompts.caption;
1200
- const result = await callGLMVision(model ?? "glm-4.6v", visionPrompt, image);
1201
- return { content: [{ type: "text", text: result }] };
1202
- } catch (err) {
1203
- const errMsg = err instanceof Error ? err.message : String(err);
1204
- return { content: [{ type: "text", text: formatError(errMsg) }], isError: true };
1205
- }
1206
- }
1207
- );
1208
- }
1209
-
1210
1045
  // src/mcp-handler.ts
1211
1046
  function initializeMcpServer(server) {
1212
1047
  const budget = { limit: null, spent: 0, calls: 0, agents: /* @__PURE__ */ new Map() };
@@ -1220,9 +1055,6 @@ function initializeMcpServer(server) {
1220
1055
  registerExaTool(server);
1221
1056
  registerMarketsTool(server);
1222
1057
  registerDexTool(server);
1223
- if (process.env.ZHIPU_API_KEY) {
1224
- registerGLMVisionTool(server);
1225
- }
1226
1058
  server.registerResource(
1227
1059
  "wallet",
1228
1060
  "blockrun://wallet",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/mcp",
3
- "version": "0.7.0",
3
+ "version": "0.7.1",
4
4
  "mcpName": "io.github.BlockRunAI/blockrun-mcp",
5
5
  "description": "BlockRun MCP Server - Give your AI agent web search, deep research, prediction markets, crypto data, X/Twitter intelligence. Paid via x402 micropayments.",
6
6
  "type": "module",