npm - compressx-mcp - Versions diffs - 0.1.0 - Mend

compressx-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/LICENSE ADDED Viewed

@@ -0,0 +1,39 @@
+CompressX Software License
+Copyright (c) 2026 A. Smith Media. All rights reserved.
+This software and associated documentation files (the "Software") are the
+proprietary property of A. Smith Media. This package is distributed via npm
+for the convenience of end users.
+PERMITTED USE
+You are granted a non-exclusive, non-transferable, revocable license to:
+  1. Install the Software on machines you own or control.
+  2. Use the Software for personal, commercial, academic, and research
+     purposes.
+  3. Run the Software against models you have the rights to use.
+RESTRICTIONS
+You may NOT:
+  1. Copy, modify, merge, publish, distribute, sublicense, or sell copies
+     of the Software source code.
+  2. Reverse engineer, decompile, or disassemble the Software, except to
+     the extent that such activity is expressly permitted by applicable law.
+  3. Remove or alter any proprietary notices or labels on the Software.
+  4. Use the name "CompressX" or "A. Smith Labs" or "A. Smith Media" to
+     endorse or promote products derived from this Software without prior
+     written permission.
+DISCLAIMER OF WARRANTY
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+A. SMITH MEDIA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
+OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+For questions about licensing, contact A. Smith Media.

package/README.md ADDED Viewed

@@ -0,0 +1,69 @@
+# compressx-mcp
+**MCP server for [CompressX](https://www.npmjs.com/package/compressx).** Lets Claude Code, Cursor, Windsurf, and other AI coding tools compress LLM models through the Model Context Protocol.
+```bash
+npm install -g compressx-mcp
+```
+## What It Does
+Exposes CompressX's tools to AI assistants via MCP:
+| Tool | Description |
+|---|---|
+| `list_models` | Search supported LLM models by name, family, or size |
+| `recommend_compression` | Get hardware-aware quantization recommendations |
+| `compress_model` | Kick off a compression job for a specific model |
+| `check_job_status` | Poll the status of a running compression job |
+| `get_credits` | Check your CompressX cloud credit balance |
+## Configuration
+### Claude Code
+Add to your Claude Code MCP settings:
+```json
+{
+  "mcpServers": {
+    "compressx": {
+      "command": "npx",
+      "args": ["-y", "compressx-mcp"]
+    }
+  }
+}
+```
+### Cursor / Windsurf
+Add to your MCP server configuration:
+```json
+{
+  "compressx": {
+    "command": "compressx-mcp"
+  }
+}
+```
+## Usage
+Once configured, just ask your AI tool:
+> "I want to run Qwen3 4B on my laptop with 8 GB VRAM. What quantization should I use?"
+The AI calls `recommend_compression` and gets a hardware-aware answer.
+> "Compress qwen3:4b to q4_k_m for me."
+The AI calls `compress_model` and tracks progress with `check_job_status`.
+## Requirements
+- **Node.js** 18 or later
+- (Optional) A CompressX account for cloud compression. Local compression via the [compressx CLI](https://www.npmjs.com/package/compressx) does not require an account.
+---
+CompressX · an **A. Smith Labs** product · © A. Smith Media

package/dist/index.js ADDED Viewed

@@ -0,0 +1,294 @@
+#!/usr/bin/env node
+// src/index.ts
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
+import { z } from "zod";
+// src/api/client.ts
+var CompressXClient = class {
+  constructor(baseUrl, apiKey) {
+    this.baseUrl = baseUrl;
+    this.apiKey = apiKey;
+  }
+  baseUrl;
+  apiKey;
+  async request(path, options = {}) {
+    const res = await fetch(`${this.baseUrl}${path}`, {
+      ...options,
+      headers: {
+        "Content-Type": "application/json",
+        Authorization: `Bearer ${this.apiKey}`,
+        ...options.headers
+      }
+    });
+    if (!res.ok) {
+      const text = await res.text();
+      throw new Error(`API error ${res.status}: ${text}`);
+    }
+    return res.json();
+  }
+  async compress(params) {
+    return this.request("/compress", {
+      method: "POST",
+      body: JSON.stringify(params)
+    });
+  }
+  async getJob(jobId) {
+    return this.request(`/compress/${jobId}`);
+  }
+  async getCredits() {
+    return this.request("/credits");
+  }
+  async searchModels(query) {
+    return this.request(`/models?q=${encodeURIComponent(query)}`);
+  }
+};
+// src/tools/model-data.ts
+var OLLAMA_MODELS = [
+  { ollamaId: "qwen3:0.6b", name: "Qwen 3 0.6B", hfRepoId: "Qwen/Qwen3-0.6B", parametersBillion: 0.6, fp16SizeGb: 1.2, family: "Qwen", description: "Ultra-lightweight, edge devices" },
+  { ollamaId: "qwen3:4b", name: "Qwen 3 4B", hfRepoId: "Qwen/Qwen3-4B", parametersBillion: 4, fp16SizeGb: 8, family: "Qwen", description: "Great balance of speed and capability" },
+  { ollamaId: "qwen3:8b", name: "Qwen 3 8B", hfRepoId: "Qwen/Qwen3-8B", parametersBillion: 8, fp16SizeGb: 16, family: "Qwen", description: "Strong general-purpose" },
+  { ollamaId: "qwen3:14b", name: "Qwen 3 14B", hfRepoId: "Qwen/Qwen3-14B", parametersBillion: 14, fp16SizeGb: 28, family: "Qwen", description: "High-quality reasoning" },
+  { ollamaId: "qwen3:32b", name: "Qwen 3 32B", hfRepoId: "Qwen/Qwen3-32B", parametersBillion: 32, fp16SizeGb: 64, family: "Qwen", description: "Near-frontier" },
+  { ollamaId: "gemma3:4b", name: "Gemma 3 4B", hfRepoId: "google/gemma-3-4b-pt", parametersBillion: 4, fp16SizeGb: 8, family: "Gemma", description: "Google's efficient model" },
+  { ollamaId: "gemma3:12b", name: "Gemma 3 12B", hfRepoId: "google/gemma-3-12b-pt", parametersBillion: 12, fp16SizeGb: 24, family: "Gemma", description: "Strong reasoning" },
+  { ollamaId: "gemma3:27b", name: "Gemma 3 27B", hfRepoId: "google/gemma-3-27b-pt", parametersBillion: 27, fp16SizeGb: 54, family: "Gemma", description: "Largest Gemma" },
+  { ollamaId: "llama3.2:3b", name: "Llama 3.2 3B", hfRepoId: "meta-llama/Llama-3.2-3B", parametersBillion: 3, fp16SizeGb: 6.4, family: "Llama", description: "Compact" },
+  { ollamaId: "llama3.1:8b", name: "Llama 3.1 8B", hfRepoId: "meta-llama/Llama-3.1-8B", parametersBillion: 8, fp16SizeGb: 16, family: "Llama", description: "Popular" },
+  { ollamaId: "llama3.1:70b", name: "Llama 3.1 70B", hfRepoId: "meta-llama/Llama-3.1-70B", parametersBillion: 70, fp16SizeGb: 140, family: "Llama", description: "Frontier" },
+  { ollamaId: "mistral:7b", name: "Mistral 7B", hfRepoId: "mistralai/Mistral-7B-v0.3", parametersBillion: 7, fp16SizeGb: 14.5, family: "Mistral", description: "Fast, efficient" },
+  { ollamaId: "phi4:14b", name: "Phi-4 14B", hfRepoId: "microsoft/phi-4", parametersBillion: 14, fp16SizeGb: 28, family: "Phi", description: "Reasoning-focused" },
+  { ollamaId: "tinyllama:1.1b", name: "TinyLlama 1.1B", hfRepoId: "TinyLlama/TinyLlama-1.1B-Chat-v1.0", parametersBillion: 1.1, fp16SizeGb: 2.2, family: "Llama", description: "Testing" }
+];
+function searchModels(query) {
+  const q = query.toLowerCase();
+  return OLLAMA_MODELS.filter(
+    (m) => m.ollamaId.includes(q) || m.name.toLowerCase().includes(q) || m.family.toLowerCase().includes(q)
+  );
+}
+var BPW = {
+  f16: 16,
+  q8_0: 8.5,
+  q6_k: 6.6,
+  q5_k_m: 5.7,
+  q4_k_m: 4.9,
+  q4_0: 4.5,
+  q3_k_m: 3.9,
+  q2_k: 3.35
+};
+function estimateCompressedSize(paramsBillion, quantType) {
+  const bpw = BPW[quantType] || 4.9;
+  return Math.round((paramsBillion * 1e9 * bpw / 8 / 1e9 + 0.1) * 100) / 100;
+}
+function recommendQuantForHardware(paramsBillion, ramGb, vramGb, prioritize) {
+  let maxGb;
+  if (vramGb && vramGb >= 4) {
+    maxGb = Math.floor(vramGb * 0.85);
+  } else if (ramGb) {
+    maxGb = Math.floor(ramGb * 0.6);
+  } else {
+    maxGb = 8;
+  }
+  const options = [
+    { quantType: "q8_0", label: "Best Quality", bpw: 8.5 },
+    { quantType: "q6_k", label: "Very High Quality", bpw: 6.6 },
+    { quantType: "q5_k_m", label: "High Quality", bpw: 5.7 },
+    { quantType: "q4_k_m", label: "Recommended Balance", bpw: 4.9 },
+    { quantType: "q4_0", label: "Good, Fast Inference", bpw: 4.5 },
+    { quantType: "q3_k_m", label: "Smaller, Lower Quality", bpw: 3.9 },
+    { quantType: "q2_k", label: "Smallest, Noticeable Loss", bpw: 3.35 }
+  ];
+  const sorted = prioritize === "size" ? [...options].reverse() : options;
+  return sorted.map((opt) => {
+    const sizeGb = estimateCompressedSize(paramsBillion, opt.quantType);
+    return {
+      quantType: opt.quantType,
+      label: opt.label,
+      estimatedSizeGb: sizeGb,
+      fitsInMemory: sizeGb <= maxGb,
+      recommended: sizeGb <= maxGb
+    };
+  });
+}
+// src/index.ts
+var API_KEY = process.env.COMPRESSX_API_KEY || "";
+var API_URL = process.env.COMPRESSX_API_URL || "https://compressx.asmith.media/api/v1";
+var client = new CompressXClient(API_URL, API_KEY);
+var server = new McpServer({
+  name: "compressx",
+  version: "0.1.0"
+});
+server.tool(
+  "list_models",
+  "Search available LLM models that can be compressed. Returns Ollama model IDs, sizes, and descriptions.",
+  {
+    query: z.string().optional().describe("Search query (e.g., 'qwen', 'gemma', '7b')"),
+    family: z.string().optional().describe("Filter by family (Qwen, Gemma, Llama, Mistral, Phi)"),
+    maxSizeGb: z.number().optional().describe("Maximum FP16 model size in GB")
+  },
+  async ({ query, family, maxSizeGb }) => {
+    let models = query ? searchModels(query) : OLLAMA_MODELS;
+    if (family) {
+      models = models.filter((m) => m.family.toLowerCase() === family.toLowerCase());
+    }
+    if (maxSizeGb) {
+      models = models.filter((m) => m.fp16SizeGb <= maxSizeGb);
+    }
+    const results = models.map((m) => ({
+      ollamaId: m.ollamaId,
+      name: m.name,
+      parameters: `${m.parametersBillion}B`,
+      fp16Size: `${m.fp16SizeGb} GB`,
+      family: m.family,
+      description: m.description
+    }));
+    return {
+      content: [{ type: "text", text: JSON.stringify(results, null, 2) }]
+    };
+  }
+);
+server.tool(
+  "recommend_compression",
+  "Given a model and hardware specs, recommend the best quantization level. Use this when a developer asks what compression to use.",
+  {
+    model: z.string().describe("Ollama model ID (e.g., 'qwen3:4b') or HuggingFace repo"),
+    ramGb: z.number().optional().describe("System RAM in GB"),
+    vramGb: z.number().optional().describe("GPU VRAM in GB"),
+    gpuName: z.string().optional().describe("GPU name (e.g., 'RTX 3060')"),
+    prioritize: z.enum(["quality", "size", "speed"]).optional().describe("What to optimize for")
+  },
+  async ({ model, ramGb, vramGb, gpuName, prioritize }) => {
+    const modelInfo = OLLAMA_MODELS.find(
+      (m) => m.ollamaId === model || m.name.toLowerCase().includes(model.toLowerCase())
+    );
+    if (!modelInfo) {
+      return {
+        content: [{ type: "text", text: `Model "${model}" not found. Use list_models to see available models.` }]
+      };
+    }
+    const recommendations = recommendQuantForHardware(
+      modelInfo.parametersBillion,
+      ramGb || null,
+      vramGb || null,
+      prioritize || "quality"
+    );
+    const result = {
+      model: modelInfo.name,
+      ollamaId: modelInfo.ollamaId,
+      originalSize: `${modelInfo.fp16SizeGb} GB (FP16)`,
+      hardware: { ramGb, vramGb, gpuName },
+      recommended: recommendations[0],
+      alternatives: recommendations.slice(1),
+      nextStep: `To compress, use the compress_model tool with model="${modelInfo.ollamaId}" and quantType="${recommendations[0].quantType}"`
+    };
+    return {
+      content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
+    };
+  }
+);
+server.tool(
+  "compress_model",
+  "Compress an LLM model to GGUF format. Requires a CompressX account with available credits. Returns a job ID to track progress.",
+  {
+    model: z.string().describe("Ollama model ID (e.g., 'qwen3:4b')"),
+    quantType: z.string().default("q4_k_m").describe("Quantization type (q8_0, q5_k_m, q4_k_m, q3_k_m, q2_k)")
+  },
+  async ({ model, quantType }) => {
+    if (!API_KEY) {
+      return {
+        content: [{
+          type: "text",
+          text: "No API key configured. Set COMPRESSX_API_KEY environment variable.\n\nGet your key at https://compressx.asmith.media/settings/api-keys\n\nFree tier includes 100 credits/month."
+        }]
+      };
+    }
+    const modelInfo = OLLAMA_MODELS.find((m) => m.ollamaId === model);
+    if (!modelInfo) {
+      return {
+        content: [{ type: "text", text: `Model "${model}" not found. Use list_models to search.` }]
+      };
+    }
+    try {
+      const result = await client.compress({
+        sourceModelId: modelInfo.hfRepoId,
+        sourceModelName: modelInfo.name,
+        method: "GGUF",
+        config: { quant_type: quantType }
+      });
+      const estSize = estimateCompressedSize(modelInfo.parametersBillion, quantType);
+      return {
+        content: [{
+          type: "text",
+          text: JSON.stringify({
+            status: "Job submitted",
+            jobId: result.id,
+            model: modelInfo.name,
+            quantType,
+            estimatedOutputSize: `${estSize} GB`,
+            creditCost: Math.ceil(modelInfo.fp16SizeGb),
+            checkProgress: `Use check_job_status with jobId="${result.id}"`
+          }, null, 2)
+        }]
+      };
+    } catch (err) {
+      return {
+        content: [{ type: "text", text: `Compression failed: ${err instanceof Error ? err.message : String(err)}` }]
+      };
+    }
+  }
+);
+server.tool(
+  "check_job_status",
+  "Check the status and progress of a compression job.",
+  {
+    jobId: z.string().describe("The job ID returned by compress_model")
+  },
+  async ({ jobId }) => {
+    if (!API_KEY) {
+      return { content: [{ type: "text", text: "No API key configured." }] };
+    }
+    try {
+      const job = await client.getJob(jobId);
+      return {
+        content: [{ type: "text", text: JSON.stringify(job, null, 2) }]
+      };
+    } catch (err) {
+      return {
+        content: [{ type: "text", text: `Error: ${err instanceof Error ? err.message : String(err)}` }]
+      };
+    }
+  }
+);
+server.tool(
+  "get_credits",
+  "Check your CompressX credit balance and tier information.",
+  {},
+  async () => {
+    if (!API_KEY) {
+      return {
+        content: [{
+          type: "text",
+          text: "No API key configured. Set COMPRESSX_API_KEY to check credits.\n\nSign up free at https://compressx.asmith.media"
+        }]
+      };
+    }
+    try {
+      const credits = await client.getCredits();
+      return {
+        content: [{ type: "text", text: JSON.stringify(credits, null, 2) }]
+      };
+    } catch (err) {
+      return {
+        content: [{ type: "text", text: `Error: ${err instanceof Error ? err.message : String(err)}` }]
+      };
+    }
+  }
+);
+async function main() {
+  const transport = new StdioServerTransport();
+  await server.connect(transport);
+}
+main().catch(console.error);

package/package.json ADDED Viewed

@@ -0,0 +1,47 @@
+{
+  "name": "compressx-mcp",
+  "version": "0.1.0",
+  "description": "MCP server for CompressX — lets Claude Code, Cursor, and other AI tools compress LLM models via the Model Context Protocol.",
+  "type": "module",
+  "bin": {
+    "compressx-mcp": "./dist/index.js"
+  },
+  "main": "./dist/index.js",
+  "files": [
+    "dist/",
+    "README.md",
+    "LICENSE"
+  ],
+  "scripts": {
+    "build": "tsup",
+    "dev": "tsx src/index.ts",
+    "prepublishOnly": "npm run build"
+  },
+  "keywords": [
+    "mcp",
+    "model-context-protocol",
+    "llm",
+    "compression",
+    "gguf",
+    "ollama",
+    "claude-code",
+    "cursor",
+    "ai-tools"
+  ],
+  "author": "A. Smith Media",
+  "license": "UNLICENSED",
+  "homepage": "https://compressx.asmith.media",
+  "engines": {
+    "node": ">=18"
+  },
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.0.0",
+    "zod": "^3.23.0"
+  },
+  "devDependencies": {
+    "@types/node": "^20.0.0",
+    "tsup": "^8.5.1",
+    "tsx": "^4.0.0",
+    "typescript": "^5.0.0"
+  }
+}