compressx-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/LICENSE +39 -0
  2. package/README.md +69 -0
  3. package/dist/index.js +294 -0
  4. package/package.json +47 -0
package/LICENSE ADDED
@@ -0,0 +1,39 @@
1
+ CompressX Software License
2
+
3
+ Copyright (c) 2026 A. Smith Media. All rights reserved.
4
+
5
+ This software and associated documentation files (the "Software") are the
6
+ proprietary property of A. Smith Media. This package is distributed via npm
7
+ for the convenience of end users.
8
+
9
+ PERMITTED USE
10
+
11
+ You are granted a non-exclusive, non-transferable, revocable license to:
12
+ 1. Install the Software on machines you own or control.
13
+ 2. Use the Software for personal, commercial, academic, and research
14
+ purposes.
15
+ 3. Run the Software against models you have the rights to use.
16
+
17
+ RESTRICTIONS
18
+
19
+ You may NOT:
20
+ 1. Copy, modify, merge, publish, distribute, sublicense, or sell copies
21
+ of the Software source code.
22
+ 2. Reverse engineer, decompile, or disassemble the Software, except to
23
+ the extent that such activity is expressly permitted by applicable law.
24
+ 3. Remove or alter any proprietary notices or labels on the Software.
25
+ 4. Use the name "CompressX" or "A. Smith Labs" or "A. Smith Media" to
26
+ endorse or promote products derived from this Software without prior
27
+ written permission.
28
+
29
+ DISCLAIMER OF WARRANTY
30
+
31
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
32
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
33
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
34
+ A. SMITH MEDIA BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
35
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
36
+ OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37
+ SOFTWARE.
38
+
39
+ For questions about licensing, contact A. Smith Media.
package/README.md ADDED
@@ -0,0 +1,69 @@
1
+ # compressx-mcp
2
+
3
+ **MCP server for [CompressX](https://www.npmjs.com/package/compressx).** Lets Claude Code, Cursor, Windsurf, and other AI coding tools compress LLM models through the Model Context Protocol.
4
+
5
+ ```bash
6
+ npm install -g compressx-mcp
7
+ ```
8
+
9
+ ## What It Does
10
+
11
+ Exposes CompressX's tools to AI assistants via MCP:
12
+
13
+ | Tool | Description |
14
+ |---|---|
15
+ | `list_models` | Search supported LLM models by name, family, or size |
16
+ | `recommend_compression` | Get hardware-aware quantization recommendations |
17
+ | `compress_model` | Kick off a compression job for a specific model |
18
+ | `check_job_status` | Poll the status of a running compression job |
19
+ | `get_credits` | Check your CompressX cloud credit balance |
20
+
21
+ ## Configuration
22
+
23
+ ### Claude Code
24
+
25
+ Add to your Claude Code MCP settings:
26
+
27
+ ```json
28
+ {
29
+ "mcpServers": {
30
+ "compressx": {
31
+ "command": "npx",
32
+ "args": ["-y", "compressx-mcp"]
33
+ }
34
+ }
35
+ }
36
+ ```
37
+
38
+ ### Cursor / Windsurf
39
+
40
+ Add to your MCP server configuration:
41
+
42
+ ```json
43
+ {
44
+ "compressx": {
45
+ "command": "compressx-mcp"
46
+ }
47
+ }
48
+ ```
49
+
50
+ ## Usage
51
+
52
+ Once configured, just ask your AI tool:
53
+
54
+ > "I want to run Qwen3 4B on my laptop with 8 GB VRAM. What quantization should I use?"
55
+
56
+ The AI calls `recommend_compression` and gets a hardware-aware answer.
57
+
58
+ > "Compress qwen3:4b to q4_k_m for me."
59
+
60
+ The AI calls `compress_model` and tracks progress with `check_job_status`.
61
+
62
+ ## Requirements
63
+
64
+ - **Node.js** 18 or later
65
+ - (Optional) A CompressX account for cloud compression. Local compression via the [compressx CLI](https://www.npmjs.com/package/compressx) does not require an account.
66
+
67
+ ---
68
+
69
+ CompressX · an **A. Smith Labs** product · © A. Smith Media
package/dist/index.js ADDED
@@ -0,0 +1,294 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/index.ts
4
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
5
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
6
+ import { z } from "zod";
7
+
8
+ // src/api/client.ts
9
+ var CompressXClient = class {
10
+ constructor(baseUrl, apiKey) {
11
+ this.baseUrl = baseUrl;
12
+ this.apiKey = apiKey;
13
+ }
14
+ baseUrl;
15
+ apiKey;
16
+ async request(path, options = {}) {
17
+ const res = await fetch(`${this.baseUrl}${path}`, {
18
+ ...options,
19
+ headers: {
20
+ "Content-Type": "application/json",
21
+ Authorization: `Bearer ${this.apiKey}`,
22
+ ...options.headers
23
+ }
24
+ });
25
+ if (!res.ok) {
26
+ const text = await res.text();
27
+ throw new Error(`API error ${res.status}: ${text}`);
28
+ }
29
+ return res.json();
30
+ }
31
+ async compress(params) {
32
+ return this.request("/compress", {
33
+ method: "POST",
34
+ body: JSON.stringify(params)
35
+ });
36
+ }
37
+ async getJob(jobId) {
38
+ return this.request(`/compress/${jobId}`);
39
+ }
40
+ async getCredits() {
41
+ return this.request("/credits");
42
+ }
43
+ async searchModels(query) {
44
+ return this.request(`/models?q=${encodeURIComponent(query)}`);
45
+ }
46
+ };
47
+
48
+ // src/tools/model-data.ts
49
+ var OLLAMA_MODELS = [
50
+ { ollamaId: "qwen3:0.6b", name: "Qwen 3 0.6B", hfRepoId: "Qwen/Qwen3-0.6B", parametersBillion: 0.6, fp16SizeGb: 1.2, family: "Qwen", description: "Ultra-lightweight, edge devices" },
51
+ { ollamaId: "qwen3:4b", name: "Qwen 3 4B", hfRepoId: "Qwen/Qwen3-4B", parametersBillion: 4, fp16SizeGb: 8, family: "Qwen", description: "Great balance of speed and capability" },
52
+ { ollamaId: "qwen3:8b", name: "Qwen 3 8B", hfRepoId: "Qwen/Qwen3-8B", parametersBillion: 8, fp16SizeGb: 16, family: "Qwen", description: "Strong general-purpose" },
53
+ { ollamaId: "qwen3:14b", name: "Qwen 3 14B", hfRepoId: "Qwen/Qwen3-14B", parametersBillion: 14, fp16SizeGb: 28, family: "Qwen", description: "High-quality reasoning" },
54
+ { ollamaId: "qwen3:32b", name: "Qwen 3 32B", hfRepoId: "Qwen/Qwen3-32B", parametersBillion: 32, fp16SizeGb: 64, family: "Qwen", description: "Near-frontier" },
55
+ { ollamaId: "gemma3:4b", name: "Gemma 3 4B", hfRepoId: "google/gemma-3-4b-pt", parametersBillion: 4, fp16SizeGb: 8, family: "Gemma", description: "Google's efficient model" },
56
+ { ollamaId: "gemma3:12b", name: "Gemma 3 12B", hfRepoId: "google/gemma-3-12b-pt", parametersBillion: 12, fp16SizeGb: 24, family: "Gemma", description: "Strong reasoning" },
57
+ { ollamaId: "gemma3:27b", name: "Gemma 3 27B", hfRepoId: "google/gemma-3-27b-pt", parametersBillion: 27, fp16SizeGb: 54, family: "Gemma", description: "Largest Gemma" },
58
+ { ollamaId: "llama3.2:3b", name: "Llama 3.2 3B", hfRepoId: "meta-llama/Llama-3.2-3B", parametersBillion: 3, fp16SizeGb: 6.4, family: "Llama", description: "Compact" },
59
+ { ollamaId: "llama3.1:8b", name: "Llama 3.1 8B", hfRepoId: "meta-llama/Llama-3.1-8B", parametersBillion: 8, fp16SizeGb: 16, family: "Llama", description: "Popular" },
60
+ { ollamaId: "llama3.1:70b", name: "Llama 3.1 70B", hfRepoId: "meta-llama/Llama-3.1-70B", parametersBillion: 70, fp16SizeGb: 140, family: "Llama", description: "Frontier" },
61
+ { ollamaId: "mistral:7b", name: "Mistral 7B", hfRepoId: "mistralai/Mistral-7B-v0.3", parametersBillion: 7, fp16SizeGb: 14.5, family: "Mistral", description: "Fast, efficient" },
62
+ { ollamaId: "phi4:14b", name: "Phi-4 14B", hfRepoId: "microsoft/phi-4", parametersBillion: 14, fp16SizeGb: 28, family: "Phi", description: "Reasoning-focused" },
63
+ { ollamaId: "tinyllama:1.1b", name: "TinyLlama 1.1B", hfRepoId: "TinyLlama/TinyLlama-1.1B-Chat-v1.0", parametersBillion: 1.1, fp16SizeGb: 2.2, family: "Llama", description: "Testing" }
64
+ ];
65
+ function searchModels(query) {
66
+ const q = query.toLowerCase();
67
+ return OLLAMA_MODELS.filter(
68
+ (m) => m.ollamaId.includes(q) || m.name.toLowerCase().includes(q) || m.family.toLowerCase().includes(q)
69
+ );
70
+ }
71
+ var BPW = {
72
+ f16: 16,
73
+ q8_0: 8.5,
74
+ q6_k: 6.6,
75
+ q5_k_m: 5.7,
76
+ q4_k_m: 4.9,
77
+ q4_0: 4.5,
78
+ q3_k_m: 3.9,
79
+ q2_k: 3.35
80
+ };
81
+ function estimateCompressedSize(paramsBillion, quantType) {
82
+ const bpw = BPW[quantType] || 4.9;
83
+ return Math.round((paramsBillion * 1e9 * bpw / 8 / 1e9 + 0.1) * 100) / 100;
84
+ }
85
+ function recommendQuantForHardware(paramsBillion, ramGb, vramGb, prioritize) {
86
+ let maxGb;
87
+ if (vramGb && vramGb >= 4) {
88
+ maxGb = Math.floor(vramGb * 0.85);
89
+ } else if (ramGb) {
90
+ maxGb = Math.floor(ramGb * 0.6);
91
+ } else {
92
+ maxGb = 8;
93
+ }
94
+ const options = [
95
+ { quantType: "q8_0", label: "Best Quality", bpw: 8.5 },
96
+ { quantType: "q6_k", label: "Very High Quality", bpw: 6.6 },
97
+ { quantType: "q5_k_m", label: "High Quality", bpw: 5.7 },
98
+ { quantType: "q4_k_m", label: "Recommended Balance", bpw: 4.9 },
99
+ { quantType: "q4_0", label: "Good, Fast Inference", bpw: 4.5 },
100
+ { quantType: "q3_k_m", label: "Smaller, Lower Quality", bpw: 3.9 },
101
+ { quantType: "q2_k", label: "Smallest, Noticeable Loss", bpw: 3.35 }
102
+ ];
103
+ const sorted = prioritize === "size" ? [...options].reverse() : options;
104
+ return sorted.map((opt) => {
105
+ const sizeGb = estimateCompressedSize(paramsBillion, opt.quantType);
106
+ return {
107
+ quantType: opt.quantType,
108
+ label: opt.label,
109
+ estimatedSizeGb: sizeGb,
110
+ fitsInMemory: sizeGb <= maxGb,
111
+ recommended: sizeGb <= maxGb
112
+ };
113
+ });
114
+ }
115
+
116
+ // src/index.ts
117
+ var API_KEY = process.env.COMPRESSX_API_KEY || "";
118
+ var API_URL = process.env.COMPRESSX_API_URL || "https://compressx.asmith.media/api/v1";
119
+ var client = new CompressXClient(API_URL, API_KEY);
120
+ var server = new McpServer({
121
+ name: "compressx",
122
+ version: "0.1.0"
123
+ });
124
+ server.tool(
125
+ "list_models",
126
+ "Search available LLM models that can be compressed. Returns Ollama model IDs, sizes, and descriptions.",
127
+ {
128
+ query: z.string().optional().describe("Search query (e.g., 'qwen', 'gemma', '7b')"),
129
+ family: z.string().optional().describe("Filter by family (Qwen, Gemma, Llama, Mistral, Phi)"),
130
+ maxSizeGb: z.number().optional().describe("Maximum FP16 model size in GB")
131
+ },
132
+ async ({ query, family, maxSizeGb }) => {
133
+ let models = query ? searchModels(query) : OLLAMA_MODELS;
134
+ if (family) {
135
+ models = models.filter((m) => m.family.toLowerCase() === family.toLowerCase());
136
+ }
137
+ if (maxSizeGb) {
138
+ models = models.filter((m) => m.fp16SizeGb <= maxSizeGb);
139
+ }
140
+ const results = models.map((m) => ({
141
+ ollamaId: m.ollamaId,
142
+ name: m.name,
143
+ parameters: `${m.parametersBillion}B`,
144
+ fp16Size: `${m.fp16SizeGb} GB`,
145
+ family: m.family,
146
+ description: m.description
147
+ }));
148
+ return {
149
+ content: [{ type: "text", text: JSON.stringify(results, null, 2) }]
150
+ };
151
+ }
152
+ );
153
+ server.tool(
154
+ "recommend_compression",
155
+ "Given a model and hardware specs, recommend the best quantization level. Use this when a developer asks what compression to use.",
156
+ {
157
+ model: z.string().describe("Ollama model ID (e.g., 'qwen3:4b') or HuggingFace repo"),
158
+ ramGb: z.number().optional().describe("System RAM in GB"),
159
+ vramGb: z.number().optional().describe("GPU VRAM in GB"),
160
+ gpuName: z.string().optional().describe("GPU name (e.g., 'RTX 3060')"),
161
+ prioritize: z.enum(["quality", "size", "speed"]).optional().describe("What to optimize for")
162
+ },
163
+ async ({ model, ramGb, vramGb, gpuName, prioritize }) => {
164
+ const modelInfo = OLLAMA_MODELS.find(
165
+ (m) => m.ollamaId === model || m.name.toLowerCase().includes(model.toLowerCase())
166
+ );
167
+ if (!modelInfo) {
168
+ return {
169
+ content: [{ type: "text", text: `Model "${model}" not found. Use list_models to see available models.` }]
170
+ };
171
+ }
172
+ const recommendations = recommendQuantForHardware(
173
+ modelInfo.parametersBillion,
174
+ ramGb || null,
175
+ vramGb || null,
176
+ prioritize || "quality"
177
+ );
178
+ const result = {
179
+ model: modelInfo.name,
180
+ ollamaId: modelInfo.ollamaId,
181
+ originalSize: `${modelInfo.fp16SizeGb} GB (FP16)`,
182
+ hardware: { ramGb, vramGb, gpuName },
183
+ recommended: recommendations[0],
184
+ alternatives: recommendations.slice(1),
185
+ nextStep: `To compress, use the compress_model tool with model="${modelInfo.ollamaId}" and quantType="${recommendations[0].quantType}"`
186
+ };
187
+ return {
188
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }]
189
+ };
190
+ }
191
+ );
192
+ server.tool(
193
+ "compress_model",
194
+ "Compress an LLM model to GGUF format. Requires a CompressX account with available credits. Returns a job ID to track progress.",
195
+ {
196
+ model: z.string().describe("Ollama model ID (e.g., 'qwen3:4b')"),
197
+ quantType: z.string().default("q4_k_m").describe("Quantization type (q8_0, q5_k_m, q4_k_m, q3_k_m, q2_k)")
198
+ },
199
+ async ({ model, quantType }) => {
200
+ if (!API_KEY) {
201
+ return {
202
+ content: [{
203
+ type: "text",
204
+ text: "No API key configured. Set COMPRESSX_API_KEY environment variable.\n\nGet your key at https://compressx.asmith.media/settings/api-keys\n\nFree tier includes 100 credits/month."
205
+ }]
206
+ };
207
+ }
208
+ const modelInfo = OLLAMA_MODELS.find((m) => m.ollamaId === model);
209
+ if (!modelInfo) {
210
+ return {
211
+ content: [{ type: "text", text: `Model "${model}" not found. Use list_models to search.` }]
212
+ };
213
+ }
214
+ try {
215
+ const result = await client.compress({
216
+ sourceModelId: modelInfo.hfRepoId,
217
+ sourceModelName: modelInfo.name,
218
+ method: "GGUF",
219
+ config: { quant_type: quantType }
220
+ });
221
+ const estSize = estimateCompressedSize(modelInfo.parametersBillion, quantType);
222
+ return {
223
+ content: [{
224
+ type: "text",
225
+ text: JSON.stringify({
226
+ status: "Job submitted",
227
+ jobId: result.id,
228
+ model: modelInfo.name,
229
+ quantType,
230
+ estimatedOutputSize: `${estSize} GB`,
231
+ creditCost: Math.ceil(modelInfo.fp16SizeGb),
232
+ checkProgress: `Use check_job_status with jobId="${result.id}"`
233
+ }, null, 2)
234
+ }]
235
+ };
236
+ } catch (err) {
237
+ return {
238
+ content: [{ type: "text", text: `Compression failed: ${err instanceof Error ? err.message : String(err)}` }]
239
+ };
240
+ }
241
+ }
242
+ );
243
+ server.tool(
244
+ "check_job_status",
245
+ "Check the status and progress of a compression job.",
246
+ {
247
+ jobId: z.string().describe("The job ID returned by compress_model")
248
+ },
249
+ async ({ jobId }) => {
250
+ if (!API_KEY) {
251
+ return { content: [{ type: "text", text: "No API key configured." }] };
252
+ }
253
+ try {
254
+ const job = await client.getJob(jobId);
255
+ return {
256
+ content: [{ type: "text", text: JSON.stringify(job, null, 2) }]
257
+ };
258
+ } catch (err) {
259
+ return {
260
+ content: [{ type: "text", text: `Error: ${err instanceof Error ? err.message : String(err)}` }]
261
+ };
262
+ }
263
+ }
264
+ );
265
+ server.tool(
266
+ "get_credits",
267
+ "Check your CompressX credit balance and tier information.",
268
+ {},
269
+ async () => {
270
+ if (!API_KEY) {
271
+ return {
272
+ content: [{
273
+ type: "text",
274
+ text: "No API key configured. Set COMPRESSX_API_KEY to check credits.\n\nSign up free at https://compressx.asmith.media"
275
+ }]
276
+ };
277
+ }
278
+ try {
279
+ const credits = await client.getCredits();
280
+ return {
281
+ content: [{ type: "text", text: JSON.stringify(credits, null, 2) }]
282
+ };
283
+ } catch (err) {
284
+ return {
285
+ content: [{ type: "text", text: `Error: ${err instanceof Error ? err.message : String(err)}` }]
286
+ };
287
+ }
288
+ }
289
+ );
290
+ async function main() {
291
+ const transport = new StdioServerTransport();
292
+ await server.connect(transport);
293
+ }
294
+ main().catch(console.error);
package/package.json ADDED
@@ -0,0 +1,47 @@
1
+ {
2
+ "name": "compressx-mcp",
3
+ "version": "0.1.0",
4
+ "description": "MCP server for CompressX — lets Claude Code, Cursor, and other AI tools compress LLM models via the Model Context Protocol.",
5
+ "type": "module",
6
+ "bin": {
7
+ "compressx-mcp": "./dist/index.js"
8
+ },
9
+ "main": "./dist/index.js",
10
+ "files": [
11
+ "dist/",
12
+ "README.md",
13
+ "LICENSE"
14
+ ],
15
+ "scripts": {
16
+ "build": "tsup",
17
+ "dev": "tsx src/index.ts",
18
+ "prepublishOnly": "npm run build"
19
+ },
20
+ "keywords": [
21
+ "mcp",
22
+ "model-context-protocol",
23
+ "llm",
24
+ "compression",
25
+ "gguf",
26
+ "ollama",
27
+ "claude-code",
28
+ "cursor",
29
+ "ai-tools"
30
+ ],
31
+ "author": "A. Smith Media",
32
+ "license": "UNLICENSED",
33
+ "homepage": "https://compressx.asmith.media",
34
+ "engines": {
35
+ "node": ">=18"
36
+ },
37
+ "dependencies": {
38
+ "@modelcontextprotocol/sdk": "^1.0.0",
39
+ "zod": "^3.23.0"
40
+ },
41
+ "devDependencies": {
42
+ "@types/node": "^20.0.0",
43
+ "tsup": "^8.5.1",
44
+ "tsx": "^4.0.0",
45
+ "typescript": "^5.0.0"
46
+ }
47
+ }