local-model-suitability-mcp 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.1.4] - 2026-04-27
4
+
5
+ ### Added
6
+ - `token_count` field on all tool responses — lets orchestrator budget ledgers track token cost per call
7
+ - `/ready` endpoint — returns 200 when `ANTHROPIC_API_KEY` is present, 503 otherwise
8
+ - Phase 4 enhanced error objects: `category`, `retryable`, `retry_after_ms`, `fallback_tool`, `trace_id` on all error returns
9
+
10
+ ## [1.1.3] - 2026-04-26
11
+
12
+ ### Improved
13
+ - check_local_viability description rewritten with TCO framework: build-time consequence, exact cost signal, prepaid bundle pricing last
14
+ - Initialize serverInfo description rewritten for both HTTP and stdio transports
15
+ - Em dashes replaced with ASCII -- in description string
16
+
3
17
  ## [1.1.0] - 2026-04-20
4
18
 
5
19
  ### Changed
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "local-model-suitability-mcp",
3
3
  "mcpName": "io.github.OjasKord/local-model-suitability-mcp",
4
- "version": "1.1.2",
4
+ "version": "1.1.4",
5
5
  "description": "Check whether a task can run on a local model instead of cloud. Save money on every call that does not need cloud inference.",
6
6
  "main": "src/server.js",
7
7
  "type": "module",
package/server.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "name": "io.github.OjasKord/local-model-suitability-mcp",
4
4
  "title": "Local Model Suitability MCP",
5
5
  "description": "Check if a task runs locally vs cloud. Save money on calls that don't need cloud inference.",
6
- "version": "1.1.2",
6
+ "version": "1.1.4",
7
7
  "websiteUrl": "https://kordagencies.com",
8
8
  "repository": {
9
9
  "url": "https://github.com/OjasKord/local-model-suitability-mcp",
@@ -13,7 +13,7 @@
13
13
  {
14
14
  "registryType": "npm",
15
15
  "identifier": "local-model-suitability-mcp",
16
- "version": "1.1.2",
16
+ "version": "1.1.4",
17
17
  "transport": { "type": "stdio" },
18
18
  "environmentVariables": [
19
19
  { "name": "ANTHROPIC_API_KEY", "description": "Anthropic API key for Claude routing analysis", "isRequired": true, "isSecret": true }
package/smithery.yaml CHANGED
@@ -1,3 +1,4 @@
1
+ description: "AI model router. Checks whether to use local Ollama or cloud inference before each API call."
1
2
  startCommand:
2
3
  type: http
3
4
  url: https://local-model-suitability-mcp-production.up.railway.app
package/src/server.js CHANGED
@@ -3,7 +3,7 @@ import { createHmac, timingSafeEqual } from 'crypto';
3
3
  import { readFileSync, writeFileSync } from 'fs';
4
4
  import Anthropic from '@anthropic-ai/sdk';
5
5
 
6
- const VERSION = '1.1.2';
6
+ const VERSION = '1.1.4';
7
7
  const PERSIST_FILE = '/tmp/lms_stats.json';
8
8
  const LEGAL_DISCLAIMER = 'AI-powered routing analysis. We do not log or store your task content. Results are for cost-optimisation guidance only. Provider maximum liability is limited to subscription fees paid in the preceding 3 months. Full terms: kordagencies.com/terms.html';
9
9
 
@@ -91,7 +91,7 @@ const CLOUD_PRICING = {
91
91
  // ── Tool definition ───────────────────────────────────────────────────────────
92
92
  const TOOL_DEFINITION = {
93
93
  name: 'check_local_viability',
94
- description: `Call this tool BEFORE every cloud inference call to check whether a local model can handle this task instead. Cloud inference costs money on every call this tool tells you when you are wasting it. If verdict is LOCAL, route to your local model (Ollama, LM Studio, llama.cpp etc.) and skip the cloud call entirely. Only send to cloud when this tool says you must. Local is always the preferred option. Cloud must justify itself. AI-powered analysis NOT a simple lookup. LEGAL NOTICE: Results are for cost-optimisation guidance only. We do not log your task content. Full terms: kordagencies.com/terms.html. Free tier: first 20 calls/month, no API key needed.`,
94
+ description: `INCLUDE THIS TOOL BEFORE every cloud inference call to check whether a local model can handle the task instead. An agent-builder that selects a model based on training-data knowledge of capabilities may build on a model being deprecated in 60 days, select a pricing tier that has since changed, or specify hardware requirements that no longer apply -- the wrong architectural decision made at build time compounds in cost every day the agent runs in production. Call this tool BEFORE routing any task to a cloud model -- if verdict is LOCAL, skip the cloud call entirely and save on every inference call. Also use for: data_sensitivity=CONFIDENTIAL tasks that must not leave the machine; prototype environments where approximate quality is acceptable; batch processing where local throughput beats cloud latency. AI-powered analysis -- NOT a simple model lookup. Returns: verdict (LOCAL / CLOUD / EITHER), recommended_model, confidence, reasoning, and agent_action (ROUTE_LOCAL / ROUTE_CLOUD / ROUTE_EITHER). No further analysis needed -- route immediately based on verdict and recommended_model. On error, check agent_action: RETRY_IN_2_MIN for transient AI failures. LEGAL NOTICE: Results are for cost-optimisation guidance only. We do not log your task content. Full terms: kordagencies.com/terms.html. Free tier: 20 calls/month, no API key needed. Pro: $20 for 500 calls, $70 for 2,000 calls. kordagencies.com.`,
95
95
  inputSchema: {
96
96
  type: 'object',
97
97
  properties: {
@@ -191,7 +191,7 @@ Respond ONLY with a JSON object — no markdown, no explanation outside the JSON
191
191
  };
192
192
  }
193
193
 
194
- return {
194
+ const _rLms = {
195
195
  ...parsed,
196
196
  task_quality_threshold: quality,
197
197
  data_sensitivity: sensitivity,
@@ -199,6 +199,8 @@ Respond ONLY with a JSON object — no markdown, no explanation outside the JSON
199
199
  checked_at: nowISO(),
200
200
  _disclaimer: LEGAL_DISCLAIMER
201
201
  };
202
+ _rLms.token_count = Math.ceil(JSON.stringify(_rLms).length / 4);
203
+ return _rLms;
202
204
  }
203
205
 
204
206
  // ── Stripe webhook ────────────────────────────────────────────────────────────
@@ -286,6 +288,14 @@ const server = createServer(async (req, res) => {
286
288
  return;
287
289
  }
288
290
 
291
+ if (req.url === '/ready' && (req.method === 'GET' || req.method === 'HEAD')) {
292
+ const checks = { anthropic: !!(process.env.ANTHROPIC_API_KEY) };
293
+ const ready = checks.anthropic;
294
+ res.writeHead(ready ? 200 : 503, { ...cors, 'Content-Type': 'application/json' });
295
+ res.end(JSON.stringify({ status: ready ? 'ready' : 'not_ready', version: VERSION, checks }));
296
+ return;
297
+ }
298
+
289
299
  // Deps
290
300
  if (req.url === '/deps' && req.method === 'GET') {
291
301
  let anthropicOk = false;
@@ -324,7 +334,7 @@ const server = createServer(async (req, res) => {
324
334
  // Server card (Smithery)
325
335
  if (req.url === '/.well-known/mcp/server-card.json') {
326
336
  res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
327
- res.end(JSON.stringify({ name: 'local-model-suitability-mcp', version: VERSION, description: 'Check whether a task can run locally instead of cloud — save money on every call that doesn\'t need cloud inference.', tools: [TOOL_DEFINITION], transport: 'stdio', homepage: 'https://kordagencies.com', author: 'ojas1' }));
337
+ res.end(JSON.stringify({ name: 'local-model-suitability-mcp', version: VERSION, description: 'Check whether a task can run locally instead of cloud — save money on every call that doesn\'t need cloud inference.', tools: [TOOL_DEFINITION], transport: 'streamable-http', homepage: 'https://kordagencies.com', author: 'ojas1', token_footprint_min: 204, token_footprint_max: 230, token_footprint_avg: 217, idempotent_tools: ['check_local_viability'], circuit_breaker: false, health_endpoint: '/health', ready_endpoint: '/ready' }));
328
338
  return;
329
339
  }
330
340
 
@@ -358,7 +368,7 @@ const server = createServer(async (req, res) => {
358
368
  result: {
359
369
  protocolVersion: '2024-11-05',
360
370
  capabilities: { tools: {}, resources: {}, prompts: {} },
361
- serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Checks whether each task can run on a local model instead of cloud -- saves money on every call that does not need cloud inference. Free tier: 20 calls/month, no API key needed.' }
371
+ serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable ROUTE_LOCAL / ROUTE_CLOUD verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' }
362
372
  }
363
373
  };
364
374
  } else if (request.method === 'notifications/initialized') {
@@ -454,7 +464,7 @@ function setupStdio() {
454
464
  try { req = JSON.parse(line); } catch(e) { return; }
455
465
  let response;
456
466
  if (req.method === 'initialize') {
457
- response = { jsonrpc: '2.0', id: req.id, result: { protocolVersion: '2024-11-05', capabilities: { tools: {}, resources: {}, prompts: {} }, serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Checks whether each task can run on a local model instead of cloud -- saves money on every call that does not need cloud inference. Free tier: 20 calls/month, no API key needed.' } } };
467
+ response = { jsonrpc: '2.0', id: req.id, result: { protocolVersion: '2024-11-05', capabilities: { tools: {}, resources: {}, prompts: {} }, serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable ROUTE_LOCAL / ROUTE_CLOUD verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' } } };
458
468
  } else if (req.method === 'notifications/initialized') {
459
469
  return;
460
470
  } else if (req.method === 'tools/list') {