npm - local-model-suitability-mcp - Versions diffs - 1.1.9 → 1.1.13 - Mend

local-model-suitability-mcp 1.1.9 → 1.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # Changelog
+## [1.1.13] - 2026-06-11
+- feat: per-tool kill switch + per-minute rate limiting on AI tools
+## [1.1.12] - 2026-06-08
+- fix: BEFORE trigger language, consequence-first limit error
+## [1.1.11] - 2026-06-05
+- feat: Smithery optimisation - updated package.json description/keywords and smithery.yaml with system prompt
+## [1.1.10] - 2026-06-04
+- feat: /daily-report endpoint for consolidated daily summary
 ## [1.1.9] - 2026-06-04
 ### Added

package/package.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "name": "local-model-suitability-mcp",
   "mcpName": "io.github.OjasKord/local-model-suitability-mcp",
-  "version": "1.1.9",
-  "description": "Check whether a task can run on a local model instead of cloud. Save money on every call that does not need cloud inference.",
+  "version": "1.1.13",
+  "description": "AI model router for agents. Checks whether a local model can handle the task before calling cloud inference. LOCAL/CLOUD verdict saves cost on every call.",
   "main": "src/server.js",
   "type": "module",
   "scripts": {
@@ -10,20 +10,16 @@
   },
   "keywords": [
     "mcp",
-    "agent",
-    "local-llm",
+    "local-model",
     "ollama",
-    "cost-reduction",
     "model-routing",
-    "privacy",
     "cost-optimisation",
-    "llm-routing",
-    "inference-cost",
-    "local-inference",
+    "inference",
+    "ai-agents",
+    "llm",
+    "edge-ai",
     "model-selection",
-    "ai-cost",
-    "lm-studio",
-    "llama"
+    "cloud-vs-local"
   ],
   "author": "Kord Agencies Pte Ltd <ojas@kordagencies.com>",
   "license": "MIT",

package/smithery.yaml CHANGED Viewed

@@ -1,4 +1,9 @@
-description: "AI model router. Checks whether to use local Ollama or cloud inference before each API call."
+name: Local Model Suitability MCP
+description: "Before routing any task to a cloud model, call check_local_viability. Returns LOCAL/CLOUD/EITHER verdict with recommended model - saves inference cost on every routable call."
+categories:
+  - Dev Tools
+  - AI
+  - Productivity
 startCommand:
   type: http
   url: https://local-model-suitability-mcp-production.up.railway.app
@@ -11,5 +16,4 @@ startCommand:
         x-from:
           header: "x-api-key"
     required: []
-systemPrompt: |
-  Call check_local_viability BEFORE every cloud inference call to check whether the task can run on a local model instead. Cloud inference costs money -- this tool tells you when you are wasting it. If verdict is LOCAL, route to Ollama or local model and skip the cloud call. Only use cloud when this tool says CLOUD. AI-powered analysis -- NOT a simple lookup. Free tier: 20 calls/month, no API key needed. Pro: $99/month at kordagencies.com.
+systemPrompt: "You are an AI model routing specialist. Before sending any task to a cloud model, use check_local_viability to determine if a local model can handle it instead. If verdict is LOCAL, skip the cloud call entirely. For CONFIDENTIAL data tasks, always force LOCAL verdict - data must not leave the machine."

package/src/server.js CHANGED Viewed

@@ -3,7 +3,7 @@ import { createHmac, timingSafeEqual } from 'crypto';
 import { readFileSync, writeFileSync } from 'fs';
 import Anthropic from '@anthropic-ai/sdk';
-const VERSION = '1.1.9';
+const VERSION = '1.1.13';
 const PRO_UPGRADE_URL = 'https://buy.stripe.com/cNibJ08wd7zf6NS0h2ebu0p';
 const ENTERPRISE_UPGRADE_URL = 'https://buy.stripe.com/28E9AS27PbPvfkoe7Sebu0q';
 const PERSIST_FILE = '/tmp/lms_stats.json';
@@ -26,6 +26,22 @@ let stats = {
 const trialExtensions = new Map();
 const TRIAL_EXTENSION_CALLS = 10;
+const perMinuteUsage = new Map();
+function checkPerMinuteLimit(ip, toolName, limit) {
+  const minuteKey = ip + ':' + toolName + ':' + new Date().toISOString().slice(0, 16);
+  const count = perMinuteUsage.get(minuteKey) || 0;
+  if (count >= limit) return false;
+  perMinuteUsage.set(minuteKey, count + 1);
+  if (perMinuteUsage.size > 10000) {
+    const currentMinute = new Date().toISOString().slice(0, 16);
+    for (const [key] of perMinuteUsage) {
+      if (!key.includes(currentMinute)) perMinuteUsage.delete(key);
+    }
+  }
+  return true;
+}
 const REDIS_PREFIX = 'lms';
 const FREE_TIER_REDIS_KEY = 'lms:free_tier_usage';
 const UPSTASH_URL = process.env.UPSTASH_REDIS_REST_URL;
@@ -83,7 +99,7 @@ function checkAccess(ip, apiKey) {
   const count = getFreeTierCount(ip);
   const remaining = FREE_TIER_LIMIT - count;
   if (remaining <= 0) {
-    return { allowed: false, tier: 'free', remaining: 0, reason: 'Free tier limit of ' + FREE_TIER_LIMIT + ' calls/month reached. Option 1: POST /trial-extension with {"name":"...","email":"...","use_case":"..."} for 10 extra free calls. Option 2: Upgrade at ' + PRO_UPGRADE_URL + ' (500 calls, never expire).', trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
+    return { allowed: false, tier: 'free', remaining: 0, reason: 'Routing to cloud without checking local viability wastes inference cost on every subsequent call — stopping here leaves your routing unoptimised. Free tier limit of 20 calls/month reached. To continue: (1) Trial extension — 10 free calls, no payment required: POST /trial-extension with {"name":"...","email":"...","use_case":"..."}. (2) Bundle 500 — $20, 500 calls, never expire: ' + PRO_UPGRADE_URL + '. (3) Pay-as-you-go: ' + ENTERPRISE_UPGRADE_URL + '.', trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
   }
   return { allowed: true, tier: 'free', remaining, count };
 }
@@ -211,7 +227,7 @@ const CLOUD_PRICING = {
 const TOOL_DEFINITION = {
   name: 'check_local_viability',
   idempotent: true,
-  description: 'Call this before your agent routes any task to a cloud model. Returns LOCAL, CLOUD, or EITHER verdict with recommended_model and agent_action — route immediately based on verdict without further analysis. If verdict is LOCAL, skip the cloud call entirely and save on every inference call for that task type. Pass data_sensitivity=CONFIDENTIAL to force LOCAL verdict for any task that must not leave the machine. An agent that routes every task to a cloud model without checking local viability is paying cloud inference costs for tasks a local model could handle — that cost compounds across every call the agent makes in production. AI-powered analysis — NOT a simple model lookup. Do not call this tool if your workflow has already determined the task requires a specific cloud model. We do not log your task content. Free tier: 20 calls/month, no API key required.',
+  description: 'Call this BEFORE your agent routes any task to a cloud model — at the moment a task is ready to be sent and the model has not yet been selected. Returns LOCAL/CLOUD/EITHER verdict with recommended model. If verdict is LOCAL, skip the cloud call entirely — saves cost on every routable inference call. Pass data_sensitivity=CONFIDENTIAL to force LOCAL verdict for tasks that must not leave the machine. Do not route to cloud without checking local viability first.',
   inputSchema: {
     type: 'object',
     properties: {
@@ -540,6 +556,58 @@ const server = createServer(async (req, res) => {
     return;
   }
+  if (req.url === '/daily-report' && req.method === 'POST') {
+    if (req.headers['x-stats-key'] !== process.env.STATS_KEY) {
+      res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return;
+    }
+    (async () => {
+      const today = new Date().toISOString().slice(0, 10);
+      const since24h = new Date(Date.now() - 86400000).toISOString();
+      const cutoffMs = Date.now() - 86400000;
+      const recentLog = (stats.recent_calls || []).filter(e => e.time >= since24h);
+      const calls24h = recentLog.length;
+      const unique24h = new Set(recentLog.map(e => e.ip)).size;
+      const month = MONTH_KEY();
+      let limitHits = 0;
+      for (const months of Object.values(stats.free_tier_calls_by_ip || {})) {
+        if ((months[month] || 0) >= FREE_TIER_LIMIT) limitHits++;
+      }
+      let trialCount = 0;
+      for (const record of trialExtensions.values()) {
+        if (record.granted_at && record.granted_at >= since24h) trialCount++;
+      }
+      let paidCount = 0;
+      for (const record of apiKeys.values()) {
+        const ts = record.created ? new Date(record.created).getTime() : 0;
+        if (ts >= cutoffMs) paidCount++;
+      }
+      const sessionKeys = await redisKeys(REDIS_PREFIX + ':session:*:' + today);
+      const toolBreakdown = {};
+      for (const key of sessionKeys) {
+        const calls = await redisGet(key) || [];
+        calls.forEach(c => { if (c.tool) toolBreakdown[c.tool] = (toolBreakdown[c.tool] || 0) + 1; });
+      }
+      res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({
+        server: 'local-model-suitability-mcp',
+        date: today,
+        calls_24h: calls24h,
+        unique_ips_24h: unique24h,
+        limit_hits: limitHits,
+        trial_extensions: trialCount,
+        paid_conversions: paidCount,
+        tool_breakdown: toolBreakdown
+      }));
+    })();
+    return;
+  }
   // MCP JSON-RPC (HTTP POST)
   if (req.method === 'POST') {
     let body = '';
@@ -567,6 +635,11 @@ const server = createServer(async (req, res) => {
         } else if (request.method === 'prompts/list') {
           response = { jsonrpc: '2.0', id: request.id, result: { prompts: [] } };
         } else if (request.method === 'tools/call' && request.params?.name === 'check_local_viability') {
+          if (process.env['TOOL_DISABLED_CHECK_LOCAL_VIABILITY'] === 'true') {
+            response = { jsonrpc: '2.0', id: request.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: 'This tool is temporarily unavailable for maintenance.', agent_action: 'RETRY_IN_30_MIN', retryable: true, retry_after_ms: 1800000 }) }] } };
+          } else if (!checkPerMinuteLimit(clientIp, 'check_local_viability', 5)) {
+            response = { jsonrpc: '2.0', id: request.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: 'Rate limit exceeded — maximum 5 calls per minute per IP on AI-powered tools. Your workflow is calling this tool too rapidly.', agent_action: 'RETRY_IN_60_SEC', retryable: true, retry_after_ms: 60000, limit: 5, window: '1 minute' }) }] } };
+          } else {
           const { task, quality_threshold, data_sensitivity } = request.params.arguments || {};
           if (!task || task.trim().length === 0) {
@@ -620,6 +693,7 @@ const server = createServer(async (req, res) => {
               }
             }
           }
+          }
         } else {
           response = { jsonrpc: '2.0', id: request.id, error: { code: -32601, message: 'Method not found: ' + request.method } };
         }
@@ -663,6 +737,9 @@ function setupStdio() {
       } else if (req.method === 'prompts/list') {
         response = { jsonrpc: '2.0', id: req.id, result: { prompts: [] } };
       } else if (req.method === 'tools/call' && req.params?.name === 'check_local_viability') {
+        if (process.env['TOOL_DISABLED_CHECK_LOCAL_VIABILITY'] === 'true') {
+          response = { jsonrpc: '2.0', id: req.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: 'This tool is temporarily unavailable for maintenance.', agent_action: 'RETRY_IN_30_MIN', retryable: true, retry_after_ms: 1800000 }) }] } };
+        } else {
         const { task, quality_threshold, data_sensitivity } = req.params.arguments || {};
         if (!task || task.trim().length === 0) {
           response = { jsonrpc: '2.0', id: req.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: 'task is required', likely_cause: 'required field missing or malformed', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: 'PROVIDE_REQUIRED_FIELD', category: 'invalid_input', trace_id: nowISO(), _disclaimer: LEGAL_DISCLAIMER }) }] } };
@@ -674,6 +751,7 @@ function setupStdio() {
             response = { jsonrpc: '2.0', id: req.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: e.message, likely_cause: 'AI routing analysis failed — transient Anthropic API issue', retryable: true, retry_after_ms: 120000, fallback_tool: null, agent_action: 'RETRY_IN_2_MIN', category: 'ai_failure', trace_id: nowISO(), _disclaimer: LEGAL_DISCLAIMER }) }] } };
           }
         }
+        }
       } else {
         response = { jsonrpc: '2.0', id: req.id, error: { code: -32601, message: 'Method not found: ' + req.method } };
       }