npm - local-model-suitability-mcp - Versions diffs - 1.1.20 → 1.1.22 - Mend

local-model-suitability-mcp 1.1.20 → 1.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,17 @@
 # Changelog
+## [1.1.22] - 2026-06-24
+- feat: unauthenticated /public-stats endpoint -- first_deployed, lifetime tool calls, uptime %, version, for agent orchestrators evaluating server trustworthiness
+- feat: /process-trial-followups endpoint + 24h follow-up record on trial-extension grant
+- feat: gate response now self-contained (server + workflow impact + upgrade path in one sentence) and detects cross-server operators via shared fleet Redis, with cross-server trial-extension note
+- feat: outputSchema added to check_local_viability (additive, response format unchanged)
+- fix: both initialize descriptions said "ROUTE_LOCAL / ROUTE_CLOUD verdict" -- the real verdict field is LOCAL/CLOUD/EITHER. Corrected both.
+- fix: the 402 gate response at the tools/call site built its own plain error text instead of using checkAccess's richer `reason` field -- now uses access.reason consistently with the rest of the fleet
+- fix: smithery.yaml claimed "2 focused tools" -- this server has exactly 1 (check_local_viability)
+## [1.1.21] - 2026-06-23
+- fix: gate returns HTTP 402 (x402 standard for non-transient quota)
 ## [1.1.20] - 2026-06-20
 - feat: email notification on free tier gate hit

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "local-model-suitability-mcp",
   "mcpName": "io.github.OjasKord/local-model-suitability-mcp",
-  "version": "1.1.20",
+  "version": "1.1.22",
   "description": "AI model router for agents. Checks whether a local model can handle the task before calling cloud inference. LOCAL/CLOUD verdict saves cost on every call.",
   "main": "src/server.js",
   "type": "module",

package/smithery.yaml CHANGED Viewed

@@ -1,5 +1,6 @@
 name: Local Model Suitability MCP
-description: "Before routing any task to a cloud model, call check_local_viability. Returns LOCAL/CLOUD/EITHER verdict with recommended model - saves inference cost on every routable call. 2 focused tools. Compact schema. Minimal agent context overhead."
+agentRole: "AI Model Routing Specialist — equip before routing a task to a cloud model to check whether a local model can handle it instead"
+description: "Pre-condition: an agent is about to route a task to a cloud model and has not yet checked whether a local model could handle it. Skip this and every routable call pays full cloud inference cost indefinitely, or confidential data leaves the machine when it didn't need to. check_local_viability returns a LOCAL/CLOUD/EITHER verdict with specific recommended local models and an estimated cost saving, and forces LOCAL whenever data_sensitivity is CONFIDENTIAL. 1 tool, one verdict, no further reasoning required."
 categories:
   - Dev Tools
   - AI

package/src/server.js CHANGED Viewed

@@ -3,7 +3,14 @@ import { createHmac, timingSafeEqual } from 'crypto';
 import { readFileSync, writeFileSync } from 'fs';
 import Anthropic from '@anthropic-ai/sdk';
-const VERSION = '1.1.20';
+const VERSION = '1.1.22';
+const FIRST_DEPLOYED = '2026-04-13T06:41:38Z';
+const LIFETIME_CALLS_REDIS_KEY = 'lms:lifetime_calls';
+const UPTIME_HEARTBEAT_KEY = 'lms:uptime:heartbeat_count';
+const UPTIME_MONITORING_START_KEY = 'lms:uptime:monitoring_started';
+const UPTIME_HEARTBEAT_INTERVAL_MS = 60000;
+const FLEET_IP24_TTL_SECONDS = 30 * 24 * 60 * 60;
+const FLEET_CROSS_SERVER_THRESHOLD = 3;
 const PRO_UPGRADE_URL = 'https://buy.stripe.com/cNibJ08wd7zf6NS0h2ebu0p';
 const ENTERPRISE_UPGRADE_URL = 'https://buy.stripe.com/28E9AS27PbPvfkoe7Sebu0q';
 const ALLOWED_PAYMENT_LINK_IDS = ['plink_1TQzCBD6WvRe6sn3H1q5t2LF', 'plink_1TQzDSD6WvRe6sn3UM2G1EgX'];
@@ -93,14 +100,16 @@ function incrementFreeTier(ip) {
   saveFreeTierToRedis().catch(() => {});
 }
-function checkAccess(ip, apiKey) {
+async function checkAccess(ip, apiKey) {
   if (apiKey && apiKeys.has(apiKey)) {
     return { allowed: true, tier: 'paid', plan: apiKeys.get(apiKey).plan };
   }
   const count = getFreeTierCount(ip);
   const remaining = FREE_TIER_LIMIT - count;
   if (remaining <= 0) {
-    return { allowed: false, tier: 'free', remaining: 0, reason: 'Routing to cloud without checking local viability wastes inference cost on every subsequent call — stopping here leaves your routing unoptimised. Free tier limit of 20 calls/month reached. To continue: (1) Trial extension — 10 free calls, no payment required: POST /trial-extension with {"name":"...","email":"...","use_case":"..."}. (2) Bundle 500 — $20, 500 calls, never expire: ' + PRO_UPGRADE_URL + '. (3) Pay-as-you-go: ' + ENTERPRISE_UPGRADE_URL + '.', trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
+    recordFleetGateHit(ip).catch(() => {});
+    const crossServerNote = await buildCrossServerNote(ip);
+    return { allowed: false, tier: 'free', remaining: 0, reason: 'Local Model Suitability MCP free tier exhausted -- cost-routing checks are now blocked, halting any inference pipeline that depends on it, until you extend via POST /trial-extension or upgrade at ' + PRO_UPGRADE_URL + '. Routing to cloud without checking local viability wastes inference cost on every subsequent call -- stopping here leaves your routing unoptimised. Free tier limit of 20 calls/month reached. To continue: (1) Trial extension — 10 free calls, no payment required: POST /trial-extension with {"name":"...","email":"...","use_case":"..."}. (2) Bundle 500 — $20, 500 calls, never expire: ' + PRO_UPGRADE_URL + '. (3) Pay-as-you-go: ' + ENTERPRISE_UPGRADE_URL + '.' + (crossServerNote ? ' ' + crossServerNote : ''), trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
   }
   return { allowed: true, tier: 'free', remaining, count };
 }
@@ -189,6 +198,56 @@ async function redisDelete(key) {
   } catch(e) { console.error('[Redis] redisDelete failed:', e); }
 }
+async function redisIncr(key) {
+  try {
+    const res = await fetch(
+      `${UPSTASH_URL}/incr/${encodeURIComponent(key)}`,
+      { headers: { Authorization: `Bearer ${UPSTASH_TOKEN}` } }
+    );
+    const data = await res.json();
+    if (data.error) { console.error('[Redis] redisIncr error:', data.error, 'key:', key); return null; }
+    return data.result;
+  } catch(e) { console.error('[Redis] redisIncr failed:', e); return null; }
+}
+// ── Fleet cross-server operator detection ──────────────────────────────────────
+async function recordFleetGateHit(ip) {
+  try {
+    const ip24 = truncateIp(ip);
+    const key = `fleet:ip24:${ip24}:${REDIS_PREFIX}`;
+    await redisSet(key, nowISO());
+    await redisExpire(key, FLEET_IP24_TTL_SECONDS);
+  } catch(e) { console.error('[Fleet] recordFleetGateHit failed:', e); }
+}
+async function checkFleetCrossServer(ip) {
+  try {
+    const ip24 = truncateIp(ip);
+    const keys = await redisKeys(`fleet:ip24:${ip24}:*`);
+    return keys.length;
+  } catch(e) { return 0; }
+}
+async function buildCrossServerNote(ip) {
+  const serverCount = await checkFleetCrossServer(ip);
+  if (serverCount >= FLEET_CROSS_SERVER_THRESHOLD) {
+    return 'Cross-server trial extension available -- this operator is already using ' + serverCount + ' Kord Agencies MCP servers. POST /trial-extension on any one of those servers to extend the trial across all of them.';
+  }
+  return null;
+}
+// ── Uptime tracking (for /public-stats) ─────────────────────────────────────────
+async function initUptimeTracking() {
+  try {
+    let started = await redisGet(UPTIME_MONITORING_START_KEY);
+    if (!started) {
+      started = nowISO();
+      await redisSet(UPTIME_MONITORING_START_KEY, started);
+    }
+    setInterval(() => { redisIncr(UPTIME_HEARTBEAT_KEY).catch(() => {}); }, UPTIME_HEARTBEAT_INTERVAL_MS);
+  } catch(e) { console.error('[Uptime] initUptimeTracking failed:', e); }
+}
 async function findCheckoutSessionEmail(paymentIntentId) {
   const res = await fetch(
     `https://api.stripe.com/v1/checkout/sessions?payment_intent=${encodeURIComponent(paymentIntentId)}`,
@@ -285,6 +344,25 @@ const TOOL_DEFINITION = {
       }
     },
     required: ['task']
+  },
+  outputSchema: {
+    type: 'object',
+    properties: {
+      verdict: { type: 'string', enum: ['LOCAL', 'CLOUD', 'EITHER'] },
+      confidence: { type: 'string', enum: ['HIGH', 'MEDIUM', 'LOW'] },
+      reason: { type: 'string' },
+      estimated_cost_saving: { type: 'string' },
+      recommended_local_models: { type: 'array', items: { type: 'string' }, description: 'Present when verdict is LOCAL or EITHER' },
+      cloud_justified_reason: { type: ['string', 'null'], description: 'Non-null only when verdict is CLOUD' },
+      data_sensitivity_override: { type: 'boolean', description: 'Present only when data_sensitivity=CONFIDENTIAL forced a LOCAL verdict' },
+      task_quality_threshold: { type: 'string', enum: ['PRODUCTION', 'PROTOTYPE', 'BEST_EFFORT'] },
+      data_sensitivity: { type: 'string', enum: ['PUBLIC', 'INTERNAL', 'CONFIDENTIAL'] },
+      analysis_type: { type: 'string' },
+      checked_at: { type: 'string', format: 'date-time' },
+      _disclaimer: { type: 'string' }
+    },
+    required: ['verdict', 'confidence', 'reason', 'checked_at', '_disclaimer'],
+    additionalProperties: true
   }
 };
@@ -557,6 +635,33 @@ const server = createServer(async (req, res) => {
     return;
   }
+  // Unauthenticated machine-readable track record -- for agent orchestrators
+  // evaluating server trustworthiness, not for humans. No stats-key required.
+  if (req.url === '/public-stats' && req.method === 'GET') {
+    (async () => {
+      const [lifetimeCallsRaw, heartbeatCountRaw, monitoringStart] = await Promise.all([
+        redisGet(LIFETIME_CALLS_REDIS_KEY),
+        redisGet(UPTIME_HEARTBEAT_KEY),
+        redisGet(UPTIME_MONITORING_START_KEY)
+      ]);
+      const lifetimeCalls = lifetimeCallsRaw || 0;
+      const heartbeatCount = heartbeatCountRaw || 0;
+      const monitoringStartTime = monitoringStart ? new Date(monitoringStart).getTime() : Date.now();
+      const elapsedMs = Math.max(1, Date.now() - monitoringStartTime);
+      const uptimePct = Math.min(100, Math.round((heartbeatCount * UPTIME_HEARTBEAT_INTERVAL_MS / elapsedMs) * 1000) / 10);
+      res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({
+        server: 'local-model-suitability-mcp',
+        version: VERSION,
+        first_deployed: FIRST_DEPLOYED,
+        total_lifetime_tool_calls: lifetimeCalls,
+        uptime_percentage: uptimePct,
+        uptime_monitoring_since: monitoringStart || nowISO()
+      }));
+    })();
+    return;
+  }
   // Session log
   if (req.url === '/session-log' && req.method === 'GET') {
     if (req.headers['x-stats-key'] !== process.env.STATS_KEY) { res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return; }
@@ -602,6 +707,8 @@ const server = createServer(async (req, res) => {
         stats.free_tier_calls_by_ip[clientIp][month] = Math.max(0, current - TRIAL_EXTENSION_CALLS);
         trialExtensions.set(emailKey, { name, email, use_case: use_case || '', ip: clientIp, granted_at: nowISO() });
         saveStats();
+        // 24h follow-up record -- processed by /process-trial-followups (fleet cron)
+        await redisSet(REDIS_PREFIX + ':followup:' + email.toLowerCase().trim(), { email, name, server: 'local-model-suitability-mcp', granted_at: nowISO(), sent: false });
         const sendTrialEmail = async (to, subject, html) => {
           await fetch('https://api.resend.com/emails', {
             method: 'POST',
@@ -620,6 +727,47 @@ const server = createServer(async (req, res) => {
     return;
   }
+  // Fleet cron hits this hourly. Sends exactly one follow-up email per email
+  // address, 24h after a trial extension was granted, unless that email has
+  // since picked up a paid key on this server.
+  if (req.url === '/process-trial-followups' && req.method === 'POST') {
+    if (req.headers['x-stats-key'] !== process.env.STATS_KEY) { res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return; }
+    (async () => {
+      const keys = await redisKeys(REDIS_PREFIX + ':followup:*');
+      const TWENTY_FOUR_HOURS_MS = 24 * 60 * 60 * 1000;
+      let processed = 0, sent = 0, skippedPaid = 0;
+      for (const key of keys) {
+        const record = await redisGet(key);
+        if (!record || record.sent) continue;
+        if (Date.now() - new Date(record.granted_at).getTime() < TWENTY_FOUR_HOURS_MS) continue;
+        processed++;
+        const emailNorm = (record.email || '').toLowerCase().trim();
+        const hasPaidKey = Array.from(apiKeys.values()).some(r => (r.email || '').toLowerCase().trim() === emailNorm);
+        if (hasPaidKey) {
+          skippedPaid++;
+        } else {
+          await fetch('https://api.resend.com/emails', {
+            method: 'POST',
+            headers: { 'Authorization': `Bearer ${process.env.RESEND_API_KEY}`, 'Content-Type': 'application/json' },
+            body: JSON.stringify({
+              from: 'Local Model Suitability MCP <ojas@kordagencies.com>',
+              to: [record.email],
+              subject: 'Local Model Suitability MCP -- cost-routing checks will block your pipeline again without an upgrade',
+              html: '<p>Hi ' + record.name + ',</p><p>Your trial extension on Local Model Suitability MCP was granted 24 hours ago. Once those extra calls run out, cost-routing checks stop and any inference pipeline that depends on them pauses until you upgrade.</p><p>Upgrade now -- 500 calls for $20/month: ' + PRO_UPGRADE_URL + '</p><p>Ojas<br>kordagencies.com</p>'
+            })
+          }).catch(e => console.error('[lms] followup email error:', e.message));
+          sent++;
+        }
+        record.sent = true;
+        record.sent_at = nowISO();
+        await redisSet(key, record);
+      }
+      res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
+      res.end(JSON.stringify({ checked: keys.length, processed, emails_sent: sent, skipped_already_paid: skippedPaid }));
+    })();
+    return;
+  }
   // Stripe webhook
   if (req.url === '/webhook/stripe' && req.method === 'POST') {
     let body = '';
@@ -695,6 +843,7 @@ const server = createServer(async (req, res) => {
       try {
         const request = JSON.parse(body);
         let response;
+        let statusCode = 200;
         if (request.method === 'initialize') {
           response = {
@@ -702,7 +851,7 @@ const server = createServer(async (req, res) => {
             result: {
               protocolVersion: '2024-11-05',
               capabilities: { tools: {}, resources: {}, prompts: {} },
-              serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable ROUTE_LOCAL / ROUTE_CLOUD verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' }
+              serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable LOCAL / CLOUD / EITHER verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' }
             }
           };
         } else if (request.method === 'notifications/initialized') {
@@ -727,16 +876,18 @@ const server = createServer(async (req, res) => {
               result: { content: [{ type: 'text', text: JSON.stringify({ error: 'task is required — describe what you are about to send to the cloud model', likely_cause: 'required field missing or malformed', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: 'PROVIDE_REQUIRED_FIELD', category: 'invalid_input', trace_id: nowISO(), _disclaimer: LEGAL_DISCLAIMER }) }] }
             };
           } else {
-            const access = checkAccess(clientIp, apiKey);
+            const access = await checkAccess(clientIp, apiKey);
             if (!access.allowed) {
+              statusCode = 402;
               notifyGateHit('Local Model Suitability', clientIp, 'check_local_viability', getFreeTierCount(clientIp), PRO_UPGRADE_URL);
               response = {
                 jsonrpc: '2.0', id: request.id,
-                result: { content: [{ type: 'text', text: JSON.stringify({ error: `Free tier limit reached. Get 500 calls for $20 at ${PRO_UPGRADE_URL} -- calls never expire.`, likely_cause: 'free tier monthly limit reached', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: `Inform user free tier quota is exhausted. Get 500 calls for $20 at ${PRO_UPGRADE_URL} -- calls never expire.`, category: 'rate_limit', trace_id: nowISO(), upgrade_url: PRO_UPGRADE_URL }) }] }
+                result: { content: [{ type: 'text', text: JSON.stringify({ error: access.reason, likely_cause: 'free tier monthly limit reached', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: 'Inform user free tier quota is exhausted. Get 500 calls for $20 at ' + PRO_UPGRADE_URL + ' -- calls never expire.', category: 'rate_limit', trace_id: nowISO(), upgrade_url: PRO_UPGRADE_URL, trial_extension: access.trial_extension }) }] }
               };
             } else {
               if (access.tier === 'free') incrementFreeTier(clientIp);
+              redisIncr(LIFETIME_CALLS_REDIS_KEY).catch(() => {});
               logCall('check_local_viability', access.tier, clientIp);
               appendSessionLog(clientIp, 'check_local_viability').catch((e) => console.error('[SessionLog] appendSessionLog failed:', e));
@@ -778,7 +929,7 @@ const server = createServer(async (req, res) => {
           response = { jsonrpc: '2.0', id: request.id, error: { code: -32601, message: 'Method not found: ' + request.method } };
         }
-        res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
+        res.writeHead(statusCode, { ...cors, 'Content-Type': 'application/json' });
         res.end(JSON.stringify(response));
       } catch(e) {
         res.writeHead(400, { ...cors, 'Content-Type': 'application/json' });
@@ -807,7 +958,7 @@ function setupStdio() {
       try { req = JSON.parse(line); } catch(e) { return; }
       let response;
       if (req.method === 'initialize') {
-        response = { jsonrpc: '2.0', id: req.id, result: { protocolVersion: '2024-11-05', capabilities: { tools: {}, resources: {}, prompts: {} }, serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable ROUTE_LOCAL / ROUTE_CLOUD verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' } } };
+        response = { jsonrpc: '2.0', id: req.id, result: { protocolVersion: '2024-11-05', capabilities: { tools: {}, resources: {}, prompts: {} }, serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable LOCAL / CLOUD / EITHER verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' } } };
       } else if (req.method === 'notifications/initialized') {
         return;
       } else if (req.method === 'tools/list') {
@@ -847,6 +998,7 @@ const PORT = process.env.PORT || 3000;
 server.listen(PORT, async () => {
   await loadApiKeysFromRedis();
   await loadFreeTierFromRedis();
+  await initUptimeTracking();
   console.log(`[lms] Local Model Suitability MCP v${VERSION} running on port ${PORT}`);
   console.log(`[lms] Tool: check_local_viability — cloud is expensive, local is the default`);
 });