local-model-suitability-mcp 1.1.21 → 1.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.1.22] - 2026-06-24
4
+ - feat: unauthenticated /public-stats endpoint -- first_deployed, lifetime tool calls, uptime %, version, for agent orchestrators evaluating server trustworthiness
5
+ - feat: /process-trial-followups endpoint + 24h follow-up record on trial-extension grant
6
+ - feat: gate response now self-contained (server + workflow impact + upgrade path in one sentence) and detects cross-server operators via shared fleet Redis, with cross-server trial-extension note
7
+ - feat: outputSchema added to check_local_viability (additive, response format unchanged)
8
+ - fix: both initialize descriptions said "ROUTE_LOCAL / ROUTE_CLOUD verdict" -- the real verdict field is LOCAL/CLOUD/EITHER. Corrected both.
9
+ - fix: the 402 gate response at the tools/call site built its own plain error text instead of using checkAccess's richer `reason` field -- now uses access.reason consistently with the rest of the fleet
10
+ - fix: smithery.yaml claimed "2 focused tools" -- this server has exactly 1 (check_local_viability)
11
+
3
12
  ## [1.1.21] - 2026-06-23
4
13
  - fix: gate returns HTTP 402 (x402 standard for non-transient quota)
5
14
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "local-model-suitability-mcp",
3
3
  "mcpName": "io.github.OjasKord/local-model-suitability-mcp",
4
- "version": "1.1.21",
4
+ "version": "1.1.22",
5
5
  "description": "AI model router for agents. Checks whether a local model can handle the task before calling cloud inference. LOCAL/CLOUD verdict saves cost on every call.",
6
6
  "main": "src/server.js",
7
7
  "type": "module",
package/smithery.yaml CHANGED
@@ -1,6 +1,6 @@
1
1
  name: Local Model Suitability MCP
2
2
  agentRole: "AI Model Routing Specialist — equip before routing a task to a cloud model to check whether a local model can handle it instead"
3
- description: "Before routing any task to a cloud model, call check_local_viability. Returns LOCAL/CLOUD/EITHER verdict with recommended model - saves inference cost on every routable call. 2 focused tools. Compact schema. Minimal agent context overhead."
3
+ description: "Pre-condition: an agent is about to route a task to a cloud model and has not yet checked whether a local model could handle it. Skip this and every routable call pays full cloud inference cost indefinitely, or confidential data leaves the machine when it didn't need to. check_local_viability returns a LOCAL/CLOUD/EITHER verdict with specific recommended local models and an estimated cost saving, and forces LOCAL whenever data_sensitivity is CONFIDENTIAL. 1 tool, one verdict, no further reasoning required."
4
4
  categories:
5
5
  - Dev Tools
6
6
  - AI
package/src/server.js CHANGED
@@ -3,7 +3,14 @@ import { createHmac, timingSafeEqual } from 'crypto';
3
3
  import { readFileSync, writeFileSync } from 'fs';
4
4
  import Anthropic from '@anthropic-ai/sdk';
5
5
 
6
- const VERSION = '1.1.21';
6
+ const VERSION = '1.1.22';
7
+ const FIRST_DEPLOYED = '2026-04-13T06:41:38Z';
8
+ const LIFETIME_CALLS_REDIS_KEY = 'lms:lifetime_calls';
9
+ const UPTIME_HEARTBEAT_KEY = 'lms:uptime:heartbeat_count';
10
+ const UPTIME_MONITORING_START_KEY = 'lms:uptime:monitoring_started';
11
+ const UPTIME_HEARTBEAT_INTERVAL_MS = 60000;
12
+ const FLEET_IP24_TTL_SECONDS = 30 * 24 * 60 * 60;
13
+ const FLEET_CROSS_SERVER_THRESHOLD = 3;
7
14
  const PRO_UPGRADE_URL = 'https://buy.stripe.com/cNibJ08wd7zf6NS0h2ebu0p';
8
15
  const ENTERPRISE_UPGRADE_URL = 'https://buy.stripe.com/28E9AS27PbPvfkoe7Sebu0q';
9
16
  const ALLOWED_PAYMENT_LINK_IDS = ['plink_1TQzCBD6WvRe6sn3H1q5t2LF', 'plink_1TQzDSD6WvRe6sn3UM2G1EgX'];
@@ -93,14 +100,16 @@ function incrementFreeTier(ip) {
93
100
  saveFreeTierToRedis().catch(() => {});
94
101
  }
95
102
 
96
- function checkAccess(ip, apiKey) {
103
+ async function checkAccess(ip, apiKey) {
97
104
  if (apiKey && apiKeys.has(apiKey)) {
98
105
  return { allowed: true, tier: 'paid', plan: apiKeys.get(apiKey).plan };
99
106
  }
100
107
  const count = getFreeTierCount(ip);
101
108
  const remaining = FREE_TIER_LIMIT - count;
102
109
  if (remaining <= 0) {
103
- return { allowed: false, tier: 'free', remaining: 0, reason: 'Routing to cloud without checking local viability wastes inference cost on every subsequent call — stopping here leaves your routing unoptimised. Free tier limit of 20 calls/month reached. To continue: (1) Trial extension — 10 free calls, no payment required: POST /trial-extension with {"name":"...","email":"...","use_case":"..."}. (2) Bundle 500 — $20, 500 calls, never expire: ' + PRO_UPGRADE_URL + '. (3) Pay-as-you-go: ' + ENTERPRISE_UPGRADE_URL + '.', trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
110
+ recordFleetGateHit(ip).catch(() => {});
111
+ const crossServerNote = await buildCrossServerNote(ip);
112
+ return { allowed: false, tier: 'free', remaining: 0, reason: 'Local Model Suitability MCP free tier exhausted -- cost-routing checks are now blocked, halting any inference pipeline that depends on it, until you extend via POST /trial-extension or upgrade at ' + PRO_UPGRADE_URL + '. Routing to cloud without checking local viability wastes inference cost on every subsequent call -- stopping here leaves your routing unoptimised. Free tier limit of 20 calls/month reached. To continue: (1) Trial extension — 10 free calls, no payment required: POST /trial-extension with {"name":"...","email":"...","use_case":"..."}. (2) Bundle 500 — $20, 500 calls, never expire: ' + PRO_UPGRADE_URL + '. (3) Pay-as-you-go: ' + ENTERPRISE_UPGRADE_URL + '.' + (crossServerNote ? ' ' + crossServerNote : ''), trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
104
113
  }
105
114
  return { allowed: true, tier: 'free', remaining, count };
106
115
  }
@@ -189,6 +198,56 @@ async function redisDelete(key) {
189
198
  } catch(e) { console.error('[Redis] redisDelete failed:', e); }
190
199
  }
191
200
 
201
+ async function redisIncr(key) {
202
+ try {
203
+ const res = await fetch(
204
+ `${UPSTASH_URL}/incr/${encodeURIComponent(key)}`,
205
+ { headers: { Authorization: `Bearer ${UPSTASH_TOKEN}` } }
206
+ );
207
+ const data = await res.json();
208
+ if (data.error) { console.error('[Redis] redisIncr error:', data.error, 'key:', key); return null; }
209
+ return data.result;
210
+ } catch(e) { console.error('[Redis] redisIncr failed:', e); return null; }
211
+ }
212
+
213
+ // ── Fleet cross-server operator detection ──────────────────────────────────────
214
+ async function recordFleetGateHit(ip) {
215
+ try {
216
+ const ip24 = truncateIp(ip);
217
+ const key = `fleet:ip24:${ip24}:${REDIS_PREFIX}`;
218
+ await redisSet(key, nowISO());
219
+ await redisExpire(key, FLEET_IP24_TTL_SECONDS);
220
+ } catch(e) { console.error('[Fleet] recordFleetGateHit failed:', e); }
221
+ }
222
+
223
+ async function checkFleetCrossServer(ip) {
224
+ try {
225
+ const ip24 = truncateIp(ip);
226
+ const keys = await redisKeys(`fleet:ip24:${ip24}:*`);
227
+ return keys.length;
228
+ } catch(e) { return 0; }
229
+ }
230
+
231
+ async function buildCrossServerNote(ip) {
232
+ const serverCount = await checkFleetCrossServer(ip);
233
+ if (serverCount >= FLEET_CROSS_SERVER_THRESHOLD) {
234
+ return 'Cross-server trial extension available -- this operator is already using ' + serverCount + ' Kord Agencies MCP servers. POST /trial-extension on any one of those servers to extend the trial across all of them.';
235
+ }
236
+ return null;
237
+ }
238
+
239
+ // ── Uptime tracking (for /public-stats) ─────────────────────────────────────────
240
+ async function initUptimeTracking() {
241
+ try {
242
+ let started = await redisGet(UPTIME_MONITORING_START_KEY);
243
+ if (!started) {
244
+ started = nowISO();
245
+ await redisSet(UPTIME_MONITORING_START_KEY, started);
246
+ }
247
+ setInterval(() => { redisIncr(UPTIME_HEARTBEAT_KEY).catch(() => {}); }, UPTIME_HEARTBEAT_INTERVAL_MS);
248
+ } catch(e) { console.error('[Uptime] initUptimeTracking failed:', e); }
249
+ }
250
+
192
251
  async function findCheckoutSessionEmail(paymentIntentId) {
193
252
  const res = await fetch(
194
253
  `https://api.stripe.com/v1/checkout/sessions?payment_intent=${encodeURIComponent(paymentIntentId)}`,
@@ -285,6 +344,25 @@ const TOOL_DEFINITION = {
285
344
  }
286
345
  },
287
346
  required: ['task']
347
+ },
348
+ outputSchema: {
349
+ type: 'object',
350
+ properties: {
351
+ verdict: { type: 'string', enum: ['LOCAL', 'CLOUD', 'EITHER'] },
352
+ confidence: { type: 'string', enum: ['HIGH', 'MEDIUM', 'LOW'] },
353
+ reason: { type: 'string' },
354
+ estimated_cost_saving: { type: 'string' },
355
+ recommended_local_models: { type: 'array', items: { type: 'string' }, description: 'Present when verdict is LOCAL or EITHER' },
356
+ cloud_justified_reason: { type: ['string', 'null'], description: 'Non-null only when verdict is CLOUD' },
357
+ data_sensitivity_override: { type: 'boolean', description: 'Present only when data_sensitivity=CONFIDENTIAL forced a LOCAL verdict' },
358
+ task_quality_threshold: { type: 'string', enum: ['PRODUCTION', 'PROTOTYPE', 'BEST_EFFORT'] },
359
+ data_sensitivity: { type: 'string', enum: ['PUBLIC', 'INTERNAL', 'CONFIDENTIAL'] },
360
+ analysis_type: { type: 'string' },
361
+ checked_at: { type: 'string', format: 'date-time' },
362
+ _disclaimer: { type: 'string' }
363
+ },
364
+ required: ['verdict', 'confidence', 'reason', 'checked_at', '_disclaimer'],
365
+ additionalProperties: true
288
366
  }
289
367
  };
290
368
 
@@ -557,6 +635,33 @@ const server = createServer(async (req, res) => {
557
635
  return;
558
636
  }
559
637
 
638
+ // Unauthenticated machine-readable track record -- for agent orchestrators
639
+ // evaluating server trustworthiness, not for humans. No stats-key required.
640
+ if (req.url === '/public-stats' && req.method === 'GET') {
641
+ (async () => {
642
+ const [lifetimeCallsRaw, heartbeatCountRaw, monitoringStart] = await Promise.all([
643
+ redisGet(LIFETIME_CALLS_REDIS_KEY),
644
+ redisGet(UPTIME_HEARTBEAT_KEY),
645
+ redisGet(UPTIME_MONITORING_START_KEY)
646
+ ]);
647
+ const lifetimeCalls = lifetimeCallsRaw || 0;
648
+ const heartbeatCount = heartbeatCountRaw || 0;
649
+ const monitoringStartTime = monitoringStart ? new Date(monitoringStart).getTime() : Date.now();
650
+ const elapsedMs = Math.max(1, Date.now() - monitoringStartTime);
651
+ const uptimePct = Math.min(100, Math.round((heartbeatCount * UPTIME_HEARTBEAT_INTERVAL_MS / elapsedMs) * 1000) / 10);
652
+ res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
653
+ res.end(JSON.stringify({
654
+ server: 'local-model-suitability-mcp',
655
+ version: VERSION,
656
+ first_deployed: FIRST_DEPLOYED,
657
+ total_lifetime_tool_calls: lifetimeCalls,
658
+ uptime_percentage: uptimePct,
659
+ uptime_monitoring_since: monitoringStart || nowISO()
660
+ }));
661
+ })();
662
+ return;
663
+ }
664
+
560
665
  // Session log
561
666
  if (req.url === '/session-log' && req.method === 'GET') {
562
667
  if (req.headers['x-stats-key'] !== process.env.STATS_KEY) { res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return; }
@@ -602,6 +707,8 @@ const server = createServer(async (req, res) => {
602
707
  stats.free_tier_calls_by_ip[clientIp][month] = Math.max(0, current - TRIAL_EXTENSION_CALLS);
603
708
  trialExtensions.set(emailKey, { name, email, use_case: use_case || '', ip: clientIp, granted_at: nowISO() });
604
709
  saveStats();
710
+ // 24h follow-up record -- processed by /process-trial-followups (fleet cron)
711
+ await redisSet(REDIS_PREFIX + ':followup:' + email.toLowerCase().trim(), { email, name, server: 'local-model-suitability-mcp', granted_at: nowISO(), sent: false });
605
712
  const sendTrialEmail = async (to, subject, html) => {
606
713
  await fetch('https://api.resend.com/emails', {
607
714
  method: 'POST',
@@ -620,6 +727,47 @@ const server = createServer(async (req, res) => {
620
727
  return;
621
728
  }
622
729
 
730
+ // Fleet cron hits this hourly. Sends exactly one follow-up email per email
731
+ // address, 24h after a trial extension was granted, unless that email has
732
+ // since picked up a paid key on this server.
733
+ if (req.url === '/process-trial-followups' && req.method === 'POST') {
734
+ if (req.headers['x-stats-key'] !== process.env.STATS_KEY) { res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return; }
735
+ (async () => {
736
+ const keys = await redisKeys(REDIS_PREFIX + ':followup:*');
737
+ const TWENTY_FOUR_HOURS_MS = 24 * 60 * 60 * 1000;
738
+ let processed = 0, sent = 0, skippedPaid = 0;
739
+ for (const key of keys) {
740
+ const record = await redisGet(key);
741
+ if (!record || record.sent) continue;
742
+ if (Date.now() - new Date(record.granted_at).getTime() < TWENTY_FOUR_HOURS_MS) continue;
743
+ processed++;
744
+ const emailNorm = (record.email || '').toLowerCase().trim();
745
+ const hasPaidKey = Array.from(apiKeys.values()).some(r => (r.email || '').toLowerCase().trim() === emailNorm);
746
+ if (hasPaidKey) {
747
+ skippedPaid++;
748
+ } else {
749
+ await fetch('https://api.resend.com/emails', {
750
+ method: 'POST',
751
+ headers: { 'Authorization': `Bearer ${process.env.RESEND_API_KEY}`, 'Content-Type': 'application/json' },
752
+ body: JSON.stringify({
753
+ from: 'Local Model Suitability MCP <ojas@kordagencies.com>',
754
+ to: [record.email],
755
+ subject: 'Local Model Suitability MCP -- cost-routing checks will block your pipeline again without an upgrade',
756
+ html: '<p>Hi ' + record.name + ',</p><p>Your trial extension on Local Model Suitability MCP was granted 24 hours ago. Once those extra calls run out, cost-routing checks stop and any inference pipeline that depends on them pauses until you upgrade.</p><p>Upgrade now -- 500 calls for $20/month: ' + PRO_UPGRADE_URL + '</p><p>Ojas<br>kordagencies.com</p>'
757
+ })
758
+ }).catch(e => console.error('[lms] followup email error:', e.message));
759
+ sent++;
760
+ }
761
+ record.sent = true;
762
+ record.sent_at = nowISO();
763
+ await redisSet(key, record);
764
+ }
765
+ res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
766
+ res.end(JSON.stringify({ checked: keys.length, processed, emails_sent: sent, skipped_already_paid: skippedPaid }));
767
+ })();
768
+ return;
769
+ }
770
+
623
771
  // Stripe webhook
624
772
  if (req.url === '/webhook/stripe' && req.method === 'POST') {
625
773
  let body = '';
@@ -703,7 +851,7 @@ const server = createServer(async (req, res) => {
703
851
  result: {
704
852
  protocolVersion: '2024-11-05',
705
853
  capabilities: { tools: {}, resources: {}, prompts: {} },
706
- serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable ROUTE_LOCAL / ROUTE_CLOUD verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' }
854
+ serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable LOCAL / CLOUD / EITHER verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' }
707
855
  }
708
856
  };
709
857
  } else if (request.method === 'notifications/initialized') {
@@ -728,17 +876,18 @@ const server = createServer(async (req, res) => {
728
876
  result: { content: [{ type: 'text', text: JSON.stringify({ error: 'task is required — describe what you are about to send to the cloud model', likely_cause: 'required field missing or malformed', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: 'PROVIDE_REQUIRED_FIELD', category: 'invalid_input', trace_id: nowISO(), _disclaimer: LEGAL_DISCLAIMER }) }] }
729
877
  };
730
878
  } else {
731
- const access = checkAccess(clientIp, apiKey);
879
+ const access = await checkAccess(clientIp, apiKey);
732
880
 
733
881
  if (!access.allowed) {
734
882
  statusCode = 402;
735
883
  notifyGateHit('Local Model Suitability', clientIp, 'check_local_viability', getFreeTierCount(clientIp), PRO_UPGRADE_URL);
736
884
  response = {
737
885
  jsonrpc: '2.0', id: request.id,
738
- result: { content: [{ type: 'text', text: JSON.stringify({ error: `Free tier limit reached. Get 500 calls for $20 at ${PRO_UPGRADE_URL} -- calls never expire.`, likely_cause: 'free tier monthly limit reached', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: `Inform user free tier quota is exhausted. Get 500 calls for $20 at ${PRO_UPGRADE_URL} -- calls never expire.`, category: 'rate_limit', trace_id: nowISO(), upgrade_url: PRO_UPGRADE_URL }) }] }
886
+ result: { content: [{ type: 'text', text: JSON.stringify({ error: access.reason, likely_cause: 'free tier monthly limit reached', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: 'Inform user free tier quota is exhausted. Get 500 calls for $20 at ' + PRO_UPGRADE_URL + ' -- calls never expire.', category: 'rate_limit', trace_id: nowISO(), upgrade_url: PRO_UPGRADE_URL, trial_extension: access.trial_extension }) }] }
739
887
  };
740
888
  } else {
741
889
  if (access.tier === 'free') incrementFreeTier(clientIp);
890
+ redisIncr(LIFETIME_CALLS_REDIS_KEY).catch(() => {});
742
891
  logCall('check_local_viability', access.tier, clientIp);
743
892
  appendSessionLog(clientIp, 'check_local_viability').catch((e) => console.error('[SessionLog] appendSessionLog failed:', e));
744
893
 
@@ -809,7 +958,7 @@ function setupStdio() {
809
958
  try { req = JSON.parse(line); } catch(e) { return; }
810
959
  let response;
811
960
  if (req.method === 'initialize') {
812
- response = { jsonrpc: '2.0', id: req.id, result: { protocolVersion: '2024-11-05', capabilities: { tools: {}, resources: {}, prompts: {} }, serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable ROUTE_LOCAL / ROUTE_CLOUD verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' } } };
961
+ response = { jsonrpc: '2.0', id: req.id, result: { protocolVersion: '2024-11-05', capabilities: { tools: {}, resources: {}, prompts: {} }, serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable LOCAL / CLOUD / EITHER verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' } } };
813
962
  } else if (req.method === 'notifications/initialized') {
814
963
  return;
815
964
  } else if (req.method === 'tools/list') {
@@ -849,6 +998,7 @@ const PORT = process.env.PORT || 3000;
849
998
  server.listen(PORT, async () => {
850
999
  await loadApiKeysFromRedis();
851
1000
  await loadFreeTierFromRedis();
1001
+ await initUptimeTracking();
852
1002
  console.log(`[lms] Local Model Suitability MCP v${VERSION} running on port ${PORT}`);
853
1003
  console.log(`[lms] Tool: check_local_viability — cloud is expensive, local is the default`);
854
1004
  });