local-model-suitability-mcp 1.1.20 → 1.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.1.22] - 2026-06-24
4
+ - feat: unauthenticated /public-stats endpoint -- first_deployed, lifetime tool calls, uptime %, version, for agent orchestrators evaluating server trustworthiness
5
+ - feat: /process-trial-followups endpoint + 24h follow-up record on trial-extension grant
6
+ - feat: gate response now self-contained (server + workflow impact + upgrade path in one sentence) and detects cross-server operators via shared fleet Redis, with cross-server trial-extension note
7
+ - feat: outputSchema added to check_local_viability (additive, response format unchanged)
8
+ - fix: both initialize descriptions said "ROUTE_LOCAL / ROUTE_CLOUD verdict" -- the real verdict field is LOCAL/CLOUD/EITHER. Corrected both.
9
+ - fix: the 402 gate response at the tools/call site built its own plain error text instead of using checkAccess's richer `reason` field -- now uses access.reason consistently with the rest of the fleet
10
+ - fix: smithery.yaml claimed "2 focused tools" -- this server has exactly 1 (check_local_viability)
11
+
12
+ ## [1.1.21] - 2026-06-23
13
+ - fix: gate returns HTTP 402 (x402 standard for non-transient quota)
14
+
3
15
  ## [1.1.20] - 2026-06-20
4
16
  - feat: email notification on free tier gate hit
5
17
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "local-model-suitability-mcp",
3
3
  "mcpName": "io.github.OjasKord/local-model-suitability-mcp",
4
- "version": "1.1.20",
4
+ "version": "1.1.22",
5
5
  "description": "AI model router for agents. Checks whether a local model can handle the task before calling cloud inference. LOCAL/CLOUD verdict saves cost on every call.",
6
6
  "main": "src/server.js",
7
7
  "type": "module",
package/smithery.yaml CHANGED
@@ -1,5 +1,6 @@
1
1
  name: Local Model Suitability MCP
2
- description: "Before routing any task to a cloud model, call check_local_viability. Returns LOCAL/CLOUD/EITHER verdict with recommended model - saves inference cost on every routable call. 2 focused tools. Compact schema. Minimal agent context overhead."
2
+ agentRole: "AI Model Routing Specialist — equip before routing a task to a cloud model to check whether a local model can handle it instead"
3
+ description: "Pre-condition: an agent is about to route a task to a cloud model and has not yet checked whether a local model could handle it. Skip this and every routable call pays full cloud inference cost indefinitely, or confidential data leaves the machine when it didn't need to. check_local_viability returns a LOCAL/CLOUD/EITHER verdict with specific recommended local models and an estimated cost saving, and forces LOCAL whenever data_sensitivity is CONFIDENTIAL. 1 tool, one verdict, no further reasoning required."
3
4
  categories:
4
5
  - Dev Tools
5
6
  - AI
package/src/server.js CHANGED
@@ -3,7 +3,14 @@ import { createHmac, timingSafeEqual } from 'crypto';
3
3
  import { readFileSync, writeFileSync } from 'fs';
4
4
  import Anthropic from '@anthropic-ai/sdk';
5
5
 
6
- const VERSION = '1.1.20';
6
+ const VERSION = '1.1.22';
7
+ const FIRST_DEPLOYED = '2026-04-13T06:41:38Z';
8
+ const LIFETIME_CALLS_REDIS_KEY = 'lms:lifetime_calls';
9
+ const UPTIME_HEARTBEAT_KEY = 'lms:uptime:heartbeat_count';
10
+ const UPTIME_MONITORING_START_KEY = 'lms:uptime:monitoring_started';
11
+ const UPTIME_HEARTBEAT_INTERVAL_MS = 60000;
12
+ const FLEET_IP24_TTL_SECONDS = 30 * 24 * 60 * 60;
13
+ const FLEET_CROSS_SERVER_THRESHOLD = 3;
7
14
  const PRO_UPGRADE_URL = 'https://buy.stripe.com/cNibJ08wd7zf6NS0h2ebu0p';
8
15
  const ENTERPRISE_UPGRADE_URL = 'https://buy.stripe.com/28E9AS27PbPvfkoe7Sebu0q';
9
16
  const ALLOWED_PAYMENT_LINK_IDS = ['plink_1TQzCBD6WvRe6sn3H1q5t2LF', 'plink_1TQzDSD6WvRe6sn3UM2G1EgX'];
@@ -93,14 +100,16 @@ function incrementFreeTier(ip) {
93
100
  saveFreeTierToRedis().catch(() => {});
94
101
  }
95
102
 
96
- function checkAccess(ip, apiKey) {
103
+ async function checkAccess(ip, apiKey) {
97
104
  if (apiKey && apiKeys.has(apiKey)) {
98
105
  return { allowed: true, tier: 'paid', plan: apiKeys.get(apiKey).plan };
99
106
  }
100
107
  const count = getFreeTierCount(ip);
101
108
  const remaining = FREE_TIER_LIMIT - count;
102
109
  if (remaining <= 0) {
103
- return { allowed: false, tier: 'free', remaining: 0, reason: 'Routing to cloud without checking local viability wastes inference cost on every subsequent call — stopping here leaves your routing unoptimised. Free tier limit of 20 calls/month reached. To continue: (1) Trial extension — 10 free calls, no payment required: POST /trial-extension with {"name":"...","email":"...","use_case":"..."}. (2) Bundle 500 — $20, 500 calls, never expire: ' + PRO_UPGRADE_URL + '. (3) Pay-as-you-go: ' + ENTERPRISE_UPGRADE_URL + '.', trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
110
+ recordFleetGateHit(ip).catch(() => {});
111
+ const crossServerNote = await buildCrossServerNote(ip);
112
+ return { allowed: false, tier: 'free', remaining: 0, reason: 'Local Model Suitability MCP free tier exhausted -- cost-routing checks are now blocked, halting any inference pipeline that depends on it, until you extend via POST /trial-extension or upgrade at ' + PRO_UPGRADE_URL + '. Routing to cloud without checking local viability wastes inference cost on every subsequent call -- stopping here leaves your routing unoptimised. Free tier limit of 20 calls/month reached. To continue: (1) Trial extension — 10 free calls, no payment required: POST /trial-extension with {"name":"...","email":"...","use_case":"..."}. (2) Bundle 500 — $20, 500 calls, never expire: ' + PRO_UPGRADE_URL + '. (3) Pay-as-you-go: ' + ENTERPRISE_UPGRADE_URL + '.' + (crossServerNote ? ' ' + crossServerNote : ''), trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
104
113
  }
105
114
  return { allowed: true, tier: 'free', remaining, count };
106
115
  }
@@ -189,6 +198,56 @@ async function redisDelete(key) {
189
198
  } catch(e) { console.error('[Redis] redisDelete failed:', e); }
190
199
  }
191
200
 
201
+ async function redisIncr(key) {
202
+ try {
203
+ const res = await fetch(
204
+ `${UPSTASH_URL}/incr/${encodeURIComponent(key)}`,
205
+ { headers: { Authorization: `Bearer ${UPSTASH_TOKEN}` } }
206
+ );
207
+ const data = await res.json();
208
+ if (data.error) { console.error('[Redis] redisIncr error:', data.error, 'key:', key); return null; }
209
+ return data.result;
210
+ } catch(e) { console.error('[Redis] redisIncr failed:', e); return null; }
211
+ }
212
+
213
+ // ── Fleet cross-server operator detection ──────────────────────────────────────
214
+ async function recordFleetGateHit(ip) {
215
+ try {
216
+ const ip24 = truncateIp(ip);
217
+ const key = `fleet:ip24:${ip24}:${REDIS_PREFIX}`;
218
+ await redisSet(key, nowISO());
219
+ await redisExpire(key, FLEET_IP24_TTL_SECONDS);
220
+ } catch(e) { console.error('[Fleet] recordFleetGateHit failed:', e); }
221
+ }
222
+
223
+ async function checkFleetCrossServer(ip) {
224
+ try {
225
+ const ip24 = truncateIp(ip);
226
+ const keys = await redisKeys(`fleet:ip24:${ip24}:*`);
227
+ return keys.length;
228
+ } catch(e) { return 0; }
229
+ }
230
+
231
+ async function buildCrossServerNote(ip) {
232
+ const serverCount = await checkFleetCrossServer(ip);
233
+ if (serverCount >= FLEET_CROSS_SERVER_THRESHOLD) {
234
+ return 'Cross-server trial extension available -- this operator is already using ' + serverCount + ' Kord Agencies MCP servers. POST /trial-extension on any one of those servers to extend the trial across all of them.';
235
+ }
236
+ return null;
237
+ }
238
+
239
+ // ── Uptime tracking (for /public-stats) ─────────────────────────────────────────
240
+ async function initUptimeTracking() {
241
+ try {
242
+ let started = await redisGet(UPTIME_MONITORING_START_KEY);
243
+ if (!started) {
244
+ started = nowISO();
245
+ await redisSet(UPTIME_MONITORING_START_KEY, started);
246
+ }
247
+ setInterval(() => { redisIncr(UPTIME_HEARTBEAT_KEY).catch(() => {}); }, UPTIME_HEARTBEAT_INTERVAL_MS);
248
+ } catch(e) { console.error('[Uptime] initUptimeTracking failed:', e); }
249
+ }
250
+
192
251
  async function findCheckoutSessionEmail(paymentIntentId) {
193
252
  const res = await fetch(
194
253
  `https://api.stripe.com/v1/checkout/sessions?payment_intent=${encodeURIComponent(paymentIntentId)}`,
@@ -285,6 +344,25 @@ const TOOL_DEFINITION = {
285
344
  }
286
345
  },
287
346
  required: ['task']
347
+ },
348
+ outputSchema: {
349
+ type: 'object',
350
+ properties: {
351
+ verdict: { type: 'string', enum: ['LOCAL', 'CLOUD', 'EITHER'] },
352
+ confidence: { type: 'string', enum: ['HIGH', 'MEDIUM', 'LOW'] },
353
+ reason: { type: 'string' },
354
+ estimated_cost_saving: { type: 'string' },
355
+ recommended_local_models: { type: 'array', items: { type: 'string' }, description: 'Present when verdict is LOCAL or EITHER' },
356
+ cloud_justified_reason: { type: ['string', 'null'], description: 'Non-null only when verdict is CLOUD' },
357
+ data_sensitivity_override: { type: 'boolean', description: 'Present only when data_sensitivity=CONFIDENTIAL forced a LOCAL verdict' },
358
+ task_quality_threshold: { type: 'string', enum: ['PRODUCTION', 'PROTOTYPE', 'BEST_EFFORT'] },
359
+ data_sensitivity: { type: 'string', enum: ['PUBLIC', 'INTERNAL', 'CONFIDENTIAL'] },
360
+ analysis_type: { type: 'string' },
361
+ checked_at: { type: 'string', format: 'date-time' },
362
+ _disclaimer: { type: 'string' }
363
+ },
364
+ required: ['verdict', 'confidence', 'reason', 'checked_at', '_disclaimer'],
365
+ additionalProperties: true
288
366
  }
289
367
  };
290
368
 
@@ -557,6 +635,33 @@ const server = createServer(async (req, res) => {
557
635
  return;
558
636
  }
559
637
 
638
+ // Unauthenticated machine-readable track record -- for agent orchestrators
639
+ // evaluating server trustworthiness, not for humans. No stats-key required.
640
+ if (req.url === '/public-stats' && req.method === 'GET') {
641
+ (async () => {
642
+ const [lifetimeCallsRaw, heartbeatCountRaw, monitoringStart] = await Promise.all([
643
+ redisGet(LIFETIME_CALLS_REDIS_KEY),
644
+ redisGet(UPTIME_HEARTBEAT_KEY),
645
+ redisGet(UPTIME_MONITORING_START_KEY)
646
+ ]);
647
+ const lifetimeCalls = lifetimeCallsRaw || 0;
648
+ const heartbeatCount = heartbeatCountRaw || 0;
649
+ const monitoringStartTime = monitoringStart ? new Date(monitoringStart).getTime() : Date.now();
650
+ const elapsedMs = Math.max(1, Date.now() - monitoringStartTime);
651
+ const uptimePct = Math.min(100, Math.round((heartbeatCount * UPTIME_HEARTBEAT_INTERVAL_MS / elapsedMs) * 1000) / 10);
652
+ res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
653
+ res.end(JSON.stringify({
654
+ server: 'local-model-suitability-mcp',
655
+ version: VERSION,
656
+ first_deployed: FIRST_DEPLOYED,
657
+ total_lifetime_tool_calls: lifetimeCalls,
658
+ uptime_percentage: uptimePct,
659
+ uptime_monitoring_since: monitoringStart || nowISO()
660
+ }));
661
+ })();
662
+ return;
663
+ }
664
+
560
665
  // Session log
561
666
  if (req.url === '/session-log' && req.method === 'GET') {
562
667
  if (req.headers['x-stats-key'] !== process.env.STATS_KEY) { res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return; }
@@ -602,6 +707,8 @@ const server = createServer(async (req, res) => {
602
707
  stats.free_tier_calls_by_ip[clientIp][month] = Math.max(0, current - TRIAL_EXTENSION_CALLS);
603
708
  trialExtensions.set(emailKey, { name, email, use_case: use_case || '', ip: clientIp, granted_at: nowISO() });
604
709
  saveStats();
710
+ // 24h follow-up record -- processed by /process-trial-followups (fleet cron)
711
+ await redisSet(REDIS_PREFIX + ':followup:' + email.toLowerCase().trim(), { email, name, server: 'local-model-suitability-mcp', granted_at: nowISO(), sent: false });
605
712
  const sendTrialEmail = async (to, subject, html) => {
606
713
  await fetch('https://api.resend.com/emails', {
607
714
  method: 'POST',
@@ -620,6 +727,47 @@ const server = createServer(async (req, res) => {
620
727
  return;
621
728
  }
622
729
 
730
+ // Fleet cron hits this hourly. Sends exactly one follow-up email per email
731
+ // address, 24h after a trial extension was granted, unless that email has
732
+ // since picked up a paid key on this server.
733
+ if (req.url === '/process-trial-followups' && req.method === 'POST') {
734
+ if (req.headers['x-stats-key'] !== process.env.STATS_KEY) { res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return; }
735
+ (async () => {
736
+ const keys = await redisKeys(REDIS_PREFIX + ':followup:*');
737
+ const TWENTY_FOUR_HOURS_MS = 24 * 60 * 60 * 1000;
738
+ let processed = 0, sent = 0, skippedPaid = 0;
739
+ for (const key of keys) {
740
+ const record = await redisGet(key);
741
+ if (!record || record.sent) continue;
742
+ if (Date.now() - new Date(record.granted_at).getTime() < TWENTY_FOUR_HOURS_MS) continue;
743
+ processed++;
744
+ const emailNorm = (record.email || '').toLowerCase().trim();
745
+ const hasPaidKey = Array.from(apiKeys.values()).some(r => (r.email || '').toLowerCase().trim() === emailNorm);
746
+ if (hasPaidKey) {
747
+ skippedPaid++;
748
+ } else {
749
+ await fetch('https://api.resend.com/emails', {
750
+ method: 'POST',
751
+ headers: { 'Authorization': `Bearer ${process.env.RESEND_API_KEY}`, 'Content-Type': 'application/json' },
752
+ body: JSON.stringify({
753
+ from: 'Local Model Suitability MCP <ojas@kordagencies.com>',
754
+ to: [record.email],
755
+ subject: 'Local Model Suitability MCP -- cost-routing checks will block your pipeline again without an upgrade',
756
+ html: '<p>Hi ' + record.name + ',</p><p>Your trial extension on Local Model Suitability MCP was granted 24 hours ago. Once those extra calls run out, cost-routing checks stop and any inference pipeline that depends on them pauses until you upgrade.</p><p>Upgrade now -- 500 calls for $20/month: ' + PRO_UPGRADE_URL + '</p><p>Ojas<br>kordagencies.com</p>'
757
+ })
758
+ }).catch(e => console.error('[lms] followup email error:', e.message));
759
+ sent++;
760
+ }
761
+ record.sent = true;
762
+ record.sent_at = nowISO();
763
+ await redisSet(key, record);
764
+ }
765
+ res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
766
+ res.end(JSON.stringify({ checked: keys.length, processed, emails_sent: sent, skipped_already_paid: skippedPaid }));
767
+ })();
768
+ return;
769
+ }
770
+
623
771
  // Stripe webhook
624
772
  if (req.url === '/webhook/stripe' && req.method === 'POST') {
625
773
  let body = '';
@@ -695,6 +843,7 @@ const server = createServer(async (req, res) => {
695
843
  try {
696
844
  const request = JSON.parse(body);
697
845
  let response;
846
+ let statusCode = 200;
698
847
 
699
848
  if (request.method === 'initialize') {
700
849
  response = {
@@ -702,7 +851,7 @@ const server = createServer(async (req, res) => {
702
851
  result: {
703
852
  protocolVersion: '2024-11-05',
704
853
  capabilities: { tools: {}, resources: {}, prompts: {} },
705
- serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable ROUTE_LOCAL / ROUTE_CLOUD verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' }
854
+ serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable LOCAL / CLOUD / EITHER verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' }
706
855
  }
707
856
  };
708
857
  } else if (request.method === 'notifications/initialized') {
@@ -727,16 +876,18 @@ const server = createServer(async (req, res) => {
727
876
  result: { content: [{ type: 'text', text: JSON.stringify({ error: 'task is required — describe what you are about to send to the cloud model', likely_cause: 'required field missing or malformed', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: 'PROVIDE_REQUIRED_FIELD', category: 'invalid_input', trace_id: nowISO(), _disclaimer: LEGAL_DISCLAIMER }) }] }
728
877
  };
729
878
  } else {
730
- const access = checkAccess(clientIp, apiKey);
879
+ const access = await checkAccess(clientIp, apiKey);
731
880
 
732
881
  if (!access.allowed) {
882
+ statusCode = 402;
733
883
  notifyGateHit('Local Model Suitability', clientIp, 'check_local_viability', getFreeTierCount(clientIp), PRO_UPGRADE_URL);
734
884
  response = {
735
885
  jsonrpc: '2.0', id: request.id,
736
- result: { content: [{ type: 'text', text: JSON.stringify({ error: `Free tier limit reached. Get 500 calls for $20 at ${PRO_UPGRADE_URL} -- calls never expire.`, likely_cause: 'free tier monthly limit reached', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: `Inform user free tier quota is exhausted. Get 500 calls for $20 at ${PRO_UPGRADE_URL} -- calls never expire.`, category: 'rate_limit', trace_id: nowISO(), upgrade_url: PRO_UPGRADE_URL }) }] }
886
+ result: { content: [{ type: 'text', text: JSON.stringify({ error: access.reason, likely_cause: 'free tier monthly limit reached', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: 'Inform user free tier quota is exhausted. Get 500 calls for $20 at ' + PRO_UPGRADE_URL + ' -- calls never expire.', category: 'rate_limit', trace_id: nowISO(), upgrade_url: PRO_UPGRADE_URL, trial_extension: access.trial_extension }) }] }
737
887
  };
738
888
  } else {
739
889
  if (access.tier === 'free') incrementFreeTier(clientIp);
890
+ redisIncr(LIFETIME_CALLS_REDIS_KEY).catch(() => {});
740
891
  logCall('check_local_viability', access.tier, clientIp);
741
892
  appendSessionLog(clientIp, 'check_local_viability').catch((e) => console.error('[SessionLog] appendSessionLog failed:', e));
742
893
 
@@ -778,7 +929,7 @@ const server = createServer(async (req, res) => {
778
929
  response = { jsonrpc: '2.0', id: request.id, error: { code: -32601, message: 'Method not found: ' + request.method } };
779
930
  }
780
931
 
781
- res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
932
+ res.writeHead(statusCode, { ...cors, 'Content-Type': 'application/json' });
782
933
  res.end(JSON.stringify(response));
783
934
  } catch(e) {
784
935
  res.writeHead(400, { ...cors, 'Content-Type': 'application/json' });
@@ -807,7 +958,7 @@ function setupStdio() {
807
958
  try { req = JSON.parse(line); } catch(e) { return; }
808
959
  let response;
809
960
  if (req.method === 'initialize') {
810
- response = { jsonrpc: '2.0', id: req.id, result: { protocolVersion: '2024-11-05', capabilities: { tools: {}, resources: {}, prompts: {} }, serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable ROUTE_LOCAL / ROUTE_CLOUD verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' } } };
961
+ response = { jsonrpc: '2.0', id: req.id, result: { protocolVersion: '2024-11-05', capabilities: { tools: {}, resources: {}, prompts: {} }, serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable LOCAL / CLOUD / EITHER verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' } } };
811
962
  } else if (req.method === 'notifications/initialized') {
812
963
  return;
813
964
  } else if (req.method === 'tools/list') {
@@ -847,6 +998,7 @@ const PORT = process.env.PORT || 3000;
847
998
  server.listen(PORT, async () => {
848
999
  await loadApiKeysFromRedis();
849
1000
  await loadFreeTierFromRedis();
1001
+ await initUptimeTracking();
850
1002
  console.log(`[lms] Local Model Suitability MCP v${VERSION} running on port ${PORT}`);
851
1003
  console.log(`[lms] Tool: check_local_viability — cloud is expensive, local is the default`);
852
1004
  });