local-model-suitability-mcp 1.1.21 → 1.1.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.1.23] - 2026-06-25
4
+ - feat: calls_remaining field added to check_local_viability response -- "unlimited" for paid keys, numeric free-tier headroom otherwise (HTTP POST and stdio transports)
5
+ - feat: verdict_ttl field added (86400s/24h)
6
+ - feat: data_source_status field added (full/degraded) -- "degraded" when Anthropic responds but returns unparseable output and a fallback verdict is used; "full" otherwise, including the CONFIDENTIAL local-rule shortcut which never calls Anthropic
7
+
8
+ ## [1.1.22] - 2026-06-24
9
+ - feat: unauthenticated /public-stats endpoint -- first_deployed, lifetime tool calls, uptime %, version, for agent orchestrators evaluating server trustworthiness
10
+ - feat: /process-trial-followups endpoint + 24h follow-up record on trial-extension grant
11
+ - feat: gate response now self-contained (server + workflow impact + upgrade path in one sentence) and detects cross-server operators via shared fleet Redis, with cross-server trial-extension note
12
+ - feat: outputSchema added to check_local_viability (additive, response format unchanged)
13
+ - fix: both initialize descriptions said "ROUTE_LOCAL / ROUTE_CLOUD verdict" -- the real verdict field is LOCAL/CLOUD/EITHER. Corrected both.
14
+ - fix: the 402 gate response at the tools/call site built its own plain error text instead of using checkAccess's richer `reason` field -- now uses access.reason consistently with the rest of the fleet
15
+ - fix: smithery.yaml claimed "2 focused tools" -- this server has exactly 1 (check_local_viability)
16
+
3
17
  ## [1.1.21] - 2026-06-23
4
18
  - fix: gate returns HTTP 402 (x402 standard for non-transient quota)
5
19
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "local-model-suitability-mcp",
3
3
  "mcpName": "io.github.OjasKord/local-model-suitability-mcp",
4
- "version": "1.1.21",
4
+ "version": "1.1.23",
5
5
  "description": "AI model router for agents. Checks whether a local model can handle the task before calling cloud inference. LOCAL/CLOUD verdict saves cost on every call.",
6
6
  "main": "src/server.js",
7
7
  "type": "module",
package/server.json CHANGED
@@ -1,36 +1,36 @@
1
- {
2
- "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json",
3
- "name": "io.github.OjasKord/local-model-suitability-mcp",
4
- "title": "Local Model Suitability MCP",
5
- "description": "Check if a task runs locally vs cloud. Save money on calls that don't need cloud inference.",
6
- "version": "1.1.16",
7
- "websiteUrl": "https://kordagencies.com",
8
- "repository": {
9
- "url": "https://github.com/OjasKord/local-model-suitability-mcp",
10
- "source": "github"
11
- },
12
- "packages": [
13
- {
14
- "registryType": "npm",
15
- "identifier": "local-model-suitability-mcp",
16
- "version": "1.1.16",
17
- "transport": {
18
- "type": "stdio"
19
- },
20
- "environmentVariables": [
21
- {
22
- "name": "ANTHROPIC_API_KEY",
23
- "description": "Anthropic API key for Claude routing analysis",
24
- "isRequired": true,
25
- "isSecret": true
26
- }
27
- ]
28
- }
29
- ],
30
- "remotes": [
31
- {
32
- "type": "streamable-http",
33
- "url": "https://local-model-suitability-mcp-production.up.railway.app"
34
- }
35
- ]
36
- }
1
+ {
2
+ "$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json",
3
+ "name": "io.github.OjasKord/local-model-suitability-mcp",
4
+ "title": "Local Model Suitability MCP",
5
+ "description": "Check if a task runs locally vs cloud. Save money on calls that don't need cloud inference.",
6
+ "version": "1.1.23",
7
+ "websiteUrl": "https://kordagencies.com",
8
+ "repository": {
9
+ "url": "https://github.com/OjasKord/local-model-suitability-mcp",
10
+ "source": "github"
11
+ },
12
+ "packages": [
13
+ {
14
+ "registryType": "npm",
15
+ "identifier": "local-model-suitability-mcp",
16
+ "version": "1.1.23",
17
+ "transport": {
18
+ "type": "stdio"
19
+ },
20
+ "environmentVariables": [
21
+ {
22
+ "name": "ANTHROPIC_API_KEY",
23
+ "description": "Anthropic API key for Claude routing analysis",
24
+ "isRequired": true,
25
+ "isSecret": true
26
+ }
27
+ ]
28
+ }
29
+ ],
30
+ "remotes": [
31
+ {
32
+ "type": "streamable-http",
33
+ "url": "https://local-model-suitability-mcp-production.up.railway.app"
34
+ }
35
+ ]
36
+ }
package/smithery.yaml CHANGED
@@ -1,6 +1,6 @@
1
1
  name: Local Model Suitability MCP
2
2
  agentRole: "AI Model Routing Specialist — equip before routing a task to a cloud model to check whether a local model can handle it instead"
3
- description: "Before routing any task to a cloud model, call check_local_viability. Returns LOCAL/CLOUD/EITHER verdict with recommended model - saves inference cost on every routable call. 2 focused tools. Compact schema. Minimal agent context overhead."
3
+ description: "Pre-condition: an agent is about to route a task to a cloud model and has not yet checked whether a local model could handle it. Skip this and every routable call pays full cloud inference cost indefinitely, or confidential data leaves the machine when it didn't need to. check_local_viability returns a LOCAL/CLOUD/EITHER verdict with specific recommended local models and an estimated cost saving, and forces LOCAL whenever data_sensitivity is CONFIDENTIAL. 1 tool, one verdict, no further reasoning required."
4
4
  categories:
5
5
  - Dev Tools
6
6
  - AI
package/src/server.js CHANGED
@@ -3,12 +3,21 @@ import { createHmac, timingSafeEqual } from 'crypto';
3
3
  import { readFileSync, writeFileSync } from 'fs';
4
4
  import Anthropic from '@anthropic-ai/sdk';
5
5
 
6
- const VERSION = '1.1.21';
6
+ const VERSION = '1.1.23';
7
+ const FIRST_DEPLOYED = '2026-04-13T06:41:38Z';
8
+ const LIFETIME_CALLS_REDIS_KEY = 'lms:lifetime_calls';
9
+ const UPTIME_HEARTBEAT_KEY = 'lms:uptime:heartbeat_count';
10
+ const UPTIME_MONITORING_START_KEY = 'lms:uptime:monitoring_started';
11
+ const UPTIME_HEARTBEAT_INTERVAL_MS = 60000;
12
+ const FLEET_IP24_TTL_SECONDS = 30 * 24 * 60 * 60;
13
+ const FLEET_CROSS_SERVER_THRESHOLD = 3;
7
14
  const PRO_UPGRADE_URL = 'https://buy.stripe.com/cNibJ08wd7zf6NS0h2ebu0p';
8
15
  const ENTERPRISE_UPGRADE_URL = 'https://buy.stripe.com/28E9AS27PbPvfkoe7Sebu0q';
9
16
  const ALLOWED_PAYMENT_LINK_IDS = ['plink_1TQzCBD6WvRe6sn3H1q5t2LF', 'plink_1TQzDSD6WvRe6sn3UM2G1EgX'];
10
17
  const PERSIST_FILE = '/tmp/lms_stats.json';
11
18
  const LEGAL_DISCLAIMER = 'AI-powered routing analysis. We do not log or store your task content. Results are for cost-optimisation guidance only. Provider maximum liability is limited to subscription fees paid in the preceding 3 months. Full terms: kordagencies.com/terms.html';
19
+ // Caching/staleness policy per tool, in seconds.
20
+ const VERDICT_TTL = { check_local_viability: 86400 };
12
21
 
13
22
  function nowISO() { return new Date().toISOString(); }
14
23
 
@@ -93,14 +102,16 @@ function incrementFreeTier(ip) {
93
102
  saveFreeTierToRedis().catch(() => {});
94
103
  }
95
104
 
96
- function checkAccess(ip, apiKey) {
105
+ async function checkAccess(ip, apiKey) {
97
106
  if (apiKey && apiKeys.has(apiKey)) {
98
107
  return { allowed: true, tier: 'paid', plan: apiKeys.get(apiKey).plan };
99
108
  }
100
109
  const count = getFreeTierCount(ip);
101
110
  const remaining = FREE_TIER_LIMIT - count;
102
111
  if (remaining <= 0) {
103
- return { allowed: false, tier: 'free', remaining: 0, reason: 'Routing to cloud without checking local viability wastes inference cost on every subsequent call — stopping here leaves your routing unoptimised. Free tier limit of 20 calls/month reached. To continue: (1) Trial extension — 10 free calls, no payment required: POST /trial-extension with {"name":"...","email":"...","use_case":"..."}. (2) Bundle 500 — $20, 500 calls, never expire: ' + PRO_UPGRADE_URL + '. (3) Pay-as-you-go: ' + ENTERPRISE_UPGRADE_URL + '.', trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
112
+ recordFleetGateHit(ip).catch(() => {});
113
+ const crossServerNote = await buildCrossServerNote(ip);
114
+ return { allowed: false, tier: 'free', remaining: 0, reason: 'Local Model Suitability MCP free tier exhausted -- cost-routing checks are now blocked, halting any inference pipeline that depends on it, until you extend via POST /trial-extension or upgrade at ' + PRO_UPGRADE_URL + '. Routing to cloud without checking local viability wastes inference cost on every subsequent call -- stopping here leaves your routing unoptimised. Free tier limit of 20 calls/month reached. To continue: (1) Trial extension — 10 free calls, no payment required: POST /trial-extension with {"name":"...","email":"...","use_case":"..."}. (2) Bundle 500 — $20, 500 calls, never expire: ' + PRO_UPGRADE_URL + '. (3) Pay-as-you-go: ' + ENTERPRISE_UPGRADE_URL + '.' + (crossServerNote ? ' ' + crossServerNote : ''), trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
104
115
  }
105
116
  return { allowed: true, tier: 'free', remaining, count };
106
117
  }
@@ -189,6 +200,56 @@ async function redisDelete(key) {
189
200
  } catch(e) { console.error('[Redis] redisDelete failed:', e); }
190
201
  }
191
202
 
203
+ async function redisIncr(key) {
204
+ try {
205
+ const res = await fetch(
206
+ `${UPSTASH_URL}/incr/${encodeURIComponent(key)}`,
207
+ { headers: { Authorization: `Bearer ${UPSTASH_TOKEN}` } }
208
+ );
209
+ const data = await res.json();
210
+ if (data.error) { console.error('[Redis] redisIncr error:', data.error, 'key:', key); return null; }
211
+ return data.result;
212
+ } catch(e) { console.error('[Redis] redisIncr failed:', e); return null; }
213
+ }
214
+
215
+ // ── Fleet cross-server operator detection ──────────────────────────────────────
216
+ async function recordFleetGateHit(ip) {
217
+ try {
218
+ const ip24 = truncateIp(ip);
219
+ const key = `fleet:ip24:${ip24}:${REDIS_PREFIX}`;
220
+ await redisSet(key, nowISO());
221
+ await redisExpire(key, FLEET_IP24_TTL_SECONDS);
222
+ } catch(e) { console.error('[Fleet] recordFleetGateHit failed:', e); }
223
+ }
224
+
225
+ async function checkFleetCrossServer(ip) {
226
+ try {
227
+ const ip24 = truncateIp(ip);
228
+ const keys = await redisKeys(`fleet:ip24:${ip24}:*`);
229
+ return keys.length;
230
+ } catch(e) { return 0; }
231
+ }
232
+
233
+ async function buildCrossServerNote(ip) {
234
+ const serverCount = await checkFleetCrossServer(ip);
235
+ if (serverCount >= FLEET_CROSS_SERVER_THRESHOLD) {
236
+ return 'Cross-server trial extension available -- this operator is already using ' + serverCount + ' Kord Agencies MCP servers. POST /trial-extension on any one of those servers to extend the trial across all of them.';
237
+ }
238
+ return null;
239
+ }
240
+
241
+ // ── Uptime tracking (for /public-stats) ─────────────────────────────────────────
242
+ async function initUptimeTracking() {
243
+ try {
244
+ let started = await redisGet(UPTIME_MONITORING_START_KEY);
245
+ if (!started) {
246
+ started = nowISO();
247
+ await redisSet(UPTIME_MONITORING_START_KEY, started);
248
+ }
249
+ setInterval(() => { redisIncr(UPTIME_HEARTBEAT_KEY).catch(() => {}); }, UPTIME_HEARTBEAT_INTERVAL_MS);
250
+ } catch(e) { console.error('[Uptime] initUptimeTracking failed:', e); }
251
+ }
252
+
192
253
  async function findCheckoutSessionEmail(paymentIntentId) {
193
254
  const res = await fetch(
194
255
  `https://api.stripe.com/v1/checkout/sessions?payment_intent=${encodeURIComponent(paymentIntentId)}`,
@@ -285,6 +346,25 @@ const TOOL_DEFINITION = {
285
346
  }
286
347
  },
287
348
  required: ['task']
349
+ },
350
+ outputSchema: {
351
+ type: 'object',
352
+ properties: {
353
+ verdict: { type: 'string', enum: ['LOCAL', 'CLOUD', 'EITHER'] },
354
+ confidence: { type: 'string', enum: ['HIGH', 'MEDIUM', 'LOW'] },
355
+ reason: { type: 'string' },
356
+ estimated_cost_saving: { type: 'string' },
357
+ recommended_local_models: { type: 'array', items: { type: 'string' }, description: 'Present when verdict is LOCAL or EITHER' },
358
+ cloud_justified_reason: { type: ['string', 'null'], description: 'Non-null only when verdict is CLOUD' },
359
+ data_sensitivity_override: { type: 'boolean', description: 'Present only when data_sensitivity=CONFIDENTIAL forced a LOCAL verdict' },
360
+ task_quality_threshold: { type: 'string', enum: ['PRODUCTION', 'PROTOTYPE', 'BEST_EFFORT'] },
361
+ data_sensitivity: { type: 'string', enum: ['PUBLIC', 'INTERNAL', 'CONFIDENTIAL'] },
362
+ analysis_type: { type: 'string' },
363
+ checked_at: { type: 'string', format: 'date-time' },
364
+ _disclaimer: { type: 'string' }
365
+ },
366
+ required: ['verdict', 'confidence', 'reason', 'checked_at', '_disclaimer'],
367
+ additionalProperties: true
288
368
  }
289
369
  };
290
370
 
@@ -304,6 +384,8 @@ async function checkLocalViability(task, qualityThreshold, dataSensitivity) {
304
384
  cloud_justified_reason: null,
305
385
  data_sensitivity_override: true,
306
386
  analysis_type: 'AI-powered cost routing — NOT a simple lookup',
387
+ verdict_ttl: VERDICT_TTL.check_local_viability,
388
+ data_source_status: 'full',
307
389
  _disclaimer: LEGAL_DISCLAIMER
308
390
  };
309
391
  }
@@ -351,6 +433,7 @@ Respond ONLY with a JSON object — no markdown, no explanation outside the JSON
351
433
 
352
434
  const raw = response.content[0].text.trim();
353
435
  let parsed;
436
+ let aiDegraded = false;
354
437
  try {
355
438
  parsed = JSON.parse(raw);
356
439
  } catch(e) {
@@ -363,6 +446,7 @@ Respond ONLY with a JSON object — no markdown, no explanation outside the JSON
363
446
  recommended_local_models: ['llama3.2:8b', 'mistral-7b'],
364
447
  cloud_justified_reason: null
365
448
  };
449
+ aiDegraded = true;
366
450
  }
367
451
 
368
452
  const _rLms = {
@@ -370,6 +454,8 @@ Respond ONLY with a JSON object — no markdown, no explanation outside the JSON
370
454
  task_quality_threshold: quality,
371
455
  data_sensitivity: sensitivity,
372
456
  analysis_type: 'AI-powered cost routing — NOT a simple lookup',
457
+ verdict_ttl: VERDICT_TTL.check_local_viability,
458
+ data_source_status: aiDegraded ? 'degraded' : 'full',
373
459
  checked_at: nowISO(),
374
460
  _disclaimer: LEGAL_DISCLAIMER
375
461
  };
@@ -557,6 +643,33 @@ const server = createServer(async (req, res) => {
557
643
  return;
558
644
  }
559
645
 
646
+ // Unauthenticated machine-readable track record -- for agent orchestrators
647
+ // evaluating server trustworthiness, not for humans. No stats-key required.
648
+ if (req.url === '/public-stats' && req.method === 'GET') {
649
+ (async () => {
650
+ const [lifetimeCallsRaw, heartbeatCountRaw, monitoringStart] = await Promise.all([
651
+ redisGet(LIFETIME_CALLS_REDIS_KEY),
652
+ redisGet(UPTIME_HEARTBEAT_KEY),
653
+ redisGet(UPTIME_MONITORING_START_KEY)
654
+ ]);
655
+ const lifetimeCalls = lifetimeCallsRaw || 0;
656
+ const heartbeatCount = heartbeatCountRaw || 0;
657
+ const monitoringStartTime = monitoringStart ? new Date(monitoringStart).getTime() : Date.now();
658
+ const elapsedMs = Math.max(1, Date.now() - monitoringStartTime);
659
+ const uptimePct = Math.min(100, Math.round((heartbeatCount * UPTIME_HEARTBEAT_INTERVAL_MS / elapsedMs) * 1000) / 10);
660
+ res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
661
+ res.end(JSON.stringify({
662
+ server: 'local-model-suitability-mcp',
663
+ version: VERSION,
664
+ first_deployed: FIRST_DEPLOYED,
665
+ total_lifetime_tool_calls: lifetimeCalls,
666
+ uptime_percentage: uptimePct,
667
+ uptime_monitoring_since: monitoringStart || nowISO()
668
+ }));
669
+ })();
670
+ return;
671
+ }
672
+
560
673
  // Session log
561
674
  if (req.url === '/session-log' && req.method === 'GET') {
562
675
  if (req.headers['x-stats-key'] !== process.env.STATS_KEY) { res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return; }
@@ -602,6 +715,8 @@ const server = createServer(async (req, res) => {
602
715
  stats.free_tier_calls_by_ip[clientIp][month] = Math.max(0, current - TRIAL_EXTENSION_CALLS);
603
716
  trialExtensions.set(emailKey, { name, email, use_case: use_case || '', ip: clientIp, granted_at: nowISO() });
604
717
  saveStats();
718
+ // 24h follow-up record -- processed by /process-trial-followups (fleet cron)
719
+ await redisSet(REDIS_PREFIX + ':followup:' + email.toLowerCase().trim(), { email, name, server: 'local-model-suitability-mcp', granted_at: nowISO(), sent: false });
605
720
  const sendTrialEmail = async (to, subject, html) => {
606
721
  await fetch('https://api.resend.com/emails', {
607
722
  method: 'POST',
@@ -620,6 +735,47 @@ const server = createServer(async (req, res) => {
620
735
  return;
621
736
  }
622
737
 
738
+ // Fleet cron hits this hourly. Sends exactly one follow-up email per email
739
+ // address, 24h after a trial extension was granted, unless that email has
740
+ // since picked up a paid key on this server.
741
+ if (req.url === '/process-trial-followups' && req.method === 'POST') {
742
+ if (req.headers['x-stats-key'] !== process.env.STATS_KEY) { res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return; }
743
+ (async () => {
744
+ const keys = await redisKeys(REDIS_PREFIX + ':followup:*');
745
+ const TWENTY_FOUR_HOURS_MS = 24 * 60 * 60 * 1000;
746
+ let processed = 0, sent = 0, skippedPaid = 0;
747
+ for (const key of keys) {
748
+ const record = await redisGet(key);
749
+ if (!record || record.sent) continue;
750
+ if (Date.now() - new Date(record.granted_at).getTime() < TWENTY_FOUR_HOURS_MS) continue;
751
+ processed++;
752
+ const emailNorm = (record.email || '').toLowerCase().trim();
753
+ const hasPaidKey = Array.from(apiKeys.values()).some(r => (r.email || '').toLowerCase().trim() === emailNorm);
754
+ if (hasPaidKey) {
755
+ skippedPaid++;
756
+ } else {
757
+ await fetch('https://api.resend.com/emails', {
758
+ method: 'POST',
759
+ headers: { 'Authorization': `Bearer ${process.env.RESEND_API_KEY}`, 'Content-Type': 'application/json' },
760
+ body: JSON.stringify({
761
+ from: 'Local Model Suitability MCP <ojas@kordagencies.com>',
762
+ to: [record.email],
763
+ subject: 'Local Model Suitability MCP -- cost-routing checks will block your pipeline again without an upgrade',
764
+ html: '<p>Hi ' + record.name + ',</p><p>Your trial extension on Local Model Suitability MCP was granted 24 hours ago. Once those extra calls run out, cost-routing checks stop and any inference pipeline that depends on them pauses until you upgrade.</p><p>Upgrade now -- 500 calls for $20/month: ' + PRO_UPGRADE_URL + '</p><p>Ojas<br>kordagencies.com</p>'
765
+ })
766
+ }).catch(e => console.error('[lms] followup email error:', e.message));
767
+ sent++;
768
+ }
769
+ record.sent = true;
770
+ record.sent_at = nowISO();
771
+ await redisSet(key, record);
772
+ }
773
+ res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
774
+ res.end(JSON.stringify({ checked: keys.length, processed, emails_sent: sent, skipped_already_paid: skippedPaid }));
775
+ })();
776
+ return;
777
+ }
778
+
623
779
  // Stripe webhook
624
780
  if (req.url === '/webhook/stripe' && req.method === 'POST') {
625
781
  let body = '';
@@ -703,7 +859,7 @@ const server = createServer(async (req, res) => {
703
859
  result: {
704
860
  protocolVersion: '2024-11-05',
705
861
  capabilities: { tools: {}, resources: {}, prompts: {} },
706
- serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable ROUTE_LOCAL / ROUTE_CLOUD verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' }
862
+ serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable LOCAL / CLOUD / EITHER verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' }
707
863
  }
708
864
  };
709
865
  } else if (request.method === 'notifications/initialized') {
@@ -728,22 +884,25 @@ const server = createServer(async (req, res) => {
728
884
  result: { content: [{ type: 'text', text: JSON.stringify({ error: 'task is required — describe what you are about to send to the cloud model', likely_cause: 'required field missing or malformed', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: 'PROVIDE_REQUIRED_FIELD', category: 'invalid_input', trace_id: nowISO(), _disclaimer: LEGAL_DISCLAIMER }) }] }
729
885
  };
730
886
  } else {
731
- const access = checkAccess(clientIp, apiKey);
887
+ const access = await checkAccess(clientIp, apiKey);
732
888
 
733
889
  if (!access.allowed) {
734
890
  statusCode = 402;
735
891
  notifyGateHit('Local Model Suitability', clientIp, 'check_local_viability', getFreeTierCount(clientIp), PRO_UPGRADE_URL);
736
892
  response = {
737
893
  jsonrpc: '2.0', id: request.id,
738
- result: { content: [{ type: 'text', text: JSON.stringify({ error: `Free tier limit reached. Get 500 calls for $20 at ${PRO_UPGRADE_URL} -- calls never expire.`, likely_cause: 'free tier monthly limit reached', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: `Inform user free tier quota is exhausted. Get 500 calls for $20 at ${PRO_UPGRADE_URL} -- calls never expire.`, category: 'rate_limit', trace_id: nowISO(), upgrade_url: PRO_UPGRADE_URL }) }] }
894
+ result: { content: [{ type: 'text', text: JSON.stringify({ error: access.reason, likely_cause: 'free tier monthly limit reached', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: 'Inform user free tier quota is exhausted. Get 500 calls for $20 at ' + PRO_UPGRADE_URL + ' -- calls never expire.', category: 'rate_limit', trace_id: nowISO(), upgrade_url: PRO_UPGRADE_URL, trial_extension: access.trial_extension }) }] }
739
895
  };
740
896
  } else {
741
897
  if (access.tier === 'free') incrementFreeTier(clientIp);
898
+ redisIncr(LIFETIME_CALLS_REDIS_KEY).catch(() => {});
742
899
  logCall('check_local_viability', access.tier, clientIp);
743
900
  appendSessionLog(clientIp, 'check_local_viability').catch((e) => console.error('[SessionLog] appendSessionLog failed:', e));
901
+ const callsRemaining = access.tier === 'free' ? Math.max(0, FREE_TIER_LIMIT - getFreeTierCount(clientIp)) : 'unlimited';
744
902
 
745
903
  try {
746
904
  const result = await checkLocalViability(task, quality_threshold, data_sensitivity);
905
+ result.calls_remaining = callsRemaining;
747
906
 
748
907
  // Partial response for free tier
749
908
  if (access.tier === 'free') {
@@ -753,6 +912,7 @@ const server = createServer(async (req, res) => {
753
912
  reason: result.reason,
754
913
  analysis_type: result.analysis_type,
755
914
  checked_at: result.checked_at,
915
+ calls_remaining: result.calls_remaining,
756
916
  _disclaimer: result._disclaimer,
757
917
  upgrade_url: PRO_UPGRADE_URL
758
918
  };
@@ -809,7 +969,7 @@ function setupStdio() {
809
969
  try { req = JSON.parse(line); } catch(e) { return; }
810
970
  let response;
811
971
  if (req.method === 'initialize') {
812
- response = { jsonrpc: '2.0', id: req.id, result: { protocolVersion: '2024-11-05', capabilities: { tools: {}, resources: {}, prompts: {} }, serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable ROUTE_LOCAL / ROUTE_CLOUD verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' } } };
972
+ response = { jsonrpc: '2.0', id: req.id, result: { protocolVersion: '2024-11-05', capabilities: { tools: {}, resources: {}, prompts: {} }, serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable LOCAL / CLOUD / EITHER verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' } } };
813
973
  } else if (req.method === 'notifications/initialized') {
814
974
  return;
815
975
  } else if (req.method === 'tools/list') {
@@ -828,6 +988,7 @@ function setupStdio() {
828
988
  } else {
829
989
  try {
830
990
  const result = await checkLocalViability(task, quality_threshold, data_sensitivity);
991
+ result.calls_remaining = 'unlimited';
831
992
  response = { jsonrpc: '2.0', id: req.id, result: { content: [{ type: 'text', text: JSON.stringify(result) }] } };
832
993
  } catch(e) {
833
994
  response = { jsonrpc: '2.0', id: req.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: e.message, likely_cause: 'AI routing analysis failed — transient Anthropic API issue', retryable: true, retry_after_ms: 120000, fallback_tool: null, agent_action: 'RETRY_IN_2_MIN', category: 'ai_failure', trace_id: nowISO(), _disclaimer: LEGAL_DISCLAIMER }) }] } };
@@ -849,6 +1010,7 @@ const PORT = process.env.PORT || 3000;
849
1010
  server.listen(PORT, async () => {
850
1011
  await loadApiKeysFromRedis();
851
1012
  await loadFreeTierFromRedis();
1013
+ await initUptimeTracking();
852
1014
  console.log(`[lms] Local Model Suitability MCP v${VERSION} running on port ${PORT}`);
853
1015
  console.log(`[lms] Tool: check_local_viability — cloud is expensive, local is the default`);
854
1016
  });