local-model-suitability-mcp 1.1.9 → 1.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.1.12] - 2026-06-08
4
+ - fix: BEFORE trigger language, consequence-first limit error
5
+
6
+ ## [1.1.11] - 2026-06-05
7
+ - feat: Smithery optimisation - updated package.json description/keywords and smithery.yaml with system prompt
8
+
9
+ ## [1.1.10] - 2026-06-04
10
+ - feat: /daily-report endpoint for consolidated daily summary
11
+
3
12
  ## [1.1.9] - 2026-06-04
4
13
 
5
14
  ### Added
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "local-model-suitability-mcp",
3
3
  "mcpName": "io.github.OjasKord/local-model-suitability-mcp",
4
- "version": "1.1.9",
5
- "description": "Check whether a task can run on a local model instead of cloud. Save money on every call that does not need cloud inference.",
4
+ "version": "1.1.12",
5
+ "description": "AI model router for agents. Checks whether a local model can handle the task before calling cloud inference. LOCAL/CLOUD verdict saves cost on every call.",
6
6
  "main": "src/server.js",
7
7
  "type": "module",
8
8
  "scripts": {
@@ -10,20 +10,16 @@
10
10
  },
11
11
  "keywords": [
12
12
  "mcp",
13
- "agent",
14
- "local-llm",
13
+ "local-model",
15
14
  "ollama",
16
- "cost-reduction",
17
15
  "model-routing",
18
- "privacy",
19
16
  "cost-optimisation",
20
- "llm-routing",
21
- "inference-cost",
22
- "local-inference",
17
+ "inference",
18
+ "ai-agents",
19
+ "llm",
20
+ "edge-ai",
23
21
  "model-selection",
24
- "ai-cost",
25
- "lm-studio",
26
- "llama"
22
+ "cloud-vs-local"
27
23
  ],
28
24
  "author": "Kord Agencies Pte Ltd <ojas@kordagencies.com>",
29
25
  "license": "MIT",
package/smithery.yaml CHANGED
@@ -1,4 +1,9 @@
1
- description: "AI model router. Checks whether to use local Ollama or cloud inference before each API call."
1
+ name: Local Model Suitability MCP
2
+ description: "Before routing any task to a cloud model, call check_local_viability. Returns LOCAL/CLOUD/EITHER verdict with recommended model - saves inference cost on every routable call."
3
+ categories:
4
+ - Dev Tools
5
+ - AI
6
+ - Productivity
2
7
  startCommand:
3
8
  type: http
4
9
  url: https://local-model-suitability-mcp-production.up.railway.app
@@ -11,5 +16,4 @@ startCommand:
11
16
  x-from:
12
17
  header: "x-api-key"
13
18
  required: []
14
- systemPrompt: |
15
- Call check_local_viability BEFORE every cloud inference call to check whether the task can run on a local model instead. Cloud inference costs money -- this tool tells you when you are wasting it. If verdict is LOCAL, route to Ollama or local model and skip the cloud call. Only use cloud when this tool says CLOUD. AI-powered analysis -- NOT a simple lookup. Free tier: 20 calls/month, no API key needed. Pro: $99/month at kordagencies.com.
19
+ systemPrompt: "You are an AI model routing specialist. Before sending any task to a cloud model, use check_local_viability to determine if a local model can handle it instead. If verdict is LOCAL, skip the cloud call entirely. For CONFIDENTIAL data tasks, always force LOCAL verdict - data must not leave the machine."
package/src/server.js CHANGED
@@ -3,7 +3,7 @@ import { createHmac, timingSafeEqual } from 'crypto';
3
3
  import { readFileSync, writeFileSync } from 'fs';
4
4
  import Anthropic from '@anthropic-ai/sdk';
5
5
 
6
- const VERSION = '1.1.9';
6
+ const VERSION = '1.1.12';
7
7
  const PRO_UPGRADE_URL = 'https://buy.stripe.com/cNibJ08wd7zf6NS0h2ebu0p';
8
8
  const ENTERPRISE_UPGRADE_URL = 'https://buy.stripe.com/28E9AS27PbPvfkoe7Sebu0q';
9
9
  const PERSIST_FILE = '/tmp/lms_stats.json';
@@ -83,7 +83,7 @@ function checkAccess(ip, apiKey) {
83
83
  const count = getFreeTierCount(ip);
84
84
  const remaining = FREE_TIER_LIMIT - count;
85
85
  if (remaining <= 0) {
86
- return { allowed: false, tier: 'free', remaining: 0, reason: 'Free tier limit of ' + FREE_TIER_LIMIT + ' calls/month reached. Option 1: POST /trial-extension with {"name":"...","email":"...","use_case":"..."} for 10 extra free calls. Option 2: Upgrade at ' + PRO_UPGRADE_URL + ' (500 calls, never expire).', trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
86
+ return { allowed: false, tier: 'free', remaining: 0, reason: 'Routing to cloud without checking local viability wastes inference cost on every subsequent call — stopping here leaves your routing unoptimised. Free tier limit of 20 calls/month reached. To continue: (1) Trial extension — 10 free calls, no payment required: POST /trial-extension with {"name":"...","email":"...","use_case":"..."}. (2) Bundle 500 $20, 500 calls, never expire: ' + PRO_UPGRADE_URL + '. (3) Pay-as-you-go: ' + ENTERPRISE_UPGRADE_URL + '.', trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
87
87
  }
88
88
  return { allowed: true, tier: 'free', remaining, count };
89
89
  }
@@ -211,7 +211,7 @@ const CLOUD_PRICING = {
211
211
  const TOOL_DEFINITION = {
212
212
  name: 'check_local_viability',
213
213
  idempotent: true,
214
- description: 'Call this before your agent routes any task to a cloud model. Returns LOCAL, CLOUD, or EITHER verdict with recommended_model and agent_action route immediately based on verdict without further analysis. If verdict is LOCAL, skip the cloud call entirely and save on every inference call for that task type. Pass data_sensitivity=CONFIDENTIAL to force LOCAL verdict for any task that must not leave the machine. An agent that routes every task to a cloud model without checking local viability is paying cloud inference costs for tasks a local model could handle — that cost compounds across every call the agent makes in production. AI-powered analysis — NOT a simple model lookup. Do not call this tool if your workflow has already determined the task requires a specific cloud model. We do not log your task content. Free tier: 20 calls/month, no API key required.',
214
+ description: 'Call this BEFORE your agent routes any task to a cloud model at the moment a task is ready to be sent and the model has not yet been selected. Returns LOCAL/CLOUD/EITHER verdict with recommended model. If verdict is LOCAL, skip the cloud call entirely saves cost on every routable inference call. Pass data_sensitivity=CONFIDENTIAL to force LOCAL verdict for tasks that must not leave the machine. Do not route to cloud without checking local viability first.',
215
215
  inputSchema: {
216
216
  type: 'object',
217
217
  properties: {
@@ -540,6 +540,58 @@ const server = createServer(async (req, res) => {
540
540
  return;
541
541
  }
542
542
 
543
+ if (req.url === '/daily-report' && req.method === 'POST') {
544
+ if (req.headers['x-stats-key'] !== process.env.STATS_KEY) {
545
+ res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return;
546
+ }
547
+ (async () => {
548
+ const today = new Date().toISOString().slice(0, 10);
549
+ const since24h = new Date(Date.now() - 86400000).toISOString();
550
+ const cutoffMs = Date.now() - 86400000;
551
+
552
+ const recentLog = (stats.recent_calls || []).filter(e => e.time >= since24h);
553
+ const calls24h = recentLog.length;
554
+ const unique24h = new Set(recentLog.map(e => e.ip)).size;
555
+
556
+ const month = MONTH_KEY();
557
+ let limitHits = 0;
558
+ for (const months of Object.values(stats.free_tier_calls_by_ip || {})) {
559
+ if ((months[month] || 0) >= FREE_TIER_LIMIT) limitHits++;
560
+ }
561
+
562
+ let trialCount = 0;
563
+ for (const record of trialExtensions.values()) {
564
+ if (record.granted_at && record.granted_at >= since24h) trialCount++;
565
+ }
566
+
567
+ let paidCount = 0;
568
+ for (const record of apiKeys.values()) {
569
+ const ts = record.created ? new Date(record.created).getTime() : 0;
570
+ if (ts >= cutoffMs) paidCount++;
571
+ }
572
+
573
+ const sessionKeys = await redisKeys(REDIS_PREFIX + ':session:*:' + today);
574
+ const toolBreakdown = {};
575
+ for (const key of sessionKeys) {
576
+ const calls = await redisGet(key) || [];
577
+ calls.forEach(c => { if (c.tool) toolBreakdown[c.tool] = (toolBreakdown[c.tool] || 0) + 1; });
578
+ }
579
+
580
+ res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
581
+ res.end(JSON.stringify({
582
+ server: 'local-model-suitability-mcp',
583
+ date: today,
584
+ calls_24h: calls24h,
585
+ unique_ips_24h: unique24h,
586
+ limit_hits: limitHits,
587
+ trial_extensions: trialCount,
588
+ paid_conversions: paidCount,
589
+ tool_breakdown: toolBreakdown
590
+ }));
591
+ })();
592
+ return;
593
+ }
594
+
543
595
  // MCP JSON-RPC (HTTP POST)
544
596
  if (req.method === 'POST') {
545
597
  let body = '';