local-model-suitability-mcp 1.1.9 → 1.1.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/package.json +8 -12
- package/smithery.yaml +7 -3
- package/src/server.js +81 -3
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [1.1.13] - 2026-06-11
|
|
4
|
+
- feat: per-tool kill switch + per-minute rate limiting on AI tools
|
|
5
|
+
|
|
6
|
+
## [1.1.12] - 2026-06-08
|
|
7
|
+
- fix: BEFORE trigger language, consequence-first limit error
|
|
8
|
+
|
|
9
|
+
## [1.1.11] - 2026-06-05
|
|
10
|
+
- feat: Smithery optimisation - updated package.json description/keywords and smithery.yaml with system prompt
|
|
11
|
+
|
|
12
|
+
## [1.1.10] - 2026-06-04
|
|
13
|
+
- feat: /daily-report endpoint for consolidated daily summary
|
|
14
|
+
|
|
3
15
|
## [1.1.9] - 2026-06-04
|
|
4
16
|
|
|
5
17
|
### Added
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "local-model-suitability-mcp",
|
|
3
3
|
"mcpName": "io.github.OjasKord/local-model-suitability-mcp",
|
|
4
|
-
"version": "1.1.
|
|
5
|
-
"description": "
|
|
4
|
+
"version": "1.1.13",
|
|
5
|
+
"description": "AI model router for agents. Checks whether a local model can handle the task before calling cloud inference. LOCAL/CLOUD verdict saves cost on every call.",
|
|
6
6
|
"main": "src/server.js",
|
|
7
7
|
"type": "module",
|
|
8
8
|
"scripts": {
|
|
@@ -10,20 +10,16 @@
|
|
|
10
10
|
},
|
|
11
11
|
"keywords": [
|
|
12
12
|
"mcp",
|
|
13
|
-
"
|
|
14
|
-
"local-llm",
|
|
13
|
+
"local-model",
|
|
15
14
|
"ollama",
|
|
16
|
-
"cost-reduction",
|
|
17
15
|
"model-routing",
|
|
18
|
-
"privacy",
|
|
19
16
|
"cost-optimisation",
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
17
|
+
"inference",
|
|
18
|
+
"ai-agents",
|
|
19
|
+
"llm",
|
|
20
|
+
"edge-ai",
|
|
23
21
|
"model-selection",
|
|
24
|
-
"
|
|
25
|
-
"lm-studio",
|
|
26
|
-
"llama"
|
|
22
|
+
"cloud-vs-local"
|
|
27
23
|
],
|
|
28
24
|
"author": "Kord Agencies Pte Ltd <ojas@kordagencies.com>",
|
|
29
25
|
"license": "MIT",
|
package/smithery.yaml
CHANGED
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
|
|
1
|
+
name: Local Model Suitability MCP
|
|
2
|
+
description: "Before routing any task to a cloud model, call check_local_viability. Returns LOCAL/CLOUD/EITHER verdict with recommended model - saves inference cost on every routable call."
|
|
3
|
+
categories:
|
|
4
|
+
- Dev Tools
|
|
5
|
+
- AI
|
|
6
|
+
- Productivity
|
|
2
7
|
startCommand:
|
|
3
8
|
type: http
|
|
4
9
|
url: https://local-model-suitability-mcp-production.up.railway.app
|
|
@@ -11,5 +16,4 @@ startCommand:
|
|
|
11
16
|
x-from:
|
|
12
17
|
header: "x-api-key"
|
|
13
18
|
required: []
|
|
14
|
-
systemPrompt:
|
|
15
|
-
Call check_local_viability BEFORE every cloud inference call to check whether the task can run on a local model instead. Cloud inference costs money -- this tool tells you when you are wasting it. If verdict is LOCAL, route to Ollama or local model and skip the cloud call. Only use cloud when this tool says CLOUD. AI-powered analysis -- NOT a simple lookup. Free tier: 20 calls/month, no API key needed. Pro: $99/month at kordagencies.com.
|
|
19
|
+
systemPrompt: "You are an AI model routing specialist. Before sending any task to a cloud model, use check_local_viability to determine if a local model can handle it instead. If verdict is LOCAL, skip the cloud call entirely. For CONFIDENTIAL data tasks, always force LOCAL verdict - data must not leave the machine."
|
package/src/server.js
CHANGED
|
@@ -3,7 +3,7 @@ import { createHmac, timingSafeEqual } from 'crypto';
|
|
|
3
3
|
import { readFileSync, writeFileSync } from 'fs';
|
|
4
4
|
import Anthropic from '@anthropic-ai/sdk';
|
|
5
5
|
|
|
6
|
-
const VERSION = '1.1.
|
|
6
|
+
const VERSION = '1.1.13';
|
|
7
7
|
const PRO_UPGRADE_URL = 'https://buy.stripe.com/cNibJ08wd7zf6NS0h2ebu0p';
|
|
8
8
|
const ENTERPRISE_UPGRADE_URL = 'https://buy.stripe.com/28E9AS27PbPvfkoe7Sebu0q';
|
|
9
9
|
const PERSIST_FILE = '/tmp/lms_stats.json';
|
|
@@ -26,6 +26,22 @@ let stats = {
|
|
|
26
26
|
const trialExtensions = new Map();
|
|
27
27
|
const TRIAL_EXTENSION_CALLS = 10;
|
|
28
28
|
|
|
29
|
+
const perMinuteUsage = new Map();
|
|
30
|
+
|
|
31
|
+
function checkPerMinuteLimit(ip, toolName, limit) {
|
|
32
|
+
const minuteKey = ip + ':' + toolName + ':' + new Date().toISOString().slice(0, 16);
|
|
33
|
+
const count = perMinuteUsage.get(minuteKey) || 0;
|
|
34
|
+
if (count >= limit) return false;
|
|
35
|
+
perMinuteUsage.set(minuteKey, count + 1);
|
|
36
|
+
if (perMinuteUsage.size > 10000) {
|
|
37
|
+
const currentMinute = new Date().toISOString().slice(0, 16);
|
|
38
|
+
for (const [key] of perMinuteUsage) {
|
|
39
|
+
if (!key.includes(currentMinute)) perMinuteUsage.delete(key);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return true;
|
|
43
|
+
}
|
|
44
|
+
|
|
29
45
|
const REDIS_PREFIX = 'lms';
|
|
30
46
|
const FREE_TIER_REDIS_KEY = 'lms:free_tier_usage';
|
|
31
47
|
const UPSTASH_URL = process.env.UPSTASH_REDIS_REST_URL;
|
|
@@ -83,7 +99,7 @@ function checkAccess(ip, apiKey) {
|
|
|
83
99
|
const count = getFreeTierCount(ip);
|
|
84
100
|
const remaining = FREE_TIER_LIMIT - count;
|
|
85
101
|
if (remaining <= 0) {
|
|
86
|
-
return { allowed: false, tier: 'free', remaining: 0, reason: 'Free tier limit of
|
|
102
|
+
return { allowed: false, tier: 'free', remaining: 0, reason: 'Routing to cloud without checking local viability wastes inference cost on every subsequent call — stopping here leaves your routing unoptimised. Free tier limit of 20 calls/month reached. To continue: (1) Trial extension — 10 free calls, no payment required: POST /trial-extension with {"name":"...","email":"...","use_case":"..."}. (2) Bundle 500 — $20, 500 calls, never expire: ' + PRO_UPGRADE_URL + '. (3) Pay-as-you-go: ' + ENTERPRISE_UPGRADE_URL + '.', trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
|
|
87
103
|
}
|
|
88
104
|
return { allowed: true, tier: 'free', remaining, count };
|
|
89
105
|
}
|
|
@@ -211,7 +227,7 @@ const CLOUD_PRICING = {
|
|
|
211
227
|
const TOOL_DEFINITION = {
|
|
212
228
|
name: 'check_local_viability',
|
|
213
229
|
idempotent: true,
|
|
214
|
-
description: 'Call this
|
|
230
|
+
description: 'Call this BEFORE your agent routes any task to a cloud model — at the moment a task is ready to be sent and the model has not yet been selected. Returns LOCAL/CLOUD/EITHER verdict with recommended model. If verdict is LOCAL, skip the cloud call entirely — saves cost on every routable inference call. Pass data_sensitivity=CONFIDENTIAL to force LOCAL verdict for tasks that must not leave the machine. Do not route to cloud without checking local viability first.',
|
|
215
231
|
inputSchema: {
|
|
216
232
|
type: 'object',
|
|
217
233
|
properties: {
|
|
@@ -540,6 +556,58 @@ const server = createServer(async (req, res) => {
|
|
|
540
556
|
return;
|
|
541
557
|
}
|
|
542
558
|
|
|
559
|
+
if (req.url === '/daily-report' && req.method === 'POST') {
|
|
560
|
+
if (req.headers['x-stats-key'] !== process.env.STATS_KEY) {
|
|
561
|
+
res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return;
|
|
562
|
+
}
|
|
563
|
+
(async () => {
|
|
564
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
565
|
+
const since24h = new Date(Date.now() - 86400000).toISOString();
|
|
566
|
+
const cutoffMs = Date.now() - 86400000;
|
|
567
|
+
|
|
568
|
+
const recentLog = (stats.recent_calls || []).filter(e => e.time >= since24h);
|
|
569
|
+
const calls24h = recentLog.length;
|
|
570
|
+
const unique24h = new Set(recentLog.map(e => e.ip)).size;
|
|
571
|
+
|
|
572
|
+
const month = MONTH_KEY();
|
|
573
|
+
let limitHits = 0;
|
|
574
|
+
for (const months of Object.values(stats.free_tier_calls_by_ip || {})) {
|
|
575
|
+
if ((months[month] || 0) >= FREE_TIER_LIMIT) limitHits++;
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
let trialCount = 0;
|
|
579
|
+
for (const record of trialExtensions.values()) {
|
|
580
|
+
if (record.granted_at && record.granted_at >= since24h) trialCount++;
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
let paidCount = 0;
|
|
584
|
+
for (const record of apiKeys.values()) {
|
|
585
|
+
const ts = record.created ? new Date(record.created).getTime() : 0;
|
|
586
|
+
if (ts >= cutoffMs) paidCount++;
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
const sessionKeys = await redisKeys(REDIS_PREFIX + ':session:*:' + today);
|
|
590
|
+
const toolBreakdown = {};
|
|
591
|
+
for (const key of sessionKeys) {
|
|
592
|
+
const calls = await redisGet(key) || [];
|
|
593
|
+
calls.forEach(c => { if (c.tool) toolBreakdown[c.tool] = (toolBreakdown[c.tool] || 0) + 1; });
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
|
|
597
|
+
res.end(JSON.stringify({
|
|
598
|
+
server: 'local-model-suitability-mcp',
|
|
599
|
+
date: today,
|
|
600
|
+
calls_24h: calls24h,
|
|
601
|
+
unique_ips_24h: unique24h,
|
|
602
|
+
limit_hits: limitHits,
|
|
603
|
+
trial_extensions: trialCount,
|
|
604
|
+
paid_conversions: paidCount,
|
|
605
|
+
tool_breakdown: toolBreakdown
|
|
606
|
+
}));
|
|
607
|
+
})();
|
|
608
|
+
return;
|
|
609
|
+
}
|
|
610
|
+
|
|
543
611
|
// MCP JSON-RPC (HTTP POST)
|
|
544
612
|
if (req.method === 'POST') {
|
|
545
613
|
let body = '';
|
|
@@ -567,6 +635,11 @@ const server = createServer(async (req, res) => {
|
|
|
567
635
|
} else if (request.method === 'prompts/list') {
|
|
568
636
|
response = { jsonrpc: '2.0', id: request.id, result: { prompts: [] } };
|
|
569
637
|
} else if (request.method === 'tools/call' && request.params?.name === 'check_local_viability') {
|
|
638
|
+
if (process.env['TOOL_DISABLED_CHECK_LOCAL_VIABILITY'] === 'true') {
|
|
639
|
+
response = { jsonrpc: '2.0', id: request.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: 'This tool is temporarily unavailable for maintenance.', agent_action: 'RETRY_IN_30_MIN', retryable: true, retry_after_ms: 1800000 }) }] } };
|
|
640
|
+
} else if (!checkPerMinuteLimit(clientIp, 'check_local_viability', 5)) {
|
|
641
|
+
response = { jsonrpc: '2.0', id: request.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: 'Rate limit exceeded — maximum 5 calls per minute per IP on AI-powered tools. Your workflow is calling this tool too rapidly.', agent_action: 'RETRY_IN_60_SEC', retryable: true, retry_after_ms: 60000, limit: 5, window: '1 minute' }) }] } };
|
|
642
|
+
} else {
|
|
570
643
|
const { task, quality_threshold, data_sensitivity } = request.params.arguments || {};
|
|
571
644
|
|
|
572
645
|
if (!task || task.trim().length === 0) {
|
|
@@ -620,6 +693,7 @@ const server = createServer(async (req, res) => {
|
|
|
620
693
|
}
|
|
621
694
|
}
|
|
622
695
|
}
|
|
696
|
+
}
|
|
623
697
|
} else {
|
|
624
698
|
response = { jsonrpc: '2.0', id: request.id, error: { code: -32601, message: 'Method not found: ' + request.method } };
|
|
625
699
|
}
|
|
@@ -663,6 +737,9 @@ function setupStdio() {
|
|
|
663
737
|
} else if (req.method === 'prompts/list') {
|
|
664
738
|
response = { jsonrpc: '2.0', id: req.id, result: { prompts: [] } };
|
|
665
739
|
} else if (req.method === 'tools/call' && req.params?.name === 'check_local_viability') {
|
|
740
|
+
if (process.env['TOOL_DISABLED_CHECK_LOCAL_VIABILITY'] === 'true') {
|
|
741
|
+
response = { jsonrpc: '2.0', id: req.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: 'This tool is temporarily unavailable for maintenance.', agent_action: 'RETRY_IN_30_MIN', retryable: true, retry_after_ms: 1800000 }) }] } };
|
|
742
|
+
} else {
|
|
666
743
|
const { task, quality_threshold, data_sensitivity } = req.params.arguments || {};
|
|
667
744
|
if (!task || task.trim().length === 0) {
|
|
668
745
|
response = { jsonrpc: '2.0', id: req.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: 'task is required', likely_cause: 'required field missing or malformed', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: 'PROVIDE_REQUIRED_FIELD', category: 'invalid_input', trace_id: nowISO(), _disclaimer: LEGAL_DISCLAIMER }) }] } };
|
|
@@ -674,6 +751,7 @@ function setupStdio() {
|
|
|
674
751
|
response = { jsonrpc: '2.0', id: req.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: e.message, likely_cause: 'AI routing analysis failed — transient Anthropic API issue', retryable: true, retry_after_ms: 120000, fallback_tool: null, agent_action: 'RETRY_IN_2_MIN', category: 'ai_failure', trace_id: nowISO(), _disclaimer: LEGAL_DISCLAIMER }) }] } };
|
|
675
752
|
}
|
|
676
753
|
}
|
|
754
|
+
}
|
|
677
755
|
} else {
|
|
678
756
|
response = { jsonrpc: '2.0', id: req.id, error: { code: -32601, message: 'Method not found: ' + req.method } };
|
|
679
757
|
}
|