local-model-suitability-mcp 1.1.21 → 1.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/package.json +1 -1
- package/server.json +36 -36
- package/smithery.yaml +1 -1
- package/src/server.js +169 -7
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,19 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [1.1.23] - 2026-06-25
|
|
4
|
+
- feat: calls_remaining field added to check_local_viability response -- "unlimited" for paid keys, numeric free-tier headroom otherwise (HTTP POST and stdio transports)
|
|
5
|
+
- feat: verdict_ttl field added (86400s/24h)
|
|
6
|
+
- feat: data_source_status field added (full/degraded) -- "degraded" when Anthropic responds but returns unparseable output and a fallback verdict is used; "full" otherwise, including the CONFIDENTIAL local-rule shortcut which never calls Anthropic
|
|
7
|
+
|
|
8
|
+
## [1.1.22] - 2026-06-24
|
|
9
|
+
- feat: unauthenticated /public-stats endpoint -- first_deployed, lifetime tool calls, uptime %, version, for agent orchestrators evaluating server trustworthiness
|
|
10
|
+
- feat: /process-trial-followups endpoint + 24h follow-up record on trial-extension grant
|
|
11
|
+
- feat: gate response now self-contained (server + workflow impact + upgrade path in one sentence) and detects cross-server operators via shared fleet Redis, with cross-server trial-extension note
|
|
12
|
+
- feat: outputSchema added to check_local_viability (additive, response format unchanged)
|
|
13
|
+
- fix: both initialize descriptions said "ROUTE_LOCAL / ROUTE_CLOUD verdict" -- the real verdict field is LOCAL/CLOUD/EITHER. Corrected both.
|
|
14
|
+
- fix: the 402 gate response at the tools/call site built its own plain error text instead of using checkAccess's richer `reason` field -- now uses access.reason consistently with the rest of the fleet
|
|
15
|
+
- fix: smithery.yaml claimed "2 focused tools" -- this server has exactly 1 (check_local_viability)
|
|
16
|
+
|
|
3
17
|
## [1.1.21] - 2026-06-23
|
|
4
18
|
- fix: gate returns HTTP 402 (x402 standard for non-transient quota)
|
|
5
19
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "local-model-suitability-mcp",
|
|
3
3
|
"mcpName": "io.github.OjasKord/local-model-suitability-mcp",
|
|
4
|
-
"version": "1.1.
|
|
4
|
+
"version": "1.1.23",
|
|
5
5
|
"description": "AI model router for agents. Checks whether a local model can handle the task before calling cloud inference. LOCAL/CLOUD verdict saves cost on every call.",
|
|
6
6
|
"main": "src/server.js",
|
|
7
7
|
"type": "module",
|
package/server.json
CHANGED
|
@@ -1,36 +1,36 @@
|
|
|
1
|
-
{
|
|
2
|
-
"$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json",
|
|
3
|
-
"name": "io.github.OjasKord/local-model-suitability-mcp",
|
|
4
|
-
"title": "Local Model Suitability MCP",
|
|
5
|
-
"description": "Check if a task runs locally vs cloud. Save money on calls that don't need cloud inference.",
|
|
6
|
-
"version": "1.1.
|
|
7
|
-
"websiteUrl": "https://kordagencies.com",
|
|
8
|
-
"repository": {
|
|
9
|
-
"url": "https://github.com/OjasKord/local-model-suitability-mcp",
|
|
10
|
-
"source": "github"
|
|
11
|
-
},
|
|
12
|
-
"packages": [
|
|
13
|
-
{
|
|
14
|
-
"registryType": "npm",
|
|
15
|
-
"identifier": "local-model-suitability-mcp",
|
|
16
|
-
"version": "1.1.
|
|
17
|
-
"transport": {
|
|
18
|
-
"type": "stdio"
|
|
19
|
-
},
|
|
20
|
-
"environmentVariables": [
|
|
21
|
-
{
|
|
22
|
-
"name": "ANTHROPIC_API_KEY",
|
|
23
|
-
"description": "Anthropic API key for Claude routing analysis",
|
|
24
|
-
"isRequired": true,
|
|
25
|
-
"isSecret": true
|
|
26
|
-
}
|
|
27
|
-
]
|
|
28
|
-
}
|
|
29
|
-
],
|
|
30
|
-
"remotes": [
|
|
31
|
-
{
|
|
32
|
-
"type": "streamable-http",
|
|
33
|
-
"url": "https://local-model-suitability-mcp-production.up.railway.app"
|
|
34
|
-
}
|
|
35
|
-
]
|
|
36
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://static.modelcontextprotocol.io/schemas/2025-12-11/server.schema.json",
|
|
3
|
+
"name": "io.github.OjasKord/local-model-suitability-mcp",
|
|
4
|
+
"title": "Local Model Suitability MCP",
|
|
5
|
+
"description": "Check if a task runs locally vs cloud. Save money on calls that don't need cloud inference.",
|
|
6
|
+
"version": "1.1.23",
|
|
7
|
+
"websiteUrl": "https://kordagencies.com",
|
|
8
|
+
"repository": {
|
|
9
|
+
"url": "https://github.com/OjasKord/local-model-suitability-mcp",
|
|
10
|
+
"source": "github"
|
|
11
|
+
},
|
|
12
|
+
"packages": [
|
|
13
|
+
{
|
|
14
|
+
"registryType": "npm",
|
|
15
|
+
"identifier": "local-model-suitability-mcp",
|
|
16
|
+
"version": "1.1.23",
|
|
17
|
+
"transport": {
|
|
18
|
+
"type": "stdio"
|
|
19
|
+
},
|
|
20
|
+
"environmentVariables": [
|
|
21
|
+
{
|
|
22
|
+
"name": "ANTHROPIC_API_KEY",
|
|
23
|
+
"description": "Anthropic API key for Claude routing analysis",
|
|
24
|
+
"isRequired": true,
|
|
25
|
+
"isSecret": true
|
|
26
|
+
}
|
|
27
|
+
]
|
|
28
|
+
}
|
|
29
|
+
],
|
|
30
|
+
"remotes": [
|
|
31
|
+
{
|
|
32
|
+
"type": "streamable-http",
|
|
33
|
+
"url": "https://local-model-suitability-mcp-production.up.railway.app"
|
|
34
|
+
}
|
|
35
|
+
]
|
|
36
|
+
}
|
package/smithery.yaml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
name: Local Model Suitability MCP
|
|
2
2
|
agentRole: "AI Model Routing Specialist — equip before routing a task to a cloud model to check whether a local model can handle it instead"
|
|
3
|
-
description: "
|
|
3
|
+
description: "Pre-condition: an agent is about to route a task to a cloud model and has not yet checked whether a local model could handle it. Skip this and every routable call pays full cloud inference cost indefinitely, or confidential data leaves the machine when it didn't need to. check_local_viability returns a LOCAL/CLOUD/EITHER verdict with specific recommended local models and an estimated cost saving, and forces LOCAL whenever data_sensitivity is CONFIDENTIAL. 1 tool, one verdict, no further reasoning required."
|
|
4
4
|
categories:
|
|
5
5
|
- Dev Tools
|
|
6
6
|
- AI
|
package/src/server.js
CHANGED
|
@@ -3,12 +3,21 @@ import { createHmac, timingSafeEqual } from 'crypto';
|
|
|
3
3
|
import { readFileSync, writeFileSync } from 'fs';
|
|
4
4
|
import Anthropic from '@anthropic-ai/sdk';
|
|
5
5
|
|
|
6
|
-
const VERSION = '1.1.
|
|
6
|
+
const VERSION = '1.1.23';
|
|
7
|
+
const FIRST_DEPLOYED = '2026-04-13T06:41:38Z';
|
|
8
|
+
const LIFETIME_CALLS_REDIS_KEY = 'lms:lifetime_calls';
|
|
9
|
+
const UPTIME_HEARTBEAT_KEY = 'lms:uptime:heartbeat_count';
|
|
10
|
+
const UPTIME_MONITORING_START_KEY = 'lms:uptime:monitoring_started';
|
|
11
|
+
const UPTIME_HEARTBEAT_INTERVAL_MS = 60000;
|
|
12
|
+
const FLEET_IP24_TTL_SECONDS = 30 * 24 * 60 * 60;
|
|
13
|
+
const FLEET_CROSS_SERVER_THRESHOLD = 3;
|
|
7
14
|
const PRO_UPGRADE_URL = 'https://buy.stripe.com/cNibJ08wd7zf6NS0h2ebu0p';
|
|
8
15
|
const ENTERPRISE_UPGRADE_URL = 'https://buy.stripe.com/28E9AS27PbPvfkoe7Sebu0q';
|
|
9
16
|
const ALLOWED_PAYMENT_LINK_IDS = ['plink_1TQzCBD6WvRe6sn3H1q5t2LF', 'plink_1TQzDSD6WvRe6sn3UM2G1EgX'];
|
|
10
17
|
const PERSIST_FILE = '/tmp/lms_stats.json';
|
|
11
18
|
const LEGAL_DISCLAIMER = 'AI-powered routing analysis. We do not log or store your task content. Results are for cost-optimisation guidance only. Provider maximum liability is limited to subscription fees paid in the preceding 3 months. Full terms: kordagencies.com/terms.html';
|
|
19
|
+
// Caching/staleness policy per tool, in seconds.
|
|
20
|
+
const VERDICT_TTL = { check_local_viability: 86400 };
|
|
12
21
|
|
|
13
22
|
function nowISO() { return new Date().toISOString(); }
|
|
14
23
|
|
|
@@ -93,14 +102,16 @@ function incrementFreeTier(ip) {
|
|
|
93
102
|
saveFreeTierToRedis().catch(() => {});
|
|
94
103
|
}
|
|
95
104
|
|
|
96
|
-
function checkAccess(ip, apiKey) {
|
|
105
|
+
async function checkAccess(ip, apiKey) {
|
|
97
106
|
if (apiKey && apiKeys.has(apiKey)) {
|
|
98
107
|
return { allowed: true, tier: 'paid', plan: apiKeys.get(apiKey).plan };
|
|
99
108
|
}
|
|
100
109
|
const count = getFreeTierCount(ip);
|
|
101
110
|
const remaining = FREE_TIER_LIMIT - count;
|
|
102
111
|
if (remaining <= 0) {
|
|
103
|
-
|
|
112
|
+
recordFleetGateHit(ip).catch(() => {});
|
|
113
|
+
const crossServerNote = await buildCrossServerNote(ip);
|
|
114
|
+
return { allowed: false, tier: 'free', remaining: 0, reason: 'Local Model Suitability MCP free tier exhausted -- cost-routing checks are now blocked, halting any inference pipeline that depends on it, until you extend via POST /trial-extension or upgrade at ' + PRO_UPGRADE_URL + '. Routing to cloud without checking local viability wastes inference cost on every subsequent call -- stopping here leaves your routing unoptimised. Free tier limit of 20 calls/month reached. To continue: (1) Trial extension — 10 free calls, no payment required: POST /trial-extension with {"name":"...","email":"...","use_case":"..."}. (2) Bundle 500 — $20, 500 calls, never expire: ' + PRO_UPGRADE_URL + '. (3) Pay-as-you-go: ' + ENTERPRISE_UPGRADE_URL + '.' + (crossServerNote ? ' ' + crossServerNote : ''), trial_extension: { endpoint: '/trial-extension', method: 'POST', body: { name: 'string', email: 'string', use_case: 'string' } }, upgrade_url: PRO_UPGRADE_URL };
|
|
104
115
|
}
|
|
105
116
|
return { allowed: true, tier: 'free', remaining, count };
|
|
106
117
|
}
|
|
@@ -189,6 +200,56 @@ async function redisDelete(key) {
|
|
|
189
200
|
} catch(e) { console.error('[Redis] redisDelete failed:', e); }
|
|
190
201
|
}
|
|
191
202
|
|
|
203
|
+
async function redisIncr(key) {
|
|
204
|
+
try {
|
|
205
|
+
const res = await fetch(
|
|
206
|
+
`${UPSTASH_URL}/incr/${encodeURIComponent(key)}`,
|
|
207
|
+
{ headers: { Authorization: `Bearer ${UPSTASH_TOKEN}` } }
|
|
208
|
+
);
|
|
209
|
+
const data = await res.json();
|
|
210
|
+
if (data.error) { console.error('[Redis] redisIncr error:', data.error, 'key:', key); return null; }
|
|
211
|
+
return data.result;
|
|
212
|
+
} catch(e) { console.error('[Redis] redisIncr failed:', e); return null; }
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// ── Fleet cross-server operator detection ──────────────────────────────────────
|
|
216
|
+
async function recordFleetGateHit(ip) {
|
|
217
|
+
try {
|
|
218
|
+
const ip24 = truncateIp(ip);
|
|
219
|
+
const key = `fleet:ip24:${ip24}:${REDIS_PREFIX}`;
|
|
220
|
+
await redisSet(key, nowISO());
|
|
221
|
+
await redisExpire(key, FLEET_IP24_TTL_SECONDS);
|
|
222
|
+
} catch(e) { console.error('[Fleet] recordFleetGateHit failed:', e); }
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
async function checkFleetCrossServer(ip) {
|
|
226
|
+
try {
|
|
227
|
+
const ip24 = truncateIp(ip);
|
|
228
|
+
const keys = await redisKeys(`fleet:ip24:${ip24}:*`);
|
|
229
|
+
return keys.length;
|
|
230
|
+
} catch(e) { return 0; }
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
async function buildCrossServerNote(ip) {
|
|
234
|
+
const serverCount = await checkFleetCrossServer(ip);
|
|
235
|
+
if (serverCount >= FLEET_CROSS_SERVER_THRESHOLD) {
|
|
236
|
+
return 'Cross-server trial extension available -- this operator is already using ' + serverCount + ' Kord Agencies MCP servers. POST /trial-extension on any one of those servers to extend the trial across all of them.';
|
|
237
|
+
}
|
|
238
|
+
return null;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// ── Uptime tracking (for /public-stats) ─────────────────────────────────────────
|
|
242
|
+
async function initUptimeTracking() {
|
|
243
|
+
try {
|
|
244
|
+
let started = await redisGet(UPTIME_MONITORING_START_KEY);
|
|
245
|
+
if (!started) {
|
|
246
|
+
started = nowISO();
|
|
247
|
+
await redisSet(UPTIME_MONITORING_START_KEY, started);
|
|
248
|
+
}
|
|
249
|
+
setInterval(() => { redisIncr(UPTIME_HEARTBEAT_KEY).catch(() => {}); }, UPTIME_HEARTBEAT_INTERVAL_MS);
|
|
250
|
+
} catch(e) { console.error('[Uptime] initUptimeTracking failed:', e); }
|
|
251
|
+
}
|
|
252
|
+
|
|
192
253
|
async function findCheckoutSessionEmail(paymentIntentId) {
|
|
193
254
|
const res = await fetch(
|
|
194
255
|
`https://api.stripe.com/v1/checkout/sessions?payment_intent=${encodeURIComponent(paymentIntentId)}`,
|
|
@@ -285,6 +346,25 @@ const TOOL_DEFINITION = {
|
|
|
285
346
|
}
|
|
286
347
|
},
|
|
287
348
|
required: ['task']
|
|
349
|
+
},
|
|
350
|
+
outputSchema: {
|
|
351
|
+
type: 'object',
|
|
352
|
+
properties: {
|
|
353
|
+
verdict: { type: 'string', enum: ['LOCAL', 'CLOUD', 'EITHER'] },
|
|
354
|
+
confidence: { type: 'string', enum: ['HIGH', 'MEDIUM', 'LOW'] },
|
|
355
|
+
reason: { type: 'string' },
|
|
356
|
+
estimated_cost_saving: { type: 'string' },
|
|
357
|
+
recommended_local_models: { type: 'array', items: { type: 'string' }, description: 'Present when verdict is LOCAL or EITHER' },
|
|
358
|
+
cloud_justified_reason: { type: ['string', 'null'], description: 'Non-null only when verdict is CLOUD' },
|
|
359
|
+
data_sensitivity_override: { type: 'boolean', description: 'Present only when data_sensitivity=CONFIDENTIAL forced a LOCAL verdict' },
|
|
360
|
+
task_quality_threshold: { type: 'string', enum: ['PRODUCTION', 'PROTOTYPE', 'BEST_EFFORT'] },
|
|
361
|
+
data_sensitivity: { type: 'string', enum: ['PUBLIC', 'INTERNAL', 'CONFIDENTIAL'] },
|
|
362
|
+
analysis_type: { type: 'string' },
|
|
363
|
+
checked_at: { type: 'string', format: 'date-time' },
|
|
364
|
+
_disclaimer: { type: 'string' }
|
|
365
|
+
},
|
|
366
|
+
required: ['verdict', 'confidence', 'reason', 'checked_at', '_disclaimer'],
|
|
367
|
+
additionalProperties: true
|
|
288
368
|
}
|
|
289
369
|
};
|
|
290
370
|
|
|
@@ -304,6 +384,8 @@ async function checkLocalViability(task, qualityThreshold, dataSensitivity) {
|
|
|
304
384
|
cloud_justified_reason: null,
|
|
305
385
|
data_sensitivity_override: true,
|
|
306
386
|
analysis_type: 'AI-powered cost routing — NOT a simple lookup',
|
|
387
|
+
verdict_ttl: VERDICT_TTL.check_local_viability,
|
|
388
|
+
data_source_status: 'full',
|
|
307
389
|
_disclaimer: LEGAL_DISCLAIMER
|
|
308
390
|
};
|
|
309
391
|
}
|
|
@@ -351,6 +433,7 @@ Respond ONLY with a JSON object — no markdown, no explanation outside the JSON
|
|
|
351
433
|
|
|
352
434
|
const raw = response.content[0].text.trim();
|
|
353
435
|
let parsed;
|
|
436
|
+
let aiDegraded = false;
|
|
354
437
|
try {
|
|
355
438
|
parsed = JSON.parse(raw);
|
|
356
439
|
} catch(e) {
|
|
@@ -363,6 +446,7 @@ Respond ONLY with a JSON object — no markdown, no explanation outside the JSON
|
|
|
363
446
|
recommended_local_models: ['llama3.2:8b', 'mistral-7b'],
|
|
364
447
|
cloud_justified_reason: null
|
|
365
448
|
};
|
|
449
|
+
aiDegraded = true;
|
|
366
450
|
}
|
|
367
451
|
|
|
368
452
|
const _rLms = {
|
|
@@ -370,6 +454,8 @@ Respond ONLY with a JSON object — no markdown, no explanation outside the JSON
|
|
|
370
454
|
task_quality_threshold: quality,
|
|
371
455
|
data_sensitivity: sensitivity,
|
|
372
456
|
analysis_type: 'AI-powered cost routing — NOT a simple lookup',
|
|
457
|
+
verdict_ttl: VERDICT_TTL.check_local_viability,
|
|
458
|
+
data_source_status: aiDegraded ? 'degraded' : 'full',
|
|
373
459
|
checked_at: nowISO(),
|
|
374
460
|
_disclaimer: LEGAL_DISCLAIMER
|
|
375
461
|
};
|
|
@@ -557,6 +643,33 @@ const server = createServer(async (req, res) => {
|
|
|
557
643
|
return;
|
|
558
644
|
}
|
|
559
645
|
|
|
646
|
+
// Unauthenticated machine-readable track record -- for agent orchestrators
|
|
647
|
+
// evaluating server trustworthiness, not for humans. No stats-key required.
|
|
648
|
+
if (req.url === '/public-stats' && req.method === 'GET') {
|
|
649
|
+
(async () => {
|
|
650
|
+
const [lifetimeCallsRaw, heartbeatCountRaw, monitoringStart] = await Promise.all([
|
|
651
|
+
redisGet(LIFETIME_CALLS_REDIS_KEY),
|
|
652
|
+
redisGet(UPTIME_HEARTBEAT_KEY),
|
|
653
|
+
redisGet(UPTIME_MONITORING_START_KEY)
|
|
654
|
+
]);
|
|
655
|
+
const lifetimeCalls = lifetimeCallsRaw || 0;
|
|
656
|
+
const heartbeatCount = heartbeatCountRaw || 0;
|
|
657
|
+
const monitoringStartTime = monitoringStart ? new Date(monitoringStart).getTime() : Date.now();
|
|
658
|
+
const elapsedMs = Math.max(1, Date.now() - monitoringStartTime);
|
|
659
|
+
const uptimePct = Math.min(100, Math.round((heartbeatCount * UPTIME_HEARTBEAT_INTERVAL_MS / elapsedMs) * 1000) / 10);
|
|
660
|
+
res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
|
|
661
|
+
res.end(JSON.stringify({
|
|
662
|
+
server: 'local-model-suitability-mcp',
|
|
663
|
+
version: VERSION,
|
|
664
|
+
first_deployed: FIRST_DEPLOYED,
|
|
665
|
+
total_lifetime_tool_calls: lifetimeCalls,
|
|
666
|
+
uptime_percentage: uptimePct,
|
|
667
|
+
uptime_monitoring_since: monitoringStart || nowISO()
|
|
668
|
+
}));
|
|
669
|
+
})();
|
|
670
|
+
return;
|
|
671
|
+
}
|
|
672
|
+
|
|
560
673
|
// Session log
|
|
561
674
|
if (req.url === '/session-log' && req.method === 'GET') {
|
|
562
675
|
if (req.headers['x-stats-key'] !== process.env.STATS_KEY) { res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return; }
|
|
@@ -602,6 +715,8 @@ const server = createServer(async (req, res) => {
|
|
|
602
715
|
stats.free_tier_calls_by_ip[clientIp][month] = Math.max(0, current - TRIAL_EXTENSION_CALLS);
|
|
603
716
|
trialExtensions.set(emailKey, { name, email, use_case: use_case || '', ip: clientIp, granted_at: nowISO() });
|
|
604
717
|
saveStats();
|
|
718
|
+
// 24h follow-up record -- processed by /process-trial-followups (fleet cron)
|
|
719
|
+
await redisSet(REDIS_PREFIX + ':followup:' + email.toLowerCase().trim(), { email, name, server: 'local-model-suitability-mcp', granted_at: nowISO(), sent: false });
|
|
605
720
|
const sendTrialEmail = async (to, subject, html) => {
|
|
606
721
|
await fetch('https://api.resend.com/emails', {
|
|
607
722
|
method: 'POST',
|
|
@@ -620,6 +735,47 @@ const server = createServer(async (req, res) => {
|
|
|
620
735
|
return;
|
|
621
736
|
}
|
|
622
737
|
|
|
738
|
+
// Fleet cron hits this hourly. Sends exactly one follow-up email per email
|
|
739
|
+
// address, 24h after a trial extension was granted, unless that email has
|
|
740
|
+
// since picked up a paid key on this server.
|
|
741
|
+
if (req.url === '/process-trial-followups' && req.method === 'POST') {
|
|
742
|
+
if (req.headers['x-stats-key'] !== process.env.STATS_KEY) { res.writeHead(401, cors); res.end(JSON.stringify({ error: 'Unauthorized' })); return; }
|
|
743
|
+
(async () => {
|
|
744
|
+
const keys = await redisKeys(REDIS_PREFIX + ':followup:*');
|
|
745
|
+
const TWENTY_FOUR_HOURS_MS = 24 * 60 * 60 * 1000;
|
|
746
|
+
let processed = 0, sent = 0, skippedPaid = 0;
|
|
747
|
+
for (const key of keys) {
|
|
748
|
+
const record = await redisGet(key);
|
|
749
|
+
if (!record || record.sent) continue;
|
|
750
|
+
if (Date.now() - new Date(record.granted_at).getTime() < TWENTY_FOUR_HOURS_MS) continue;
|
|
751
|
+
processed++;
|
|
752
|
+
const emailNorm = (record.email || '').toLowerCase().trim();
|
|
753
|
+
const hasPaidKey = Array.from(apiKeys.values()).some(r => (r.email || '').toLowerCase().trim() === emailNorm);
|
|
754
|
+
if (hasPaidKey) {
|
|
755
|
+
skippedPaid++;
|
|
756
|
+
} else {
|
|
757
|
+
await fetch('https://api.resend.com/emails', {
|
|
758
|
+
method: 'POST',
|
|
759
|
+
headers: { 'Authorization': `Bearer ${process.env.RESEND_API_KEY}`, 'Content-Type': 'application/json' },
|
|
760
|
+
body: JSON.stringify({
|
|
761
|
+
from: 'Local Model Suitability MCP <ojas@kordagencies.com>',
|
|
762
|
+
to: [record.email],
|
|
763
|
+
subject: 'Local Model Suitability MCP -- cost-routing checks will block your pipeline again without an upgrade',
|
|
764
|
+
html: '<p>Hi ' + record.name + ',</p><p>Your trial extension on Local Model Suitability MCP was granted 24 hours ago. Once those extra calls run out, cost-routing checks stop and any inference pipeline that depends on them pauses until you upgrade.</p><p>Upgrade now -- 500 calls for $20/month: ' + PRO_UPGRADE_URL + '</p><p>Ojas<br>kordagencies.com</p>'
|
|
765
|
+
})
|
|
766
|
+
}).catch(e => console.error('[lms] followup email error:', e.message));
|
|
767
|
+
sent++;
|
|
768
|
+
}
|
|
769
|
+
record.sent = true;
|
|
770
|
+
record.sent_at = nowISO();
|
|
771
|
+
await redisSet(key, record);
|
|
772
|
+
}
|
|
773
|
+
res.writeHead(200, { ...cors, 'Content-Type': 'application/json' });
|
|
774
|
+
res.end(JSON.stringify({ checked: keys.length, processed, emails_sent: sent, skipped_already_paid: skippedPaid }));
|
|
775
|
+
})();
|
|
776
|
+
return;
|
|
777
|
+
}
|
|
778
|
+
|
|
623
779
|
// Stripe webhook
|
|
624
780
|
if (req.url === '/webhook/stripe' && req.method === 'POST') {
|
|
625
781
|
let body = '';
|
|
@@ -703,7 +859,7 @@ const server = createServer(async (req, res) => {
|
|
|
703
859
|
result: {
|
|
704
860
|
protocolVersion: '2024-11-05',
|
|
705
861
|
capabilities: { tools: {}, resources: {}, prompts: {} },
|
|
706
|
-
serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable
|
|
862
|
+
serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable LOCAL / CLOUD / EITHER verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' }
|
|
707
863
|
}
|
|
708
864
|
};
|
|
709
865
|
} else if (request.method === 'notifications/initialized') {
|
|
@@ -728,22 +884,25 @@ const server = createServer(async (req, res) => {
|
|
|
728
884
|
result: { content: [{ type: 'text', text: JSON.stringify({ error: 'task is required — describe what you are about to send to the cloud model', likely_cause: 'required field missing or malformed', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: 'PROVIDE_REQUIRED_FIELD', category: 'invalid_input', trace_id: nowISO(), _disclaimer: LEGAL_DISCLAIMER }) }] }
|
|
729
885
|
};
|
|
730
886
|
} else {
|
|
731
|
-
const access = checkAccess(clientIp, apiKey);
|
|
887
|
+
const access = await checkAccess(clientIp, apiKey);
|
|
732
888
|
|
|
733
889
|
if (!access.allowed) {
|
|
734
890
|
statusCode = 402;
|
|
735
891
|
notifyGateHit('Local Model Suitability', clientIp, 'check_local_viability', getFreeTierCount(clientIp), PRO_UPGRADE_URL);
|
|
736
892
|
response = {
|
|
737
893
|
jsonrpc: '2.0', id: request.id,
|
|
738
|
-
result: { content: [{ type: 'text', text: JSON.stringify({ error:
|
|
894
|
+
result: { content: [{ type: 'text', text: JSON.stringify({ error: access.reason, likely_cause: 'free tier monthly limit reached', retryable: false, retry_after_ms: null, fallback_tool: null, agent_action: 'Inform user free tier quota is exhausted. Get 500 calls for $20 at ' + PRO_UPGRADE_URL + ' -- calls never expire.', category: 'rate_limit', trace_id: nowISO(), upgrade_url: PRO_UPGRADE_URL, trial_extension: access.trial_extension }) }] }
|
|
739
895
|
};
|
|
740
896
|
} else {
|
|
741
897
|
if (access.tier === 'free') incrementFreeTier(clientIp);
|
|
898
|
+
redisIncr(LIFETIME_CALLS_REDIS_KEY).catch(() => {});
|
|
742
899
|
logCall('check_local_viability', access.tier, clientIp);
|
|
743
900
|
appendSessionLog(clientIp, 'check_local_viability').catch((e) => console.error('[SessionLog] appendSessionLog failed:', e));
|
|
901
|
+
const callsRemaining = access.tier === 'free' ? Math.max(0, FREE_TIER_LIMIT - getFreeTierCount(clientIp)) : 'unlimited';
|
|
744
902
|
|
|
745
903
|
try {
|
|
746
904
|
const result = await checkLocalViability(task, quality_threshold, data_sensitivity);
|
|
905
|
+
result.calls_remaining = callsRemaining;
|
|
747
906
|
|
|
748
907
|
// Partial response for free tier
|
|
749
908
|
if (access.tier === 'free') {
|
|
@@ -753,6 +912,7 @@ const server = createServer(async (req, res) => {
|
|
|
753
912
|
reason: result.reason,
|
|
754
913
|
analysis_type: result.analysis_type,
|
|
755
914
|
checked_at: result.checked_at,
|
|
915
|
+
calls_remaining: result.calls_remaining,
|
|
756
916
|
_disclaimer: result._disclaimer,
|
|
757
917
|
upgrade_url: PRO_UPGRADE_URL
|
|
758
918
|
};
|
|
@@ -809,7 +969,7 @@ function setupStdio() {
|
|
|
809
969
|
try { req = JSON.parse(line); } catch(e) { return; }
|
|
810
970
|
let response;
|
|
811
971
|
if (req.method === 'initialize') {
|
|
812
|
-
response = { jsonrpc: '2.0', id: req.id, result: { protocolVersion: '2024-11-05', capabilities: { tools: {}, resources: {}, prompts: {} }, serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable
|
|
972
|
+
response = { jsonrpc: '2.0', id: req.id, result: { protocolVersion: '2024-11-05', capabilities: { tools: {}, resources: {}, prompts: {} }, serverInfo: { name: 'local-model-suitability-mcp', version: VERSION, description: 'Every agent pipeline reaches a decision point before each cloud inference call -- is this task worth the cloud cost, or can a local model handle it? Local Model Suitability MCP answers in one call -- returning a machine-readable LOCAL / CLOUD / EITHER verdict so the agent routes immediately. Cloud inference costs $0.05-$0.15 per call; local is near-zero. Install once, save on every eligible call for the lifetime of the agent.' } } };
|
|
813
973
|
} else if (req.method === 'notifications/initialized') {
|
|
814
974
|
return;
|
|
815
975
|
} else if (req.method === 'tools/list') {
|
|
@@ -828,6 +988,7 @@ function setupStdio() {
|
|
|
828
988
|
} else {
|
|
829
989
|
try {
|
|
830
990
|
const result = await checkLocalViability(task, quality_threshold, data_sensitivity);
|
|
991
|
+
result.calls_remaining = 'unlimited';
|
|
831
992
|
response = { jsonrpc: '2.0', id: req.id, result: { content: [{ type: 'text', text: JSON.stringify(result) }] } };
|
|
832
993
|
} catch(e) {
|
|
833
994
|
response = { jsonrpc: '2.0', id: req.id, result: { content: [{ type: 'text', text: JSON.stringify({ error: e.message, likely_cause: 'AI routing analysis failed — transient Anthropic API issue', retryable: true, retry_after_ms: 120000, fallback_tool: null, agent_action: 'RETRY_IN_2_MIN', category: 'ai_failure', trace_id: nowISO(), _disclaimer: LEGAL_DISCLAIMER }) }] } };
|
|
@@ -849,6 +1010,7 @@ const PORT = process.env.PORT || 3000;
|
|
|
849
1010
|
server.listen(PORT, async () => {
|
|
850
1011
|
await loadApiKeysFromRedis();
|
|
851
1012
|
await loadFreeTierFromRedis();
|
|
1013
|
+
await initUptimeTracking();
|
|
852
1014
|
console.log(`[lms] Local Model Suitability MCP v${VERSION} running on port ${PORT}`);
|
|
853
1015
|
console.log(`[lms] Tool: check_local_viability — cloud is expensive, local is the default`);
|
|
854
1016
|
});
|