prism-mcp-server 17.0.0 → 17.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +94 -11
- package/dist/tools/ledgerHandlers.js +37 -7
- package/dist/tools/prismInferHandler.js +46 -6
- package/dist/tools/skillRouting.js +39 -0
- package/dist/utils/braveApi.js +52 -0
- package/dist/utils/entitlements.js +136 -0
- package/dist/utils/phiGuard.js +88 -0
- package/dist/utils/synaluxSearch.js +195 -0
- package/package.json +2 -2
- package/dist/agent/agentTools.js +0 -453
- package/dist/agent/mcpBridge.js +0 -234
- package/dist/agent/platformUtils.js +0 -470
- package/dist/agent/terminalUI.js +0 -198
- package/dist/auth.js +0 -218
- package/dist/darkfactory/cloudDelegate.js +0 -173
- package/dist/env-preload.cjs +0 -2
- package/dist/plugins/pluginManager.js +0 -199
- package/dist/prism-cloud.js +0 -110
- package/dist/scm/ciPipeline.js +0 -220
- package/dist/start-with-env.sh +0 -5
- package/dist/sync/encryptedSync.js +0 -172
- package/dist/sync/synaluxProxy.js +0 -177
- package/dist/tools/adaptiveDefinitions.js +0 -148
- package/dist/tools/projects.js +0 -214
- package/dist/utils/changelogGenerator.js +0 -158
- package/dist/utils/fallbackClient.js +0 -52
- package/dist/utils/memoryAttestation.js +0 -163
- package/dist/utils/rbac.js +0 -321
- package/dist/utils/tavilyApi.js +0 -70
- package/dist/vm/quotaEnforcer.js +0 -192
package/README.md
CHANGED
|
@@ -200,7 +200,7 @@ HRR acts as Tier 0 — if confidence is high, FTS5 is skipped entirely. Falls th
|
|
|
200
200
|
|
|
201
201
|
Top-1 = correct word is tile #1. MRR = Mean Reciprocal Rank. Zero Top-5 regressions in any scenario. HRR encodes bigrams + trigrams from every spoken phrase; probes take ~0.2ms — safe on every keystroke. All Synalux apps (clinical, AAC, PrismCoach) share HRR via the portal `/api/v1/hrr` endpoint.
|
|
202
202
|
|
|
203
|
-
**
|
|
203
|
+
**Memory retrieval comparison:**
|
|
204
204
|
|
|
205
205
|
| System | Retrieval | Offline | Cost | Latency |
|
|
206
206
|
|--------|-----------|---------|------|---------|
|
|
@@ -512,16 +512,99 @@ Cascade (14B→32B): **100.0%** · Opus solo: 98.3% · Opus engaged: **0% of req
|
|
|
512
512
|
|
|
513
513
|
## Plans
|
|
514
514
|
|
|
515
|
-
|
|
|
516
|
-
|
|
517
|
-
| **
|
|
518
|
-
| **
|
|
519
|
-
| **
|
|
520
|
-
| **
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
515
|
+
| | **Free** | **Standard $19/mo** | **Advanced $49/mo** | **Enterprise $99/mo** |
|
|
516
|
+
|---|---|---|---|---|
|
|
517
|
+
| **Local model ceiling** | up to 4b | up to 14b | up to 32b | up to 32b |
|
|
518
|
+
| **Daily inference limit** | 50 | 200 | 2,000 | 100,000 |
|
|
519
|
+
| **Max output tokens** | 512 | 1,024 | 2,048 | 4,096 |
|
|
520
|
+
| **Cloud fallback** | — | Portal cascade (14b → 32b) | Portal cascade (14b → 32b → Claude Opus) | Priority cascade + Claude Opus |
|
|
521
|
+
| **L3 grounding verifier** | — | ✓ | ✓ | ✓ |
|
|
522
|
+
| **Knowledge search** | limited | unlimited | unlimited | unlimited |
|
|
523
|
+
| **Session memory** | limited | unlimited | unlimited | unlimited |
|
|
524
|
+
| **Analytics dashboard** | — | ✓ | ✓ | ✓ |
|
|
525
|
+
| **HIPAA BAA** | — | — | — | ✓ |
|
|
526
|
+
|
|
527
|
+
### What free users get
|
|
528
|
+
|
|
529
|
+
- Local Ollama inference with models up to 4b (prism-coder:1b7 and prism-coder:4b)
|
|
530
|
+
- 50 calls/day, 512 max output tokens per call
|
|
531
|
+
- Local SQLite storage for session memory and knowledge
|
|
532
|
+
- All open-weight models available to pull via `ollama pull`
|
|
533
|
+
|
|
534
|
+
### What paid users get
|
|
535
|
+
|
|
536
|
+
- **Higher model ceilings** — Standard unlocks 14b, Advanced/Enterprise unlock 32b
|
|
537
|
+
- **Cloud fallback** — when local Ollama is down or underpowered, inference routes through the Synalux portal cascade (14b → 32b → Claude Opus)
|
|
538
|
+
- **L3 grounding verifier** — evidence-based claim verification that rejects hallucinated outputs
|
|
539
|
+
- **Unlimited knowledge search and session memory** — no caps on stored context
|
|
540
|
+
- **Analytics dashboard** — usage metrics, latency tracking, model performance
|
|
541
|
+
- **Higher daily limits and token caps** — see table above
|
|
542
|
+
|
|
543
|
+
All on-device models are open-weight and free to run locally via Ollama. The subscription gates cloud features, higher model tiers, and increased limits.
|
|
544
|
+
|
|
545
|
+
14-day free trial on all paid plans. [Subscribe →](https://synalux.ai/pricing)
|
|
546
|
+
|
|
547
|
+
### Why Prism MCP
|
|
548
|
+
|
|
549
|
+
**Pricing — flat-rate, not per-seat:**
|
|
550
|
+
|
|
551
|
+
| | **Prism MCP** | GitHub Copilot | Cursor | Windsurf | Amazon Q | Tabnine |
|
|
552
|
+
|---|---|---|---|---|---|---|
|
|
553
|
+
| **Individual** | **$19/mo** | $10/mo | $20/mo | $15-20/mo | $19/mo | $39/mo |
|
|
554
|
+
| **Team (5 devs)** | **$49/mo flat** | $95/mo | $200/mo | $200/mo | $95/mo | $295/mo |
|
|
555
|
+
| **Enterprise** | **$99/mo flat** | $195/mo | $1,000/mo | Custom | Custom | Custom |
|
|
556
|
+
|
|
557
|
+
**Features — full stack vs single-purpose:**
|
|
558
|
+
|
|
559
|
+
| | **Prism MCP** | GitHub Copilot | Cursor | Windsurf | Amazon Q | Tabnine | Devin |
|
|
560
|
+
|---|---|---|---|---|---|---|---|
|
|
561
|
+
| **Web IDE** | **Synalux Coder** | github.dev | — | — | Console | — | Browser |
|
|
562
|
+
| **VS Code extension** | **Yes** | Yes | N/A (is a fork) | N/A (is a fork) | Yes | Yes | No |
|
|
563
|
+
| **MCP server** | **Native** | No | Partial | No | No | No | No |
|
|
564
|
+
| **Works with Claude Code** | **Yes** | No | N/A | No | No | No | No |
|
|
565
|
+
| **Local inference (Ollama)** | **1.7B–32B fleet** | No | No | No | No | No | No |
|
|
566
|
+
| **Cloud fallback** | **14b→32b→Opus** | Cloud only | Cloud only | Cloud only | Cloud only | Cloud only | Cloud only |
|
|
567
|
+
| **Works offline** | **Yes** | No | No | No | No | No | No |
|
|
568
|
+
| **Open-weight models** | **HuggingFace** | Proprietary | Proprietary | Proprietary | Proprietary | Proprietary | Proprietary |
|
|
569
|
+
| **Persistent memory** | **Cross-session** | No | No | No | No | No | Partial |
|
|
570
|
+
| **Cognitive routing** | **Episodic/semantic/procedural** | No | No | No | No | No | No |
|
|
571
|
+
| **Session drift detection** | **HRR-based** | No | No | No | No | No | No |
|
|
572
|
+
| **Codebase indexing** | **Knowledge ingest (MCP + webhook + REST)** | Partial | Yes | Yes | Yes | Yes | Yes |
|
|
573
|
+
| **L3 grounding verifier** | **Evidence-based** | No | No | No | No | No | No |
|
|
574
|
+
| **Multi-agent hivemind** | **Shared Mind Palace** | No | No | No | No | No | No |
|
|
575
|
+
| **Analytics dashboard** | **Yes** | No | Yes | Yes | Yes | No | Yes |
|
|
576
|
+
| **HIPAA / air-gapped** | **On-prem, no BAA needed** | Requires BAA | No | No | Partial | No | No |
|
|
577
|
+
| **Data stays local** | **Yes** | No | No | No | No | No | No |
|
|
578
|
+
|
|
579
|
+
**vs local AI tools:**
|
|
580
|
+
|
|
581
|
+
| | **Prism MCP** | Ollama | LM Studio | Jan.ai | Mem0 | Zep |
|
|
582
|
+
|---|---|---|---|---|---|---|
|
|
583
|
+
| **Local inference** | 1.7B–32B cascade | Any GGUF | Any GGUF | Any GGUF | No | No |
|
|
584
|
+
| **Cloud fallback** | Automatic | No | No | Partial | Cloud only | Cloud only |
|
|
585
|
+
| **Persistent memory** | Cross-session | No | No | No | Yes | Yes |
|
|
586
|
+
| **Knowledge ingestion** | MCP + GitHub webhook + REST | No | No | No | Partial | No |
|
|
587
|
+
| **Cognitive routing** | 3-store (episodic/semantic/procedural) | No | No | No | No | Temporal graph |
|
|
588
|
+
| **Grounding verifier** | L3 evidence-based | No | No | No | No | No |
|
|
589
|
+
| **Drift detection** | HRR-based | No | No | No | No | No |
|
|
590
|
+
| **MCP server** | Native | No | No | No | No | No |
|
|
591
|
+
| **Web IDE** | Synalux Coder | No | No | No | No | No |
|
|
592
|
+
| **VS Code extension** | Yes | No | No | No | No | No |
|
|
593
|
+
| **Analytics** | Dashboard + Datadog | No | No | No | Yes | Yes |
|
|
594
|
+
| **Price** | $0–99/mo flat | Free | Free/$10/user | Free | $249/mo | $99/mo |
|
|
595
|
+
|
|
596
|
+
**Why developers choose Prism:**
|
|
597
|
+
- **Full IDE experience** — Synalux Coder (web) + VS Code extension + MCP for Claude Code, Cursor, JetBrains
|
|
598
|
+
- **Local-first** — your code and context never leave your machine unless you opt in to cloud
|
|
599
|
+
- **Flat-rate pricing** — $49/mo for your whole team, not $40/seat/mo
|
|
600
|
+
- **Works offline** — airplane, hospital, air-gapped classified environments
|
|
601
|
+
- **Open models** — prism-coder weights are on HuggingFace, not locked behind an API
|
|
602
|
+
- **Memory that persists** — cognitive routing stores episodic, semantic, and procedural memory across sessions
|
|
603
|
+
- **Drift detection** — HRR-based session monitoring catches when your AI agent goes off-track
|
|
604
|
+
- **Grounding verification** — L3 verifier rejects hallucinated outputs before they reach you
|
|
605
|
+
- **Codebase indexing** — knowledge ingestion via MCP tool, GitHub webhooks, or REST API
|
|
606
|
+
- **Multi-agent ready** — Hivemind lets multiple agents share the same Mind Palace with role-scoped context
|
|
607
|
+
- **HIPAA without paperwork** — local inference means no BAA required, PHI never leaves the device
|
|
525
608
|
|
|
526
609
|
---
|
|
527
610
|
|
|
@@ -3,6 +3,7 @@ import * as nodePath from "node:path";
|
|
|
3
3
|
import * as os from "node:os";
|
|
4
4
|
import { randomUUID } from "node:crypto";
|
|
5
5
|
import { redactSettings, toMarkdown } from "./commonHelpers.js";
|
|
6
|
+
import { scanAndRedactPHI } from "../utils/phiGuard.js";
|
|
6
7
|
import * as fflate from "fflate";
|
|
7
8
|
import { buildVaultDirectory } from "../utils/vaultExporter.js";
|
|
8
9
|
/**
|
|
@@ -60,9 +61,11 @@ import { notifyResourceUpdate } from "../server.js";
|
|
|
60
61
|
* Zero-latency (pure regex, no API calls). Runs on every save.
|
|
61
62
|
*/
|
|
62
63
|
export function sanitizeMemoryInput(text) {
|
|
63
|
-
|
|
64
|
+
const stripped = text
|
|
64
65
|
.replace(/<\/?(?:system|user_input|instruction|anti_pattern|desired_pattern|assistant|tool_call|prism_memory)[^>]*>/gi, '')
|
|
65
66
|
.trim();
|
|
67
|
+
// HIPAA: redact PHI before storage — SSN, DOB, MRN, patient names, etc.
|
|
68
|
+
return scanAndRedactPHI(stripped).redacted;
|
|
66
69
|
}
|
|
67
70
|
/** Sanitize each string in an array (for decisions[], todos[], etc.) */
|
|
68
71
|
function sanitizeArray(arr) {
|
|
@@ -853,17 +856,29 @@ export async function sessionLoadContextHandler(args) {
|
|
|
853
856
|
.fetchSkillContent(missing).catch(() => ({}));
|
|
854
857
|
debugLog(`[session_load_context] Synalux skill content fetched: ${Object.keys(synaluxContent).join(", ") || "none"}`);
|
|
855
858
|
}
|
|
859
|
+
const SKILL_BLOCK_CAP = 30_000;
|
|
860
|
+
const skippedSkills = [];
|
|
856
861
|
for (const skillName of skillsToLoad) {
|
|
857
862
|
if (loadedSkills.includes(skillName))
|
|
858
863
|
continue;
|
|
859
|
-
|
|
864
|
+
if (skillBlock.length >= SKILL_BLOCK_CAP) {
|
|
865
|
+
skippedSkills.push(skillName);
|
|
866
|
+
debugLog(`[session_load_context] Skill "${skillName}" skipped — block cap ${SKILL_BLOCK_CAP} reached`);
|
|
867
|
+
continue;
|
|
868
|
+
}
|
|
860
869
|
const content = synaluxContent[skillName] || await getSetting(`skill:${skillName}`, "");
|
|
861
870
|
if (content && content.trim()) {
|
|
871
|
+
const trimmed = content.trim();
|
|
872
|
+
if (skillBlock.length + trimmed.length > SKILL_BLOCK_CAP && loadedSkills.length > 0) {
|
|
873
|
+
skippedSkills.push(skillName);
|
|
874
|
+
debugLog(`[session_load_context] Skill "${skillName}" skipped — would exceed cap (${skillBlock.length}+${trimmed.length} > ${SKILL_BLOCK_CAP})`);
|
|
875
|
+
continue;
|
|
876
|
+
}
|
|
862
877
|
const source = synaluxContent[skillName] ? "synalux" : "local-platform";
|
|
863
|
-
skillBlock += `\n\n[📜 SKILL: ${skillName}]\n${
|
|
878
|
+
skillBlock += `\n\n[📜 SKILL: ${skillName}]\n${trimmed}`;
|
|
864
879
|
loadedSkills.push(skillName);
|
|
865
880
|
skillLoaded = true;
|
|
866
|
-
debugLog(`[session_load_context] Skill "${skillName}" loaded (${source}) for project="${project}"`);
|
|
881
|
+
debugLog(`[session_load_context] Skill "${skillName}" loaded (${source}) for project="${project}" [${skillBlock.length}/${SKILL_BLOCK_CAP} chars]`);
|
|
867
882
|
}
|
|
868
883
|
}
|
|
869
884
|
// ─── User-Local Skills ──────────────────────────────────────
|
|
@@ -876,10 +891,15 @@ export async function sessionLoadContextHandler(args) {
|
|
|
876
891
|
for (const [k, v] of Object.entries(allSettings)) {
|
|
877
892
|
if (!k.startsWith(prefix) || !v)
|
|
878
893
|
continue;
|
|
894
|
+
if (skillBlock.length >= SKILL_BLOCK_CAP)
|
|
895
|
+
break;
|
|
879
896
|
const skillName = k.replace(prefix, "");
|
|
880
897
|
if (loadedSkills.includes(skillName))
|
|
881
898
|
continue;
|
|
882
|
-
|
|
899
|
+
const trimmed = v.trim();
|
|
900
|
+
if (skillBlock.length + trimmed.length > SKILL_BLOCK_CAP && loadedSkills.length > 0)
|
|
901
|
+
continue;
|
|
902
|
+
skillBlock += `\n\n[📜 USER SKILL: ${skillName}]\n${trimmed}`;
|
|
883
903
|
loadedSkills.push(skillName);
|
|
884
904
|
skillLoaded = true;
|
|
885
905
|
debugLog(`[session_load_context] User-local skill "${skillName}" loaded`);
|
|
@@ -888,22 +908,32 @@ export async function sessionLoadContextHandler(args) {
|
|
|
888
908
|
// ─── Memory-Based Skill Discovery ──────────────────────────
|
|
889
909
|
// If recent handoff/ledger mentions a platform skill name, auto-load it.
|
|
890
910
|
// Only scans platform skill: keys — user_skill: discovery is not automatic.
|
|
891
|
-
if (formattedContext.length > 0) {
|
|
911
|
+
if (formattedContext.length > 0 && skillBlock.length < SKILL_BLOCK_CAP) {
|
|
892
912
|
const contextText = formattedContext.toLowerCase();
|
|
893
913
|
const allSkillKeys = await storage.getAllSettings?.() || {};
|
|
894
914
|
for (const [k, v] of Object.entries(allSkillKeys)) {
|
|
895
915
|
if (!k.startsWith("skill:") || !v)
|
|
896
916
|
continue;
|
|
917
|
+
if (skillBlock.length >= SKILL_BLOCK_CAP)
|
|
918
|
+
break;
|
|
897
919
|
const skillName = k.replace("skill:", "");
|
|
898
920
|
if (loadedSkills.includes(skillName))
|
|
899
921
|
continue;
|
|
900
922
|
if (contextText.includes(skillName.replace(/-/g, " ")) || contextText.includes(skillName)) {
|
|
901
|
-
|
|
923
|
+
const trimmed = v.trim();
|
|
924
|
+
if (skillBlock.length + trimmed.length > SKILL_BLOCK_CAP && loadedSkills.length > 0) {
|
|
925
|
+
skippedSkills.push(skillName);
|
|
926
|
+
continue;
|
|
927
|
+
}
|
|
928
|
+
skillBlock += `\n\n[📜 CONTEXT SKILL: ${skillName}]\n${trimmed}`;
|
|
902
929
|
loadedSkills.push(skillName);
|
|
903
930
|
debugLog(`[session_load_context] Context-triggered skill "${skillName}"`);
|
|
904
931
|
}
|
|
905
932
|
}
|
|
906
933
|
}
|
|
934
|
+
if (skippedSkills.length > 0) {
|
|
935
|
+
skillBlock += `\n\n[⏭️ ${skippedSkills.length} skills skipped (cap ${SKILL_BLOCK_CAP} chars): ${skippedSkills.join(", ")}]`;
|
|
936
|
+
}
|
|
907
937
|
// ─── Agent Greeting Block ────────────────────────────────────
|
|
908
938
|
// Shows agent identity (name + role) and skill status after briefing.
|
|
909
939
|
let greetingBlock = "";
|
|
@@ -25,6 +25,8 @@ import { getAvailableMemoryBytes } from "../utils/availableMemory.js";
|
|
|
25
25
|
import { PRISM_SYNALUX_BASE_URL, PRISM_LOCAL_LLM_URL, } from "../config.js";
|
|
26
26
|
import { debugLog } from "../utils/logger.js";
|
|
27
27
|
import { verifyGrounding } from "../utils/groundingVerifier.js";
|
|
28
|
+
import { getEntitlements, clampCeiling } from "../utils/entitlements.js";
|
|
29
|
+
import { ddLog } from "../utils/ddLogger.js";
|
|
28
30
|
// ─── Tool Definition ────────────────────────────────────────────
|
|
29
31
|
export const PRISM_INFER_TOOL = {
|
|
30
32
|
name: "prism_infer",
|
|
@@ -273,12 +275,47 @@ async function callSynaluxInference(prompt, maxTokens, timeoutMs) {
|
|
|
273
275
|
}
|
|
274
276
|
export async function runInfer(args, deps) {
|
|
275
277
|
const t0 = Date.now();
|
|
276
|
-
const maxTokens = Math.min(args.max_tokens ?? 1024, 8192);
|
|
277
278
|
const temperature = args.temperature ?? 0;
|
|
278
|
-
|
|
279
|
+
// ── Entitlement enforcement ──────────────────────────────────
|
|
280
|
+
// Fetch user's plan limits (cached 1hr). Free users without auth
|
|
281
|
+
// get 4b ceiling, 50 calls/day, 512 max tokens.
|
|
282
|
+
const ent = deps.entitlements ?? await getEntitlements();
|
|
283
|
+
// Clamp model ceiling to what the plan allows
|
|
284
|
+
const effectiveCeiling = clampCeiling(args.model_ceiling, ent.model_ceiling);
|
|
285
|
+
// Clamp max_tokens to plan limit
|
|
286
|
+
const maxTokens = Math.min(args.max_tokens ?? 1024, ent.max_tokens, 8192);
|
|
287
|
+
// Cloud fallback only for paid plans
|
|
288
|
+
const allowCloud = args.cloud_fallback === true && ent.features.cloud_fallback;
|
|
289
|
+
// Verification only for paid plans (free users skip L3 grounding)
|
|
290
|
+
const canVerify = ent.features.grounding_verifier;
|
|
279
291
|
const freeBytes = deps.freemem();
|
|
280
292
|
const ramFreeMb = Math.round(freeBytes / (1024 * 1024));
|
|
281
293
|
const attempts = [];
|
|
294
|
+
// Strip verification args if plan lacks grounding_verifier
|
|
295
|
+
const gatedArgs = canVerify ? args : { ...args, verify: false, evidence: undefined };
|
|
296
|
+
debugLog(`[prism_infer] plan=${ent.plan} ceiling=${effectiveCeiling} max_tokens=${maxTokens} cloud=${allowCloud} verify=${canVerify}`);
|
|
297
|
+
// Log tier enforcement to Datadog for monetization visibility
|
|
298
|
+
const ceilingClamped = effectiveCeiling !== (args.model_ceiling ?? ent.model_ceiling);
|
|
299
|
+
const tokensClamped = maxTokens < (args.max_tokens ?? 1024);
|
|
300
|
+
const cloudBlocked = args.cloud_fallback === true && !allowCloud;
|
|
301
|
+
const verifierBlocked = (args.verify === true || (args.evidence?.length ?? 0) > 0) && !canVerify;
|
|
302
|
+
if (ceilingClamped || tokensClamped || cloudBlocked || verifierBlocked) {
|
|
303
|
+
ddLog("info", "prism_infer.tier_enforcement", {
|
|
304
|
+
plan: ent.plan,
|
|
305
|
+
requested_ceiling: args.model_ceiling,
|
|
306
|
+
effective_ceiling: effectiveCeiling,
|
|
307
|
+
ceiling_clamped: ceilingClamped,
|
|
308
|
+
requested_tokens: args.max_tokens,
|
|
309
|
+
effective_tokens: maxTokens,
|
|
310
|
+
tokens_clamped: tokensClamped,
|
|
311
|
+
cloud_requested: args.cloud_fallback,
|
|
312
|
+
cloud_allowed: allowCloud,
|
|
313
|
+
cloud_blocked: cloudBlocked,
|
|
314
|
+
verify_requested: args.verify,
|
|
315
|
+
verify_allowed: canVerify,
|
|
316
|
+
verify_blocked: verifierBlocked,
|
|
317
|
+
});
|
|
318
|
+
}
|
|
282
319
|
// Discover which tags Ollama actually has + which are already warm.
|
|
283
320
|
// Already-loaded models don't need RAM headroom — they're reusing
|
|
284
321
|
// memory Ollama allocated previously.
|
|
@@ -292,8 +329,8 @@ export async function runInfer(args, deps) {
|
|
|
292
329
|
// so the caller can see exactly why each tier was bypassed.
|
|
293
330
|
if (installed) {
|
|
294
331
|
// Find start index from ceiling — if no ceiling, start at the top (32B).
|
|
295
|
-
const ceilStart =
|
|
296
|
-
? Math.max(0, MODEL_TIERS.findIndex(t => t.tag.endsWith(
|
|
332
|
+
const ceilStart = effectiveCeiling
|
|
333
|
+
? Math.max(0, MODEL_TIERS.findIndex(t => t.tag.endsWith(effectiveCeiling) || t.tag === effectiveCeiling))
|
|
297
334
|
: 0;
|
|
298
335
|
let anyViable = false;
|
|
299
336
|
for (let i = ceilStart; i < MODEL_TIERS.length; i++) {
|
|
@@ -318,13 +355,14 @@ export async function runInfer(args, deps) {
|
|
|
318
355
|
const timeout = args.timeout_ms ?? DEFAULT_TIMEOUTS[tier.tag] ?? 60_000;
|
|
319
356
|
const result = await deps.callLocal(deps.ollamaUrl, ollamaName, args.prompt, args.system, maxTokens, temperature, timeout);
|
|
320
357
|
if (result.ok) {
|
|
321
|
-
return await applyVerification(result.text,
|
|
358
|
+
return await applyVerification(result.text, gatedArgs, deps, {
|
|
322
359
|
backend: `ollama-${tier.tag.replace("prism-coder:", "")}`,
|
|
323
360
|
model_picked: tier.tag,
|
|
324
361
|
ram_free_mb: ramFreeMb,
|
|
325
362
|
latency_ms: Date.now() - t0,
|
|
326
363
|
used_cloud: false,
|
|
327
364
|
attempts,
|
|
365
|
+
plan: ent.plan,
|
|
328
366
|
});
|
|
329
367
|
}
|
|
330
368
|
attempts.push({ tier: tier.tag, reason: result.reason });
|
|
@@ -340,13 +378,14 @@ export async function runInfer(args, deps) {
|
|
|
340
378
|
const cloudTimeout = args.timeout_ms ?? 90_000;
|
|
341
379
|
const cloud = await deps.callCloud(args.prompt, maxTokens, cloudTimeout);
|
|
342
380
|
if (cloud.ok && cloud.output) {
|
|
343
|
-
return await applyVerification(cloud.output,
|
|
381
|
+
return await applyVerification(cloud.output, gatedArgs, deps, {
|
|
344
382
|
backend: cloud.backend ?? "synalux",
|
|
345
383
|
model_picked: null,
|
|
346
384
|
ram_free_mb: ramFreeMb,
|
|
347
385
|
latency_ms: Date.now() - t0,
|
|
348
386
|
used_cloud: true,
|
|
349
387
|
attempts,
|
|
388
|
+
plan: ent.plan,
|
|
350
389
|
});
|
|
351
390
|
}
|
|
352
391
|
attempts.push({ tier: "synalux", reason: cloud.reason ?? "unknown" });
|
|
@@ -408,6 +447,7 @@ export async function prismInferHandler(args) {
|
|
|
408
447
|
debugLog(`[prism_infer] backend=${result.backend} model=${result.model_picked} latency=${result.latency_ms}ms free=${result.ram_free_mb}MB`);
|
|
409
448
|
const header = `[prism_infer] backend=${result.backend}` +
|
|
410
449
|
` model=${result.model_picked ?? "n/a"}` +
|
|
450
|
+
` plan=${result.plan ?? "unknown"}` +
|
|
411
451
|
` free_ram=${result.ram_free_mb}MB` +
|
|
412
452
|
` latency=${result.latency_ms}ms` +
|
|
413
453
|
` used_cloud=${result.used_cloud}` +
|
|
@@ -81,6 +81,45 @@ export async function resolveSkillsForProject(project) {
|
|
|
81
81
|
user_local: table.user_local ?? OFFLINE_FALLBACK.user_local,
|
|
82
82
|
};
|
|
83
83
|
}
|
|
84
|
+
/**
|
|
85
|
+
* Resolve skills based on user prompt keywords. Matches prompt text
|
|
86
|
+
* against the routing table's prompt_keywords regex patterns.
|
|
87
|
+
* Returns deduplicated skill names (excluding any already in baseSkills).
|
|
88
|
+
*/
|
|
89
|
+
export async function resolveSkillsForPrompt(prompt, baseSkills = []) {
|
|
90
|
+
const now = Date.now();
|
|
91
|
+
if (!cached || now - cached.fetchedAt > CACHE_TTL_MS) {
|
|
92
|
+
if (!inflight) {
|
|
93
|
+
inflight = fetchOnce().then((table) => {
|
|
94
|
+
cached = { table, fetchedAt: Date.now() };
|
|
95
|
+
return table;
|
|
96
|
+
}).finally(() => { inflight = null; });
|
|
97
|
+
}
|
|
98
|
+
await inflight;
|
|
99
|
+
}
|
|
100
|
+
const table = cached.table;
|
|
101
|
+
if (!table.prompt_keywords)
|
|
102
|
+
return [];
|
|
103
|
+
const existing = new Set(baseSkills);
|
|
104
|
+
const matched = [];
|
|
105
|
+
for (const [pattern, skills] of Object.entries(table.prompt_keywords)) {
|
|
106
|
+
try {
|
|
107
|
+
const re = new RegExp(pattern, 'i');
|
|
108
|
+
if (re.test(prompt)) {
|
|
109
|
+
for (const s of skills) {
|
|
110
|
+
if (!existing.has(s)) {
|
|
111
|
+
existing.add(s);
|
|
112
|
+
matched.push(s);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
catch {
|
|
118
|
+
// Invalid regex in routing table — skip silently
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return matched;
|
|
122
|
+
}
|
|
84
123
|
/** Force a re-fetch on the next call. Exposed for tests + admin tooling. */
|
|
85
124
|
export function _invalidateRoutingCache() {
|
|
86
125
|
cached = null;
|
package/dist/utils/braveApi.js
CHANGED
|
@@ -29,8 +29,20 @@
|
|
|
29
29
|
* The Brave Answers endpoint uses a separate BRAVE_ANSWERS_API_KEY via Bearer token.
|
|
30
30
|
*/
|
|
31
31
|
import { BRAVE_API_KEY, BRAVE_ANSWERS_API_KEY } from "../config.js";
|
|
32
|
+
import { SYNALUX_SEARCH_AVAILABLE, synaluxWebSearch, synaluxWebSearchRaw, synaluxLocalSearch, synaluxLocalSearchRaw, synaluxBraveAnswers, } from "./synaluxSearch.js";
|
|
33
|
+
import { debugLog } from "./logger.js";
|
|
32
34
|
// Brave Answers API call (AI Grounding/OpenAI-compatible)
|
|
33
35
|
export async function performBraveAnswers(query, model = "brave") {
|
|
36
|
+
// Route through Synalux portal when available
|
|
37
|
+
if (SYNALUX_SEARCH_AVAILABLE) {
|
|
38
|
+
try {
|
|
39
|
+
return await synaluxBraveAnswers(query, model);
|
|
40
|
+
}
|
|
41
|
+
catch (err) {
|
|
42
|
+
debugLog(`[braveApi] Synalux answers failed, falling back to Brave: ${err instanceof Error ? err.message : String(err)}`);
|
|
43
|
+
// Fall through to direct Brave API
|
|
44
|
+
}
|
|
45
|
+
}
|
|
34
46
|
if (!BRAVE_ANSWERS_API_KEY) {
|
|
35
47
|
throw new Error("BRAVE_ANSWERS_API_KEY is not configured");
|
|
36
48
|
}
|
|
@@ -62,6 +74,16 @@ export async function performBraveAnswers(query, model = "brave") {
|
|
|
62
74
|
}
|
|
63
75
|
// Raw web search API call
|
|
64
76
|
export async function performWebSearchRaw(query, count = 10, offset = 0) {
|
|
77
|
+
// Route through Synalux portal when available (offset=0 only — portal doesn't support offset)
|
|
78
|
+
if (SYNALUX_SEARCH_AVAILABLE && offset === 0) {
|
|
79
|
+
try {
|
|
80
|
+
return await synaluxWebSearchRaw(query, count);
|
|
81
|
+
}
|
|
82
|
+
catch (err) {
|
|
83
|
+
debugLog(`[braveApi] Synalux search failed, falling back to Brave: ${err instanceof Error ? err.message : String(err)}`);
|
|
84
|
+
// Fall through to direct Brave API
|
|
85
|
+
}
|
|
86
|
+
}
|
|
65
87
|
const url = new URL("https://api.search.brave.com/res/v1/web/search");
|
|
66
88
|
url.searchParams.set("q", query);
|
|
67
89
|
url.searchParams.set("count", Math.min(count, 20).toString()); // API limit
|
|
@@ -81,6 +103,16 @@ export async function performWebSearchRaw(query, count = 10, offset = 0) {
|
|
|
81
103
|
}
|
|
82
104
|
// Web search API call
|
|
83
105
|
export async function performWebSearch(query, count = 10, offset = 0) {
|
|
106
|
+
// Route through Synalux portal when available (offset=0 only — portal doesn't support offset)
|
|
107
|
+
if (SYNALUX_SEARCH_AVAILABLE && offset === 0) {
|
|
108
|
+
try {
|
|
109
|
+
return await synaluxWebSearch(query, count);
|
|
110
|
+
}
|
|
111
|
+
catch (err) {
|
|
112
|
+
debugLog(`[braveApi] Synalux search failed, falling back to Brave: ${err instanceof Error ? err.message : String(err)}`);
|
|
113
|
+
// Fall through to direct Brave API
|
|
114
|
+
}
|
|
115
|
+
}
|
|
84
116
|
const textData = await performWebSearchRaw(query, count, offset);
|
|
85
117
|
const data = JSON.parse(textData);
|
|
86
118
|
// Extract just web results
|
|
@@ -136,6 +168,16 @@ function chunkArray(arr, size) {
|
|
|
136
168
|
}
|
|
137
169
|
// Raw local search API call with poi/details payload
|
|
138
170
|
export async function performLocalSearchRaw(query, count = 5) {
|
|
171
|
+
// Route through Synalux portal when available
|
|
172
|
+
if (SYNALUX_SEARCH_AVAILABLE) {
|
|
173
|
+
try {
|
|
174
|
+
return await synaluxLocalSearchRaw(query, count);
|
|
175
|
+
}
|
|
176
|
+
catch (err) {
|
|
177
|
+
debugLog(`[braveApi] Synalux local search raw failed, falling back to Brave: ${err instanceof Error ? err.message : String(err)}`);
|
|
178
|
+
// Fall through to direct Brave API
|
|
179
|
+
}
|
|
180
|
+
}
|
|
139
181
|
// Initial search to get location IDs
|
|
140
182
|
const webUrl = new URL("https://api.search.brave.com/res/v1/web/search");
|
|
141
183
|
webUrl.searchParams.set("q", query);
|
|
@@ -190,6 +232,16 @@ export async function performLocalSearchRaw(query, count = 5) {
|
|
|
190
232
|
}
|
|
191
233
|
// Local search API call with poi details
|
|
192
234
|
export async function performLocalSearch(query, count = 5) {
|
|
235
|
+
// Route through Synalux portal when available
|
|
236
|
+
if (SYNALUX_SEARCH_AVAILABLE) {
|
|
237
|
+
try {
|
|
238
|
+
return await synaluxLocalSearch(query, count);
|
|
239
|
+
}
|
|
240
|
+
catch (err) {
|
|
241
|
+
debugLog(`[braveApi] Synalux local search failed, falling back to Brave: ${err instanceof Error ? err.message : String(err)}`);
|
|
242
|
+
// Fall through to direct Brave API
|
|
243
|
+
}
|
|
244
|
+
}
|
|
193
245
|
const rawData = await performLocalSearchRaw(query, count);
|
|
194
246
|
const parsed = JSON.parse(rawData);
|
|
195
247
|
if (parsed.source === "web_fallback") {
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Prism Entitlements — Plan-Based Feature & Model Gating
|
|
3
|
+
* ═══════════════════════════════════════════════════════════
|
|
4
|
+
* Fetches the user's plan entitlements from the Synalux portal
|
|
5
|
+
* and caches them locally. Used by prism_infer and other tools
|
|
6
|
+
* to enforce model ceiling, max_tokens, and feature gates.
|
|
7
|
+
*
|
|
8
|
+
* Unauthenticated users (no SYNALUX_API_KEY) get free-tier defaults.
|
|
9
|
+
* Authenticated users get their plan from the portal (1-hour cache).
|
|
10
|
+
*/
|
|
11
|
+
import { getSynaluxJwt } from "./synaluxJwt.js";
|
|
12
|
+
import { PRISM_SYNALUX_BASE_URL, SYNALUX_CONFIGURED } from "../config.js";
|
|
13
|
+
import { debugLog } from "./logger.js";
|
|
14
|
+
// ── Free-tier defaults (no auth) ──────────────────────────────────
|
|
15
|
+
export const FREE_ENTITLEMENTS = {
|
|
16
|
+
plan: "free",
|
|
17
|
+
model_ceiling: "4b",
|
|
18
|
+
daily_infer_limit: 50,
|
|
19
|
+
max_tokens: 512,
|
|
20
|
+
features: {
|
|
21
|
+
cloud_fallback: false,
|
|
22
|
+
grounding_verifier: false,
|
|
23
|
+
knowledge_search_unlimited: false,
|
|
24
|
+
session_memory_unlimited: false,
|
|
25
|
+
analytics_dashboard: false,
|
|
26
|
+
},
|
|
27
|
+
upgrade_url: "https://synalux.ai/pricing",
|
|
28
|
+
};
|
|
29
|
+
// ── Cache ─────────────────────────────────────────────────────────
|
|
30
|
+
const CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
|
|
31
|
+
let cache = null;
|
|
32
|
+
let inFlight = null;
|
|
33
|
+
// ── Model tier ordering for ceiling enforcement ───────────────────
|
|
34
|
+
const TIER_ORDER = ["1b7", "4b", "8b", "14b", "32b"];
|
|
35
|
+
/**
|
|
36
|
+
* Returns true if `requested` exceeds `ceiling`.
|
|
37
|
+
* e.g. ceilingExceeded("14b", "4b") → true (14b > 4b ceiling)
|
|
38
|
+
*/
|
|
39
|
+
export function ceilingExceeded(requested, ceiling) {
|
|
40
|
+
const reqIdx = TIER_ORDER.indexOf(requested);
|
|
41
|
+
const ceilIdx = TIER_ORDER.indexOf(ceiling);
|
|
42
|
+
if (reqIdx === -1 || ceilIdx === -1)
|
|
43
|
+
return false;
|
|
44
|
+
return reqIdx > ceilIdx;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Clamp a model ceiling string to the plan's maximum.
|
|
48
|
+
* Returns the lower of the two ceilings.
|
|
49
|
+
*/
|
|
50
|
+
export function clampCeiling(requested, planCeiling) {
|
|
51
|
+
if (!requested)
|
|
52
|
+
return planCeiling;
|
|
53
|
+
const reqIdx = TIER_ORDER.indexOf(requested);
|
|
54
|
+
const planIdx = TIER_ORDER.indexOf(planCeiling);
|
|
55
|
+
if (reqIdx === -1)
|
|
56
|
+
return planCeiling;
|
|
57
|
+
if (planIdx === -1)
|
|
58
|
+
return requested;
|
|
59
|
+
return TIER_ORDER[Math.min(reqIdx, planIdx)];
|
|
60
|
+
}
|
|
61
|
+
// ── Fetch ─────────────────────────────────────────────────────────
|
|
62
|
+
async function fetchEntitlements() {
|
|
63
|
+
if (!SYNALUX_CONFIGURED || !PRISM_SYNALUX_BASE_URL) {
|
|
64
|
+
debugLog("[entitlements] no Synalux auth configured — free tier");
|
|
65
|
+
return FREE_ENTITLEMENTS;
|
|
66
|
+
}
|
|
67
|
+
const jwt = await getSynaluxJwt();
|
|
68
|
+
if (!jwt) {
|
|
69
|
+
debugLog("[entitlements] JWT exchange failed — free tier fallback");
|
|
70
|
+
return FREE_ENTITLEMENTS;
|
|
71
|
+
}
|
|
72
|
+
try {
|
|
73
|
+
const url = `${PRISM_SYNALUX_BASE_URL}/api/v1/prism/entitlements`;
|
|
74
|
+
const res = await fetch(url, {
|
|
75
|
+
method: "GET",
|
|
76
|
+
headers: { Authorization: `Bearer ${jwt}` },
|
|
77
|
+
signal: AbortSignal.timeout(10_000),
|
|
78
|
+
redirect: "error",
|
|
79
|
+
});
|
|
80
|
+
if (!res.ok) {
|
|
81
|
+
debugLog(`[entitlements] portal HTTP ${res.status} — free tier fallback`);
|
|
82
|
+
return FREE_ENTITLEMENTS;
|
|
83
|
+
}
|
|
84
|
+
const data = (await res.json());
|
|
85
|
+
if (!data.plan || !data.model_ceiling) {
|
|
86
|
+
debugLog("[entitlements] malformed response — free tier fallback");
|
|
87
|
+
return FREE_ENTITLEMENTS;
|
|
88
|
+
}
|
|
89
|
+
debugLog(`[entitlements] plan=${data.plan} ceiling=${data.model_ceiling} ` +
|
|
90
|
+
`daily=${data.daily_infer_limit} max_tokens=${data.max_tokens}`);
|
|
91
|
+
return data;
|
|
92
|
+
}
|
|
93
|
+
catch (err) {
|
|
94
|
+
debugLog(`[entitlements] fetch error: ${err instanceof Error ? err.message : String(err)} — free tier fallback`);
|
|
95
|
+
return FREE_ENTITLEMENTS;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
// ── Public API ────────────────────────────────────────────────────
|
|
99
|
+
/**
|
|
100
|
+
* Get the current user's entitlements (cached for 1 hour).
|
|
101
|
+
* Concurrent callers share a single in-flight fetch.
|
|
102
|
+
*/
|
|
103
|
+
export async function getEntitlements() {
|
|
104
|
+
const now = Date.now();
|
|
105
|
+
if (cache && cache.expiresAt > now) {
|
|
106
|
+
return cache.entitlements;
|
|
107
|
+
}
|
|
108
|
+
if (inFlight)
|
|
109
|
+
return inFlight;
|
|
110
|
+
inFlight = (async () => {
|
|
111
|
+
try {
|
|
112
|
+
const ent = await fetchEntitlements();
|
|
113
|
+
cache = { entitlements: ent, expiresAt: Date.now() + CACHE_TTL_MS };
|
|
114
|
+
return ent;
|
|
115
|
+
}
|
|
116
|
+
finally {
|
|
117
|
+
inFlight = null;
|
|
118
|
+
}
|
|
119
|
+
})();
|
|
120
|
+
return inFlight;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Force cache invalidation (e.g. after plan upgrade).
|
|
124
|
+
*/
|
|
125
|
+
export function invalidateEntitlements() {
|
|
126
|
+
cache = null;
|
|
127
|
+
}
|
|
128
|
+
/** Test-only: reset all state. */
|
|
129
|
+
export function _resetEntitlementsForTest() {
|
|
130
|
+
cache = null;
|
|
131
|
+
inFlight = null;
|
|
132
|
+
}
|
|
133
|
+
/** Test-only: inject a cached entitlement. */
|
|
134
|
+
export function _setCacheForTest(ent, ttlMs = CACHE_TTL_MS) {
|
|
135
|
+
cache = { entitlements: ent, expiresAt: Date.now() + ttlMs };
|
|
136
|
+
}
|