prism-mcp-server 18.0.2 → 19.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/server.js CHANGED
@@ -66,6 +66,7 @@ import { startDarkFactoryRunner } from "./darkfactory/runner.js";
66
66
  import { getSyncBus } from "./sync/factory.js";
67
67
  import { startDashboardServer } from "./dashboard/server.js";
68
68
  import { acquireLock, registerShutdownHandlers } from "./lifecycle.js";
69
+ import { verifyBehaviorHandler } from "./tools/behavioralVerifierHandler.js";
69
70
  // ─── v2.3.6 FIX: Use Storage Abstraction for Prompts/Resources ───
70
71
  // CRITICAL FIX: Previously imported supabaseRpc/supabaseGet directly,
71
72
  // which bypassed the storage abstraction layer and caused the server
@@ -106,6 +107,8 @@ SESSION_BACKFILL_LINKS_TOOL, SESSION_SYNTHESIZE_EDGES_TOOL, SESSION_COGNITIVE_RO
106
107
  SESSION_TASK_ROUTE_TOOL,
107
108
  // Session Drift Detection
108
109
  SESSION_DETECT_DRIFT_TOOL,
110
+ // Behavioral Verifier
111
+ VERIFY_BEHAVIOR_TOOL, isVerifyBehaviorArgs,
109
112
  // v12: Developer Onboarding & Enterprise Observability
110
113
  ONBOARDING_WIZARD_TOOL, EXTRACT_ENTITIES_TOOL, API_ANALYTICS_TOOL, BACKUP_DATABASE_TOOL, CONFIGURE_NOTIFICATIONS_TOOL, QUERY_MEMORY_NATURAL_TOOL,
111
114
  // v15.5: Knowledge Ingestion
@@ -230,6 +233,7 @@ function buildSessionMemoryTools(autoloadList) {
230
233
  SESSION_SYNTHESIZE_EDGES_TOOL, // session_synthesize_edges — inferred semantic graph enrichment
231
234
  SESSION_COGNITIVE_ROUTE_TOOL, // session_cognitive_route — HDC policy-gated concept routing (v6.5)
232
235
  SESSION_DETECT_DRIFT_TOOL, // session_detect_drift — semantic goal drift detection (synalux)
236
+ VERIFY_BEHAVIOR_TOOL, // verify_behavior — behavioral verification via Synalux portal
233
237
  // ─── v6.1: Storage Hygiene tool ───
234
238
  MAINTENANCE_VACUUM_TOOL, // maintenance_vacuum — reclaim SQLite disk space post-purge
235
239
  // ─── v12.1: Developer Onboarding & Framework Bridge ───
@@ -892,6 +896,11 @@ export function createServer() {
892
896
  throw new Error("Session memory not configured. Set SUPABASE_URL and SUPABASE_KEY.");
893
897
  result = await sessionDetectDriftHandler(args);
894
898
  break;
899
+ case "verify_behavior":
900
+ if (!isVerifyBehaviorArgs(args))
901
+ throw new Error("file_path and change_summary required.");
902
+ result = await verifyBehaviorHandler(args);
903
+ break;
895
904
  // ─── v7.3: Dark Factory Pipeline Tools ───
896
905
  case "session_start_pipeline":
897
906
  if (!SESSION_MEMORY_ENABLED)
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Behavioral Verifier — thin client to Synalux portal API.
3
+ *
4
+ * Calls POST /api/v1/prism/verify-behavior with the file path
5
+ * and returns a domain-specific scenario the agent must answer
6
+ * before editing the file.
7
+ *
8
+ * FAIL-CLOSED: if the portal is unreachable, returns a generic
9
+ * verification challenge rather than skipping verification.
10
+ */
11
+ import { PRISM_SYNALUX_BASE_URL, SYNALUX_CONFIGURED } from "../config.js";
12
+ import { getSynaluxJwt } from "../utils/synaluxJwt.js";
13
+ import { debugLog } from "../utils/logger.js";
14
+ const FALLBACK_SCENARIO = [
15
+ "⚠️ BEHAVIORAL VERIFICATION (OFFLINE MODE)",
16
+ "",
17
+ "Portal unreachable — using generic verification.",
18
+ "Before editing this file, answer ALL of these:",
19
+ "",
20
+ "1. What does the end user experience BEFORE vs AFTER this change?",
21
+ "2. Does this endpoint verify the caller owns/belongs-to the resource?",
22
+ "3. Can a user from workspace A access workspace B's data by guessing an ID?",
23
+ "4. If this is a revert, was the original change actually correct?",
24
+ "",
25
+ "Answer concretely. If you cannot, READ THE FILE FIRST.",
26
+ ].join("\n");
27
+ export async function verifyBehaviorHandler(args) {
28
+ if (!SYNALUX_CONFIGURED || !PRISM_SYNALUX_BASE_URL) {
29
+ return FALLBACK_SCENARIO;
30
+ }
31
+ const jwt = await getSynaluxJwt();
32
+ if (!jwt) {
33
+ debugLog("[verify-behavior] JWT unavailable — fail-closed with generic scenario");
34
+ return FALLBACK_SCENARIO;
35
+ }
36
+ try {
37
+ const url = `${PRISM_SYNALUX_BASE_URL}/api/v1/prism/verify-behavior`;
38
+ const res = await fetch(url, {
39
+ method: "POST",
40
+ headers: {
41
+ "Authorization": `Bearer ${jwt}`,
42
+ "Content-Type": "application/json",
43
+ },
44
+ body: JSON.stringify({
45
+ file_path: args.file_path,
46
+ change_summary: args.change_summary,
47
+ workspace_id: args.workspace_id,
48
+ }),
49
+ signal: AbortSignal.timeout(5_000),
50
+ });
51
+ if (!res.ok) {
52
+ debugLog(`[verify-behavior] portal returned ${res.status} — fail-closed`);
53
+ return FALLBACK_SCENARIO;
54
+ }
55
+ const data = (await res.json());
56
+ return formatResult(data);
57
+ }
58
+ catch (err) {
59
+ debugLog(`[verify-behavior] error: ${err.message} — fail-closed`);
60
+ return FALLBACK_SCENARIO;
61
+ }
62
+ }
63
+ function formatResult(data) {
64
+ if (!data.requires_verification) {
65
+ return JSON.stringify({ requires_verification: false, reason: data.reason || "non-behavioral file" });
66
+ }
67
+ return [
68
+ `⚠️ BEHAVIORAL VERIFICATION REQUIRED`,
69
+ `Domain: ${data.domain}`,
70
+ ``,
71
+ `Before making this edit, answer this scenario:`,
72
+ ``,
73
+ data.scenario || "(generic) Describe what the end user experiences BEFORE vs AFTER this change.",
74
+ ``,
75
+ `RULES:`,
76
+ ...(data.rules || []).map((r, i) => `${i + 1}. ${r}`),
77
+ ``,
78
+ `Answer the scenario in your next message before proceeding with the edit.`,
79
+ ].join("\n");
80
+ }
@@ -57,6 +57,8 @@ export { onboardingWizardHandler, extractEntitiesHandler, apiAnalyticsHandler, b
57
57
  // ── Session Drift Detection ──
58
58
  export { SESSION_DETECT_DRIFT_TOOL, isSessionDetectDriftArgs } from "./sessionMemoryDefinitions.js";
59
59
  export { sessionDetectDriftHandler } from "./sessionDriftHandler.js";
60
+ export { VERIFY_BEHAVIOR_TOOL, isVerifyBehaviorArgs } from "./sessionMemoryDefinitions.js";
61
+ export { verifyBehaviorHandler } from "./behavioralVerifierHandler.js";
60
62
  // ── Knowledge Ingestion (v15.5 — Open Interface) ──
61
63
  // Chunks source code, generates Q&A via Claude Haiku, stores in knowledge graph.
62
64
  // Three entry points: MCP tool, REST API, GitHub webhook.
@@ -840,50 +840,47 @@ export async function sessionLoadContextHandler(args) {
840
840
  }
841
841
  }
842
842
  // ─── Project-Aware Skill Injection ──────────────────────────
843
- // Routing (WHICH skills + user_local policy): Synalux /api/v1/skills/routing.
844
- // Content (WHAT):
845
- // Platform skills → Synalux /api/v1/skills/content (DB first, filesystem fallback)
846
- // local SQLite skill:<name> (free tier / offline fallback)
847
- // User-local skills → local SQLite user_skill:<name>
848
- // ONLY when user_local.enabled=true in routing table
849
- // OR session_load_context called with user_local=true.
850
- // Users CANNOT write to the platform skill: namespace.
843
+ // Skills are priority-sorted and cap-aware. Protected skills always load
844
+ // (they bypass the cap check). This prevents the silent-truncation bug
845
+ // where important behavioral skills were dropped because large low-priority
846
+ // skills consumed the budget first.
851
847
  const { resolveSkillsForProject } = await import("./skillRouting.js");
852
848
  const resolved = await resolveSkillsForProject(project);
853
- const skillsToLoad = resolved.names;
849
+ const sortedSkills = resolved.skills;
854
850
  const userLocalPolicy = resolved.user_local;
855
- // Paid tier: batch-fetch platform skill content from Synalux in one request.
856
851
  let synaluxContent = {};
857
852
  if (SYNALUX_CONFIGURED && storage && typeof storage.fetchSkillContent === "function") {
858
- const missing = skillsToLoad.filter(n => !loadedSkills.includes(n));
853
+ const missing = sortedSkills.map(s => s.name).filter(n => !loadedSkills.includes(n));
859
854
  synaluxContent = await storage
860
855
  .fetchSkillContent(missing).catch(() => ({}));
861
856
  debugLog(`[session_load_context] Synalux skill content fetched: ${Object.keys(synaluxContent).join(", ") || "none"}`);
862
857
  }
863
- const SKILL_BLOCK_CAP = 30_000;
858
+ const SKILL_BLOCK_CAP = 40_000;
864
859
  const skippedSkills = [];
865
- for (const skillName of skillsToLoad) {
866
- if (loadedSkills.includes(skillName))
860
+ for (const entry of sortedSkills) {
861
+ if (loadedSkills.includes(entry.name))
867
862
  continue;
868
- if (skillBlock.length >= SKILL_BLOCK_CAP) {
869
- skippedSkills.push(skillName);
870
- debugLog(`[session_load_context] Skill "${skillName}" skipped — block cap ${SKILL_BLOCK_CAP} reached`);
863
+ const content = synaluxContent[entry.name] || await getSetting(`skill:${entry.name}`, "");
864
+ if (!content || !content.trim())
871
865
  continue;
872
- }
873
- const content = synaluxContent[skillName] || await getSetting(`skill:${skillName}`, "");
874
- if (content && content.trim()) {
875
- const trimmed = content.trim();
876
- if (skillBlock.length + trimmed.length > SKILL_BLOCK_CAP && loadedSkills.length > 0) {
877
- skippedSkills.push(skillName);
878
- debugLog(`[session_load_context] Skill "${skillName}" skipped — would exceed cap (${skillBlock.length}+${trimmed.length} > ${SKILL_BLOCK_CAP})`);
879
- continue;
880
- }
881
- const source = synaluxContent[skillName] ? "synalux" : "local-platform";
882
- skillBlock += `\n\n[📜 SKILL: ${skillName}]\n${trimmed}`;
883
- loadedSkills.push(skillName);
866
+ const trimmed = content.trim();
867
+ if (entry.protected) {
868
+ skillBlock += `\n\n[📜 SKILL: ${entry.name}]\n${trimmed}`;
869
+ loadedSkills.push(entry.name);
884
870
  skillLoaded = true;
885
- debugLog(`[session_load_context] Skill "${skillName}" loaded (${source}) for project="${project}" [${skillBlock.length}/${SKILL_BLOCK_CAP} chars]`);
871
+ debugLog(`[session_load_context] Skill "${entry.name}" loaded (protected, p${entry.priority}) [${skillBlock.length} chars]`);
872
+ continue;
873
+ }
874
+ if (skillBlock.length + trimmed.length > SKILL_BLOCK_CAP) {
875
+ skippedSkills.push(entry.name);
876
+ debugLog(`[session_load_context] Skill "${entry.name}" skipped — would exceed cap (${skillBlock.length}+${trimmed.length} > ${SKILL_BLOCK_CAP})`);
877
+ continue;
886
878
  }
879
+ const source = synaluxContent[entry.name] ? "synalux" : "local-platform";
880
+ skillBlock += `\n\n[📜 SKILL: ${entry.name}]\n${trimmed}`;
881
+ loadedSkills.push(entry.name);
882
+ skillLoaded = true;
883
+ debugLog(`[session_load_context] Skill "${entry.name}" loaded (${source}, p${entry.priority}) [${skillBlock.length}/${SKILL_BLOCK_CAP} chars]`);
887
884
  }
888
885
  // ─── User-Local Skills ──────────────────────────────────────
889
886
  // Loaded ONLY when user_local.enabled=true (set in Synalux routing table
@@ -936,7 +933,7 @@ export async function sessionLoadContextHandler(args) {
936
933
  }
937
934
  }
938
935
  if (skippedSkills.length > 0) {
939
- skillBlock += `\n\n[⏭️ ${skippedSkills.length} skills skipped (cap ${SKILL_BLOCK_CAP} chars): ${skippedSkills.join(", ")}]`;
936
+ skillBlock += `\n\n[⚠️ ${skippedSkills.length} skills TRUNCATED by ${SKILL_BLOCK_CAP}-char cap — NOT loaded: ${skippedSkills.join(", ")}. These rules are NOT in your context. Do not claim to follow them.]`;
940
937
  }
941
938
  // ─── Agent Greeting Block ────────────────────────────────────
942
939
  // Shows agent identity (name + role) and skill status after briefing.
@@ -60,7 +60,7 @@ export const PRISM_INFER_TOOL = {
60
60
  },
61
61
  model_ceiling: {
62
62
  type: "string",
63
- enum: ["32b", "14b", "8b", "1b7"],
63
+ enum: ["32b", "14b", "4b", "2b"],
64
64
  description: "Cap the largest tier the picker may select. e.g. '14b' forbids 32B even if RAM allows.",
65
65
  },
66
66
  cloud_fallback: {
@@ -70,7 +70,7 @@ export const PRISM_INFER_TOOL = {
70
70
  },
71
71
  timeout_ms: {
72
72
  type: "number",
73
- description: "Override per-call timeout. Default scales with model size: 32B=120s, 14B=60s, 8B=30s, 1.7B=15s.",
73
+ description: "Override per-call timeout. Default scales with model size: 32B=120s, 14B=60s, 4B=20s, 1.7B=15s.",
74
74
  },
75
75
  evidence: {
76
76
  type: "array",
@@ -91,12 +91,12 @@ export const PRISM_INFER_TOOL = {
91
91
  type: "boolean",
92
92
  description: "Enable the L3 grounding verifier. Default: true when `evidence` is provided, " +
93
93
  "false otherwise. When enabled, the model's draft is checked by a different model " +
94
- "(prism-coder:4b by default) against the supplied `evidence`. Drafts with " +
94
+ "(qwen3.5:4b by default) against the supplied `evidence`. Drafts with " +
95
95
  "NEUTRAL or CONTRADICTED claims are refused.",
96
96
  },
97
97
  verifier_model: {
98
98
  type: "string",
99
- description: "Override the verifier model. Default: prism-coder:4b.",
99
+ description: "Override the verifier model. Default: qwen3.5:4b.",
100
100
  },
101
101
  verifier_timeout_ms: {
102
102
  type: "number",
@@ -124,7 +124,7 @@ export function isPrismInferArgs(args) {
124
124
  if (a.timeout_ms !== undefined && typeof a.timeout_ms !== "number")
125
125
  return false;
126
126
  if (a.model_ceiling !== undefined &&
127
- !["32b", "14b", "8b", "1b7"].includes(a.model_ceiling))
127
+ !["32b", "14b", "4b", "2b"].includes(a.model_ceiling))
128
128
  return false;
129
129
  if (a.verify !== undefined && typeof a.verify !== "boolean")
130
130
  return false;
@@ -149,8 +149,8 @@ export function isPrismInferArgs(args) {
149
149
  const DEFAULT_TIMEOUTS = {
150
150
  "prism-coder:32b": 120_000,
151
151
  "prism-coder:14b": 60_000,
152
- "prism-coder:8b": 30_000,
153
- "prism-coder:1b7": 15_000,
152
+ "qwen3.5:4b": 20_000,
153
+ "prism-coder:2b": 15_000,
154
154
  };
155
155
  /** List Ollama-installed tags. Returns null if Ollama unreachable. */
156
156
  export async function listOllamaTags(url = PRISM_LOCAL_LLM_URL) {
@@ -330,7 +330,7 @@ export async function runInfer(args, deps) {
330
330
  if (installed) {
331
331
  // Find start index from ceiling — if no ceiling, start at the top (32B).
332
332
  const ceilStart = effectiveCeiling
333
- ? Math.max(0, MODEL_TIERS.findIndex(t => t.tag.endsWith(effectiveCeiling) || t.tag === effectiveCeiling))
333
+ ? Math.max(0, MODEL_TIERS.findIndex(t => t.tag.endsWith(`:${effectiveCeiling}`)))
334
334
  : 0;
335
335
  let anyViable = false;
336
336
  for (let i = ceilStart; i < MODEL_TIERS.length; i++) {
@@ -1750,3 +1750,43 @@ export function isSessionDetectDriftArgs(args) {
1750
1750
  return false;
1751
1751
  return true;
1752
1752
  }
1753
+ // ─── Behavioral Verifier ────────────────────────────────────────
1754
+ export const VERIFY_BEHAVIOR_TOOL = {
1755
+ name: "verify_behavior",
1756
+ description: "Call BEFORE editing behavioral source files (API routes, ordering logic, billing, auth, migrations). " +
1757
+ "Returns a domain-specific scenario you must answer to demonstrate understanding of the end-user impact. " +
1758
+ "Example: editing a KDS route returns 'A cook has a 3-item ticket. One item is voided. What should the cook see?' " +
1759
+ "Answer the scenario concretely before proceeding with the edit.",
1760
+ inputSchema: {
1761
+ type: "object",
1762
+ properties: {
1763
+ file_path: {
1764
+ type: "string",
1765
+ description: "Path of the file about to be edited.",
1766
+ },
1767
+ change_summary: {
1768
+ type: "string",
1769
+ description: "Brief description of the intended change.",
1770
+ },
1771
+ project: {
1772
+ type: "string",
1773
+ description: "Optional project identifier for workspace-scoped scenarios.",
1774
+ },
1775
+ workspace_id: {
1776
+ type: "string",
1777
+ description: "Optional workspace ID for custom scenarios.",
1778
+ },
1779
+ },
1780
+ required: ["file_path", "change_summary"],
1781
+ },
1782
+ };
1783
+ export function isVerifyBehaviorArgs(a) {
1784
+ if (!a || typeof a !== "object")
1785
+ return false;
1786
+ const o = a;
1787
+ if (typeof o.file_path !== "string" || !o.file_path.trim())
1788
+ return false;
1789
+ if (typeof o.change_summary !== "string")
1790
+ return false;
1791
+ return true;
1792
+ }
@@ -19,7 +19,11 @@
19
19
  // Minimal fallback when synalux is unreachable.
20
20
  const OFFLINE_FALLBACK = {
21
21
  version: 1,
22
- universal: ['bcba_ai_assistant'],
22
+ universal: [
23
+ { name: 'prime-directive', priority: 0, protected: true },
24
+ { name: 'evidence-first-protocol', priority: 1, protected: true },
25
+ { name: 'bcba_ai_assistant', priority: 20 },
26
+ ],
23
27
  projects: {},
24
28
  user_local: { enabled: false, key_prefix: 'user_skill:' },
25
29
  };
@@ -56,6 +60,12 @@ async function fetchOnce() {
56
60
  * skills. Also returns the user_local policy so callers know whether to
57
61
  * load user_skill:* entries from local SQLite.
58
62
  */
63
+ function normalizeEntry(entry, defaultPriority) {
64
+ if (typeof entry === 'string') {
65
+ return { name: entry, priority: defaultPriority, protected: false };
66
+ }
67
+ return { name: entry.name, priority: entry.priority ?? defaultPriority, protected: entry.protected ?? false };
68
+ }
59
69
  export async function resolveSkillsForProject(project) {
60
70
  const now = Date.now();
61
71
  if (!cached || now - cached.fetchedAt > CACHE_TTL_MS) {
@@ -68,16 +78,31 @@ export async function resolveSkillsForProject(project) {
68
78
  await inflight;
69
79
  }
70
80
  const table = cached.table;
71
- const out = new Set(table.universal);
81
+ const seen = new Set();
82
+ const skills = [];
83
+ for (let i = 0; i < table.universal.length; i++) {
84
+ const entry = normalizeEntry(table.universal[i], i);
85
+ if (!seen.has(entry.name)) {
86
+ seen.add(entry.name);
87
+ skills.push(entry);
88
+ }
89
+ }
72
90
  const projectLower = project.toLowerCase();
73
- for (const [pattern, skills] of Object.entries(table.projects)) {
91
+ let projectPriority = 100;
92
+ for (const [pattern, projectSkills] of Object.entries(table.projects)) {
74
93
  if (projectLower.includes(pattern)) {
75
- for (const s of skills)
76
- out.add(s);
94
+ for (const s of projectSkills) {
95
+ if (!seen.has(s)) {
96
+ seen.add(s);
97
+ skills.push({ name: s, priority: projectPriority++, protected: false });
98
+ }
99
+ }
77
100
  }
78
101
  }
102
+ skills.sort((a, b) => a.priority - b.priority);
79
103
  return {
80
- names: Array.from(out),
104
+ names: skills.map(s => s.name),
105
+ skills,
81
106
  user_local: table.user_local ?? OFFLINE_FALLBACK.user_local,
82
107
  };
83
108
  }
@@ -32,7 +32,7 @@ const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
32
32
  let cache = null;
33
33
  let inFlight = null;
34
34
  // ── Model tier ordering for ceiling enforcement ───────────────────
35
- const TIER_ORDER = ["1b7", "4b", "8b", "14b", "32b"];
35
+ const TIER_ORDER = ["2b", "4b", "14b", "32b"];
36
36
  /**
37
37
  * Returns true if `requested` exceeds `ceiling`.
38
38
  * e.g. ceilingExceeded("14b", "4b") → true (14b > 4b ceiling)
@@ -9,9 +9,9 @@
9
9
  * stateless MCP), pointed at free-form generation instead of tool-call
10
10
  * responses.
11
11
  *
12
- * Cascade role: prism-coder:4b is the default verifier (fast, 2.5GB).
12
+ * Cascade role: qwen3.5:4b is the default verifier (fast, 2.5GB).
13
13
  * 14b drafts; 4b verifies. Different model = Patronus rule satisfied.
14
- * Falls back to 1b7 on devices with <4GB free RAM.
14
+ * Falls back to 2b on devices with <4GB free RAM.
15
15
  *
16
16
  * Failure modes:
17
17
  * - Verifier model unreachable / timeout → fail-closed refusal
@@ -93,7 +93,7 @@ function refusalText(action, failedClaim) {
93
93
  }
94
94
  }
95
95
  export async function verifyGrounding(opts) {
96
- const verifierModel = opts.verifierModel ?? "prism-coder:4b";
96
+ const verifierModel = opts.verifierModel ?? "qwen3.5:4b";
97
97
  const timeoutMs = opts.timeoutMs ?? 2000;
98
98
  const ollamaUrl = opts.ollamaUrl ?? PRISM_LOCAL_LLM_URL;
99
99
  const fetchImpl = opts.fetchImpl ?? fetch;
@@ -1,11 +1,12 @@
1
1
  /**
2
2
  * RAM-Gated Local Model Picker
3
3
  * ─────────────────────────────────────────────────────────────
4
- * Cascade: 14b (default) → 4b (verifier) → 32b (complex only).
4
+ * Cascade: 14b (default) → 4b (verifier) → 2b (mobile) → 32b (complex only).
5
5
  *
6
6
  * The default ceiling is "14b" — NOT "32b". This means:
7
7
  * - 14b is the primary model for routing + general inference
8
8
  * - 4b is used as the grounding verifier (fast, small)
9
+ * - 2b is the mobile/iPhone first gate (Qwen3.5-4B Q3_K_M, 99.1% BFCL)
9
10
  * - 32b is only loaded when caller explicitly passes ceiling="32b"
10
11
  * or when the task requires maximum quality (complex code gen, etc.)
11
12
  *
@@ -15,9 +16,8 @@
15
16
  * tag weights need free ctx role
16
17
  * prism-coder:32b ~19 GB ≥ 24 GB 32K complex (on-demand)
17
18
  * prism-coder:14b ~ 9 GB ≥ 12 GB 32K default router
18
- * prism-coder:8b ~ 5 GB 7 GB 32K fallback
19
- * prism-coder:4b ~ 2.5 GB ≥ 4 GB 8K verifier + mobile
20
- * prism-coder:1b7 ~ 2 GB ≥ 3 GB 8K watch + ultra-low RAM
19
+ * qwen3.5:4b ~ 3.4 GB 5 GB 32K verifier (Q4_K_M, 100%)
20
+ * prism-coder:2b ~ 2.3 GB ≥ 3 GB 8K mobile / iPhone (Q3_K_M, 99.1%)
21
21
  *
22
22
  * Below 3 GB free → no local pick (caller must use cloud).
23
23
  */
@@ -29,9 +29,8 @@ const GB = 1024 ** 3;
29
29
  export const MODEL_TIERS = [
30
30
  { tag: 'prism-coder:32b', weightsGb: 19, minFreeGb: 24, ctxTokens: 32_768 },
31
31
  { tag: 'prism-coder:14b', weightsGb: 9, minFreeGb: 12, ctxTokens: 32_768 },
32
- { tag: 'prism-coder:8b', weightsGb: 5, minFreeGb: 7, ctxTokens: 32_768 },
33
- { tag: 'prism-coder:4b', weightsGb: 2.5, minFreeGb: 4, ctxTokens: 8_192 },
34
- { tag: 'prism-coder:1b7', weightsGb: 2, minFreeGb: 3, ctxTokens: 8_192 },
32
+ { tag: 'qwen3.5:4b', weightsGb: 3.4, minFreeGb: 5, ctxTokens: 32_768 },
33
+ { tag: 'prism-coder:2b', weightsGb: 2.3, minFreeGb: 3, ctxTokens: 8_192 },
35
34
  ];
36
35
  /**
37
36
  * True when `installed` matches `tierTag` either as a bare tag
@@ -58,7 +57,7 @@ export function pickLocalModel(freeBytes, ceiling, available) {
58
57
  if (!Number.isFinite(freeBytes) || freeBytes <= 0)
59
58
  return null;
60
59
  const effectiveCeiling = ceiling || DEFAULT_CEILING;
61
- const ceilingIdx = MODEL_TIERS.findIndex(t => t.tag.endsWith(effectiveCeiling) || t.tag === effectiveCeiling);
60
+ const ceilingIdx = MODEL_TIERS.findIndex(t => t.tag.endsWith(`:${effectiveCeiling}`));
62
61
  const startIdx = ceilingIdx >= 0 ? ceilingIdx : 0;
63
62
  for (let i = startIdx; i < MODEL_TIERS.length; i++) {
64
63
  const tier = MODEL_TIERS[i];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "prism-mcp-server",
3
- "version": "18.0.2",
3
+ "version": "19.0.0",
4
4
  "mcpName": "io.github.dcostenco/prism-coder",
5
5
  "description": "Prism Coder — Cognitive memory + tool-calling intelligence for AI agents. Mind Palace persistent memory (BFCL Gold Certified, 100% Tool-Call Accuracy, 114 Agent Skills, PHI Guard, Tier Enforcement, Prompt-Based Skill Routing, Zero-Search HDC/HRR retrieval, HRR Semantic Drift Detection across BCBA/Coding/AAC domains, HIPAA-hardened local-first storage, SLERP-optimized GRPO alignment) plus the prism-coder 1.7B–32B open-weights LLM fleet.",
6
6
  "module": "index.ts",