prism-mcp-server 18.0.2 → 19.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -521,10 +521,10 @@ scmCmd
521
521
  });
522
522
  // ─── prism register-models ────────────────────────────────────
523
523
  // Convenience: alias namespaced HF-style prism-coder tags
524
- // (`dcostenco/prism-coder:14b`) to the bare tags (`prism-coder:14b`)
524
+ // (`dcostenco/prism-coder:9b`) to the bare tags (`prism-coder:9b`)
525
525
  // some external tooling expects. The MCP picker handles both forms
526
526
  // natively as of v15.5, so this command is OPTIONAL — useful only
527
- // when a user wants to run `ollama run prism-coder:14b` directly,
527
+ // when a user wants to run `ollama run prism-coder:9b` directly,
528
528
  // or for tools that pre-date the picker's namespace fallback.
529
529
  program
530
530
  .command('register-models')
package/dist/server.js CHANGED
@@ -66,6 +66,7 @@ import { startDarkFactoryRunner } from "./darkfactory/runner.js";
66
66
  import { getSyncBus } from "./sync/factory.js";
67
67
  import { startDashboardServer } from "./dashboard/server.js";
68
68
  import { acquireLock, registerShutdownHandlers } from "./lifecycle.js";
69
+ import { verifyBehaviorHandler } from "./tools/behavioralVerifierHandler.js";
69
70
  // ─── v2.3.6 FIX: Use Storage Abstraction for Prompts/Resources ───
70
71
  // CRITICAL FIX: Previously imported supabaseRpc/supabaseGet directly,
71
72
  // which bypassed the storage abstraction layer and caused the server
@@ -106,6 +107,8 @@ SESSION_BACKFILL_LINKS_TOOL, SESSION_SYNTHESIZE_EDGES_TOOL, SESSION_COGNITIVE_RO
106
107
  SESSION_TASK_ROUTE_TOOL,
107
108
  // Session Drift Detection
108
109
  SESSION_DETECT_DRIFT_TOOL,
110
+ // Behavioral Verifier
111
+ VERIFY_BEHAVIOR_TOOL, isVerifyBehaviorArgs,
109
112
  // v12: Developer Onboarding & Enterprise Observability
110
113
  ONBOARDING_WIZARD_TOOL, EXTRACT_ENTITIES_TOOL, API_ANALYTICS_TOOL, BACKUP_DATABASE_TOOL, CONFIGURE_NOTIFICATIONS_TOOL, QUERY_MEMORY_NATURAL_TOOL,
111
114
  // v15.5: Knowledge Ingestion
@@ -230,6 +233,7 @@ function buildSessionMemoryTools(autoloadList) {
230
233
  SESSION_SYNTHESIZE_EDGES_TOOL, // session_synthesize_edges — inferred semantic graph enrichment
231
234
  SESSION_COGNITIVE_ROUTE_TOOL, // session_cognitive_route — HDC policy-gated concept routing (v6.5)
232
235
  SESSION_DETECT_DRIFT_TOOL, // session_detect_drift — semantic goal drift detection (synalux)
236
+ VERIFY_BEHAVIOR_TOOL, // verify_behavior — behavioral verification via Synalux portal
233
237
  // ─── v6.1: Storage Hygiene tool ───
234
238
  MAINTENANCE_VACUUM_TOOL, // maintenance_vacuum — reclaim SQLite disk space post-purge
235
239
  // ─── v12.1: Developer Onboarding & Framework Bridge ───
@@ -892,6 +896,11 @@ export function createServer() {
892
896
  throw new Error("Session memory not configured. Set SUPABASE_URL and SUPABASE_KEY.");
893
897
  result = await sessionDetectDriftHandler(args);
894
898
  break;
899
+ case "verify_behavior":
900
+ if (!isVerifyBehaviorArgs(args))
901
+ throw new Error("file_path and change_summary required.");
902
+ result = await verifyBehaviorHandler(args);
903
+ break;
895
904
  // ─── v7.3: Dark Factory Pipeline Tools ───
896
905
  case "session_start_pipeline":
897
906
  if (!SESSION_MEMORY_ENABLED)
@@ -1268,7 +1268,7 @@ export class SqliteStorage {
1268
1268
  FROM session_ledger
1269
1269
  WHERE project = ? AND user_id = ? AND role = ?
1270
1270
  AND event_type = 'correction'
1271
- AND importance >= 3
1271
+ AND importance >= 0
1272
1272
  AND deleted_at IS NULL
1273
1273
  AND archived_at IS NULL
1274
1274
  ORDER BY importance DESC
@@ -2323,10 +2323,12 @@ export class SqliteStorage {
2323
2323
  SET importance = MAX(0, importance - 1)
2324
2324
  WHERE project = ? AND user_id = ?
2325
2325
  AND importance > 0
2326
+ AND importance < 10
2326
2327
  AND event_type != 'session'
2327
2328
  AND created_at < datetime('now', '-' || ? || ' days')
2329
+ AND (last_accessed_at IS NULL OR last_accessed_at < datetime('now', '-' || ? || ' days'))
2328
2330
  AND deleted_at IS NULL`,
2329
- args: [project, userId, decayDays],
2331
+ args: [project, userId, decayDays, decayDays],
2330
2332
  });
2331
2333
  const decayed = result.rowsAffected || 0;
2332
2334
  if (decayed > 0) {
@@ -0,0 +1,79 @@
1
+ /**
2
+ * Behavioral Verifier — thin client to Synalux portal API.
3
+ *
4
+ * Calls POST /api/v1/prism/verify-behavior with the file path
5
+ * and returns a domain-specific scenario the agent must answer
6
+ * before editing the file.
7
+ *
8
+ * FAIL-CLOSED: if the portal is unreachable, returns a generic
9
+ * verification challenge rather than skipping verification.
10
+ */
11
+ import { PRISM_SYNALUX_BASE_URL, SYNALUX_CONFIGURED } from "../config.js";
12
+ import { getSynaluxJwt } from "../utils/synaluxJwt.js";
13
+ const FALLBACK_SCENARIO = [
14
+ "⚠️ BEHAVIORAL VERIFICATION (OFFLINE MODE)",
15
+ "",
16
+ "Portal unreachable — using generic verification.",
17
+ "Before editing this file, answer ALL of these:",
18
+ "",
19
+ "1. What does the end user experience BEFORE vs AFTER this change?",
20
+ "2. Does this endpoint verify the caller owns/belongs-to the resource?",
21
+ "3. Can a user from workspace A access workspace B's data by guessing an ID?",
22
+ "4. If this is a revert, was the original change actually correct?",
23
+ "",
24
+ "Answer concretely. If you cannot, READ THE FILE FIRST.",
25
+ ].join("\n");
26
+ export async function verifyBehaviorHandler(args) {
27
+ if (!SYNALUX_CONFIGURED || !PRISM_SYNALUX_BASE_URL) {
28
+ return FALLBACK_SCENARIO;
29
+ }
30
+ const jwt = await getSynaluxJwt();
31
+ if (!jwt) {
32
+ console.error("[verify-behavior] ⚠️ JWT unavailable — fail-closed with generic scenario");
33
+ return FALLBACK_SCENARIO;
34
+ }
35
+ try {
36
+ const url = `${PRISM_SYNALUX_BASE_URL}/api/v1/prism/verify-behavior`;
37
+ const res = await fetch(url, {
38
+ method: "POST",
39
+ headers: {
40
+ "Authorization": `Bearer ${jwt}`,
41
+ "Content-Type": "application/json",
42
+ },
43
+ body: JSON.stringify({
44
+ file_path: args.file_path,
45
+ change_summary: args.change_summary,
46
+ workspace_id: args.workspace_id,
47
+ }),
48
+ signal: AbortSignal.timeout(5_000),
49
+ });
50
+ if (!res.ok) {
51
+ console.error(`[verify-behavior] ⚠️ portal returned ${res.status} — fail-closed. URL: ${url}`);
52
+ return FALLBACK_SCENARIO;
53
+ }
54
+ const data = (await res.json());
55
+ return formatResult(data);
56
+ }
57
+ catch (err) {
58
+ console.error(`[verify-behavior] ⚠️ VERIFICATION FAILED: ${err.message} — using generic fallback`);
59
+ return FALLBACK_SCENARIO;
60
+ }
61
+ }
62
+ function formatResult(data) {
63
+ if (!data.requires_verification) {
64
+ return JSON.stringify({ requires_verification: false, reason: data.reason || "non-behavioral file" });
65
+ }
66
+ return [
67
+ `⚠️ BEHAVIORAL VERIFICATION REQUIRED`,
68
+ `Domain: ${data.domain}`,
69
+ ``,
70
+ `Before making this edit, answer this scenario:`,
71
+ ``,
72
+ data.scenario || "(generic) Describe what the end user experiences BEFORE vs AFTER this change.",
73
+ ``,
74
+ `RULES:`,
75
+ ...(data.rules || []).map((r, i) => `${i + 1}. ${r}`),
76
+ ``,
77
+ `Answer the scenario in your next message before proceeding with the edit.`,
78
+ ].join("\n");
79
+ }
@@ -57,6 +57,8 @@ export { onboardingWizardHandler, extractEntitiesHandler, apiAnalyticsHandler, b
57
57
  // ── Session Drift Detection ──
58
58
  export { SESSION_DETECT_DRIFT_TOOL, isSessionDetectDriftArgs } from "./sessionMemoryDefinitions.js";
59
59
  export { sessionDetectDriftHandler } from "./sessionDriftHandler.js";
60
+ export { VERIFY_BEHAVIOR_TOOL, isVerifyBehaviorArgs } from "./sessionMemoryDefinitions.js";
61
+ export { verifyBehaviorHandler } from "./behavioralVerifierHandler.js";
60
62
  // ── Knowledge Ingestion (v15.5 — Open Interface) ──
61
63
  // Chunks source code, generates Q&A via Claude Haiku, stores in knowledge graph.
62
64
  // Three entry points: MCP tool, REST API, GitHub webhook.
@@ -840,50 +840,47 @@ export async function sessionLoadContextHandler(args) {
840
840
  }
841
841
  }
842
842
  // ─── Project-Aware Skill Injection ──────────────────────────
843
- // Routing (WHICH skills + user_local policy): Synalux /api/v1/skills/routing.
844
- // Content (WHAT):
845
- // Platform skills → Synalux /api/v1/skills/content (DB first, filesystem fallback)
846
- // local SQLite skill:<name> (free tier / offline fallback)
847
- // User-local skills → local SQLite user_skill:<name>
848
- // ONLY when user_local.enabled=true in routing table
849
- // OR session_load_context called with user_local=true.
850
- // Users CANNOT write to the platform skill: namespace.
843
+ // Skills are priority-sorted and cap-aware. Protected skills always load
844
+ // (they bypass the cap check). This prevents the silent-truncation bug
845
+ // where important behavioral skills were dropped because large low-priority
846
+ // skills consumed the budget first.
851
847
  const { resolveSkillsForProject } = await import("./skillRouting.js");
852
848
  const resolved = await resolveSkillsForProject(project);
853
- const skillsToLoad = resolved.names;
849
+ const sortedSkills = resolved.skills;
854
850
  const userLocalPolicy = resolved.user_local;
855
- // Paid tier: batch-fetch platform skill content from Synalux in one request.
856
851
  let synaluxContent = {};
857
852
  if (SYNALUX_CONFIGURED && storage && typeof storage.fetchSkillContent === "function") {
858
- const missing = skillsToLoad.filter(n => !loadedSkills.includes(n));
853
+ const missing = sortedSkills.map(s => s.name).filter(n => !loadedSkills.includes(n));
859
854
  synaluxContent = await storage
860
855
  .fetchSkillContent(missing).catch(() => ({}));
861
856
  debugLog(`[session_load_context] Synalux skill content fetched: ${Object.keys(synaluxContent).join(", ") || "none"}`);
862
857
  }
863
- const SKILL_BLOCK_CAP = 30_000;
858
+ const SKILL_BLOCK_CAP = 40_000;
864
859
  const skippedSkills = [];
865
- for (const skillName of skillsToLoad) {
866
- if (loadedSkills.includes(skillName))
860
+ for (const entry of sortedSkills) {
861
+ if (loadedSkills.includes(entry.name))
867
862
  continue;
868
- if (skillBlock.length >= SKILL_BLOCK_CAP) {
869
- skippedSkills.push(skillName);
870
- debugLog(`[session_load_context] Skill "${skillName}" skipped — block cap ${SKILL_BLOCK_CAP} reached`);
863
+ const content = synaluxContent[entry.name] || await getSetting(`skill:${entry.name}`, "");
864
+ if (!content || !content.trim())
871
865
  continue;
872
- }
873
- const content = synaluxContent[skillName] || await getSetting(`skill:${skillName}`, "");
874
- if (content && content.trim()) {
875
- const trimmed = content.trim();
876
- if (skillBlock.length + trimmed.length > SKILL_BLOCK_CAP && loadedSkills.length > 0) {
877
- skippedSkills.push(skillName);
878
- debugLog(`[session_load_context] Skill "${skillName}" skipped — would exceed cap (${skillBlock.length}+${trimmed.length} > ${SKILL_BLOCK_CAP})`);
879
- continue;
880
- }
881
- const source = synaluxContent[skillName] ? "synalux" : "local-platform";
882
- skillBlock += `\n\n[📜 SKILL: ${skillName}]\n${trimmed}`;
883
- loadedSkills.push(skillName);
866
+ const trimmed = content.trim();
867
+ if (entry.protected) {
868
+ skillBlock += `\n\n[📜 SKILL: ${entry.name}]\n${trimmed}`;
869
+ loadedSkills.push(entry.name);
884
870
  skillLoaded = true;
885
- debugLog(`[session_load_context] Skill "${skillName}" loaded (${source}) for project="${project}" [${skillBlock.length}/${SKILL_BLOCK_CAP} chars]`);
871
+ debugLog(`[session_load_context] Skill "${entry.name}" loaded (protected, p${entry.priority}) [${skillBlock.length} chars]`);
872
+ continue;
873
+ }
874
+ if (skillBlock.length + trimmed.length > SKILL_BLOCK_CAP) {
875
+ skippedSkills.push(entry.name);
876
+ debugLog(`[session_load_context] Skill "${entry.name}" skipped — would exceed cap (${skillBlock.length}+${trimmed.length} > ${SKILL_BLOCK_CAP})`);
877
+ continue;
886
878
  }
879
+ const source = synaluxContent[entry.name] ? "synalux" : "local-platform";
880
+ skillBlock += `\n\n[📜 SKILL: ${entry.name}]\n${trimmed}`;
881
+ loadedSkills.push(entry.name);
882
+ skillLoaded = true;
883
+ debugLog(`[session_load_context] Skill "${entry.name}" loaded (${source}, p${entry.priority}) [${skillBlock.length}/${SKILL_BLOCK_CAP} chars]`);
887
884
  }
888
885
  // ─── User-Local Skills ──────────────────────────────────────
889
886
  // Loaded ONLY when user_local.enabled=true (set in Synalux routing table
@@ -936,7 +933,7 @@ export async function sessionLoadContextHandler(args) {
936
933
  }
937
934
  }
938
935
  if (skippedSkills.length > 0) {
939
- skillBlock += `\n\n[⏭️ ${skippedSkills.length} skills skipped (cap ${SKILL_BLOCK_CAP} chars): ${skippedSkills.join(", ")}]`;
936
+ skillBlock += `\n\n[⚠️ ${skippedSkills.length} skills TRUNCATED by ${SKILL_BLOCK_CAP}-char cap — NOT loaded: ${skippedSkills.join(", ")}. These rules are NOT in your context. Do not claim to follow them.]`;
940
937
  }
941
938
  // ─── Agent Greeting Block ────────────────────────────────────
942
939
  // Shows agent identity (name + role) and skill status after briefing.
@@ -980,15 +977,17 @@ export async function sessionLoadContextHandler(args) {
980
977
  // Build the response object before v4.0 augmentations
981
978
  // SECURITY: Wrap output in boundary tags to prevent context confusion.
982
979
  // The LLM sees <prism_memory context="historical"> and knows this is data, not instructions.
983
- let responseText = `${MEMORY_BOUNDARY_PREFIX}📋 Session context for "${project}" (${level}):\n\n${formattedContext.trim()}${splitBrainWarning}${driftReport}${briefingBlock}${sdmRecallBlock}${greetingBlock}${visualMemoryBlock}${skillBlock}${versionNote}`;
984
- // ─── v4.0: Behavioral Warnings Injection ───────────────────
985
- // If loadContext returned behavioral_warnings, add them to the
986
- // formatted output so the agent sees them prominently.
980
+ // ─── v19.1: Behavioral Warnings BEFORE skills (protected from truncation) ───
981
+ // Corrections must surface prominently. Placed before skillBlock so the
982
+ // skill budget cannot push them out. Capped at 2,000 chars.
987
983
  const behavWarnings = data?.behavioral_warnings;
984
+ let behavBlock = '';
988
985
  if (behavWarnings && behavWarnings.length > 0) {
989
- responseText += `\n\n[⚠️ BEHAVIORAL WARNINGS]\n` +
986
+ const rawBlock = `\n\n[⚠️ BEHAVIORAL WARNINGS — DO NOT IGNORE]\n` +
990
987
  behavWarnings.map(w => `- ${w.summary} (importance: ${w.importance})`).join("\n");
988
+ behavBlock = [...rawBlock].slice(0, 2000).join('');
991
989
  }
990
+ let responseText = `${MEMORY_BOUNDARY_PREFIX}📋 Session context for "${project}" (${level}):\n\n${formattedContext.trim()}${splitBrainWarning}${driftReport}${briefingBlock}${sdmRecallBlock}${greetingBlock}${visualMemoryBlock}${behavBlock}${skillBlock}${versionNote}`;
992
991
  // ─── v9.4.7: ABA Precision Protocol (foundational) ────────
993
992
  // Injected into EVERY session load so the agent always operates
994
993
  // under these behavioral rules. Never truncated (placed before
@@ -2,7 +2,7 @@
2
2
  * prism_infer — local-first inference tool
3
3
  * ─────────────────────────────────────────────────────────────
4
4
  * Save the caller's cloud tokens by routing to a local prism-coder
5
- * model via Ollama. Tiers (32B/14B/8B/1.7B) auto-selected by free
5
+ * model via Ollama. Tiers (32B/9B/8B/1.7B) auto-selected by free
6
6
  * RAM, then capped by `model_ceiling` and the set of tags that are
7
7
  * actually pulled into Ollama.
8
8
  *
@@ -12,7 +12,7 @@
12
12
  * 4. On local fail, if cloud_fallback=true:
13
13
  * - exchange synalux_sk_ → JWT (cached)
14
14
  * - POST synalux portal /api/v1/prism-aac/inference
15
- * - portal runs its own cascade (14B/32B/Claude by tier)
15
+ * - portal runs its own cascade (9B/32B/Claude by tier)
16
16
  * 5. Return { output, backend, model_picked, ram_free_mb, latency_ms, used_cloud }
17
17
  *
18
18
  * `prism_infer` is a thin client. It never calls Anthropic / OpenRouter
@@ -24,16 +24,15 @@ import { getSynaluxJwt, invalidateSynaluxJwt } from "../utils/synaluxJwt.js";
24
24
  import { getAvailableMemoryBytes } from "../utils/availableMemory.js";
25
25
  import { PRISM_SYNALUX_BASE_URL, PRISM_LOCAL_LLM_URL, } from "../config.js";
26
26
  import { debugLog } from "../utils/logger.js";
27
- import { verifyGrounding } from "../utils/groundingVerifier.js";
28
27
  import { getEntitlements, clampCeiling } from "../utils/entitlements.js";
29
28
  import { ddLog } from "../utils/ddLogger.js";
30
29
  // ─── Tool Definition ────────────────────────────────────────────
31
30
  export const PRISM_INFER_TOOL = {
32
31
  name: "prism_infer",
33
32
  description: "Run an inference on a local prism-coder model (Ollama) to save cloud tokens. " +
34
- "Picks the largest viable tier — 32B / 14B / 8B / 1.7B — based on free RAM at call time, " +
33
+ "Picks the largest viable tier — 32B / 9B / 8B / 1.7B — based on free RAM at call time, " +
35
34
  "clamped by `model_ceiling` and what is actually pulled in Ollama. " +
36
- "Falls through to the synalux portal cloud cascade (14B → 32B → Claude Opus 4.7) " +
35
+ "Falls through to the synalux portal cloud cascade (9B → 32B → Claude Opus 4.7) " +
37
36
  "only when local is unviable AND `cloud_fallback=true`. " +
38
37
  "Use this for code generation, summarisation, classification, or any synth task you would " +
39
38
  "otherwise hand to the cloud model — it costs $0 when the local hit succeeds.",
@@ -60,8 +59,8 @@ export const PRISM_INFER_TOOL = {
60
59
  },
61
60
  model_ceiling: {
62
61
  type: "string",
63
- enum: ["32b", "14b", "8b", "1b7"],
64
- description: "Cap the largest tier the picker may select. e.g. '14b' forbids 32B even if RAM allows.",
62
+ enum: ["32b", "9b", "4b", "2b"],
63
+ description: "Cap the largest tier the picker may select. e.g. '9b' forbids 32B even if RAM allows.",
65
64
  },
66
65
  cloud_fallback: {
67
66
  type: "boolean",
@@ -70,7 +69,7 @@ export const PRISM_INFER_TOOL = {
70
69
  },
71
70
  timeout_ms: {
72
71
  type: "number",
73
- description: "Override per-call timeout. Default scales with model size: 32B=120s, 14B=60s, 8B=30s, 1.7B=15s.",
72
+ description: "Override per-call timeout. Default scales with model size: 32B=120s, 9B=60s, 4B=20s, 1.7B=15s.",
74
73
  },
75
74
  evidence: {
76
75
  type: "array",
@@ -91,12 +90,12 @@ export const PRISM_INFER_TOOL = {
91
90
  type: "boolean",
92
91
  description: "Enable the L3 grounding verifier. Default: true when `evidence` is provided, " +
93
92
  "false otherwise. When enabled, the model's draft is checked by a different model " +
94
- "(prism-coder:4b by default) against the supplied `evidence`. Drafts with " +
93
+ "(qwen3.5:4b by default) against the supplied `evidence`. Drafts with " +
95
94
  "NEUTRAL or CONTRADICTED claims are refused.",
96
95
  },
97
96
  verifier_model: {
98
97
  type: "string",
99
- description: "Override the verifier model. Default: prism-coder:4b.",
98
+ description: "Override the verifier model. Default: qwen3.5:4b.",
100
99
  },
101
100
  verifier_timeout_ms: {
102
101
  type: "number",
@@ -124,7 +123,7 @@ export function isPrismInferArgs(args) {
124
123
  if (a.timeout_ms !== undefined && typeof a.timeout_ms !== "number")
125
124
  return false;
126
125
  if (a.model_ceiling !== undefined &&
127
- !["32b", "14b", "8b", "1b7"].includes(a.model_ceiling))
126
+ !["32b", "9b", "4b", "2b"].includes(a.model_ceiling))
128
127
  return false;
129
128
  if (a.verify !== undefined && typeof a.verify !== "boolean")
130
129
  return false;
@@ -148,9 +147,9 @@ export function isPrismInferArgs(args) {
148
147
  // ─── Ollama helpers ────────────────────────────────────────────
149
148
  const DEFAULT_TIMEOUTS = {
150
149
  "prism-coder:32b": 120_000,
151
- "prism-coder:14b": 60_000,
152
- "prism-coder:8b": 30_000,
153
- "prism-coder:1b7": 15_000,
150
+ "prism-coder:9b": 60_000,
151
+ "prism-coder:4b": 20_000,
152
+ "prism-coder:2b": 15_000,
154
153
  };
155
154
  /** List Ollama-installed tags. Returns null if Ollama unreachable. */
156
155
  export async function listOllamaTags(url = PRISM_LOCAL_LLM_URL) {
@@ -330,7 +329,7 @@ export async function runInfer(args, deps) {
330
329
  if (installed) {
331
330
  // Find start index from ceiling — if no ceiling, start at the top (32B).
332
331
  const ceilStart = effectiveCeiling
333
- ? Math.max(0, MODEL_TIERS.findIndex(t => t.tag.endsWith(effectiveCeiling) || t.tag === effectiveCeiling))
332
+ ? Math.max(0, MODEL_TIERS.findIndex(t => t.tag.endsWith(`:${effectiveCeiling}`)))
334
333
  : 0;
335
334
  let anyViable = false;
336
335
  for (let i = ceilStart; i < MODEL_TIERS.length; i++) {
@@ -407,10 +406,10 @@ export async function runInfer(args, deps) {
407
406
  */
408
407
  async function applyVerification(draft, args, deps, partial) {
409
408
  const shouldVerify = args.verify ?? (args.evidence !== undefined && args.evidence.length > 0);
410
- if (!shouldVerify) {
409
+ if (!shouldVerify || !deps.callVerifier) {
411
410
  return { ...partial, output: draft };
412
411
  }
413
- const verifier = deps.callVerifier ?? verifyGrounding;
412
+ const verifier = deps.callVerifier;
414
413
  const outcome = await verifier({
415
414
  draft,
416
415
  evidence: args.evidence ?? [],
@@ -1750,3 +1750,43 @@ export function isSessionDetectDriftArgs(args) {
1750
1750
  return false;
1751
1751
  return true;
1752
1752
  }
1753
+ // ─── Behavioral Verifier ────────────────────────────────────────
1754
+ export const VERIFY_BEHAVIOR_TOOL = {
1755
+ name: "verify_behavior",
1756
+ description: "Call BEFORE editing behavioral source files (API routes, ordering logic, billing, auth, migrations). " +
1757
+ "Returns a domain-specific scenario you must answer to demonstrate understanding of the end-user impact. " +
1758
+ "Example: editing a KDS route returns 'A cook has a 3-item ticket. One item is voided. What should the cook see?' " +
1759
+ "Answer the scenario concretely before proceeding with the edit.",
1760
+ inputSchema: {
1761
+ type: "object",
1762
+ properties: {
1763
+ file_path: {
1764
+ type: "string",
1765
+ description: "Path of the file about to be edited.",
1766
+ },
1767
+ change_summary: {
1768
+ type: "string",
1769
+ description: "Brief description of the intended change.",
1770
+ },
1771
+ project: {
1772
+ type: "string",
1773
+ description: "Optional project identifier for workspace-scoped scenarios.",
1774
+ },
1775
+ workspace_id: {
1776
+ type: "string",
1777
+ description: "Optional workspace ID for custom scenarios.",
1778
+ },
1779
+ },
1780
+ required: ["file_path", "change_summary"],
1781
+ },
1782
+ };
1783
+ export function isVerifyBehaviorArgs(a) {
1784
+ if (!a || typeof a !== "object")
1785
+ return false;
1786
+ const o = a;
1787
+ if (typeof o.file_path !== "string" || !o.file_path.trim())
1788
+ return false;
1789
+ if (typeof o.change_summary !== "string")
1790
+ return false;
1791
+ return true;
1792
+ }
@@ -19,7 +19,11 @@
19
19
  // Minimal fallback when synalux is unreachable.
20
20
  const OFFLINE_FALLBACK = {
21
21
  version: 1,
22
- universal: ['bcba_ai_assistant'],
22
+ universal: [
23
+ { name: 'prime-directive', priority: 0, protected: true },
24
+ { name: 'evidence-first-protocol', priority: 1, protected: true },
25
+ { name: 'bcba_ai_assistant', priority: 20 },
26
+ ],
23
27
  projects: {},
24
28
  user_local: { enabled: false, key_prefix: 'user_skill:' },
25
29
  };
@@ -56,6 +60,12 @@ async function fetchOnce() {
56
60
  * skills. Also returns the user_local policy so callers know whether to
57
61
  * load user_skill:* entries from local SQLite.
58
62
  */
63
+ function normalizeEntry(entry, defaultPriority) {
64
+ if (typeof entry === 'string') {
65
+ return { name: entry, priority: defaultPriority, protected: false };
66
+ }
67
+ return { name: entry.name, priority: entry.priority ?? defaultPriority, protected: entry.protected ?? false };
68
+ }
59
69
  export async function resolveSkillsForProject(project) {
60
70
  const now = Date.now();
61
71
  if (!cached || now - cached.fetchedAt > CACHE_TTL_MS) {
@@ -68,16 +78,31 @@ export async function resolveSkillsForProject(project) {
68
78
  await inflight;
69
79
  }
70
80
  const table = cached.table;
71
- const out = new Set(table.universal);
81
+ const seen = new Set();
82
+ const skills = [];
83
+ for (let i = 0; i < table.universal.length; i++) {
84
+ const entry = normalizeEntry(table.universal[i], i);
85
+ if (!seen.has(entry.name)) {
86
+ seen.add(entry.name);
87
+ skills.push(entry);
88
+ }
89
+ }
72
90
  const projectLower = project.toLowerCase();
73
- for (const [pattern, skills] of Object.entries(table.projects)) {
91
+ let projectPriority = 100;
92
+ for (const [pattern, projectSkills] of Object.entries(table.projects)) {
74
93
  if (projectLower.includes(pattern)) {
75
- for (const s of skills)
76
- out.add(s);
94
+ for (const s of projectSkills) {
95
+ if (!seen.has(s)) {
96
+ seen.add(s);
97
+ skills.push({ name: s, priority: projectPriority++, protected: false });
98
+ }
99
+ }
77
100
  }
78
101
  }
102
+ skills.sort((a, b) => a.priority - b.priority);
79
103
  return {
80
- names: Array.from(out),
104
+ names: skills.map(s => s.name),
105
+ skills,
81
106
  user_local: table.user_local ?? OFFLINE_FALLBACK.user_local,
82
107
  };
83
108
  }
@@ -6,7 +6,7 @@
6
6
  * to enforce model ceiling, max_tokens, and feature gates.
7
7
  *
8
8
  * Unauthenticated users (no SYNALUX_API_KEY) get free-tier defaults.
9
- * Authenticated users get their plan from the portal (1-hour cache).
9
+ * Authenticated users get their plan from the portal (5-minute cache).
10
10
  */
11
11
  import { getSynaluxJwt } from "./synaluxJwt.js";
12
12
  import { PRISM_SYNALUX_BASE_URL, SYNALUX_CONFIGURED } from "../config.js";
@@ -32,10 +32,10 @@ const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
32
32
  let cache = null;
33
33
  let inFlight = null;
34
34
  // ── Model tier ordering for ceiling enforcement ───────────────────
35
- const TIER_ORDER = ["1b7", "4b", "8b", "14b", "32b"];
35
+ const TIER_ORDER = ["2b", "4b", "9b", "32b"];
36
36
  /**
37
37
  * Returns true if `requested` exceeds `ceiling`.
38
- * e.g. ceilingExceeded("14b", "4b") → true (14b > 4b ceiling)
38
+ * e.g. ceilingExceeded("9b", "4b") → true (9b > 4b ceiling)
39
39
  */
40
40
  export function ceilingExceeded(requested, ceiling) {
41
41
  const reqIdx = TIER_ORDER.indexOf(requested);
@@ -79,12 +79,18 @@ async function fetchEntitlements() {
79
79
  redirect: "error",
80
80
  });
81
81
  if (!res.ok) {
82
- debugLog(`[entitlements] portal HTTP ${res.status} — free tier fallback`);
82
+ debugLog(`[entitlements] portal HTTP ${res.status}`);
83
+ if (cache) {
84
+ debugLog("[entitlements] using last-known-good (safety fail-closed)");
85
+ return cache.entitlements;
86
+ }
83
87
  return FREE_ENTITLEMENTS;
84
88
  }
85
89
  const data = (await res.json());
86
90
  if (!data.plan || !data.model_ceiling) {
87
- debugLog("[entitlements] malformed response — free tier fallback");
91
+ debugLog("[entitlements] malformed response");
92
+ if (cache)
93
+ return cache.entitlements;
88
94
  return FREE_ENTITLEMENTS;
89
95
  }
90
96
  debugLog(`[entitlements] plan=${data.plan} ceiling=${data.model_ceiling} ` +
@@ -92,7 +98,14 @@ async function fetchEntitlements() {
92
98
  return data;
93
99
  }
94
100
  catch (err) {
95
- debugLog(`[entitlements] fetch error: ${err instanceof Error ? err.message : String(err)} — free tier fallback`);
101
+ debugLog(`[entitlements] fetch error: ${err instanceof Error ? err.message : String(err)}`);
102
+ // F1 fix: fail-closed — keep last-known-good entitlements on fetch error.
103
+ // Safety controls (grounding_verifier) must not degrade on availability failures.
104
+ if (cache) {
105
+ debugLog("[entitlements] using last-known-good (safety fail-closed)");
106
+ return cache.entitlements;
107
+ }
108
+ debugLog("[entitlements] no cached entitlements — free tier fallback (cold start)");
96
109
  return FREE_ENTITLEMENTS;
97
110
  }
98
111
  }
@@ -111,7 +124,14 @@ export async function getEntitlements() {
111
124
  inFlight = (async () => {
112
125
  try {
113
126
  const ent = await fetchEntitlements();
114
- cache = { entitlements: ent, expiresAt: Date.now() + CACHE_TTL_MS };
127
+ // Only update cache if this is a REAL fetch (not a cached fallback).
128
+ // fetchEntitlements returns cache.entitlements on error — detect by
129
+ // checking if the returned object is the exact same reference.
130
+ const isFallback = cache && ent === cache.entitlements;
131
+ if (!isFallback) {
132
+ cache = { entitlements: ent, expiresAt: Date.now() + CACHE_TTL_MS };
133
+ }
134
+ // On fallback: DON'T refresh expiresAt — let it expire so we retry.
115
135
  return ent;
116
136
  }
117
137
  finally {
@@ -9,9 +9,9 @@
9
9
  * stateless MCP), pointed at free-form generation instead of tool-call
10
10
  * responses.
11
11
  *
12
- * Cascade role: prism-coder:4b is the default verifier (fast, 2.5GB).
12
+ * Cascade role: qwen3.5:4b is the default verifier (fast, 2.5GB).
13
13
  * 14b drafts; 4b verifies. Different model = Patronus rule satisfied.
14
- * Falls back to 1b7 on devices with <4GB free RAM.
14
+ * Falls back to 2b on devices with <4GB free RAM.
15
15
  *
16
16
  * Failure modes:
17
17
  * - Verifier model unreachable / timeout → fail-closed refusal
@@ -93,7 +93,7 @@ function refusalText(action, failedClaim) {
93
93
  }
94
94
  }
95
95
  export async function verifyGrounding(opts) {
96
- const verifierModel = opts.verifierModel ?? "prism-coder:4b";
96
+ const verifierModel = opts.verifierModel ?? "qwen3.5:4b";
97
97
  const timeoutMs = opts.timeoutMs ?? 2000;
98
98
  const ollamaUrl = opts.ollamaUrl ?? PRISM_LOCAL_LLM_URL;
99
99
  const fetchImpl = opts.fetchImpl ?? fetch;