prism-mcp-server 18.0.2 → 19.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,22 @@
1
1
  /**
2
2
  * RAM-Gated Local Model Picker
3
3
  * ─────────────────────────────────────────────────────────────
4
- * Cascade: 14b (default) → 4b (verifier) → 32b (complex only).
4
+ * Cascade: 9b (default) → 4b (verifier) → 2b (mobile) → 32b (complex only).
5
5
  *
6
- * The default ceiling is "14b" — NOT "32b". This means:
7
- * - 14b is the primary model for routing + general inference
6
+ * The default ceiling is "9b" — NOT "32b". This means:
7
+ * - 9b is the primary model for routing + general inference (Qwen3.5-9B, 100% BFCL)
8
8
  * - 4b is used as the grounding verifier (fast, small)
9
+ * - 2b is the mobile/iPhone first gate (Qwen3.5-2B, 99.1% BFCL)
9
10
  * - 32b is only loaded when caller explicitly passes ceiling="32b"
10
11
  * or when the task requires maximum quality (complex code gen, etc.)
11
12
  *
12
- * This saves 10GB+ RAM on most devices and keeps response times fast.
13
- * The 14b achieves 100% on eval_300 — same as 32b.
13
+ * This saves 13GB+ RAM vs 32b and keeps response times fast.
14
14
  *
15
15
  * tag weights need free ctx role
16
16
  * prism-coder:32b ~19 GB ≥ 24 GB 32K complex (on-demand)
17
- * prism-coder:14b ~ 9 GB 12 GB 32K default router
18
- * prism-coder:8b ~ 5 GB 7 GB 32K fallback
19
- * prism-coder:4b ~ 2.5 GB ≥ 4 GB 8K verifier + mobile
20
- * prism-coder:1b7 ~ 2 GB ≥ 3 GB 8K watch + ultra-low RAM
17
+ * prism-coder:9b ~ 5.8 GB 8 GB 32K default router (Qwen3.5, 100% BFCL)
18
+ * prism-coder:4b ~ 3.4 GB 5 GB 32K verifier (Qwen3.5, 100%)
19
+ * prism-coder:2b ~ 2.3 GB ≥ 3 GB 8K mobile / iPhone (Qwen3.5, 99.1%)
21
20
  *
22
21
  * Below 3 GB free → no local pick (caller must use cloud).
23
22
  */
@@ -28,10 +27,9 @@ const GB = 1024 ** 3;
28
27
  */
29
28
  export const MODEL_TIERS = [
30
29
  { tag: 'prism-coder:32b', weightsGb: 19, minFreeGb: 24, ctxTokens: 32_768 },
31
- { tag: 'prism-coder:14b', weightsGb: 9, minFreeGb: 12, ctxTokens: 32_768 },
32
- { tag: 'prism-coder:8b', weightsGb: 5, minFreeGb: 7, ctxTokens: 32_768 },
33
- { tag: 'prism-coder:4b', weightsGb: 2.5, minFreeGb: 4, ctxTokens: 8_192 },
34
- { tag: 'prism-coder:1b7', weightsGb: 2, minFreeGb: 3, ctxTokens: 8_192 },
30
+ { tag: 'prism-coder:9b', weightsGb: 5.8, minFreeGb: 8, ctxTokens: 32_768 },
31
+ { tag: 'prism-coder:4b', weightsGb: 3.4, minFreeGb: 5, ctxTokens: 32_768 },
32
+ { tag: 'prism-coder:2b', weightsGb: 2.3, minFreeGb: 3, ctxTokens: 8_192 },
35
33
  ];
36
34
  /**
37
35
  * True when `installed` matches `tierTag` either as a bare tag
@@ -44,21 +42,21 @@ export const MODEL_TIERS = [
44
42
  function tagMatches(installed, tierTag) {
45
43
  return installed === tierTag || installed.endsWith(`/${tierTag}`);
46
44
  }
47
- /** Default ceiling: 14b. Pass ceiling="32b" explicitly for max quality. */
48
- export const DEFAULT_CEILING = "14b";
45
+ /** Default ceiling: 9b. Pass ceiling="32b" explicitly for max quality. */
46
+ export const DEFAULT_CEILING = "9b";
49
47
  /**
50
48
  * Pick the best viable tier for the given free RAM.
51
- * Default ceiling is 14b — use ceiling="32b" only for complex tasks.
49
+ * Default ceiling is 9b — use ceiling="32b" only for complex tasks.
52
50
  *
53
51
  * @param freeBytes Result of os.freemem() — binary bytes
54
- * @param ceiling Cap tier. Default "14b". Pass "32b" for complex tasks.
52
+ * @param ceiling Cap tier. Default "9b". Pass "32b" for complex tasks.
55
53
  * @param available Optional whitelist of installed Ollama tags.
56
54
  */
57
55
  export function pickLocalModel(freeBytes, ceiling, available) {
58
56
  if (!Number.isFinite(freeBytes) || freeBytes <= 0)
59
57
  return null;
60
58
  const effectiveCeiling = ceiling || DEFAULT_CEILING;
61
- const ceilingIdx = MODEL_TIERS.findIndex(t => t.tag.endsWith(effectiveCeiling) || t.tag === effectiveCeiling);
59
+ const ceilingIdx = MODEL_TIERS.findIndex(t => t.tag.endsWith(`:${effectiveCeiling}`));
62
60
  const startIdx = ceilingIdx >= 0 ? ceilingIdx : 0;
63
61
  for (let i = startIdx; i < MODEL_TIERS.length; i++) {
64
62
  const tier = MODEL_TIERS[i];
@@ -15,8 +15,9 @@ export class Gatekeeper {
15
15
  console.warn(`\n⚠️ [OVERRIDDEN] Verification Gate bypassed via administrator override.`);
16
16
  // Enforce immutability and record audit trail context via environment variables
17
17
  validatedResult.gate_override = true;
18
+ // F19 fix: process.env.USER is trivially spoofable — log it but note it's unauthenticated.
18
19
  const actor = process.env.USER || process.env.USERNAME || 'unknown_user';
19
- validatedResult.override_reason = validatedResult.override_reason || `CLI --force bypass by ${actor}`;
20
+ validatedResult.override_reason = validatedResult.override_reason || `CLI --force bypass (unauthenticated env.USER=${actor})`;
20
21
  return { canContinue: true, validatedResult };
21
22
  }
22
23
  switch (validatedResult.gate_action) {
@@ -196,7 +196,12 @@ export class VerificationRunner {
196
196
  * Throws an error if the hash does not match, ensuring test integrity.
197
197
  */
198
198
  static verifyRubricHash(tests, harness) {
199
- const computed = computeRubricHash(tests);
199
+ // F11 fix: include min_pass_rate in hash verification when harness has it.
200
+ // Try with min_pass_rate first; fall back to without for backward compat.
201
+ const minRate = harness.min_pass_rate;
202
+ const computed = minRate !== undefined
203
+ ? computeRubricHash(tests, minRate)
204
+ : computeRubricHash(tests);
200
205
  if (computed !== harness.rubric_hash) {
201
206
  throw new Error(`Rubric hash mismatch. Expected ${harness.rubric_hash}, but computeRubricHash returned ${computed}. The tests have been modified since the harness was created.`);
202
207
  }
@@ -405,7 +410,7 @@ export class VerificationRunner {
405
410
  if (!targetCheck.ok) {
406
411
  return { passed: false, error: `HTTP target blocked: ${targetCheck.reason}` };
407
412
  }
408
- const res = await fetch(a.target);
413
+ const res = await fetch(a.target, { redirect: "error" });
409
414
  return res.status === a.expected
410
415
  ? { passed: true }
411
416
  : { passed: false, error: `Expected status ${a.expected}, got ${res.status} for ${a.target}` };
@@ -56,8 +56,16 @@ export const TestSuiteSchema = z.object({
56
56
  * @param tests - The array of TestAssertion to hash
57
57
  * @returns Lowercase hex SHA-256 digest
58
58
  */
59
- export function computeRubricHash(tests) {
59
+ export function computeRubricHash(tests, minPassRate) {
60
60
  const sorted = [...tests].sort((a, b) => a.id.localeCompare(b.id));
61
+ // F11 fix: when minPassRate is provided, include it in the hash so the
62
+ // threshold can't be changed without invalidating the rubric.
63
+ // When omitted, hash only tests (backward compatible with existing harnesses).
64
+ if (minPassRate !== undefined) {
65
+ return createHash("sha256")
66
+ .update(JSON.stringify({ tests: sorted, min_pass_rate: minPassRate }))
67
+ .digest("hex");
68
+ }
61
69
  return createHash("sha256")
62
70
  .update(JSON.stringify(sorted))
63
71
  .digest("hex");
@@ -44,6 +44,18 @@ export function resolveEffectiveSeverity(assertionSeverity, defaultSeverity) {
44
44
  */
45
45
  export function evaluateSeverityGates(results, config) {
46
46
  const failures = results.filter(r => !r.passed && !r.skipped);
47
+ // F10 fix: skipped critical (gate/abort) assertions count as failures.
48
+ // Crafting depends_on to skip critical checks must not neutralize the gate.
49
+ const skippedCritical = results.filter(r => r.skipped && (r.severity === 'gate' || r.severity === 'abort'));
50
+ if (skippedCritical.length > 0) {
51
+ const ids = skippedCritical.map(r => r.id).join(", ");
52
+ const hasAbort = skippedCritical.some(r => r.severity === 'abort');
53
+ return {
54
+ action: hasAbort ? "abort" : "block",
55
+ failed_assertions: skippedCritical,
56
+ summary: `${hasAbort ? 'ABORT' : 'BLOCKED'}: ${skippedCritical.length} critical assertion(s) were skipped [${ids}] — treating as failures.`
57
+ };
58
+ }
47
59
  if (failures.length === 0) {
48
60
  return {
49
61
  action: "continue",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "prism-mcp-server",
3
- "version": "18.0.2",
3
+ "version": "19.0.1",
4
4
  "mcpName": "io.github.dcostenco/prism-coder",
5
5
  "description": "Prism Coder — Cognitive memory + tool-calling intelligence for AI agents. Mind Palace persistent memory (BFCL Gold Certified, 100% Tool-Call Accuracy, 114 Agent Skills, PHI Guard, Tier Enforcement, Prompt-Based Skill Routing, Zero-Search HDC/HRR retrieval, HRR Semantic Drift Detection across BCBA/Coding/AAC domains, HIPAA-hardened local-first storage, SLERP-optimized GRPO alignment) plus the prism-coder 1.7B–32B open-weights LLM fleet.",
6
6
  "module": "index.ts",