npm - prism-mcp-server - Versions diffs - 18.0.2 → 19.0.1 - Mend

prism-mcp-server 18.0.2 → 19.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +295 -763
package/dist/cli.js +2 -2
package/dist/server.js +9 -0
package/dist/storage/sqlite.js +4 -2
package/dist/tools/behavioralVerifierHandler.js +79 -0
package/dist/tools/index.js +2 -0
package/dist/tools/ledgerHandlers.js +35 -36
package/dist/tools/prismInferHandler.js +16 -17
package/dist/tools/sessionMemoryDefinitions.js +40 -0
package/dist/tools/skillRouting.js +31 -6
package/dist/utils/entitlements.js +27 -7
package/dist/utils/groundingVerifier.js +3 -3
package/dist/utils/modelPicker.js +16 -18
package/dist/verification/gatekeeper.js +2 -1
package/dist/verification/runner.js +7 -2
package/dist/verification/schema.js +9 -1
package/dist/verification/severityPolicy.js +12 -0
package/package.json +1 -1

package/dist/utils/modelPicker.js CHANGED Viewed

@@ -1,23 +1,22 @@
 /**
  * RAM-Gated Local Model Picker
  * ─────────────────────────────────────────────────────────────
- * Cascade: 14b (default) → 4b (verifier) → 32b (complex only).
+ * Cascade: 9b (default) → 4b (verifier) → 2b (mobile) → 32b (complex only).
  *
- * The default ceiling is "14b" — NOT "32b". This means:
- *   - 14b is the primary model for routing + general inference
+ * The default ceiling is "9b" — NOT "32b". This means:
+ *   - 9b is the primary model for routing + general inference (Qwen3.5-9B, 100% BFCL)
  *   - 4b is used as the grounding verifier (fast, small)
+ *   - 2b is the mobile/iPhone first gate (Qwen3.5-2B, 99.1% BFCL)
  *   - 32b is only loaded when caller explicitly passes ceiling="32b"
  *     or when the task requires maximum quality (complex code gen, etc.)
  *
- * This saves 10GB+ RAM on most devices and keeps response times fast.
- * The 14b achieves 100% on eval_300 — same as 32b.
+ * This saves 13GB+ RAM vs 32b and keeps response times fast.
  *
  *   tag                 weights   need free   ctx     role
  *   prism-coder:32b     ~19 GB    ≥ 24 GB     32K    complex (on-demand)
- *   prism-coder:14b     ~ 9 GB    ≥ 12 GB     32K    default router
- *   prism-coder:8b      ~ 5 GB    ≥  7 GB     32K    fallback
- *   prism-coder:4b      ~ 2.5 GB  ≥  4 GB      8K    verifier + mobile
- *   prism-coder:1b7     ~ 2 GB    ≥  3 GB      8K    watch + ultra-low RAM
+ *   prism-coder:9b      ~ 5.8 GB  ≥  8 GB     32K    default router (Qwen3.5, 100% BFCL)
+ *   prism-coder:4b      ~ 3.4 GB  ≥  5 GB     32K    verifier (Qwen3.5, 100%)
+ *   prism-coder:2b      ~ 2.3 GB  ≥  3 GB      8K    mobile / iPhone (Qwen3.5, 99.1%)
  *
  * Below 3 GB free → no local pick (caller must use cloud).
  */
@@ -28,10 +27,9 @@ const GB = 1024 ** 3;
  */
 export const MODEL_TIERS = [
     { tag: 'prism-coder:32b', weightsGb: 19, minFreeGb: 24, ctxTokens: 32_768 },
-    { tag: 'prism-coder:14b', weightsGb: 9, minFreeGb: 12, ctxTokens: 32_768 },
-    { tag: 'prism-coder:8b', weightsGb: 5, minFreeGb: 7, ctxTokens: 32_768 },
-    { tag: 'prism-coder:4b', weightsGb: 2.5, minFreeGb: 4, ctxTokens: 8_192 },
-    { tag: 'prism-coder:1b7', weightsGb: 2, minFreeGb: 3, ctxTokens: 8_192 },
+    { tag: 'prism-coder:9b', weightsGb: 5.8, minFreeGb: 8, ctxTokens: 32_768 },
+    { tag: 'prism-coder:4b', weightsGb: 3.4, minFreeGb: 5, ctxTokens: 32_768 },
+    { tag: 'prism-coder:2b', weightsGb: 2.3, minFreeGb: 3, ctxTokens: 8_192 },
 ];
 /**
  * True when `installed` matches `tierTag` either as a bare tag
@@ -44,21 +42,21 @@ export const MODEL_TIERS = [
 function tagMatches(installed, tierTag) {
     return installed === tierTag || installed.endsWith(`/${tierTag}`);
 }
-/** Default ceiling: 14b. Pass ceiling="32b" explicitly for max quality. */
-export const DEFAULT_CEILING = "14b";
+/** Default ceiling: 9b. Pass ceiling="32b" explicitly for max quality. */
+export const DEFAULT_CEILING = "9b";
 /**
  * Pick the best viable tier for the given free RAM.
- * Default ceiling is 14b — use ceiling="32b" only for complex tasks.
+ * Default ceiling is 9b — use ceiling="32b" only for complex tasks.
  *
  * @param freeBytes  Result of os.freemem() — binary bytes
- * @param ceiling    Cap tier. Default "14b". Pass "32b" for complex tasks.
+ * @param ceiling    Cap tier. Default "9b". Pass "32b" for complex tasks.
  * @param available  Optional whitelist of installed Ollama tags.
  */
 export function pickLocalModel(freeBytes, ceiling, available) {
     if (!Number.isFinite(freeBytes) || freeBytes <= 0)
         return null;
     const effectiveCeiling = ceiling || DEFAULT_CEILING;
-    const ceilingIdx = MODEL_TIERS.findIndex(t => t.tag.endsWith(effectiveCeiling) || t.tag === effectiveCeiling);
+    const ceilingIdx = MODEL_TIERS.findIndex(t => t.tag.endsWith(`:${effectiveCeiling}`));
     const startIdx = ceilingIdx >= 0 ? ceilingIdx : 0;
     for (let i = startIdx; i < MODEL_TIERS.length; i++) {
         const tier = MODEL_TIERS[i];

package/dist/verification/gatekeeper.js CHANGED Viewed

@@ -15,8 +15,9 @@ export class Gatekeeper {
             console.warn(`\n⚠️  [OVERRIDDEN] Verification Gate bypassed via administrator override.`);
             // Enforce immutability and record audit trail context via environment variables
             validatedResult.gate_override = true;
+            // F19 fix: process.env.USER is trivially spoofable — log it but note it's unauthenticated.
             const actor = process.env.USER || process.env.USERNAME || 'unknown_user';
-            validatedResult.override_reason = validatedResult.override_reason || `CLI --force bypass by ${actor}`;
+            validatedResult.override_reason = validatedResult.override_reason || `CLI --force bypass (unauthenticated env.USER=${actor})`;
             return { canContinue: true, validatedResult };
         }
         switch (validatedResult.gate_action) {

package/dist/verification/runner.js CHANGED Viewed

@@ -196,7 +196,12 @@ export class VerificationRunner {
      * Throws an error if the hash does not match, ensuring test integrity.
      */
     static verifyRubricHash(tests, harness) {
-        const computed = computeRubricHash(tests);
+        // F11 fix: include min_pass_rate in hash verification when harness has it.
+        // Try with min_pass_rate first; fall back to without for backward compat.
+        const minRate = harness.min_pass_rate;
+        const computed = minRate !== undefined
+            ? computeRubricHash(tests, minRate)
+            : computeRubricHash(tests);
         if (computed !== harness.rubric_hash) {
             throw new Error(`Rubric hash mismatch. Expected ${harness.rubric_hash}, but computeRubricHash returned ${computed}. The tests have been modified since the harness was created.`);
         }
@@ -405,7 +410,7 @@ export class VerificationRunner {
                     if (!targetCheck.ok) {
                         return { passed: false, error: `HTTP target blocked: ${targetCheck.reason}` };
                     }
-                    const res = await fetch(a.target);
+                    const res = await fetch(a.target, { redirect: "error" });
                     return res.status === a.expected
                         ? { passed: true }
                         : { passed: false, error: `Expected status ${a.expected}, got ${res.status} for ${a.target}` };

package/dist/verification/schema.js CHANGED Viewed

@@ -56,8 +56,16 @@ export const TestSuiteSchema = z.object({
  * @param tests - The array of TestAssertion to hash
  * @returns Lowercase hex SHA-256 digest
  */
-export function computeRubricHash(tests) {
+export function computeRubricHash(tests, minPassRate) {
     const sorted = [...tests].sort((a, b) => a.id.localeCompare(b.id));
+    // F11 fix: when minPassRate is provided, include it in the hash so the
+    // threshold can't be changed without invalidating the rubric.
+    // When omitted, hash only tests (backward compatible with existing harnesses).
+    if (minPassRate !== undefined) {
+        return createHash("sha256")
+            .update(JSON.stringify({ tests: sorted, min_pass_rate: minPassRate }))
+            .digest("hex");
+    }
     return createHash("sha256")
         .update(JSON.stringify(sorted))
         .digest("hex");

package/dist/verification/severityPolicy.js CHANGED Viewed

@@ -44,6 +44,18 @@ export function resolveEffectiveSeverity(assertionSeverity, defaultSeverity) {
  */
 export function evaluateSeverityGates(results, config) {
     const failures = results.filter(r => !r.passed && !r.skipped);
+    // F10 fix: skipped critical (gate/abort) assertions count as failures.
+    // Crafting depends_on to skip critical checks must not neutralize the gate.
+    const skippedCritical = results.filter(r => r.skipped && (r.severity === 'gate' || r.severity === 'abort'));
+    if (skippedCritical.length > 0) {
+        const ids = skippedCritical.map(r => r.id).join(", ");
+        const hasAbort = skippedCritical.some(r => r.severity === 'abort');
+        return {
+            action: hasAbort ? "abort" : "block",
+            failed_assertions: skippedCritical,
+            summary: `${hasAbort ? 'ABORT' : 'BLOCKED'}: ${skippedCritical.length} critical assertion(s) were skipped [${ids}] — treating as failures.`
+        };
+    }
     if (failures.length === 0) {
         return {
             action: "continue",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "prism-mcp-server",
-  "version": "18.0.2",
+  "version": "19.0.1",
   "mcpName": "io.github.dcostenco/prism-coder",
   "description": "Prism Coder — Cognitive memory + tool-calling intelligence for AI agents. Mind Palace persistent memory (BFCL Gold Certified, 100% Tool-Call Accuracy, 114 Agent Skills, PHI Guard, Tier Enforcement, Prompt-Based Skill Routing, Zero-Search HDC/HRR retrieval, HRR Semantic Drift Detection across BCBA/Coding/AAC domains, HIPAA-hardened local-first storage, SLERP-optimized GRPO alignment) plus the prism-coder 1.7B–32B open-weights LLM fleet.",
   "module": "index.ts",