prism-mcp-server 18.0.2 → 19.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +295 -763
- package/dist/cli.js +2 -2
- package/dist/server.js +9 -0
- package/dist/storage/sqlite.js +4 -2
- package/dist/tools/behavioralVerifierHandler.js +79 -0
- package/dist/tools/index.js +2 -0
- package/dist/tools/ledgerHandlers.js +35 -36
- package/dist/tools/prismInferHandler.js +16 -17
- package/dist/tools/sessionMemoryDefinitions.js +40 -0
- package/dist/tools/skillRouting.js +31 -6
- package/dist/utils/entitlements.js +27 -7
- package/dist/utils/groundingVerifier.js +3 -3
- package/dist/utils/modelPicker.js +16 -18
- package/dist/verification/gatekeeper.js +2 -1
- package/dist/verification/runner.js +7 -2
- package/dist/verification/schema.js +9 -1
- package/dist/verification/severityPolicy.js +12 -0
- package/package.json +1 -1
|
@@ -1,23 +1,22 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* RAM-Gated Local Model Picker
|
|
3
3
|
* ─────────────────────────────────────────────────────────────
|
|
4
|
-
* Cascade:
|
|
4
|
+
* Cascade: 9b (default) → 4b (verifier) → 2b (mobile) → 32b (complex only).
|
|
5
5
|
*
|
|
6
|
-
* The default ceiling is "
|
|
7
|
-
* -
|
|
6
|
+
* The default ceiling is "9b" — NOT "32b". This means:
|
|
7
|
+
* - 9b is the primary model for routing + general inference (Qwen3.5-9B, 100% BFCL)
|
|
8
8
|
* - 4b is used as the grounding verifier (fast, small)
|
|
9
|
+
* - 2b is the mobile/iPhone first gate (Qwen3.5-2B, 99.1% BFCL)
|
|
9
10
|
* - 32b is only loaded when caller explicitly passes ceiling="32b"
|
|
10
11
|
* or when the task requires maximum quality (complex code gen, etc.)
|
|
11
12
|
*
|
|
12
|
-
* This saves
|
|
13
|
-
* The 14b achieves 100% on eval_300 — same as 32b.
|
|
13
|
+
* This saves 13GB+ RAM vs 32b and keeps response times fast.
|
|
14
14
|
*
|
|
15
15
|
* tag weights need free ctx role
|
|
16
16
|
* prism-coder:32b ~19 GB ≥ 24 GB 32K complex (on-demand)
|
|
17
|
-
* prism-coder:
|
|
18
|
-
* prism-coder:
|
|
19
|
-
* prism-coder:
|
|
20
|
-
* prism-coder:1b7 ~ 2 GB ≥ 3 GB 8K watch + ultra-low RAM
|
|
17
|
+
* prism-coder:9b ~ 5.8 GB ≥ 8 GB 32K default router (Qwen3.5, 100% BFCL)
|
|
18
|
+
* prism-coder:4b ~ 3.4 GB ≥ 5 GB 32K verifier (Qwen3.5, 100%)
|
|
19
|
+
* prism-coder:2b ~ 2.3 GB ≥ 3 GB 8K mobile / iPhone (Qwen3.5, 99.1%)
|
|
21
20
|
*
|
|
22
21
|
* Below 3 GB free → no local pick (caller must use cloud).
|
|
23
22
|
*/
|
|
@@ -28,10 +27,9 @@ const GB = 1024 ** 3;
|
|
|
28
27
|
*/
|
|
29
28
|
export const MODEL_TIERS = [
|
|
30
29
|
{ tag: 'prism-coder:32b', weightsGb: 19, minFreeGb: 24, ctxTokens: 32_768 },
|
|
31
|
-
{ tag: 'prism-coder:
|
|
32
|
-
{ tag: 'prism-coder:
|
|
33
|
-
{ tag: 'prism-coder:
|
|
34
|
-
{ tag: 'prism-coder:1b7', weightsGb: 2, minFreeGb: 3, ctxTokens: 8_192 },
|
|
30
|
+
{ tag: 'prism-coder:9b', weightsGb: 5.8, minFreeGb: 8, ctxTokens: 32_768 },
|
|
31
|
+
{ tag: 'prism-coder:4b', weightsGb: 3.4, minFreeGb: 5, ctxTokens: 32_768 },
|
|
32
|
+
{ tag: 'prism-coder:2b', weightsGb: 2.3, minFreeGb: 3, ctxTokens: 8_192 },
|
|
35
33
|
];
|
|
36
34
|
/**
|
|
37
35
|
* True when `installed` matches `tierTag` either as a bare tag
|
|
@@ -44,21 +42,21 @@ export const MODEL_TIERS = [
|
|
|
44
42
|
function tagMatches(installed, tierTag) {
|
|
45
43
|
return installed === tierTag || installed.endsWith(`/${tierTag}`);
|
|
46
44
|
}
|
|
47
|
-
/** Default ceiling:
|
|
48
|
-
export const DEFAULT_CEILING = "
|
|
45
|
+
/** Default ceiling: 9b. Pass ceiling="32b" explicitly for max quality. */
|
|
46
|
+
export const DEFAULT_CEILING = "9b";
|
|
49
47
|
/**
|
|
50
48
|
* Pick the best viable tier for the given free RAM.
|
|
51
|
-
* Default ceiling is
|
|
49
|
+
* Default ceiling is 9b — use ceiling="32b" only for complex tasks.
|
|
52
50
|
*
|
|
53
51
|
* @param freeBytes Result of os.freemem() — binary bytes
|
|
54
|
-
* @param ceiling Cap tier. Default "
|
|
52
|
+
* @param ceiling Cap tier. Default "9b". Pass "32b" for complex tasks.
|
|
55
53
|
* @param available Optional whitelist of installed Ollama tags.
|
|
56
54
|
*/
|
|
57
55
|
export function pickLocalModel(freeBytes, ceiling, available) {
|
|
58
56
|
if (!Number.isFinite(freeBytes) || freeBytes <= 0)
|
|
59
57
|
return null;
|
|
60
58
|
const effectiveCeiling = ceiling || DEFAULT_CEILING;
|
|
61
|
-
const ceilingIdx = MODEL_TIERS.findIndex(t => t.tag.endsWith(effectiveCeiling)
|
|
59
|
+
const ceilingIdx = MODEL_TIERS.findIndex(t => t.tag.endsWith(`:${effectiveCeiling}`));
|
|
62
60
|
const startIdx = ceilingIdx >= 0 ? ceilingIdx : 0;
|
|
63
61
|
for (let i = startIdx; i < MODEL_TIERS.length; i++) {
|
|
64
62
|
const tier = MODEL_TIERS[i];
|
|
@@ -15,8 +15,9 @@ export class Gatekeeper {
|
|
|
15
15
|
console.warn(`\n⚠️ [OVERRIDDEN] Verification Gate bypassed via administrator override.`);
|
|
16
16
|
// Enforce immutability and record audit trail context via environment variables
|
|
17
17
|
validatedResult.gate_override = true;
|
|
18
|
+
// F19 fix: process.env.USER is trivially spoofable — log it but note it's unauthenticated.
|
|
18
19
|
const actor = process.env.USER || process.env.USERNAME || 'unknown_user';
|
|
19
|
-
validatedResult.override_reason = validatedResult.override_reason || `CLI --force bypass
|
|
20
|
+
validatedResult.override_reason = validatedResult.override_reason || `CLI --force bypass (unauthenticated env.USER=${actor})`;
|
|
20
21
|
return { canContinue: true, validatedResult };
|
|
21
22
|
}
|
|
22
23
|
switch (validatedResult.gate_action) {
|
|
@@ -196,7 +196,12 @@ export class VerificationRunner {
|
|
|
196
196
|
* Throws an error if the hash does not match, ensuring test integrity.
|
|
197
197
|
*/
|
|
198
198
|
static verifyRubricHash(tests, harness) {
|
|
199
|
-
|
|
199
|
+
// F11 fix: include min_pass_rate in hash verification when harness has it.
|
|
200
|
+
// Try with min_pass_rate first; fall back to without for backward compat.
|
|
201
|
+
const minRate = harness.min_pass_rate;
|
|
202
|
+
const computed = minRate !== undefined
|
|
203
|
+
? computeRubricHash(tests, minRate)
|
|
204
|
+
: computeRubricHash(tests);
|
|
200
205
|
if (computed !== harness.rubric_hash) {
|
|
201
206
|
throw new Error(`Rubric hash mismatch. Expected ${harness.rubric_hash}, but computeRubricHash returned ${computed}. The tests have been modified since the harness was created.`);
|
|
202
207
|
}
|
|
@@ -405,7 +410,7 @@ export class VerificationRunner {
|
|
|
405
410
|
if (!targetCheck.ok) {
|
|
406
411
|
return { passed: false, error: `HTTP target blocked: ${targetCheck.reason}` };
|
|
407
412
|
}
|
|
408
|
-
const res = await fetch(a.target);
|
|
413
|
+
const res = await fetch(a.target, { redirect: "error" });
|
|
409
414
|
return res.status === a.expected
|
|
410
415
|
? { passed: true }
|
|
411
416
|
: { passed: false, error: `Expected status ${a.expected}, got ${res.status} for ${a.target}` };
|
|
@@ -56,8 +56,16 @@ export const TestSuiteSchema = z.object({
|
|
|
56
56
|
* @param tests - The array of TestAssertion to hash
|
|
57
57
|
* @returns Lowercase hex SHA-256 digest
|
|
58
58
|
*/
|
|
59
|
-
export function computeRubricHash(tests) {
|
|
59
|
+
export function computeRubricHash(tests, minPassRate) {
|
|
60
60
|
const sorted = [...tests].sort((a, b) => a.id.localeCompare(b.id));
|
|
61
|
+
// F11 fix: when minPassRate is provided, include it in the hash so the
|
|
62
|
+
// threshold can't be changed without invalidating the rubric.
|
|
63
|
+
// When omitted, hash only tests (backward compatible with existing harnesses).
|
|
64
|
+
if (minPassRate !== undefined) {
|
|
65
|
+
return createHash("sha256")
|
|
66
|
+
.update(JSON.stringify({ tests: sorted, min_pass_rate: minPassRate }))
|
|
67
|
+
.digest("hex");
|
|
68
|
+
}
|
|
61
69
|
return createHash("sha256")
|
|
62
70
|
.update(JSON.stringify(sorted))
|
|
63
71
|
.digest("hex");
|
|
@@ -44,6 +44,18 @@ export function resolveEffectiveSeverity(assertionSeverity, defaultSeverity) {
|
|
|
44
44
|
*/
|
|
45
45
|
export function evaluateSeverityGates(results, config) {
|
|
46
46
|
const failures = results.filter(r => !r.passed && !r.skipped);
|
|
47
|
+
// F10 fix: skipped critical (gate/abort) assertions count as failures.
|
|
48
|
+
// Crafting depends_on to skip critical checks must not neutralize the gate.
|
|
49
|
+
const skippedCritical = results.filter(r => r.skipped && (r.severity === 'gate' || r.severity === 'abort'));
|
|
50
|
+
if (skippedCritical.length > 0) {
|
|
51
|
+
const ids = skippedCritical.map(r => r.id).join(", ");
|
|
52
|
+
const hasAbort = skippedCritical.some(r => r.severity === 'abort');
|
|
53
|
+
return {
|
|
54
|
+
action: hasAbort ? "abort" : "block",
|
|
55
|
+
failed_assertions: skippedCritical,
|
|
56
|
+
summary: `${hasAbort ? 'ABORT' : 'BLOCKED'}: ${skippedCritical.length} critical assertion(s) were skipped [${ids}] — treating as failures.`
|
|
57
|
+
};
|
|
58
|
+
}
|
|
47
59
|
if (failures.length === 0) {
|
|
48
60
|
return {
|
|
49
61
|
action: "continue",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "prism-mcp-server",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "19.0.1",
|
|
4
4
|
"mcpName": "io.github.dcostenco/prism-coder",
|
|
5
5
|
"description": "Prism Coder — Cognitive memory + tool-calling intelligence for AI agents. Mind Palace persistent memory (BFCL Gold Certified, 100% Tool-Call Accuracy, 114 Agent Skills, PHI Guard, Tier Enforcement, Prompt-Based Skill Routing, Zero-Search HDC/HRR retrieval, HRR Semantic Drift Detection across BCBA/Coding/AAC domains, HIPAA-hardened local-first storage, SLERP-optimized GRPO alignment) plus the prism-coder 1.7B–32B open-weights LLM fleet.",
|
|
6
6
|
"module": "index.ts",
|