npm - prism-mcp-server - Versions diffs - 19.2.0 → 19.2.2 - Mend

prism-mcp-server 19.2.0 → 19.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/tools/ledgerHandlers.js +4 -6
package/dist/tools/prismInferHandler.js +4 -1
package/dist/tools/taskRouterHandler.js +20 -0
package/dist/utils/ddLogger.js +3 -0
package/dist/utils/inferenceMetrics.js +73 -44
package/package.json +1 -1

package/dist/tools/ledgerHandlers.js CHANGED Viewed

@@ -89,7 +89,7 @@ const MEMORY_BOUNDARY_SUFFIX = '\n</prism_memory>';
  * After saving, generates an embedding vector for the entry via fire-and-forget.
  */
 import { computeEffectiveImportance, recordMemoryAccess } from "../utils/cognitiveMemory.js";
-import { fetchPortalInferenceMetrics, markSessionStart } from "../utils/inferenceMetrics.js";
+import { formatInferenceMetrics, resetInferenceMetrics } from "../utils/inferenceMetrics.js";
 export async function sessionSaveLedgerHandler(args) {
     if (!isSessionSaveLedgerArgs(args)) {
         throw new Error("Invalid arguments for session_save_ledger");
@@ -230,8 +230,7 @@ export async function sessionSaveLedgerHandler(args) {
     storage.decayImportance(project, PRISM_USER_ID, 30).catch((err) => {
         debugLog(`[session_save_ledger] Background decay failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
     });
-    // Fetch inference metrics from portal (thin-client: portal is authority)
-    const metricsBlock = await fetchPortalInferenceMetrics();
+    const metricsBlock = formatInferenceMetrics();
     return {
         content: [{
                 type: "text",
@@ -552,7 +551,7 @@ export async function sessionSaveHandoffHandler(args, server) {
         // Dynamic import itself failed — module not found or similar
         console.error("[FactMerger] Module load failed (non-fatal): " + err));
     }
-    const metricsBlock = await fetchPortalInferenceMetrics();
+    const metricsBlock = formatInferenceMetrics();
     // Build response text based on whether a CRDT merge occurred
     const responseText = isMerged
         ? `🔄 Auto-merged conflict for "${project}" (v${expected_version} → v${newVersion})\n` +
@@ -582,8 +581,7 @@ export async function sessionLoadContextHandler(args) {
     if (!isSessionLoadContextArgs(args)) {
         throw new Error("Invalid arguments for session_load_context");
     }
-    // Mark session boundary — portal metrics fetched with since=this timestamp
-    markSessionStart();
+    resetInferenceMetrics();
     const { project, level = "standard", role } = args;
     const maxTokens = args.max_tokens
         || parseInt(await getSetting("max_tokens", "0"), 10) || undefined; // v4.0: arg > dashboard setting > none

package/dist/tools/prismInferHandler.js CHANGED Viewed

@@ -29,6 +29,7 @@ import { ddLog } from "../utils/ddLogger.js";
 import { stripThink } from "../utils/thinkStrip.js";
 import { passesQualityGate } from "../utils/qualityGate.js";
 import { checkInputSafety, checkOutputSafety } from "../utils/safetyGate.js";
+import { recordInference } from "../utils/inferenceMetrics.js";
 // ─── Tool Definition ────────────────────────────────────────────
 export const PRISM_INFER_TOOL = {
     name: "prism_infer",
@@ -525,7 +526,9 @@ export async function prismInferHandler(args) {
             ollamaUrl: PRISM_LOCAL_LLM_URL,
         });
         debugLog(`[prism_infer] backend=${result.backend} model=${result.model_picked} latency=${result.latency_ms}ms free=${result.ram_free_mb}MB`);
-        // Forward per-call metrics to portal (thin-client pattern).
+        // Local accumulator — sole source of the user-facing metrics block.
+        recordInference(result);
+        // Best-effort portal forwarding (independent analytics stream).
         // safety_gate excluded — logging crisis filter triggers is a HIPAA concern.
         if (result.backend !== "safety_gate") {
             ddLog("info", "prism_infer.usage", {

package/dist/tools/taskRouterHandler.js CHANGED Viewed

@@ -18,6 +18,7 @@
  */
 import { isSessionTaskRouteArgs, } from "./sessionMemoryDefinitions.js";
 import { getStorage } from "../storage/index.js";
+import { getSetting } from "../storage/configStorage.js";
 import { getExperienceBias } from "./routerExperience.js";
 import { toKeywordArray } from "../utils/keywordExtractor.js";
 import { callLocalLlm } from "../utils/localLlm.js";
@@ -281,6 +282,25 @@ export async function sessionTaskRouteHandler(args) {
             isError: true,
         };
     }
+    // Delegation opt-in gate: if delegation_enabled is not "true", always route to host.
+    // This enforces the prism-infer-delegation skill's "off by default" rule in code.
+    const delegationEnabled = await getSetting("delegation_enabled", "false");
+    if (delegationEnabled !== "true") {
+        return {
+            content: [{
+                    type: "text",
+                    text: JSON.stringify({
+                        target: "host",
+                        confidence: 1.0,
+                        complexity_score: 5,
+                        rationale: "Delegation is off (default). Enable with: configure_notifications({setting: 'delegation_enabled', value: 'true'}) or via the Prism dashboard.",
+                        recommended_tool: null,
+                        delegation_enabled: false,
+                    }),
+                }],
+            isError: false,
+        };
+    }
     const result = computeRoute(args);
     // v7.2.0: Experience-based bias adjustment
     if (args.project) {

package/dist/utils/ddLogger.js CHANGED Viewed

@@ -128,3 +128,6 @@ export function ddInfo(message, context) {
 export function ddWarn(message, context) {
     ddLog("warn", message, context);
 }
+if (!TELEMETRY_WRITE_TOKEN && process.env.PRISM_DEBUG_LOGGING) {
+    console.info("[prism-mcp] Portal telemetry not configured (no TELEMETRY_WRITE_TOKEN). Session metrics work locally — this is normal for offline/free-tier use.");
+}

package/dist/utils/inferenceMetrics.js CHANGED Viewed

@@ -1,62 +1,91 @@
 /**
- * Inference metrics — thin-client fetch from Synalux portal.
+ * Inference metrics — local accumulator for user-facing display.
  *
- * Prism forwards per-call metrics via ddLog("prism_infer.usage").
- * The portal aggregates them in app_telemetry. This module fetches
- * the aggregated summary on demand (session_save_ledger/handoff).
+ * The local accumulator is the SOLE source for the session metrics block
+ * shown in session_save_ledger/handoff. It tracks what THIS prism process
+ * did THIS session — prism is the natural and only complete source for
+ * this data (the portal only sees what prism forwards).
+ *
+ * Portal forwarding (ddLog → /api/v1/telemetry) is a separate, best-effort
+ * analytics stream that the display path never depends on. If the portal
+ * is down, unconfigured, or the token is missing, users still see metrics.
  */
-import { getSynaluxJwt } from "./synaluxJwt.js";
-import { PRISM_SYNALUX_BASE_URL } from "../config.js";
 import { debugLog } from "./logger.js";
-let sessionStartedAt = new Date().toISOString();
-export function markSessionStart() {
-    sessionStartedAt = new Date().toISOString();
-}
-async function fetchMetrics() {
-    if (!PRISM_SYNALUX_BASE_URL)
-        return { metrics: null, error: "no_portal_url" };
-    const jwt = await getSynaluxJwt();
-    if (!jwt)
-        return { metrics: null, error: "jwt_unavailable" };
-    try {
-        const url = `${PRISM_SYNALUX_BASE_URL}/api/v1/telemetry/inference-metrics?since=${encodeURIComponent(sessionStartedAt)}`;
-        const res = await fetch(url, {
-            headers: { "Authorization": `Bearer ${jwt}` },
-            signal: AbortSignal.timeout(5_000),
-        });
-        if (!res.ok) {
-            debugLog(`[inference-metrics] portal returned ${res.status}`);
-            return { metrics: null, error: `portal_${res.status}` };
-        }
-        return { metrics: (await res.json()) };
+const byModel = {};
+let localCalls = 0;
+let cloudCalls = 0;
+let totalPromptTokens = 0;
+let totalCompletionTokens = 0;
+let totalLatencyMs = 0;
+export function recordInference(result) {
+    if (result.backend === "safety_gate")
+        return;
+    const key = result.model_picked ?? result.backend;
+    if (result.used_cloud) {
+        cloudCalls++;
+    }
+    else {
+        localCalls++;
     }
-    catch (err) {
-        const msg = err instanceof Error ? err.message : String(err);
-        debugLog(`[inference-metrics] fetch failed: ${msg}`);
-        return { metrics: null, error: msg };
+    const pt = result.prompt_tokens ?? 0;
+    const ct = result.completion_tokens ?? 0;
+    totalPromptTokens += pt;
+    totalCompletionTokens += ct;
+    totalLatencyMs += result.latency_ms;
+    if (!byModel[key]) {
+        byModel[key] = { calls: 0, promptTokens: 0, completionTokens: 0, totalLatencyMs: 0 };
     }
+    byModel[key].calls++;
+    byModel[key].promptTokens += pt;
+    byModel[key].completionTokens += ct;
+    byModel[key].totalLatencyMs += result.latency_ms;
 }
-export async function fetchPortalInferenceMetrics() {
-    const { metrics, error } = await fetchMetrics();
-    if (!metrics) {
-        if (error)
-            debugLog(`[inference-metrics] unavailable: ${error}`);
-        return "";
+export function getInferenceSnapshot() {
+    const total = localCalls + cloudCalls;
+    const modelCopy = {};
+    for (const [k, v] of Object.entries(byModel)) {
+        modelCopy[k] = { ...v };
+    }
+    return {
+        localCalls,
+        cloudCalls,
+        totalCalls: total,
+        localPct: total > 0 ? Math.round((localCalls / total) * 100) : 0,
+        cloudPct: total > 0 ? 100 - Math.round((localCalls / total) * 100) : 0,
+        totalPromptTokens,
+        totalCompletionTokens,
+        totalTokens: totalPromptTokens + totalCompletionTokens,
+        avgLatencyMs: total > 0 ? Math.round(totalLatencyMs / total) : 0,
+        byModel: modelCopy,
+    };
+}
+export function resetInferenceMetrics() {
+    localCalls = 0;
+    cloudCalls = 0;
+    totalPromptTokens = 0;
+    totalCompletionTokens = 0;
+    totalLatencyMs = 0;
+    for (const key of Object.keys(byModel)) {
+        delete byModel[key];
     }
-    if (metrics.total_calls === 0)
+    debugLog("[inference-metrics] Session metrics reset");
+}
+export function formatInferenceMetrics() {
+    const snap = getInferenceSnapshot();
+    if (snap.totalCalls === 0)
         return "";
     const lines = [
         `\n📊 Inference Metrics (this session):`,
-        `  Total calls: ${metrics.total_calls} — Local: ${metrics.local_calls} (${metrics.local_pct}%) | Cloud: ${metrics.cloud_calls} (${metrics.cloud_pct}%)`,
-        `  Tokens: ${metrics.total_prompt_tokens.toLocaleString()} in + ${metrics.total_completion_tokens.toLocaleString()} out = ${metrics.total_tokens.toLocaleString()} total`,
-        `  Avg latency: ${metrics.avg_latency_ms}ms`,
+        `  Total calls: ${snap.totalCalls} — Local: ${snap.localCalls} (${snap.localPct}%) | Cloud: ${snap.cloudCalls} (${snap.cloudPct}%)`,
+        `  Tokens: ${snap.totalPromptTokens.toLocaleString()} in + ${snap.totalCompletionTokens.toLocaleString()} out = ${snap.totalTokens.toLocaleString()} total`,
+        `  Avg latency: ${snap.avgLatencyMs}ms`,
     ];
-    const models = Object.entries(metrics.by_model).sort((a, b) => b[1].calls - a[1].calls);
+    const models = Object.entries(snap.byModel).sort((a, b) => b[1].calls - a[1].calls);
     if (models.length > 1) {
         lines.push(`  By model:`);
         for (const [name, stats] of models) {
-            const tokens = stats.prompt_tokens + stats.completion_tokens;
-            const avgMs = stats.calls > 0 ? Math.round(stats.total_latency_ms / stats.calls) : 0;
+            const tokens = stats.promptTokens + stats.completionTokens;
+            const avgMs = stats.calls > 0 ? Math.round(stats.totalLatencyMs / stats.calls) : 0;
             lines.push(`    ${name}: ${stats.calls} calls, ${tokens.toLocaleString()} tokens, avg ${avgMs}ms`);
         }
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "prism-mcp-server",
-  "version": "19.2.0",
+  "version": "19.2.2",
   "mcpName": "io.github.dcostenco/prism-coder",
   "description": "Prism Coder — Cognitive memory + tool-calling intelligence for AI agents. Mind Palace persistent memory (BFCL Gold Certified, 100% Tool-Call Accuracy, 114 Agent Skills, PHI Guard, Tier Enforcement, Prompt-Based Skill Routing, Zero-Search HDC/HRR retrieval, HRR Semantic Drift Detection across BCBA/Coding/AAC domains, HIPAA-hardened local-first storage, SLERP-optimized GRPO alignment) plus the prism-coder 1.7B–32B open-weights LLM fleet.",
   "module": "index.ts",