prism-mcp-server 19.2.0 → 19.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -89,7 +89,7 @@ const MEMORY_BOUNDARY_SUFFIX = '\n</prism_memory>';
89
89
  * After saving, generates an embedding vector for the entry via fire-and-forget.
90
90
  */
91
91
  import { computeEffectiveImportance, recordMemoryAccess } from "../utils/cognitiveMemory.js";
92
- import { fetchPortalInferenceMetrics, markSessionStart } from "../utils/inferenceMetrics.js";
92
+ import { formatInferenceMetrics, resetInferenceMetrics } from "../utils/inferenceMetrics.js";
93
93
  export async function sessionSaveLedgerHandler(args) {
94
94
  if (!isSessionSaveLedgerArgs(args)) {
95
95
  throw new Error("Invalid arguments for session_save_ledger");
@@ -230,8 +230,7 @@ export async function sessionSaveLedgerHandler(args) {
230
230
  storage.decayImportance(project, PRISM_USER_ID, 30).catch((err) => {
231
231
  debugLog(`[session_save_ledger] Background decay failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
232
232
  });
233
- // Fetch inference metrics from portal (thin-client: portal is authority)
234
- const metricsBlock = await fetchPortalInferenceMetrics();
233
+ const metricsBlock = formatInferenceMetrics();
235
234
  return {
236
235
  content: [{
237
236
  type: "text",
@@ -552,7 +551,7 @@ export async function sessionSaveHandoffHandler(args, server) {
552
551
  // Dynamic import itself failed — module not found or similar
553
552
  console.error("[FactMerger] Module load failed (non-fatal): " + err));
554
553
  }
555
- const metricsBlock = await fetchPortalInferenceMetrics();
554
+ const metricsBlock = formatInferenceMetrics();
556
555
  // Build response text based on whether a CRDT merge occurred
557
556
  const responseText = isMerged
558
557
  ? `🔄 Auto-merged conflict for "${project}" (v${expected_version} → v${newVersion})\n` +
@@ -582,8 +581,7 @@ export async function sessionLoadContextHandler(args) {
582
581
  if (!isSessionLoadContextArgs(args)) {
583
582
  throw new Error("Invalid arguments for session_load_context");
584
583
  }
585
- // Mark session boundary — portal metrics fetched with since=this timestamp
586
- markSessionStart();
584
+ resetInferenceMetrics();
587
585
  const { project, level = "standard", role } = args;
588
586
  const maxTokens = args.max_tokens
589
587
  || parseInt(await getSetting("max_tokens", "0"), 10) || undefined; // v4.0: arg > dashboard setting > none
@@ -29,6 +29,7 @@ import { ddLog } from "../utils/ddLogger.js";
29
29
  import { stripThink } from "../utils/thinkStrip.js";
30
30
  import { passesQualityGate } from "../utils/qualityGate.js";
31
31
  import { checkInputSafety, checkOutputSafety } from "../utils/safetyGate.js";
32
+ import { recordInference } from "../utils/inferenceMetrics.js";
32
33
  // ─── Tool Definition ────────────────────────────────────────────
33
34
  export const PRISM_INFER_TOOL = {
34
35
  name: "prism_infer",
@@ -525,7 +526,9 @@ export async function prismInferHandler(args) {
525
526
  ollamaUrl: PRISM_LOCAL_LLM_URL,
526
527
  });
527
528
  debugLog(`[prism_infer] backend=${result.backend} model=${result.model_picked} latency=${result.latency_ms}ms free=${result.ram_free_mb}MB`);
528
- // Forward per-call metrics to portal (thin-client pattern).
529
+ // Local accumulator — sole source of the user-facing metrics block.
530
+ recordInference(result);
531
+ // Best-effort portal forwarding (independent analytics stream).
529
532
  // safety_gate excluded — logging crisis filter triggers is a HIPAA concern.
530
533
  if (result.backend !== "safety_gate") {
531
534
  ddLog("info", "prism_infer.usage", {
@@ -18,6 +18,7 @@
18
18
  */
19
19
  import { isSessionTaskRouteArgs, } from "./sessionMemoryDefinitions.js";
20
20
  import { getStorage } from "../storage/index.js";
21
+ import { getSetting } from "../storage/configStorage.js";
21
22
  import { getExperienceBias } from "./routerExperience.js";
22
23
  import { toKeywordArray } from "../utils/keywordExtractor.js";
23
24
  import { callLocalLlm } from "../utils/localLlm.js";
@@ -281,6 +282,25 @@ export async function sessionTaskRouteHandler(args) {
281
282
  isError: true,
282
283
  };
283
284
  }
285
+ // Delegation opt-in gate: if delegation_enabled is not "true", always route to host.
286
+ // This enforces the prism-infer-delegation skill's "off by default" rule in code.
287
+ const delegationEnabled = await getSetting("delegation_enabled", "false");
288
+ if (delegationEnabled !== "true") {
289
+ return {
290
+ content: [{
291
+ type: "text",
292
+ text: JSON.stringify({
293
+ target: "host",
294
+ confidence: 1.0,
295
+ complexity_score: 5,
296
+ rationale: "Delegation is off (default). Enable with: configure_notifications({setting: 'delegation_enabled', value: 'true'}) or via the Prism dashboard.",
297
+ recommended_tool: null,
298
+ delegation_enabled: false,
299
+ }),
300
+ }],
301
+ isError: false,
302
+ };
303
+ }
284
304
  const result = computeRoute(args);
285
305
  // v7.2.0: Experience-based bias adjustment
286
306
  if (args.project) {
@@ -128,3 +128,6 @@ export function ddInfo(message, context) {
128
128
  export function ddWarn(message, context) {
129
129
  ddLog("warn", message, context);
130
130
  }
131
+ if (!TELEMETRY_WRITE_TOKEN && process.env.PRISM_DEBUG_LOGGING) {
132
+ console.info("[prism-mcp] Portal telemetry not configured (no TELEMETRY_WRITE_TOKEN). Session metrics work locally — this is normal for offline/free-tier use.");
133
+ }
@@ -1,62 +1,91 @@
1
1
  /**
2
- * Inference metrics — thin-client fetch from Synalux portal.
2
+ * Inference metrics — local accumulator for user-facing display.
3
3
  *
4
- * Prism forwards per-call metrics via ddLog("prism_infer.usage").
5
- * The portal aggregates them in app_telemetry. This module fetches
6
- * the aggregated summary on demand (session_save_ledger/handoff).
4
+ * The local accumulator is the SOLE source for the session metrics block
5
+ * shown in session_save_ledger/handoff. It tracks what THIS prism process
6
+ * did THIS session — prism is the natural and only complete source for
7
+ * this data (the portal only sees what prism forwards).
8
+ *
9
+ * Portal forwarding (ddLog → /api/v1/telemetry) is a separate, best-effort
10
+ * analytics stream that the display path never depends on. If the portal
11
+ * is down, unconfigured, or the token is missing, users still see metrics.
7
12
  */
8
- import { getSynaluxJwt } from "./synaluxJwt.js";
9
- import { PRISM_SYNALUX_BASE_URL } from "../config.js";
10
13
  import { debugLog } from "./logger.js";
11
- let sessionStartedAt = new Date().toISOString();
12
- export function markSessionStart() {
13
- sessionStartedAt = new Date().toISOString();
14
- }
15
- async function fetchMetrics() {
16
- if (!PRISM_SYNALUX_BASE_URL)
17
- return { metrics: null, error: "no_portal_url" };
18
- const jwt = await getSynaluxJwt();
19
- if (!jwt)
20
- return { metrics: null, error: "jwt_unavailable" };
21
- try {
22
- const url = `${PRISM_SYNALUX_BASE_URL}/api/v1/telemetry/inference-metrics?since=${encodeURIComponent(sessionStartedAt)}`;
23
- const res = await fetch(url, {
24
- headers: { "Authorization": `Bearer ${jwt}` },
25
- signal: AbortSignal.timeout(5_000),
26
- });
27
- if (!res.ok) {
28
- debugLog(`[inference-metrics] portal returned ${res.status}`);
29
- return { metrics: null, error: `portal_${res.status}` };
30
- }
31
- return { metrics: (await res.json()) };
14
+ const byModel = {};
15
+ let localCalls = 0;
16
+ let cloudCalls = 0;
17
+ let totalPromptTokens = 0;
18
+ let totalCompletionTokens = 0;
19
+ let totalLatencyMs = 0;
20
+ export function recordInference(result) {
21
+ if (result.backend === "safety_gate")
22
+ return;
23
+ const key = result.model_picked ?? result.backend;
24
+ if (result.used_cloud) {
25
+ cloudCalls++;
26
+ }
27
+ else {
28
+ localCalls++;
32
29
  }
33
- catch (err) {
34
- const msg = err instanceof Error ? err.message : String(err);
35
- debugLog(`[inference-metrics] fetch failed: ${msg}`);
36
- return { metrics: null, error: msg };
30
+ const pt = result.prompt_tokens ?? 0;
31
+ const ct = result.completion_tokens ?? 0;
32
+ totalPromptTokens += pt;
33
+ totalCompletionTokens += ct;
34
+ totalLatencyMs += result.latency_ms;
35
+ if (!byModel[key]) {
36
+ byModel[key] = { calls: 0, promptTokens: 0, completionTokens: 0, totalLatencyMs: 0 };
37
37
  }
38
+ byModel[key].calls++;
39
+ byModel[key].promptTokens += pt;
40
+ byModel[key].completionTokens += ct;
41
+ byModel[key].totalLatencyMs += result.latency_ms;
38
42
  }
39
- export async function fetchPortalInferenceMetrics() {
40
- const { metrics, error } = await fetchMetrics();
41
- if (!metrics) {
42
- if (error)
43
- debugLog(`[inference-metrics] unavailable: ${error}`);
44
- return "";
43
+ export function getInferenceSnapshot() {
44
+ const total = localCalls + cloudCalls;
45
+ const modelCopy = {};
46
+ for (const [k, v] of Object.entries(byModel)) {
47
+ modelCopy[k] = { ...v };
48
+ }
49
+ return {
50
+ localCalls,
51
+ cloudCalls,
52
+ totalCalls: total,
53
+ localPct: total > 0 ? Math.round((localCalls / total) * 100) : 0,
54
+ cloudPct: total > 0 ? 100 - Math.round((localCalls / total) * 100) : 0,
55
+ totalPromptTokens,
56
+ totalCompletionTokens,
57
+ totalTokens: totalPromptTokens + totalCompletionTokens,
58
+ avgLatencyMs: total > 0 ? Math.round(totalLatencyMs / total) : 0,
59
+ byModel: modelCopy,
60
+ };
61
+ }
62
+ export function resetInferenceMetrics() {
63
+ localCalls = 0;
64
+ cloudCalls = 0;
65
+ totalPromptTokens = 0;
66
+ totalCompletionTokens = 0;
67
+ totalLatencyMs = 0;
68
+ for (const key of Object.keys(byModel)) {
69
+ delete byModel[key];
45
70
  }
46
- if (metrics.total_calls === 0)
71
+ debugLog("[inference-metrics] Session metrics reset");
72
+ }
73
+ export function formatInferenceMetrics() {
74
+ const snap = getInferenceSnapshot();
75
+ if (snap.totalCalls === 0)
47
76
  return "";
48
77
  const lines = [
49
78
  `\n📊 Inference Metrics (this session):`,
50
- ` Total calls: ${metrics.total_calls} — Local: ${metrics.local_calls} (${metrics.local_pct}%) | Cloud: ${metrics.cloud_calls} (${metrics.cloud_pct}%)`,
51
- ` Tokens: ${metrics.total_prompt_tokens.toLocaleString()} in + ${metrics.total_completion_tokens.toLocaleString()} out = ${metrics.total_tokens.toLocaleString()} total`,
52
- ` Avg latency: ${metrics.avg_latency_ms}ms`,
79
+ ` Total calls: ${snap.totalCalls} — Local: ${snap.localCalls} (${snap.localPct}%) | Cloud: ${snap.cloudCalls} (${snap.cloudPct}%)`,
80
+ ` Tokens: ${snap.totalPromptTokens.toLocaleString()} in + ${snap.totalCompletionTokens.toLocaleString()} out = ${snap.totalTokens.toLocaleString()} total`,
81
+ ` Avg latency: ${snap.avgLatencyMs}ms`,
53
82
  ];
54
- const models = Object.entries(metrics.by_model).sort((a, b) => b[1].calls - a[1].calls);
83
+ const models = Object.entries(snap.byModel).sort((a, b) => b[1].calls - a[1].calls);
55
84
  if (models.length > 1) {
56
85
  lines.push(` By model:`);
57
86
  for (const [name, stats] of models) {
58
- const tokens = stats.prompt_tokens + stats.completion_tokens;
59
- const avgMs = stats.calls > 0 ? Math.round(stats.total_latency_ms / stats.calls) : 0;
87
+ const tokens = stats.promptTokens + stats.completionTokens;
88
+ const avgMs = stats.calls > 0 ? Math.round(stats.totalLatencyMs / stats.calls) : 0;
60
89
  lines.push(` ${name}: ${stats.calls} calls, ${tokens.toLocaleString()} tokens, avg ${avgMs}ms`);
61
90
  }
62
91
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "prism-mcp-server",
3
- "version": "19.2.0",
3
+ "version": "19.2.2",
4
4
  "mcpName": "io.github.dcostenco/prism-coder",
5
5
  "description": "Prism Coder — Cognitive memory + tool-calling intelligence for AI agents. Mind Palace persistent memory (BFCL Gold Certified, 100% Tool-Call Accuracy, 114 Agent Skills, PHI Guard, Tier Enforcement, Prompt-Based Skill Routing, Zero-Search HDC/HRR retrieval, HRR Semantic Drift Detection across BCBA/Coding/AAC domains, HIPAA-hardened local-first storage, SLERP-optimized GRPO alignment) plus the prism-coder 1.7B–32B open-weights LLM fleet.",
6
6
  "module": "index.ts",