prism-mcp-server 19.2.0 → 19.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -89,7 +89,7 @@ const MEMORY_BOUNDARY_SUFFIX = '\n</prism_memory>';
|
|
|
89
89
|
* After saving, generates an embedding vector for the entry via fire-and-forget.
|
|
90
90
|
*/
|
|
91
91
|
import { computeEffectiveImportance, recordMemoryAccess } from "../utils/cognitiveMemory.js";
|
|
92
|
-
import {
|
|
92
|
+
import { formatInferenceMetrics, resetInferenceMetrics } from "../utils/inferenceMetrics.js";
|
|
93
93
|
export async function sessionSaveLedgerHandler(args) {
|
|
94
94
|
if (!isSessionSaveLedgerArgs(args)) {
|
|
95
95
|
throw new Error("Invalid arguments for session_save_ledger");
|
|
@@ -230,8 +230,7 @@ export async function sessionSaveLedgerHandler(args) {
|
|
|
230
230
|
storage.decayImportance(project, PRISM_USER_ID, 30).catch((err) => {
|
|
231
231
|
debugLog(`[session_save_ledger] Background decay failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
232
232
|
});
|
|
233
|
-
|
|
234
|
-
const metricsBlock = await fetchPortalInferenceMetrics();
|
|
233
|
+
const metricsBlock = formatInferenceMetrics();
|
|
235
234
|
return {
|
|
236
235
|
content: [{
|
|
237
236
|
type: "text",
|
|
@@ -552,7 +551,7 @@ export async function sessionSaveHandoffHandler(args, server) {
|
|
|
552
551
|
// Dynamic import itself failed — module not found or similar
|
|
553
552
|
console.error("[FactMerger] Module load failed (non-fatal): " + err));
|
|
554
553
|
}
|
|
555
|
-
const metricsBlock =
|
|
554
|
+
const metricsBlock = formatInferenceMetrics();
|
|
556
555
|
// Build response text based on whether a CRDT merge occurred
|
|
557
556
|
const responseText = isMerged
|
|
558
557
|
? `🔄 Auto-merged conflict for "${project}" (v${expected_version} → v${newVersion})\n` +
|
|
@@ -582,8 +581,7 @@ export async function sessionLoadContextHandler(args) {
|
|
|
582
581
|
if (!isSessionLoadContextArgs(args)) {
|
|
583
582
|
throw new Error("Invalid arguments for session_load_context");
|
|
584
583
|
}
|
|
585
|
-
|
|
586
|
-
markSessionStart();
|
|
584
|
+
resetInferenceMetrics();
|
|
587
585
|
const { project, level = "standard", role } = args;
|
|
588
586
|
const maxTokens = args.max_tokens
|
|
589
587
|
|| parseInt(await getSetting("max_tokens", "0"), 10) || undefined; // v4.0: arg > dashboard setting > none
|
|
@@ -29,6 +29,7 @@ import { ddLog } from "../utils/ddLogger.js";
|
|
|
29
29
|
import { stripThink } from "../utils/thinkStrip.js";
|
|
30
30
|
import { passesQualityGate } from "../utils/qualityGate.js";
|
|
31
31
|
import { checkInputSafety, checkOutputSafety } from "../utils/safetyGate.js";
|
|
32
|
+
import { recordInference } from "../utils/inferenceMetrics.js";
|
|
32
33
|
// ─── Tool Definition ────────────────────────────────────────────
|
|
33
34
|
export const PRISM_INFER_TOOL = {
|
|
34
35
|
name: "prism_infer",
|
|
@@ -525,7 +526,9 @@ export async function prismInferHandler(args) {
|
|
|
525
526
|
ollamaUrl: PRISM_LOCAL_LLM_URL,
|
|
526
527
|
});
|
|
527
528
|
debugLog(`[prism_infer] backend=${result.backend} model=${result.model_picked} latency=${result.latency_ms}ms free=${result.ram_free_mb}MB`);
|
|
528
|
-
//
|
|
529
|
+
// Local accumulator — sole source of the user-facing metrics block.
|
|
530
|
+
recordInference(result);
|
|
531
|
+
// Best-effort portal forwarding (independent analytics stream).
|
|
529
532
|
// safety_gate excluded — logging crisis filter triggers is a HIPAA concern.
|
|
530
533
|
if (result.backend !== "safety_gate") {
|
|
531
534
|
ddLog("info", "prism_infer.usage", {
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
*/
|
|
19
19
|
import { isSessionTaskRouteArgs, } from "./sessionMemoryDefinitions.js";
|
|
20
20
|
import { getStorage } from "../storage/index.js";
|
|
21
|
+
import { getSetting } from "../storage/configStorage.js";
|
|
21
22
|
import { getExperienceBias } from "./routerExperience.js";
|
|
22
23
|
import { toKeywordArray } from "../utils/keywordExtractor.js";
|
|
23
24
|
import { callLocalLlm } from "../utils/localLlm.js";
|
|
@@ -281,6 +282,25 @@ export async function sessionTaskRouteHandler(args) {
|
|
|
281
282
|
isError: true,
|
|
282
283
|
};
|
|
283
284
|
}
|
|
285
|
+
// Delegation opt-in gate: if delegation_enabled is not "true", always route to host.
|
|
286
|
+
// This enforces the prism-infer-delegation skill's "off by default" rule in code.
|
|
287
|
+
const delegationEnabled = await getSetting("delegation_enabled", "false");
|
|
288
|
+
if (delegationEnabled !== "true") {
|
|
289
|
+
return {
|
|
290
|
+
content: [{
|
|
291
|
+
type: "text",
|
|
292
|
+
text: JSON.stringify({
|
|
293
|
+
target: "host",
|
|
294
|
+
confidence: 1.0,
|
|
295
|
+
complexity_score: 5,
|
|
296
|
+
rationale: "Delegation is off (default). Enable with: configure_notifications({setting: 'delegation_enabled', value: 'true'}) or via the Prism dashboard.",
|
|
297
|
+
recommended_tool: null,
|
|
298
|
+
delegation_enabled: false,
|
|
299
|
+
}),
|
|
300
|
+
}],
|
|
301
|
+
isError: false,
|
|
302
|
+
};
|
|
303
|
+
}
|
|
284
304
|
const result = computeRoute(args);
|
|
285
305
|
// v7.2.0: Experience-based bias adjustment
|
|
286
306
|
if (args.project) {
|
package/dist/utils/ddLogger.js
CHANGED
|
@@ -128,3 +128,6 @@ export function ddInfo(message, context) {
|
|
|
128
128
|
export function ddWarn(message, context) {
|
|
129
129
|
ddLog("warn", message, context);
|
|
130
130
|
}
|
|
131
|
+
if (!TELEMETRY_WRITE_TOKEN && process.env.PRISM_DEBUG_LOGGING) {
|
|
132
|
+
console.info("[prism-mcp] Portal telemetry not configured (no TELEMETRY_WRITE_TOKEN). Session metrics work locally — this is normal for offline/free-tier use.");
|
|
133
|
+
}
|
|
@@ -1,62 +1,91 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Inference metrics —
|
|
2
|
+
* Inference metrics — local accumulator for user-facing display.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* the
|
|
4
|
+
* The local accumulator is the SOLE source for the session metrics block
|
|
5
|
+
* shown in session_save_ledger/handoff. It tracks what THIS prism process
|
|
6
|
+
* did THIS session — prism is the natural and only complete source for
|
|
7
|
+
* this data (the portal only sees what prism forwards).
|
|
8
|
+
*
|
|
9
|
+
* Portal forwarding (ddLog → /api/v1/telemetry) is a separate, best-effort
|
|
10
|
+
* analytics stream that the display path never depends on. If the portal
|
|
11
|
+
* is down, unconfigured, or the token is missing, users still see metrics.
|
|
7
12
|
*/
|
|
8
|
-
import { getSynaluxJwt } from "./synaluxJwt.js";
|
|
9
|
-
import { PRISM_SYNALUX_BASE_URL } from "../config.js";
|
|
10
13
|
import { debugLog } from "./logger.js";
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
});
|
|
27
|
-
if (!res.ok) {
|
|
28
|
-
debugLog(`[inference-metrics] portal returned ${res.status}`);
|
|
29
|
-
return { metrics: null, error: `portal_${res.status}` };
|
|
30
|
-
}
|
|
31
|
-
return { metrics: (await res.json()) };
|
|
14
|
+
const byModel = {};
|
|
15
|
+
let localCalls = 0;
|
|
16
|
+
let cloudCalls = 0;
|
|
17
|
+
let totalPromptTokens = 0;
|
|
18
|
+
let totalCompletionTokens = 0;
|
|
19
|
+
let totalLatencyMs = 0;
|
|
20
|
+
export function recordInference(result) {
|
|
21
|
+
if (result.backend === "safety_gate")
|
|
22
|
+
return;
|
|
23
|
+
const key = result.model_picked ?? result.backend;
|
|
24
|
+
if (result.used_cloud) {
|
|
25
|
+
cloudCalls++;
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
localCalls++;
|
|
32
29
|
}
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
30
|
+
const pt = result.prompt_tokens ?? 0;
|
|
31
|
+
const ct = result.completion_tokens ?? 0;
|
|
32
|
+
totalPromptTokens += pt;
|
|
33
|
+
totalCompletionTokens += ct;
|
|
34
|
+
totalLatencyMs += result.latency_ms;
|
|
35
|
+
if (!byModel[key]) {
|
|
36
|
+
byModel[key] = { calls: 0, promptTokens: 0, completionTokens: 0, totalLatencyMs: 0 };
|
|
37
37
|
}
|
|
38
|
+
byModel[key].calls++;
|
|
39
|
+
byModel[key].promptTokens += pt;
|
|
40
|
+
byModel[key].completionTokens += ct;
|
|
41
|
+
byModel[key].totalLatencyMs += result.latency_ms;
|
|
38
42
|
}
|
|
39
|
-
export
|
|
40
|
-
const
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
43
|
+
export function getInferenceSnapshot() {
|
|
44
|
+
const total = localCalls + cloudCalls;
|
|
45
|
+
const modelCopy = {};
|
|
46
|
+
for (const [k, v] of Object.entries(byModel)) {
|
|
47
|
+
modelCopy[k] = { ...v };
|
|
48
|
+
}
|
|
49
|
+
return {
|
|
50
|
+
localCalls,
|
|
51
|
+
cloudCalls,
|
|
52
|
+
totalCalls: total,
|
|
53
|
+
localPct: total > 0 ? Math.round((localCalls / total) * 100) : 0,
|
|
54
|
+
cloudPct: total > 0 ? 100 - Math.round((localCalls / total) * 100) : 0,
|
|
55
|
+
totalPromptTokens,
|
|
56
|
+
totalCompletionTokens,
|
|
57
|
+
totalTokens: totalPromptTokens + totalCompletionTokens,
|
|
58
|
+
avgLatencyMs: total > 0 ? Math.round(totalLatencyMs / total) : 0,
|
|
59
|
+
byModel: modelCopy,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
export function resetInferenceMetrics() {
|
|
63
|
+
localCalls = 0;
|
|
64
|
+
cloudCalls = 0;
|
|
65
|
+
totalPromptTokens = 0;
|
|
66
|
+
totalCompletionTokens = 0;
|
|
67
|
+
totalLatencyMs = 0;
|
|
68
|
+
for (const key of Object.keys(byModel)) {
|
|
69
|
+
delete byModel[key];
|
|
45
70
|
}
|
|
46
|
-
|
|
71
|
+
debugLog("[inference-metrics] Session metrics reset");
|
|
72
|
+
}
|
|
73
|
+
export function formatInferenceMetrics() {
|
|
74
|
+
const snap = getInferenceSnapshot();
|
|
75
|
+
if (snap.totalCalls === 0)
|
|
47
76
|
return "";
|
|
48
77
|
const lines = [
|
|
49
78
|
`\n📊 Inference Metrics (this session):`,
|
|
50
|
-
` Total calls: ${
|
|
51
|
-
` Tokens: ${
|
|
52
|
-
` Avg latency: ${
|
|
79
|
+
` Total calls: ${snap.totalCalls} — Local: ${snap.localCalls} (${snap.localPct}%) | Cloud: ${snap.cloudCalls} (${snap.cloudPct}%)`,
|
|
80
|
+
` Tokens: ${snap.totalPromptTokens.toLocaleString()} in + ${snap.totalCompletionTokens.toLocaleString()} out = ${snap.totalTokens.toLocaleString()} total`,
|
|
81
|
+
` Avg latency: ${snap.avgLatencyMs}ms`,
|
|
53
82
|
];
|
|
54
|
-
const models = Object.entries(
|
|
83
|
+
const models = Object.entries(snap.byModel).sort((a, b) => b[1].calls - a[1].calls);
|
|
55
84
|
if (models.length > 1) {
|
|
56
85
|
lines.push(` By model:`);
|
|
57
86
|
for (const [name, stats] of models) {
|
|
58
|
-
const tokens = stats.
|
|
59
|
-
const avgMs = stats.calls > 0 ? Math.round(stats.
|
|
87
|
+
const tokens = stats.promptTokens + stats.completionTokens;
|
|
88
|
+
const avgMs = stats.calls > 0 ? Math.round(stats.totalLatencyMs / stats.calls) : 0;
|
|
60
89
|
lines.push(` ${name}: ${stats.calls} calls, ${tokens.toLocaleString()} tokens, avg ${avgMs}ms`);
|
|
61
90
|
}
|
|
62
91
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "prism-mcp-server",
|
|
3
|
-
"version": "19.2.
|
|
3
|
+
"version": "19.2.2",
|
|
4
4
|
"mcpName": "io.github.dcostenco/prism-coder",
|
|
5
5
|
"description": "Prism Coder — Cognitive memory + tool-calling intelligence for AI agents. Mind Palace persistent memory (BFCL Gold Certified, 100% Tool-Call Accuracy, 114 Agent Skills, PHI Guard, Tier Enforcement, Prompt-Based Skill Routing, Zero-Search HDC/HRR retrieval, HRR Semantic Drift Detection across BCBA/Coding/AAC domains, HIPAA-hardened local-first storage, SLERP-optimized GRPO alignment) plus the prism-coder 1.7B–32B open-weights LLM fleet.",
|
|
6
6
|
"module": "index.ts",
|