prism-mcp-server 19.2.1 → 19.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -93,7 +93,7 @@ Every `prism_infer` call tracks which model handled it (local Ollama vs cloud) a
93
93
  synalux-27b: 2 calls, 1,500 tokens, avg 1,100ms
94
94
  ```
95
95
 
96
- Local calls use actual Ollama token counts; cloud calls use estimates. Metrics are aggregated by the Synalux portal Prism is a thin client that forwards per-call data and fetches the summary on demand.
96
+ Local calls use actual Ollama token counts (`prompt_eval_count` / `eval_count` from Ollama); cloud calls use char/4 estimates. Metrics are tracked locally no portal dependency, no env vars, works offline. Per-call data is also forwarded to the Synalux portal as best-effort analytics (independent of the display).
97
97
 
98
98
  ### Session Drift Detection
99
99
 
@@ -363,7 +363,7 @@ All on-device models are free to run locally via Ollama on every tier. A subscri
363
363
  | Cloud Coder (Web IDE) | -- | 100/day | 1,000/day | 100,000/day |
364
364
  | Cloud search | -- | 50/day | 500/day | 100,000/day |
365
365
  | Max output tokens | 512 | 1,024 | 2,048 | 4,096 |
366
- | Cloud fallback | -- | Claude Sonnet 4 | Claude Sonnet 4 | Priority + Sonnet 4 |
366
+ | Cloud fallback | -- | Claude Opus 4.7 | Claude Opus 4.7 | Priority + Opus 4.7 |
367
367
  | Grounding verifier (fact-check AI output) | -- | ✅ | ✅ | ✅ |
368
368
  | Memory sync (cloud) | -- | ✅ | ✅ | ✅ |
369
369
  | Knowledge / session memory | limited | unlimited | unlimited | unlimited |
@@ -389,6 +389,7 @@ Prism exposes 40+ MCP tools. The core memory loop:
389
389
  | `verify_behavior` | Pre-edit scenario challenge — catch bad changes before they happen |
390
390
  | `knowledge_ingest` | Teach Prism a codebase or document |
391
391
  | `prism_infer` | Local-first inference (route/chat/code modes, thinking, cloud escalation) |
392
+ | `inference_metrics` | Session delegation stats on demand (call count, tokens, local/cloud split) |
392
393
 
393
394
  ### `prism_infer` — local-first inference with cloud escalation
394
395
 
@@ -553,6 +554,7 @@ Routing is automatic: `9b → 4b → cloud fallback` on desktop/server, `2b →
553
554
  | `PRISM_SYNALUX_API_KEY` | Paid-tier portal key (`synalux_sk_...`) | -- (local if unset) |
554
555
  | `LOCAL_LLM_URL` | Ollama endpoint | `http://localhost:11434` |
555
556
  | `PRISM_FORCE_LOCAL` | Force local SQLite regardless of credentials | `false` |
557
+ | `TELEMETRY_WRITE_TOKEN` | Portal analytics token (optional — metrics display works without it) | -- |
556
558
 
557
559
  With no variables set, Prism runs fully local. Set `PRISM_SYNALUX_API_KEY` (and leave `PRISM_STORAGE=auto`) to use the cloud backend.
558
560
 
@@ -561,11 +563,11 @@ With no variables set, Prism runs fully local. Set `PRISM_SYNALUX_API_KEY` (and
561
563
  ## Testing
562
564
 
563
565
  ```bash
564
- npm test # full suite (vitest)
566
+ npm test # full suite (vitest) — 95 files, 2841 tests
565
567
  npm test -- --coverage # coverage report
566
568
  ```
567
569
 
568
- Coverage spans HRR retrieval, knowledge ingestion, the inference cascade and grounding verifier, compaction, the model picker, and storage round-trips.
570
+ Coverage spans HRR retrieval, knowledge ingestion, the inference cascade and grounding verifier, inference metrics, telemetry allowlist, delegation gate, compaction, the model picker, and storage round-trips.
569
571
 
570
572
  ---
571
573
 
package/dist/server.js CHANGED
@@ -79,6 +79,7 @@ import { sanitizeMcpOutput } from "./utils/sanitizer.js";
79
79
  import { getTracer, initTelemetry } from "./utils/telemetry.js";
80
80
  import { context as otelContext, trace, SpanStatusCode } from "@opentelemetry/api";
81
81
  import { ddInfo, ddError as ddLogError } from "./utils/ddLogger.js";
82
+ import { inferenceMetricsHandler } from "./utils/inferenceMetrics.js";
82
83
  // ─── Import Tool Definitions (schemas) and Handlers (implementations) ─────
83
84
  import { WEB_SEARCH_TOOL, BRAVE_WEB_SEARCH_CODE_MODE_TOOL, LOCAL_SEARCH_TOOL, BRAVE_LOCAL_SEARCH_CODE_MODE_TOOL, CODE_MODE_TRANSFORM_TOOL, BRAVE_ANSWERS_TOOL, RESEARCH_PAPER_ANALYSIS_TOOL, webSearchHandler, braveWebSearchCodeModeHandler, localSearchHandler, braveLocalSearchCodeModeHandler, codeModeTransformHandler, braveAnswersHandler, researchPaperAnalysisHandler, } from "./tools/index.js";
84
85
  // Session memory tools — only used if Supabase is configured
@@ -112,7 +113,9 @@ VERIFY_BEHAVIOR_TOOL, isVerifyBehaviorArgs,
112
113
  // v12: Developer Onboarding & Enterprise Observability
113
114
  ONBOARDING_WIZARD_TOOL, EXTRACT_ENTITIES_TOOL, API_ANALYTICS_TOOL, BACKUP_DATABASE_TOOL, CONFIGURE_NOTIFICATIONS_TOOL, QUERY_MEMORY_NATURAL_TOOL,
114
115
  // v15.5: Knowledge Ingestion
115
- KNOWLEDGE_INGEST_TOOL, sessionSaveLedgerHandler, sessionSaveHandoffHandler, sessionLoadContextHandler, knowledgeSearchHandler, knowledgeForgetHandler,
116
+ KNOWLEDGE_INGEST_TOOL,
117
+ // v19.2: Inference Metrics
118
+ INFERENCE_METRICS_TOOL, sessionSaveLedgerHandler, sessionSaveHandoffHandler, sessionLoadContextHandler, knowledgeSearchHandler, knowledgeForgetHandler,
116
119
  // ─── v0.4.0: New tool handlers ───
117
120
  compactLedgerHandler, sessionSearchMemoryHandler, backfillEmbeddingsHandler, sessionBackfillLinksHandler, sessionSynthesizeEdgesHandler, sessionCognitiveRouteHandler,
118
121
  // ─── v2.0: Time Travel handlers ───
@@ -246,6 +249,8 @@ function buildSessionMemoryTools(autoloadList) {
246
249
  QUERY_MEMORY_NATURAL_TOOL, // query_memory_natural — NL → structured memory search
247
250
  // ─── v15.5: Knowledge Ingestion ───
248
251
  KNOWLEDGE_INGEST_TOOL, // knowledge_ingest — chunk code, gen Q&A, store in graph
252
+ // ─── v19.2: Inference Metrics ───
253
+ INFERENCE_METRICS_TOOL, // inference_metrics — read-only session delegation stats
249
254
  ];
250
255
  }
251
256
  // ─── v0.4.0: Resource Subscription Tracking ──────────────────────
@@ -960,6 +965,9 @@ export function createServer() {
960
965
  throw new Error("Session memory not configured.");
961
966
  result = await knowledgeIngestHandler(args);
962
967
  break;
968
+ case "inference_metrics":
969
+ result = await inferenceMetricsHandler();
970
+ break;
963
971
  default:
964
972
  result = {
965
973
  content: [{ type: "text", text: `Unknown tool: ${name}` }],
@@ -63,6 +63,7 @@ export { verifyBehaviorHandler } from "./behavioralVerifierHandler.js";
63
63
  // Chunks source code, generates Q&A via Claude Haiku, stores in knowledge graph.
64
64
  // Three entry points: MCP tool, REST API, GitHub webhook.
65
65
  export { KNOWLEDGE_INGEST_TOOL } from "./ingestDefinitions.js";
66
+ export { INFERENCE_METRICS_TOOL } from "./sessionMemoryDefinitions.js";
66
67
  export { knowledgeIngestHandler, handleGitHubWebhook, ingestKnowledge, isIngestArgs } from "./ingestHandler.js";
67
68
  // ── v15.4: prism_infer — local-first inference (RAM-gated cascade) ──
68
69
  // Always available. Saves caller's cloud tokens by routing to local
@@ -1790,3 +1790,15 @@ export function isVerifyBehaviorArgs(a) {
1790
1790
  return false;
1791
1791
  return true;
1792
1792
  }
1793
+ // ─── v19.2: Inference Metrics Tool ──────────────────────────
1794
+ export const INFERENCE_METRICS_TOOL = {
1795
+ name: "inference_metrics",
1796
+ description: "Returns the current session's local-model inference metrics — call count, " +
1797
+ "local vs cloud split, token totals, per-model breakdown, and average latency. " +
1798
+ "Read-only, no arguments. Reflects prism_infer delegation usage only, not the " +
1799
+ "host model's (Claude's) own token spend (use /cost for that).",
1800
+ inputSchema: {
1801
+ type: "object",
1802
+ properties: {},
1803
+ },
1804
+ };
@@ -18,6 +18,7 @@
18
18
  */
19
19
  import { isSessionTaskRouteArgs, } from "./sessionMemoryDefinitions.js";
20
20
  import { getStorage } from "../storage/index.js";
21
+ import { getSetting } from "../storage/configStorage.js";
21
22
  import { getExperienceBias } from "./routerExperience.js";
22
23
  import { toKeywordArray } from "../utils/keywordExtractor.js";
23
24
  import { callLocalLlm } from "../utils/localLlm.js";
@@ -281,6 +282,25 @@ export async function sessionTaskRouteHandler(args) {
281
282
  isError: true,
282
283
  };
283
284
  }
285
+ // Delegation opt-in gate: if delegation_enabled is not "true", always route to host.
286
+ // This enforces the prism-infer-delegation skill's "off by default" rule in code.
287
+ const delegationEnabled = await getSetting("delegation_enabled", "false");
288
+ if (delegationEnabled !== "true") {
289
+ return {
290
+ content: [{
291
+ type: "text",
292
+ text: JSON.stringify({
293
+ target: "host",
294
+ confidence: 1.0,
295
+ complexity_score: 5,
296
+ rationale: "Delegation is off (default). Enable with: configure_notifications({setting: 'delegation_enabled', value: 'true'}) or via the Prism dashboard.",
297
+ recommended_tool: null,
298
+ delegation_enabled: false,
299
+ }),
300
+ }],
301
+ isError: false,
302
+ };
303
+ }
284
304
  const result = computeRoute(args);
285
305
  // v7.2.0: Experience-based bias adjustment
286
306
  if (args.project) {
@@ -70,12 +70,21 @@ export function resetInferenceMetrics() {
70
70
  }
71
71
  debugLog("[inference-metrics] Session metrics reset");
72
72
  }
73
+ export async function inferenceMetricsHandler() {
74
+ const block = formatInferenceMetrics();
75
+ return {
76
+ content: [{
77
+ type: "text",
78
+ text: block || "No prism_infer calls this session. Metrics track local-model delegation only — not the host model's (Claude's) token spend.",
79
+ }],
80
+ };
81
+ }
73
82
  export function formatInferenceMetrics() {
74
83
  const snap = getInferenceSnapshot();
75
84
  if (snap.totalCalls === 0)
76
85
  return "";
77
86
  const lines = [
78
- `\n📊 Inference Metrics (this session):`,
87
+ `\n📊 Inference Metrics — local-model delegation (this session):`,
79
88
  ` Total calls: ${snap.totalCalls} — Local: ${snap.localCalls} (${snap.localPct}%) | Cloud: ${snap.cloudCalls} (${snap.cloudPct}%)`,
80
89
  ` Tokens: ${snap.totalPromptTokens.toLocaleString()} in + ${snap.totalCompletionTokens.toLocaleString()} out = ${snap.totalTokens.toLocaleString()} total`,
81
90
  ` Avg latency: ${snap.avgLatencyMs}ms`,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "prism-mcp-server",
3
- "version": "19.2.1",
3
+ "version": "19.2.4",
4
4
  "mcpName": "io.github.dcostenco/prism-coder",
5
5
  "description": "Prism Coder — Cognitive memory + tool-calling intelligence for AI agents. Mind Palace persistent memory (BFCL Gold Certified, 100% Tool-Call Accuracy, 114 Agent Skills, PHI Guard, Tier Enforcement, Prompt-Based Skill Routing, Zero-Search HDC/HRR retrieval, HRR Semantic Drift Detection across BCBA/Coding/AAC domains, HIPAA-hardened local-first storage, SLERP-optimized GRPO alignment) plus the prism-coder 1.7B–32B open-weights LLM fleet.",
6
6
  "module": "index.ts",