npm - prism-mcp-server - Versions diffs - 19.2.1 → 19.2.4 - Mend

prism-mcp-server 19.2.1 → 19.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +6 -4
package/dist/server.js +9 -1
package/dist/tools/index.js +1 -0
package/dist/tools/sessionMemoryDefinitions.js +12 -0
package/dist/tools/taskRouterHandler.js +20 -0
package/dist/utils/inferenceMetrics.js +10 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -93,7 +93,7 @@ Every `prism_infer` call tracks which model handled it (local Ollama vs cloud) a
     synalux-27b: 2 calls, 1,500 tokens, avg 1,100ms
 ```
-Local calls use actual Ollama token counts; cloud calls use estimates. Metrics are aggregated by the Synalux portal — Prism is a thin client that forwards per-call data and fetches the summary on demand.
+Local calls use actual Ollama token counts (`prompt_eval_count` / `eval_count` from Ollama); cloud calls use char/4 estimates. Metrics are tracked locally — no portal dependency, no env vars, works offline. Per-call data is also forwarded to the Synalux portal as best-effort analytics (independent of the display).
 ### Session Drift Detection
@@ -363,7 +363,7 @@ All on-device models are free to run locally via Ollama on every tier. A subscri
 | Cloud Coder (Web IDE) | -- | 100/day | 1,000/day | 100,000/day |
 | Cloud search | -- | 50/day | 500/day | 100,000/day |
 | Max output tokens | 512 | 1,024 | 2,048 | 4,096 |
-| Cloud fallback | -- | Claude Sonnet 4 | Claude Sonnet 4 | Priority + Sonnet 4 |
+| Cloud fallback | -- | Claude Opus 4.7 | Claude Opus 4.7 | Priority + Opus 4.7 |
 | Grounding verifier (fact-check AI output) | -- | ✅ | ✅ | ✅ |
 | Memory sync (cloud) | -- | ✅ | ✅ | ✅ |
 | Knowledge / session memory | limited | unlimited | unlimited | unlimited |
@@ -389,6 +389,7 @@ Prism exposes 40+ MCP tools. The core memory loop:
 | `verify_behavior` | Pre-edit scenario challenge — catch bad changes before they happen |
 | `knowledge_ingest` | Teach Prism a codebase or document |
 | `prism_infer` | Local-first inference (route/chat/code modes, thinking, cloud escalation) |
+| `inference_metrics` | Session delegation stats on demand (call count, tokens, local/cloud split) |
 ### `prism_infer` — local-first inference with cloud escalation
@@ -553,6 +554,7 @@ Routing is automatic: `9b → 4b → cloud fallback` on desktop/server, `2b →
 | `PRISM_SYNALUX_API_KEY` | Paid-tier portal key (`synalux_sk_...`) | -- (local if unset) |
 | `LOCAL_LLM_URL` | Ollama endpoint | `http://localhost:11434` |
 | `PRISM_FORCE_LOCAL` | Force local SQLite regardless of credentials | `false` |
+| `TELEMETRY_WRITE_TOKEN` | Portal analytics token (optional — metrics display works without it) | -- |
 With no variables set, Prism runs fully local. Set `PRISM_SYNALUX_API_KEY` (and leave `PRISM_STORAGE=auto`) to use the cloud backend.
@@ -561,11 +563,11 @@ With no variables set, Prism runs fully local. Set `PRISM_SYNALUX_API_KEY` (and
 ## Testing
 ```bash
-npm test                 # full suite (vitest)
+npm test                 # full suite (vitest) — 95 files, 2841 tests
 npm test -- --coverage   # coverage report
 ```
-Coverage spans HRR retrieval, knowledge ingestion, the inference cascade and grounding verifier, compaction, the model picker, and storage round-trips.
+Coverage spans HRR retrieval, knowledge ingestion, the inference cascade and grounding verifier, inference metrics, telemetry allowlist, delegation gate, compaction, the model picker, and storage round-trips.
 ---

package/dist/server.js CHANGED Viewed

@@ -79,6 +79,7 @@ import { sanitizeMcpOutput } from "./utils/sanitizer.js";
 import { getTracer, initTelemetry } from "./utils/telemetry.js";
 import { context as otelContext, trace, SpanStatusCode } from "@opentelemetry/api";
 import { ddInfo, ddError as ddLogError } from "./utils/ddLogger.js";
+import { inferenceMetricsHandler } from "./utils/inferenceMetrics.js";
 // ─── Import Tool Definitions (schemas) and Handlers (implementations) ─────
 import { WEB_SEARCH_TOOL, BRAVE_WEB_SEARCH_CODE_MODE_TOOL, LOCAL_SEARCH_TOOL, BRAVE_LOCAL_SEARCH_CODE_MODE_TOOL, CODE_MODE_TRANSFORM_TOOL, BRAVE_ANSWERS_TOOL, RESEARCH_PAPER_ANALYSIS_TOOL, webSearchHandler, braveWebSearchCodeModeHandler, localSearchHandler, braveLocalSearchCodeModeHandler, codeModeTransformHandler, braveAnswersHandler, researchPaperAnalysisHandler, } from "./tools/index.js";
 // Session memory tools — only used if Supabase is configured
@@ -112,7 +113,9 @@ VERIFY_BEHAVIOR_TOOL, isVerifyBehaviorArgs,
 // v12: Developer Onboarding & Enterprise Observability
 ONBOARDING_WIZARD_TOOL, EXTRACT_ENTITIES_TOOL, API_ANALYTICS_TOOL, BACKUP_DATABASE_TOOL, CONFIGURE_NOTIFICATIONS_TOOL, QUERY_MEMORY_NATURAL_TOOL,
 // v15.5: Knowledge Ingestion
-KNOWLEDGE_INGEST_TOOL, sessionSaveLedgerHandler, sessionSaveHandoffHandler, sessionLoadContextHandler, knowledgeSearchHandler, knowledgeForgetHandler,
+KNOWLEDGE_INGEST_TOOL,
+// v19.2: Inference Metrics
+INFERENCE_METRICS_TOOL, sessionSaveLedgerHandler, sessionSaveHandoffHandler, sessionLoadContextHandler, knowledgeSearchHandler, knowledgeForgetHandler,
 // ─── v0.4.0: New tool handlers ───
 compactLedgerHandler, sessionSearchMemoryHandler, backfillEmbeddingsHandler, sessionBackfillLinksHandler, sessionSynthesizeEdgesHandler, sessionCognitiveRouteHandler,
 // ─── v2.0: Time Travel handlers ───
@@ -246,6 +249,8 @@ function buildSessionMemoryTools(autoloadList) {
         QUERY_MEMORY_NATURAL_TOOL, // query_memory_natural — NL → structured memory search
         // ─── v15.5: Knowledge Ingestion ───
         KNOWLEDGE_INGEST_TOOL, // knowledge_ingest — chunk code, gen Q&A, store in graph
+        // ─── v19.2: Inference Metrics ───
+        INFERENCE_METRICS_TOOL, // inference_metrics — read-only session delegation stats
     ];
 }
 // ─── v0.4.0: Resource Subscription Tracking ──────────────────────
@@ -960,6 +965,9 @@ export function createServer() {
                             throw new Error("Session memory not configured.");
                         result = await knowledgeIngestHandler(args);
                         break;
+                    case "inference_metrics":
+                        result = await inferenceMetricsHandler();
+                        break;
                     default:
                         result = {
                             content: [{ type: "text", text: `Unknown tool: ${name}` }],

package/dist/tools/index.js CHANGED Viewed

@@ -63,6 +63,7 @@ export { verifyBehaviorHandler } from "./behavioralVerifierHandler.js";
 // Chunks source code, generates Q&A via Claude Haiku, stores in knowledge graph.
 // Three entry points: MCP tool, REST API, GitHub webhook.
 export { KNOWLEDGE_INGEST_TOOL } from "./ingestDefinitions.js";
+export { INFERENCE_METRICS_TOOL } from "./sessionMemoryDefinitions.js";
 export { knowledgeIngestHandler, handleGitHubWebhook, ingestKnowledge, isIngestArgs } from "./ingestHandler.js";
 // ── v15.4: prism_infer — local-first inference (RAM-gated cascade) ──
 // Always available. Saves caller's cloud tokens by routing to local

package/dist/tools/sessionMemoryDefinitions.js CHANGED Viewed

@@ -1790,3 +1790,15 @@ export function isVerifyBehaviorArgs(a) {
         return false;
     return true;
 }
+// ─── v19.2: Inference Metrics Tool ──────────────────────────
+export const INFERENCE_METRICS_TOOL = {
+    name: "inference_metrics",
+    description: "Returns the current session's local-model inference metrics — call count, " +
+        "local vs cloud split, token totals, per-model breakdown, and average latency. " +
+        "Read-only, no arguments. Reflects prism_infer delegation usage only, not the " +
+        "host model's (Claude's) own token spend (use /cost for that).",
+    inputSchema: {
+        type: "object",
+        properties: {},
+    },
+};

package/dist/tools/taskRouterHandler.js CHANGED Viewed

@@ -18,6 +18,7 @@
  */
 import { isSessionTaskRouteArgs, } from "./sessionMemoryDefinitions.js";
 import { getStorage } from "../storage/index.js";
+import { getSetting } from "../storage/configStorage.js";
 import { getExperienceBias } from "./routerExperience.js";
 import { toKeywordArray } from "../utils/keywordExtractor.js";
 import { callLocalLlm } from "../utils/localLlm.js";
@@ -281,6 +282,25 @@ export async function sessionTaskRouteHandler(args) {
             isError: true,
         };
     }
+    // Delegation opt-in gate: if delegation_enabled is not "true", always route to host.
+    // This enforces the prism-infer-delegation skill's "off by default" rule in code.
+    const delegationEnabled = await getSetting("delegation_enabled", "false");
+    if (delegationEnabled !== "true") {
+        return {
+            content: [{
+                    type: "text",
+                    text: JSON.stringify({
+                        target: "host",
+                        confidence: 1.0,
+                        complexity_score: 5,
+                        rationale: "Delegation is off (default). Enable with: configure_notifications({setting: 'delegation_enabled', value: 'true'}) or via the Prism dashboard.",
+                        recommended_tool: null,
+                        delegation_enabled: false,
+                    }),
+                }],
+            isError: false,
+        };
+    }
     const result = computeRoute(args);
     // v7.2.0: Experience-based bias adjustment
     if (args.project) {

package/dist/utils/inferenceMetrics.js CHANGED Viewed

@@ -70,12 +70,21 @@ export function resetInferenceMetrics() {
     }
     debugLog("[inference-metrics] Session metrics reset");
 }
+export async function inferenceMetricsHandler() {
+    const block = formatInferenceMetrics();
+    return {
+        content: [{
+                type: "text",
+                text: block || "No prism_infer calls this session. Metrics track local-model delegation only — not the host model's (Claude's) token spend.",
+            }],
+    };
+}
 export function formatInferenceMetrics() {
     const snap = getInferenceSnapshot();
     if (snap.totalCalls === 0)
         return "";
     const lines = [
-        `\n📊 Inference Metrics (this session):`,
+        `\n📊 Inference Metrics — local-model delegation (this session):`,
         `  Total calls: ${snap.totalCalls} — Local: ${snap.localCalls} (${snap.localPct}%) | Cloud: ${snap.cloudCalls} (${snap.cloudPct}%)`,
         `  Tokens: ${snap.totalPromptTokens.toLocaleString()} in + ${snap.totalCompletionTokens.toLocaleString()} out = ${snap.totalTokens.toLocaleString()} total`,
         `  Avg latency: ${snap.avgLatencyMs}ms`,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "prism-mcp-server",
-  "version": "19.2.1",
+  "version": "19.2.4",
   "mcpName": "io.github.dcostenco/prism-coder",
   "description": "Prism Coder — Cognitive memory + tool-calling intelligence for AI agents. Mind Palace persistent memory (BFCL Gold Certified, 100% Tool-Call Accuracy, 114 Agent Skills, PHI Guard, Tier Enforcement, Prompt-Based Skill Routing, Zero-Search HDC/HRR retrieval, HRR Semantic Drift Detection across BCBA/Coding/AAC domains, HIPAA-hardened local-first storage, SLERP-optimized GRPO alignment) plus the prism-coder 1.7B–32B open-weights LLM fleet.",
   "module": "index.ts",