prism-mcp-server 19.2.2 → 19.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/dist/server.js +9 -1
- package/dist/tools/index.js +1 -0
- package/dist/tools/sessionMemoryDefinitions.js +12 -0
- package/dist/utils/inferenceMetrics.js +10 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -93,7 +93,7 @@ Every `prism_infer` call tracks which model handled it (local Ollama vs cloud) a
|
|
|
93
93
|
synalux-27b: 2 calls, 1,500 tokens, avg 1,100ms
|
|
94
94
|
```
|
|
95
95
|
|
|
96
|
-
Local calls use actual Ollama token counts; cloud calls use estimates. Metrics are
|
|
96
|
+
Local calls use actual Ollama token counts (`prompt_eval_count` / `eval_count` from Ollama); cloud calls use char/4 estimates. Metrics are tracked locally — no portal dependency, no env vars, works offline. Per-call data is also forwarded to the Synalux portal as best-effort analytics (independent of the display).
|
|
97
97
|
|
|
98
98
|
### Session Drift Detection
|
|
99
99
|
|
|
@@ -363,7 +363,7 @@ All on-device models are free to run locally via Ollama on every tier. A subscri
|
|
|
363
363
|
| Cloud Coder (Web IDE) | -- | 100/day | 1,000/day | 100,000/day |
|
|
364
364
|
| Cloud search | -- | 50/day | 500/day | 100,000/day |
|
|
365
365
|
| Max output tokens | 512 | 1,024 | 2,048 | 4,096 |
|
|
366
|
-
| Cloud fallback | -- | Claude
|
|
366
|
+
| Cloud fallback | -- | Claude Opus 4.7 | Claude Opus 4.7 | Priority + Opus 4.7 |
|
|
367
367
|
| Grounding verifier (fact-check AI output) | -- | ✅ | ✅ | ✅ |
|
|
368
368
|
| Memory sync (cloud) | -- | ✅ | ✅ | ✅ |
|
|
369
369
|
| Knowledge / session memory | limited | unlimited | unlimited | unlimited |
|
|
@@ -389,6 +389,7 @@ Prism exposes 40+ MCP tools. The core memory loop:
|
|
|
389
389
|
| `verify_behavior` | Pre-edit scenario challenge — catch bad changes before they happen |
|
|
390
390
|
| `knowledge_ingest` | Teach Prism a codebase or document |
|
|
391
391
|
| `prism_infer` | Local-first inference (route/chat/code modes, thinking, cloud escalation) |
|
|
392
|
+
| `inference_metrics` | Session delegation stats on demand (call count, tokens, local/cloud split) |
|
|
392
393
|
|
|
393
394
|
### `prism_infer` — local-first inference with cloud escalation
|
|
394
395
|
|
|
@@ -553,6 +554,7 @@ Routing is automatic: `9b → 4b → cloud fallback` on desktop/server, `2b →
|
|
|
553
554
|
| `PRISM_SYNALUX_API_KEY` | Paid-tier portal key (`synalux_sk_...`) | -- (local if unset) |
|
|
554
555
|
| `LOCAL_LLM_URL` | Ollama endpoint | `http://localhost:11434` |
|
|
555
556
|
| `PRISM_FORCE_LOCAL` | Force local SQLite regardless of credentials | `false` |
|
|
557
|
+
| `TELEMETRY_WRITE_TOKEN` | Portal analytics token (optional — metrics display works without it) | -- |
|
|
556
558
|
|
|
557
559
|
With no variables set, Prism runs fully local. Set `PRISM_SYNALUX_API_KEY` (and leave `PRISM_STORAGE=auto`) to use the cloud backend.
|
|
558
560
|
|
|
@@ -561,11 +563,11 @@ With no variables set, Prism runs fully local. Set `PRISM_SYNALUX_API_KEY` (and
|
|
|
561
563
|
## Testing
|
|
562
564
|
|
|
563
565
|
```bash
|
|
564
|
-
npm test # full suite (vitest)
|
|
566
|
+
npm test # full suite (vitest) — 95 files, 2841 tests
|
|
565
567
|
npm test -- --coverage # coverage report
|
|
566
568
|
```
|
|
567
569
|
|
|
568
|
-
Coverage spans HRR retrieval, knowledge ingestion, the inference cascade and grounding verifier, compaction, the model picker, and storage round-trips.
|
|
570
|
+
Coverage spans HRR retrieval, knowledge ingestion, the inference cascade and grounding verifier, inference metrics, telemetry allowlist, delegation gate, compaction, the model picker, and storage round-trips.
|
|
569
571
|
|
|
570
572
|
---
|
|
571
573
|
|
package/dist/server.js
CHANGED
|
@@ -79,6 +79,7 @@ import { sanitizeMcpOutput } from "./utils/sanitizer.js";
|
|
|
79
79
|
import { getTracer, initTelemetry } from "./utils/telemetry.js";
|
|
80
80
|
import { context as otelContext, trace, SpanStatusCode } from "@opentelemetry/api";
|
|
81
81
|
import { ddInfo, ddError as ddLogError } from "./utils/ddLogger.js";
|
|
82
|
+
import { inferenceMetricsHandler } from "./utils/inferenceMetrics.js";
|
|
82
83
|
// ─── Import Tool Definitions (schemas) and Handlers (implementations) ─────
|
|
83
84
|
import { WEB_SEARCH_TOOL, BRAVE_WEB_SEARCH_CODE_MODE_TOOL, LOCAL_SEARCH_TOOL, BRAVE_LOCAL_SEARCH_CODE_MODE_TOOL, CODE_MODE_TRANSFORM_TOOL, BRAVE_ANSWERS_TOOL, RESEARCH_PAPER_ANALYSIS_TOOL, webSearchHandler, braveWebSearchCodeModeHandler, localSearchHandler, braveLocalSearchCodeModeHandler, codeModeTransformHandler, braveAnswersHandler, researchPaperAnalysisHandler, } from "./tools/index.js";
|
|
84
85
|
// Session memory tools — only used if Supabase is configured
|
|
@@ -112,7 +113,9 @@ VERIFY_BEHAVIOR_TOOL, isVerifyBehaviorArgs,
|
|
|
112
113
|
// v12: Developer Onboarding & Enterprise Observability
|
|
113
114
|
ONBOARDING_WIZARD_TOOL, EXTRACT_ENTITIES_TOOL, API_ANALYTICS_TOOL, BACKUP_DATABASE_TOOL, CONFIGURE_NOTIFICATIONS_TOOL, QUERY_MEMORY_NATURAL_TOOL,
|
|
114
115
|
// v15.5: Knowledge Ingestion
|
|
115
|
-
KNOWLEDGE_INGEST_TOOL,
|
|
116
|
+
KNOWLEDGE_INGEST_TOOL,
|
|
117
|
+
// v19.2: Inference Metrics
|
|
118
|
+
INFERENCE_METRICS_TOOL, sessionSaveLedgerHandler, sessionSaveHandoffHandler, sessionLoadContextHandler, knowledgeSearchHandler, knowledgeForgetHandler,
|
|
116
119
|
// ─── v0.4.0: New tool handlers ───
|
|
117
120
|
compactLedgerHandler, sessionSearchMemoryHandler, backfillEmbeddingsHandler, sessionBackfillLinksHandler, sessionSynthesizeEdgesHandler, sessionCognitiveRouteHandler,
|
|
118
121
|
// ─── v2.0: Time Travel handlers ───
|
|
@@ -246,6 +249,8 @@ function buildSessionMemoryTools(autoloadList) {
|
|
|
246
249
|
QUERY_MEMORY_NATURAL_TOOL, // query_memory_natural — NL → structured memory search
|
|
247
250
|
// ─── v15.5: Knowledge Ingestion ───
|
|
248
251
|
KNOWLEDGE_INGEST_TOOL, // knowledge_ingest — chunk code, gen Q&A, store in graph
|
|
252
|
+
// ─── v19.2: Inference Metrics ───
|
|
253
|
+
INFERENCE_METRICS_TOOL, // inference_metrics — read-only session delegation stats
|
|
249
254
|
];
|
|
250
255
|
}
|
|
251
256
|
// ─── v0.4.0: Resource Subscription Tracking ──────────────────────
|
|
@@ -960,6 +965,9 @@ export function createServer() {
|
|
|
960
965
|
throw new Error("Session memory not configured.");
|
|
961
966
|
result = await knowledgeIngestHandler(args);
|
|
962
967
|
break;
|
|
968
|
+
case "inference_metrics":
|
|
969
|
+
result = await inferenceMetricsHandler();
|
|
970
|
+
break;
|
|
963
971
|
default:
|
|
964
972
|
result = {
|
|
965
973
|
content: [{ type: "text", text: `Unknown tool: ${name}` }],
|
package/dist/tools/index.js
CHANGED
|
@@ -63,6 +63,7 @@ export { verifyBehaviorHandler } from "./behavioralVerifierHandler.js";
|
|
|
63
63
|
// Chunks source code, generates Q&A via Claude Haiku, stores in knowledge graph.
|
|
64
64
|
// Three entry points: MCP tool, REST API, GitHub webhook.
|
|
65
65
|
export { KNOWLEDGE_INGEST_TOOL } from "./ingestDefinitions.js";
|
|
66
|
+
export { INFERENCE_METRICS_TOOL } from "./sessionMemoryDefinitions.js";
|
|
66
67
|
export { knowledgeIngestHandler, handleGitHubWebhook, ingestKnowledge, isIngestArgs } from "./ingestHandler.js";
|
|
67
68
|
// ── v15.4: prism_infer — local-first inference (RAM-gated cascade) ──
|
|
68
69
|
// Always available. Saves caller's cloud tokens by routing to local
|
|
@@ -1790,3 +1790,15 @@ export function isVerifyBehaviorArgs(a) {
|
|
|
1790
1790
|
return false;
|
|
1791
1791
|
return true;
|
|
1792
1792
|
}
|
|
1793
|
+
// ─── v19.2: Inference Metrics Tool ──────────────────────────
|
|
1794
|
+
export const INFERENCE_METRICS_TOOL = {
|
|
1795
|
+
name: "inference_metrics",
|
|
1796
|
+
description: "Returns the current session's local-model inference metrics — call count, " +
|
|
1797
|
+
"local vs cloud split, token totals, per-model breakdown, and average latency. " +
|
|
1798
|
+
"Read-only, no arguments. Reflects prism_infer delegation usage only, not the " +
|
|
1799
|
+
"host model's (Claude's) own token spend (use /cost for that).",
|
|
1800
|
+
inputSchema: {
|
|
1801
|
+
type: "object",
|
|
1802
|
+
properties: {},
|
|
1803
|
+
},
|
|
1804
|
+
};
|
|
@@ -70,12 +70,21 @@ export function resetInferenceMetrics() {
|
|
|
70
70
|
}
|
|
71
71
|
debugLog("[inference-metrics] Session metrics reset");
|
|
72
72
|
}
|
|
73
|
+
export async function inferenceMetricsHandler() {
|
|
74
|
+
const block = formatInferenceMetrics();
|
|
75
|
+
return {
|
|
76
|
+
content: [{
|
|
77
|
+
type: "text",
|
|
78
|
+
text: block || "No prism_infer calls this session. Metrics track local-model delegation only — not the host model's (Claude's) token spend.",
|
|
79
|
+
}],
|
|
80
|
+
};
|
|
81
|
+
}
|
|
73
82
|
export function formatInferenceMetrics() {
|
|
74
83
|
const snap = getInferenceSnapshot();
|
|
75
84
|
if (snap.totalCalls === 0)
|
|
76
85
|
return "";
|
|
77
86
|
const lines = [
|
|
78
|
-
`\n📊 Inference Metrics (this session):`,
|
|
87
|
+
`\n📊 Inference Metrics — local-model delegation (this session):`,
|
|
79
88
|
` Total calls: ${snap.totalCalls} — Local: ${snap.localCalls} (${snap.localPct}%) | Cloud: ${snap.cloudCalls} (${snap.cloudPct}%)`,
|
|
80
89
|
` Tokens: ${snap.totalPromptTokens.toLocaleString()} in + ${snap.totalCompletionTokens.toLocaleString()} out = ${snap.totalTokens.toLocaleString()} total`,
|
|
81
90
|
` Avg latency: ${snap.avgLatencyMs}ms`,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "prism-mcp-server",
|
|
3
|
-
"version": "19.2.
|
|
3
|
+
"version": "19.2.4",
|
|
4
4
|
"mcpName": "io.github.dcostenco/prism-coder",
|
|
5
5
|
"description": "Prism Coder — Cognitive memory + tool-calling intelligence for AI agents. Mind Palace persistent memory (BFCL Gold Certified, 100% Tool-Call Accuracy, 114 Agent Skills, PHI Guard, Tier Enforcement, Prompt-Based Skill Routing, Zero-Search HDC/HRR retrieval, HRR Semantic Drift Detection across BCBA/Coding/AAC domains, HIPAA-hardened local-first storage, SLERP-optimized GRPO alignment) plus the prism-coder 1.7B–32B open-weights LLM fleet.",
|
|
6
6
|
"module": "index.ts",
|