prism-mcp-server 19.1.0 → 19.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +102 -0
- package/dist/config.js +4 -4
- package/dist/tools/compactionHandler.js +2 -2
- package/dist/tools/ledgerHandlers.js +7 -0
- package/dist/tools/prismInferHandler.js +48 -7
- package/dist/tools/taskRouterHandler.js +2 -2
- package/dist/utils/ddLogger.js +60 -19
- package/dist/utils/inferenceMetrics.js +93 -0
- package/dist/utils/localLlm.js +2 -2
- package/dist/utils/nerExtractor.js +1 -1
- package/dist/utils/qualityGate.js +28 -4
- package/dist/utils/safetyGate.js +104 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -78,6 +78,23 @@ Every session is logged with files changed, decisions made, and TODOs. Search, f
|
|
|
78
78
|
<img src="docs/session-ledger.jpg" alt="Session Ledger — 93 sessions, 847 decisions logged across 12 projects" width="700" />
|
|
79
79
|
</p>
|
|
80
80
|
|
|
81
|
+
### Inference Metrics — see where your tokens go
|
|
82
|
+
|
|
83
|
+
Every `prism_infer` call tracks which model handled it (local Ollama vs cloud) and how many tokens were consumed. When you save a session, Prism shows a summary:
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
📊 Inference Metrics (this session):
|
|
87
|
+
Total calls: 12 — Local: 10 (83%) | Cloud: 2 (17%)
|
|
88
|
+
Tokens: 8,420 in + 3,150 out = 11,570 total
|
|
89
|
+
Avg latency: 1,240ms
|
|
90
|
+
By model:
|
|
91
|
+
prism-coder:27b: 6 calls, 7,200 tokens, avg 1,800ms
|
|
92
|
+
prism-coder:9b: 4 calls, 2,870 tokens, avg 620ms
|
|
93
|
+
synalux-27b: 2 calls, 1,500 tokens, avg 1,100ms
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Local calls use actual Ollama token counts; cloud calls use estimates. Metrics are aggregated by the Synalux portal — Prism is a thin client that forwards per-call data and fetches the summary on demand.
|
|
97
|
+
|
|
81
98
|
### Session Drift Detection
|
|
82
99
|
|
|
83
100
|
Long agent sessions can wander from their original goal. `session_detect_drift` compares current work against the stated goal and returns `on_track / minor_drift / major_drift` so the agent can self-correct.
|
|
@@ -204,6 +221,91 @@ python3 tests/benchmarks/prism-routing-100/benchmark.py --models 2b 4b 9b 27b
|
|
|
204
221
|
|
|
205
222
|
**Memory uplift (LoCoMo-Plus, self-published).** A separate long-context dialogue benchmark ([dcostenco/Locomo-Plus](https://github.com/dcostenco/Locomo-Plus)) measures how much structured memory helps a base model retain multi-day context. Results show large gains when a model is paired with Prism memory versus running raw. Note this benchmark is authored, run, and LLM-judged by this project — treat it as a reproducible demonstration, not an independent third-party result, and run it yourself with the commands in that repo.
|
|
206
223
|
|
|
224
|
+
### Code Generation Quality (27B vs Claude Opus)
|
|
225
|
+
|
|
226
|
+
Three progressively harder Python tasks run through `prism_infer(mode:"code", think:true)` on the local 27B and compared with Claude Opus. Both produce correct, production-quality code. The 27B is slightly more verbose (docstrings, examples); Opus is slightly tighter (`__slots__`, early-exit DFS). On routine coding the 27B at $0 replaces cloud calls entirely.
|
|
227
|
+
|
|
228
|
+
| Task | Local 27B | Claude Opus | Verdict |
|
|
229
|
+
|------|-----------|-------------|---------|
|
|
230
|
+
| Fibonacci with memoization | `@lru_cache`, ValueError on negative, docstring | Nested `_fib` to keep cache private | Both correct, equivalent |
|
|
231
|
+
| LRU Cache (OrderedDict, O(1)) | `Any` keys, isinstance capacity check, `__repr__` | `Hashable` key type (more precise), same ops | Both correct, Opus marginally tighter |
|
|
232
|
+
| Trie with autocomplete | `.lower()` normalization, collect+sort+slice | `__slots__` on TrieNode, early-exit DFS at limit | Both correct, Opus slightly more optimized |
|
|
233
|
+
|
|
234
|
+
<details>
|
|
235
|
+
<summary>Local 27B output — Trie with autocomplete (hardest task)</summary>
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
class TrieNode:
|
|
239
|
+
def __init__(self):
|
|
240
|
+
self.children: dict[str, 'TrieNode'] = {}
|
|
241
|
+
self.is_end_of_word: bool = False
|
|
242
|
+
|
|
243
|
+
class Trie:
|
|
244
|
+
def __init__(self):
|
|
245
|
+
self.root: TrieNode = TrieNode()
|
|
246
|
+
|
|
247
|
+
def insert(self, word: str) -> None:
|
|
248
|
+
node = self.root
|
|
249
|
+
for char in word.lower():
|
|
250
|
+
if char not in node.children:
|
|
251
|
+
node.children[char] = TrieNode()
|
|
252
|
+
node = node.children[char]
|
|
253
|
+
node.is_end_of_word = True
|
|
254
|
+
|
|
255
|
+
def search(self, word: str) -> bool:
|
|
256
|
+
node = self._get_node(word.lower())
|
|
257
|
+
return node is not None and node.is_end_of_word
|
|
258
|
+
|
|
259
|
+
def starts_with(self, prefix: str) -> bool:
|
|
260
|
+
return self._get_node(prefix.lower()) is not None
|
|
261
|
+
|
|
262
|
+
def autocomplete(self, prefix: str, limit: int = 5) -> list[str]:
|
|
263
|
+
node = self._get_node(prefix.lower())
|
|
264
|
+
if node is None:
|
|
265
|
+
return []
|
|
266
|
+
results: list[str] = []
|
|
267
|
+
self._collect_words(node, prefix.lower(), results)
|
|
268
|
+
results.sort()
|
|
269
|
+
return results[:limit]
|
|
270
|
+
|
|
271
|
+
def _get_node(self, key: str) -> 'TrieNode | None':
|
|
272
|
+
node = self.root
|
|
273
|
+
for char in key:
|
|
274
|
+
if char not in node.children:
|
|
275
|
+
return None
|
|
276
|
+
node = node.children[char]
|
|
277
|
+
return node
|
|
278
|
+
|
|
279
|
+
def _collect_words(self, node: TrieNode, prefix: str, results: list[str]) -> None:
|
|
280
|
+
if node.is_end_of_word:
|
|
281
|
+
results.append(prefix)
|
|
282
|
+
for char, child in sorted(node.children.items()):
|
|
283
|
+
self._collect_words(child, prefix + char, results)
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
</details>
|
|
287
|
+
|
|
288
|
+
| Metric | Local 27B | Cloud (Opus) |
|
|
289
|
+
|--------|-----------|-------------|
|
|
290
|
+
| Latency (Trie task) | ~30s | ~8s |
|
|
291
|
+
| Cost | $0 | ~$0.05 |
|
|
292
|
+
| Think mode | Enabled (stripped before serving) | N/A |
|
|
293
|
+
| Quality gate | Passed (no escalation needed) | N/A |
|
|
294
|
+
|
|
295
|
+
### Cloud Escalation in Practice (`cloud_fallback: true`)
|
|
296
|
+
|
|
297
|
+
The same three tasks with `cloud_fallback: true` — the quality gate decides whether local output is good enough or needs cloud escalation.
|
|
298
|
+
|
|
299
|
+
| Task | used_cloud | Quality Gate | Latency | What happened |
|
|
300
|
+
|------|:----------:|-------------|---------|---------------|
|
|
301
|
+
| Fibonacci (simple) | **no** | Passed | 11s | 27B served directly, $0 |
|
|
302
|
+
| LRU Cache (medium) | **no** | Passed | 21s | 27B served directly, $0 |
|
|
303
|
+
| Trie (hard) | **yes** | `loop_detected` | 55s | 27B looped → gate caught it → escalated to cloud 27B |
|
|
304
|
+
|
|
305
|
+
The quality gate detected repeated sentences (≥3 of the same sentence in ≥6 total) in the 27B's Trie output and escalated automatically. The cloud fallback returned clean code. On a second run of the same prompt, the 27B produced clean output without escalation — the loop is stochastic, not systematic.
|
|
306
|
+
|
|
307
|
+
**Takeaway:** for ~80–90% of coding tasks, the 27B handles everything locally at $0. The quality gate + cloud escalation exists as a safety net for the remaining cases where the local model loops, truncates, or produces empty output. Paid tiers get automatic escalation; free tier gets the local result with a warning.
|
|
308
|
+
|
|
207
309
|
---
|
|
208
310
|
|
|
209
311
|
## Why Prism Coder
|
package/dist/config.js
CHANGED
|
@@ -307,11 +307,11 @@ const rawTiebreakerEpsilon = parseFloat(process.env.PRISM_TURBOQUANT_TIEBREAKER_
|
|
|
307
307
|
export const PRISM_TURBOQUANT_TIEBREAKER_EPSILON = Number.isFinite(rawTiebreakerEpsilon) && rawTiebreakerEpsilon >= 0
|
|
308
308
|
? rawTiebreakerEpsilon
|
|
309
309
|
: 0;
|
|
310
|
-
// ─── v9.x: Local LLM (prism-coder
|
|
310
|
+
// ─── v9.x: Local LLM (prism-coder) Integration ────────────────────────────
|
|
311
311
|
// Enables background tasks (compaction, task-router fallback, pipeline ops)
|
|
312
312
|
// to use a local Ollama model instead of the cloud LLM provider.
|
|
313
313
|
//
|
|
314
|
-
// Default model is prism-coder:
|
|
314
|
+
// Default model is prism-coder:9b — fine-tuned on Prism tool schemas.
|
|
315
315
|
// Disabled by default so existing deployments are unaffected.
|
|
316
316
|
//
|
|
317
317
|
// Set PRISM_LOCAL_LLM_ENABLED=true to activate.
|
|
@@ -319,10 +319,10 @@ export const PRISM_TURBOQUANT_TIEBREAKER_EPSILON = Number.isFinite(rawTiebreaker
|
|
|
319
319
|
// Set PRISM_LOCAL_LLM_URL to override the Ollama endpoint (default: localhost:11434).
|
|
320
320
|
// Set PRISM_LOCAL_LLM_TIMEOUT_MS to override per-call timeout (default: 60000, max: 300000).
|
|
321
321
|
// Set PRISM_STRICT_LOCAL_MODE=true to block cloud fallback when local LLM is enabled (HIPAA).
|
|
322
|
-
/** Master switch — enables the local prism-coder
|
|
322
|
+
/** Master switch — enables the local prism-coder LLM for background tasks. */
|
|
323
323
|
export const PRISM_LOCAL_LLM_ENABLED = process.env.PRISM_LOCAL_LLM_ENABLED === "true"; // Opt-in, default false
|
|
324
324
|
/** Ollama model tag to use for local LLM calls. */
|
|
325
|
-
export const PRISM_LOCAL_LLM_MODEL = (process.env.PRISM_LOCAL_LLM_MODEL || "prism-coder:
|
|
325
|
+
export const PRISM_LOCAL_LLM_MODEL = (process.env.PRISM_LOCAL_LLM_MODEL || "prism-coder:9b").trim();
|
|
326
326
|
/** Ollama base URL. Override for remote Ollama instances. */
|
|
327
327
|
export const PRISM_LOCAL_LLM_URL = (process.env.PRISM_LOCAL_LLM_URL || "http://localhost:11434").trim();
|
|
328
328
|
/** Per-call timeout in ms. Prevents stalled background tasks. Capped at 300s. */
|
|
@@ -108,7 +108,7 @@ function parseCompactionResponse(response, source) {
|
|
|
108
108
|
}
|
|
109
109
|
async function summarizeEntries(entries) {
|
|
110
110
|
const prompt = buildCompactionPrompt(entries);
|
|
111
|
-
// ── Path 1: Local LLM (prism-coder:
|
|
111
|
+
// ── Path 1: Local LLM (prism-coder:9b) ───────────────────────────
|
|
112
112
|
if (PRISM_LOCAL_LLM_ENABLED) {
|
|
113
113
|
debugLog(`[compact_ledger] Attempting local LLM summarization (${entries.length} entries)`);
|
|
114
114
|
const localResponse = await callLocalLlm(prompt);
|
|
@@ -123,7 +123,7 @@ async function summarizeEntries(entries) {
|
|
|
123
123
|
if (PRISM_STRICT_LOCAL_MODE) {
|
|
124
124
|
throw new Error("[HIPAA] Local LLM failed and PRISM_STRICT_LOCAL_MODE=true. " +
|
|
125
125
|
"Cloud fallback is blocked to prevent unauthorized PHI disclosure. " +
|
|
126
|
-
"Ensure Ollama is running and prism-coder:
|
|
126
|
+
"Ensure Ollama is running and prism-coder:9b is available.");
|
|
127
127
|
}
|
|
128
128
|
debugLog(`[compact_ledger] Local LLM returned null — falling back to cloud LLM`);
|
|
129
129
|
}
|
|
@@ -89,6 +89,7 @@ const MEMORY_BOUNDARY_SUFFIX = '\n</prism_memory>';
|
|
|
89
89
|
* After saving, generates an embedding vector for the entry via fire-and-forget.
|
|
90
90
|
*/
|
|
91
91
|
import { computeEffectiveImportance, recordMemoryAccess } from "../utils/cognitiveMemory.js";
|
|
92
|
+
import { formatInferenceMetrics, resetInferenceMetrics } from "../utils/inferenceMetrics.js";
|
|
92
93
|
export async function sessionSaveLedgerHandler(args) {
|
|
93
94
|
if (!isSessionSaveLedgerArgs(args)) {
|
|
94
95
|
throw new Error("Invalid arguments for session_save_ledger");
|
|
@@ -229,6 +230,7 @@ export async function sessionSaveLedgerHandler(args) {
|
|
|
229
230
|
storage.decayImportance(project, PRISM_USER_ID, 30).catch((err) => {
|
|
230
231
|
debugLog(`[session_save_ledger] Background decay failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
231
232
|
});
|
|
233
|
+
const metricsBlock = formatInferenceMetrics();
|
|
232
234
|
return {
|
|
233
235
|
content: [{
|
|
234
236
|
type: "text",
|
|
@@ -238,6 +240,7 @@ export async function sessionSaveLedgerHandler(args) {
|
|
|
238
240
|
(files_changed?.length ? `Files changed: ${files_changed.length}\n` : "") +
|
|
239
241
|
(decisions?.length ? `Decisions: ${decisions.length}\n` : "") +
|
|
240
242
|
`📊 Embedding generation queued for semantic search.` +
|
|
243
|
+
metricsBlock +
|
|
241
244
|
resolverNote,
|
|
242
245
|
}],
|
|
243
246
|
isError: false,
|
|
@@ -548,11 +551,13 @@ export async function sessionSaveHandoffHandler(args, server) {
|
|
|
548
551
|
// Dynamic import itself failed — module not found or similar
|
|
549
552
|
console.error("[FactMerger] Module load failed (non-fatal): " + err));
|
|
550
553
|
}
|
|
554
|
+
const metricsBlock = formatInferenceMetrics();
|
|
551
555
|
// Build response text based on whether a CRDT merge occurred
|
|
552
556
|
const responseText = isMerged
|
|
553
557
|
? `🔄 Auto-merged conflict for "${project}" (v${expected_version} → v${newVersion})\n` +
|
|
554
558
|
`Strategy: ${JSON.stringify(mergeStrategy)}\n` +
|
|
555
559
|
(last_summary ? `Summary: ${last_summary}\n` : "") +
|
|
560
|
+
metricsBlock +
|
|
556
561
|
`\n🔑 Remember: pass expected_version: ${newVersion} on your next save ` +
|
|
557
562
|
`to maintain concurrency control.`
|
|
558
563
|
: `✅ Handoff ${data.status || "saved"} for project "${project}" ` +
|
|
@@ -561,6 +566,7 @@ export async function sessionSaveHandoffHandler(args, server) {
|
|
|
561
566
|
(open_todos?.length ? `Open TODOs: ${open_todos.length} items\n` : "") +
|
|
562
567
|
(active_branch ? `Active branch: ${active_branch}\n` : "") +
|
|
563
568
|
`📊 Embedding generation queued for semantic search.\n` +
|
|
569
|
+
metricsBlock +
|
|
564
570
|
`\n🔑 Remember: pass expected_version: ${newVersion} on your next save ` +
|
|
565
571
|
`to maintain concurrency control.`;
|
|
566
572
|
return {
|
|
@@ -575,6 +581,7 @@ export async function sessionLoadContextHandler(args) {
|
|
|
575
581
|
if (!isSessionLoadContextArgs(args)) {
|
|
576
582
|
throw new Error("Invalid arguments for session_load_context");
|
|
577
583
|
}
|
|
584
|
+
resetInferenceMetrics();
|
|
578
585
|
const { project, level = "standard", role } = args;
|
|
579
586
|
const maxTokens = args.max_tokens
|
|
580
587
|
|| parseInt(await getSetting("max_tokens", "0"), 10) || undefined; // v4.0: arg > dashboard setting > none
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* prism_infer — local-first inference tool
|
|
3
3
|
* ─────────────────────────────────────────────────────────────
|
|
4
4
|
* Save the caller's cloud tokens by routing to a local prism-coder
|
|
5
|
-
* model via Ollama. Tiers (27B/9B/
|
|
5
|
+
* model via Ollama. Tiers (27B/9B/4B/2B) auto-selected by free
|
|
6
6
|
* RAM, then capped by `model_ceiling` and the set of tags that are
|
|
7
7
|
* actually pulled into Ollama.
|
|
8
8
|
*
|
|
@@ -28,11 +28,13 @@ import { getEntitlements, clampCeiling } from "../utils/entitlements.js";
|
|
|
28
28
|
import { ddLog } from "../utils/ddLogger.js";
|
|
29
29
|
import { stripThink } from "../utils/thinkStrip.js";
|
|
30
30
|
import { passesQualityGate } from "../utils/qualityGate.js";
|
|
31
|
+
import { checkInputSafety, checkOutputSafety } from "../utils/safetyGate.js";
|
|
32
|
+
import { recordInference } from "../utils/inferenceMetrics.js";
|
|
31
33
|
// ─── Tool Definition ────────────────────────────────────────────
|
|
32
34
|
export const PRISM_INFER_TOOL = {
|
|
33
35
|
name: "prism_infer",
|
|
34
36
|
description: "Run an inference on a local prism-coder model (Ollama) to save cloud tokens. " +
|
|
35
|
-
"Picks the largest viable tier — 27B / 9B /
|
|
37
|
+
"Picks the largest viable tier — 27B / 9B / 4B / 2B — based on free RAM at call time, " +
|
|
36
38
|
"clamped by `model_ceiling` and what is actually pulled in Ollama. " +
|
|
37
39
|
"Falls through to the synalux portal cloud cascade (9B → 27B → Claude Opus 4.7) " +
|
|
38
40
|
"only when local is unviable AND `cloud_fallback=true`. " +
|
|
@@ -71,7 +73,7 @@ export const PRISM_INFER_TOOL = {
|
|
|
71
73
|
},
|
|
72
74
|
timeout_ms: {
|
|
73
75
|
type: "number",
|
|
74
|
-
description: "Override per-call timeout. Default scales with model size: 27B=120s, 9B=60s, 4B=20s,
|
|
76
|
+
description: "Override per-call timeout. Default scales with model size: 27B=120s, 9B=60s, 4B=20s, 2B=15s.",
|
|
75
77
|
},
|
|
76
78
|
evidence: {
|
|
77
79
|
type: "array",
|
|
@@ -242,7 +244,7 @@ async function callOllamaGenerate(url, model, prompt, system, maxTokens, tempera
|
|
|
242
244
|
const text = (data.message?.content ?? "").trim();
|
|
243
245
|
if (!text)
|
|
244
246
|
return { ok: false, reason: "empty_response" };
|
|
245
|
-
return { ok: true, text, doneReason: data.done_reason };
|
|
247
|
+
return { ok: true, text, doneReason: data.done_reason, promptTokens: data.prompt_eval_count, completionTokens: data.eval_count };
|
|
246
248
|
}
|
|
247
249
|
catch (err) {
|
|
248
250
|
const name = err instanceof Error ? err.name : "Unknown";
|
|
@@ -300,6 +302,19 @@ async function callSynaluxInference(prompt, maxTokens, timeoutMs) {
|
|
|
300
302
|
export async function runInfer(args, deps) {
|
|
301
303
|
const t0 = Date.now();
|
|
302
304
|
const temperature = args.temperature ?? 0;
|
|
305
|
+
// ── L1 Safety — deterministic input interception ────────────
|
|
306
|
+
const safetyIntercept = checkInputSafety(args.prompt);
|
|
307
|
+
if (safetyIntercept) {
|
|
308
|
+
return {
|
|
309
|
+
output: safetyIntercept,
|
|
310
|
+
backend: "safety_gate",
|
|
311
|
+
model_picked: null,
|
|
312
|
+
ram_free_mb: Math.round(deps.freemem() / (1024 * 1024)),
|
|
313
|
+
latency_ms: Date.now() - t0,
|
|
314
|
+
used_cloud: false,
|
|
315
|
+
attempts: [{ tier: "l1_safety", reason: "crisis_or_medical_intercept" }],
|
|
316
|
+
};
|
|
317
|
+
}
|
|
303
318
|
// ── Entitlement enforcement ──────────────────────────────────
|
|
304
319
|
// Fetch user's plan limits (cached 1hr). Free users without auth
|
|
305
320
|
// get 4b ceiling, 50 calls/day, 512 max tokens.
|
|
@@ -392,7 +407,7 @@ export async function runInfer(args, deps) {
|
|
|
392
407
|
debugLog(`[prism_infer] quality gate FAIL (${gate.reason}) — escalating to cloud`);
|
|
393
408
|
attempts.push({ tier: tier.tag, reason: `quality_gate:${gate.reason}` });
|
|
394
409
|
if (gate.reason === "hard_truncation" || gate.reason === "loop_detected") {
|
|
395
|
-
localDraft = { output, tier: tier.tag };
|
|
410
|
+
localDraft = { output, tier: tier.tag, promptTokens: result.promptTokens, completionTokens: result.completionTokens };
|
|
396
411
|
}
|
|
397
412
|
break;
|
|
398
413
|
}
|
|
@@ -408,6 +423,8 @@ export async function runInfer(args, deps) {
|
|
|
408
423
|
used_cloud: false,
|
|
409
424
|
attempts,
|
|
410
425
|
plan: ent.plan,
|
|
426
|
+
prompt_tokens: result.promptTokens,
|
|
427
|
+
completion_tokens: result.completionTokens,
|
|
411
428
|
});
|
|
412
429
|
}
|
|
413
430
|
attempts.push({ tier: tier.tag, reason: result.reason });
|
|
@@ -431,6 +448,8 @@ export async function runInfer(args, deps) {
|
|
|
431
448
|
used_cloud: true,
|
|
432
449
|
attempts,
|
|
433
450
|
plan: ent.plan,
|
|
451
|
+
prompt_tokens: Math.ceil(args.prompt.length / 4),
|
|
452
|
+
completion_tokens: Math.ceil(cloud.output.length / 4),
|
|
434
453
|
});
|
|
435
454
|
}
|
|
436
455
|
attempts.push({ tier: "synalux", reason: cloud.reason ?? "unknown" });
|
|
@@ -449,6 +468,8 @@ export async function runInfer(args, deps) {
|
|
|
449
468
|
used_cloud: false,
|
|
450
469
|
attempts,
|
|
451
470
|
plan: ent.plan,
|
|
471
|
+
prompt_tokens: localDraft.promptTokens,
|
|
472
|
+
completion_tokens: localDraft.completionTokens,
|
|
452
473
|
quality_gate_failed: true,
|
|
453
474
|
});
|
|
454
475
|
}
|
|
@@ -464,9 +485,11 @@ export async function runInfer(args, deps) {
|
|
|
464
485
|
* field so callers can route refusals separately from successes.
|
|
465
486
|
*/
|
|
466
487
|
async function applyVerification(draft, args, deps, partial) {
|
|
488
|
+
// L1 output safety — intercept dangerous model-generated content
|
|
489
|
+
const safeDraft = checkOutputSafety(draft);
|
|
467
490
|
const shouldVerify = args.verify ?? (args.evidence !== undefined && args.evidence.length > 0);
|
|
468
491
|
if (!shouldVerify || !deps.callVerifier) {
|
|
469
|
-
return { ...partial, output:
|
|
492
|
+
return { ...partial, output: safeDraft };
|
|
470
493
|
}
|
|
471
494
|
const verifier = deps.callVerifier;
|
|
472
495
|
const outcome = await verifier({
|
|
@@ -478,7 +501,7 @@ async function applyVerification(draft, args, deps, partial) {
|
|
|
478
501
|
});
|
|
479
502
|
return {
|
|
480
503
|
...partial,
|
|
481
|
-
output: outcome.finalText,
|
|
504
|
+
output: checkOutputSafety(outcome.finalText),
|
|
482
505
|
verification: {
|
|
483
506
|
action: outcome.action,
|
|
484
507
|
verifierChain: outcome.verifierChain,
|
|
@@ -503,12 +526,30 @@ export async function prismInferHandler(args) {
|
|
|
503
526
|
ollamaUrl: PRISM_LOCAL_LLM_URL,
|
|
504
527
|
});
|
|
505
528
|
debugLog(`[prism_infer] backend=${result.backend} model=${result.model_picked} latency=${result.latency_ms}ms free=${result.ram_free_mb}MB`);
|
|
529
|
+
// Local accumulator — sole source of the user-facing metrics block.
|
|
530
|
+
recordInference(result);
|
|
531
|
+
// Best-effort portal forwarding (independent analytics stream).
|
|
532
|
+
// safety_gate excluded — logging crisis filter triggers is a HIPAA concern.
|
|
533
|
+
if (result.backend !== "safety_gate") {
|
|
534
|
+
ddLog("info", "prism_infer.usage", {
|
|
535
|
+
backend: result.backend,
|
|
536
|
+
model: result.model_picked ?? result.backend,
|
|
537
|
+
used_cloud: result.used_cloud,
|
|
538
|
+
prompt_tokens: result.prompt_tokens ?? 0,
|
|
539
|
+
completion_tokens: result.completion_tokens ?? 0,
|
|
540
|
+
latency_ms: result.latency_ms,
|
|
541
|
+
});
|
|
542
|
+
}
|
|
543
|
+
const tokenStr = result.prompt_tokens != null || result.completion_tokens != null
|
|
544
|
+
? ` tokens=${result.prompt_tokens ?? "?"}in/${result.completion_tokens ?? "?"}out`
|
|
545
|
+
: "";
|
|
506
546
|
const header = `[prism_infer] backend=${result.backend}` +
|
|
507
547
|
` model=${result.model_picked ?? "n/a"}` +
|
|
508
548
|
` plan=${result.plan ?? "unknown"}` +
|
|
509
549
|
` free_ram=${result.ram_free_mb}MB` +
|
|
510
550
|
` latency=${result.latency_ms}ms` +
|
|
511
551
|
` used_cloud=${result.used_cloud}` +
|
|
552
|
+
tokenStr +
|
|
512
553
|
(result.quality_gate_failed ? ` quality_gate_failed=true` : "") +
|
|
513
554
|
(result.verification ? ` verify=${result.verification.action}` : "") +
|
|
514
555
|
(result.attempts.length ? ` attempts=${JSON.stringify(result.attempts)}` : "");
|
|
@@ -317,7 +317,7 @@ export async function sessionTaskRouteHandler(args) {
|
|
|
317
317
|
delete result._rawComposite;
|
|
318
318
|
// ── v9.x: Local LLM second-opinion for low-confidence cases ──────────────
|
|
319
319
|
// When confidence is below the threshold AND local LLM is enabled,
|
|
320
|
-
// ask prism-coder:
|
|
320
|
+
// ask prism-coder:9b to break the tie. This is purely additive — if the
|
|
321
321
|
// LLM call fails or times out, the original heuristic result is returned.
|
|
322
322
|
if (PRISM_LOCAL_LLM_ENABLED &&
|
|
323
323
|
result.confidence < PRISM_TASK_ROUTER_CONFIDENCE_THRESHOLD) {
|
|
@@ -350,7 +350,7 @@ export async function sessionTaskRouteHandler(args) {
|
|
|
350
350
|
}
|
|
351
351
|
// ─── Local LLM Route Classifier ──────────────────────────────
|
|
352
352
|
/**
|
|
353
|
-
* Ask prism-coder:
|
|
353
|
+
* Ask prism-coder:9b to classify a task description as "claw" or "host".
|
|
354
354
|
* Returns the string or null if the model is unavailable / response unparseable.
|
|
355
355
|
* Called only when heuristic confidence is below the threshold.
|
|
356
356
|
*/
|
package/dist/utils/ddLogger.js
CHANGED
|
@@ -8,9 +8,17 @@
|
|
|
8
8
|
* Env: PRISM_SYNALUX_BASE_URL (default https://synalux.ai)
|
|
9
9
|
*/
|
|
10
10
|
const SYNALUX_BASE = process.env.PRISM_SYNALUX_BASE_URL || "https://synalux.ai";
|
|
11
|
+
const TELEMETRY_WRITE_TOKEN = process.env.TELEMETRY_WRITE_TOKEN || "";
|
|
11
12
|
const DD_API_KEY = process.env.DD_API_KEY || "";
|
|
12
13
|
const DD_SITE = process.env.DD_SITE || "datadoghq.com";
|
|
13
14
|
const SERVICE = "prism-mcp";
|
|
15
|
+
const CONTEXT_ALLOWLIST = new Set([
|
|
16
|
+
"backend", "model", "used_cloud", "prompt_tokens", "completion_tokens",
|
|
17
|
+
"latency_ms", "plan", "requested_ceiling", "effective_ceiling",
|
|
18
|
+
"ceiling_clamped", "requested_tokens", "effective_tokens", "tokens_clamped",
|
|
19
|
+
"cloud_requested", "cloud_allowed", "cloud_blocked",
|
|
20
|
+
"verify_requested", "verify_allowed", "verify_blocked",
|
|
21
|
+
]);
|
|
14
22
|
const queue = [];
|
|
15
23
|
let flushTimer = null;
|
|
16
24
|
const FLUSH_INTERVAL_MS = 5_000;
|
|
@@ -26,31 +34,61 @@ async function flush() {
|
|
|
26
34
|
return;
|
|
27
35
|
const batch = queue.splice(0, MAX_BATCH);
|
|
28
36
|
// Primary: Synalux portal → Supabase (always available)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
37
|
+
if (TELEMETRY_WRITE_TOKEN) {
|
|
38
|
+
try {
|
|
39
|
+
await fetch(`${SYNALUX_BASE}/api/v1/telemetry`, {
|
|
40
|
+
method: "POST",
|
|
41
|
+
headers: {
|
|
42
|
+
"Content-Type": "application/json",
|
|
43
|
+
"Authorization": `Bearer ${TELEMETRY_WRITE_TOKEN}`,
|
|
44
|
+
"X-Prism-Client": "prism-mcp",
|
|
45
|
+
},
|
|
46
|
+
body: JSON.stringify(batch.map(e => {
|
|
47
|
+
const ctx = {};
|
|
48
|
+
for (const [k, v] of Object.entries(e)) {
|
|
49
|
+
if (CONTEXT_ALLOWLIST.has(k))
|
|
50
|
+
ctx[k] = v;
|
|
51
|
+
}
|
|
52
|
+
return {
|
|
53
|
+
service: SERVICE,
|
|
54
|
+
event_type: e.status === "error" ? "error" : "action",
|
|
55
|
+
message: e.message,
|
|
56
|
+
context: ctx,
|
|
57
|
+
user_id: e.user_id,
|
|
58
|
+
user_plan: e.user_plan,
|
|
59
|
+
};
|
|
60
|
+
})),
|
|
61
|
+
signal: AbortSignal.timeout(5_000),
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
catch {
|
|
65
|
+
// Silent — don't crash the MCP server
|
|
66
|
+
}
|
|
46
67
|
}
|
|
47
68
|
// Secondary: Datadog Logs (if API key is set AND Logs product is enabled)
|
|
69
|
+
// Same allowlist applied — both sinks get identical filtered context.
|
|
48
70
|
if (DD_API_KEY) {
|
|
49
71
|
try {
|
|
50
72
|
await fetch(`https://http-intake.logs.${DD_SITE}/api/v2/logs`, {
|
|
51
73
|
method: "POST",
|
|
52
74
|
headers: { "Content-Type": "application/json", "DD-API-KEY": DD_API_KEY },
|
|
53
|
-
body: JSON.stringify(batch
|
|
75
|
+
body: JSON.stringify(batch.map(e => {
|
|
76
|
+
const ctx = {};
|
|
77
|
+
for (const [k, v] of Object.entries(e)) {
|
|
78
|
+
if (CONTEXT_ALLOWLIST.has(k))
|
|
79
|
+
ctx[k] = v;
|
|
80
|
+
}
|
|
81
|
+
return {
|
|
82
|
+
ddsource: "nodejs",
|
|
83
|
+
ddtags: e.ddtags,
|
|
84
|
+
hostname: e.hostname,
|
|
85
|
+
service: SERVICE,
|
|
86
|
+
status: e.status,
|
|
87
|
+
message: e.message,
|
|
88
|
+
...ctx,
|
|
89
|
+
timestamp: e.timestamp,
|
|
90
|
+
};
|
|
91
|
+
})),
|
|
54
92
|
signal: AbortSignal.timeout(5_000),
|
|
55
93
|
});
|
|
56
94
|
}
|
|
@@ -68,7 +106,7 @@ export function ddLog(level, message, context) {
|
|
|
68
106
|
hostname: process.env.HOSTNAME || "prism-mcp",
|
|
69
107
|
service: SERVICE,
|
|
70
108
|
status: level,
|
|
71
|
-
message,
|
|
109
|
+
message: message.slice(0, 200),
|
|
72
110
|
...context,
|
|
73
111
|
timestamp: new Date().toISOString(),
|
|
74
112
|
});
|
|
@@ -90,3 +128,6 @@ export function ddInfo(message, context) {
|
|
|
90
128
|
export function ddWarn(message, context) {
|
|
91
129
|
ddLog("warn", message, context);
|
|
92
130
|
}
|
|
131
|
+
if (!TELEMETRY_WRITE_TOKEN && process.env.PRISM_DEBUG_LOGGING) {
|
|
132
|
+
console.info("[prism-mcp] Portal telemetry not configured (no TELEMETRY_WRITE_TOKEN). Session metrics work locally — this is normal for offline/free-tier use.");
|
|
133
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inference metrics — local accumulator for user-facing display.
|
|
3
|
+
*
|
|
4
|
+
* The local accumulator is the SOLE source for the session metrics block
|
|
5
|
+
* shown in session_save_ledger/handoff. It tracks what THIS prism process
|
|
6
|
+
* did THIS session — prism is the natural and only complete source for
|
|
7
|
+
* this data (the portal only sees what prism forwards).
|
|
8
|
+
*
|
|
9
|
+
* Portal forwarding (ddLog → /api/v1/telemetry) is a separate, best-effort
|
|
10
|
+
* analytics stream that the display path never depends on. If the portal
|
|
11
|
+
* is down, unconfigured, or the token is missing, users still see metrics.
|
|
12
|
+
*/
|
|
13
|
+
import { debugLog } from "./logger.js";
|
|
14
|
+
const byModel = {};
|
|
15
|
+
let localCalls = 0;
|
|
16
|
+
let cloudCalls = 0;
|
|
17
|
+
let totalPromptTokens = 0;
|
|
18
|
+
let totalCompletionTokens = 0;
|
|
19
|
+
let totalLatencyMs = 0;
|
|
20
|
+
export function recordInference(result) {
|
|
21
|
+
if (result.backend === "safety_gate")
|
|
22
|
+
return;
|
|
23
|
+
const key = result.model_picked ?? result.backend;
|
|
24
|
+
if (result.used_cloud) {
|
|
25
|
+
cloudCalls++;
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
localCalls++;
|
|
29
|
+
}
|
|
30
|
+
const pt = result.prompt_tokens ?? 0;
|
|
31
|
+
const ct = result.completion_tokens ?? 0;
|
|
32
|
+
totalPromptTokens += pt;
|
|
33
|
+
totalCompletionTokens += ct;
|
|
34
|
+
totalLatencyMs += result.latency_ms;
|
|
35
|
+
if (!byModel[key]) {
|
|
36
|
+
byModel[key] = { calls: 0, promptTokens: 0, completionTokens: 0, totalLatencyMs: 0 };
|
|
37
|
+
}
|
|
38
|
+
byModel[key].calls++;
|
|
39
|
+
byModel[key].promptTokens += pt;
|
|
40
|
+
byModel[key].completionTokens += ct;
|
|
41
|
+
byModel[key].totalLatencyMs += result.latency_ms;
|
|
42
|
+
}
|
|
43
|
+
export function getInferenceSnapshot() {
|
|
44
|
+
const total = localCalls + cloudCalls;
|
|
45
|
+
const modelCopy = {};
|
|
46
|
+
for (const [k, v] of Object.entries(byModel)) {
|
|
47
|
+
modelCopy[k] = { ...v };
|
|
48
|
+
}
|
|
49
|
+
return {
|
|
50
|
+
localCalls,
|
|
51
|
+
cloudCalls,
|
|
52
|
+
totalCalls: total,
|
|
53
|
+
localPct: total > 0 ? Math.round((localCalls / total) * 100) : 0,
|
|
54
|
+
cloudPct: total > 0 ? 100 - Math.round((localCalls / total) * 100) : 0,
|
|
55
|
+
totalPromptTokens,
|
|
56
|
+
totalCompletionTokens,
|
|
57
|
+
totalTokens: totalPromptTokens + totalCompletionTokens,
|
|
58
|
+
avgLatencyMs: total > 0 ? Math.round(totalLatencyMs / total) : 0,
|
|
59
|
+
byModel: modelCopy,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
export function resetInferenceMetrics() {
|
|
63
|
+
localCalls = 0;
|
|
64
|
+
cloudCalls = 0;
|
|
65
|
+
totalPromptTokens = 0;
|
|
66
|
+
totalCompletionTokens = 0;
|
|
67
|
+
totalLatencyMs = 0;
|
|
68
|
+
for (const key of Object.keys(byModel)) {
|
|
69
|
+
delete byModel[key];
|
|
70
|
+
}
|
|
71
|
+
debugLog("[inference-metrics] Session metrics reset");
|
|
72
|
+
}
|
|
73
|
+
export function formatInferenceMetrics() {
|
|
74
|
+
const snap = getInferenceSnapshot();
|
|
75
|
+
if (snap.totalCalls === 0)
|
|
76
|
+
return "";
|
|
77
|
+
const lines = [
|
|
78
|
+
`\n📊 Inference Metrics (this session):`,
|
|
79
|
+
` Total calls: ${snap.totalCalls} — Local: ${snap.localCalls} (${snap.localPct}%) | Cloud: ${snap.cloudCalls} (${snap.cloudPct}%)`,
|
|
80
|
+
` Tokens: ${snap.totalPromptTokens.toLocaleString()} in + ${snap.totalCompletionTokens.toLocaleString()} out = ${snap.totalTokens.toLocaleString()} total`,
|
|
81
|
+
` Avg latency: ${snap.avgLatencyMs}ms`,
|
|
82
|
+
];
|
|
83
|
+
const models = Object.entries(snap.byModel).sort((a, b) => b[1].calls - a[1].calls);
|
|
84
|
+
if (models.length > 1) {
|
|
85
|
+
lines.push(` By model:`);
|
|
86
|
+
for (const [name, stats] of models) {
|
|
87
|
+
const tokens = stats.promptTokens + stats.completionTokens;
|
|
88
|
+
const avgMs = stats.calls > 0 ? Math.round(stats.totalLatencyMs / stats.calls) : 0;
|
|
89
|
+
lines.push(` ${name}: ${stats.calls} calls, ${tokens.toLocaleString()} tokens, avg ${avgMs}ms`);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
return lines.join("\n");
|
|
93
|
+
}
|
package/dist/utils/localLlm.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Local LLM Client — Ollama/prism-coder:
|
|
2
|
+
* Local LLM Client — Ollama/prism-coder:9b Integration (v1.0.0)
|
|
3
3
|
* ──────────────────────────────────────────────────────────────────
|
|
4
4
|
* Thin HTTP wrapper around the Ollama /api/chat endpoint.
|
|
5
5
|
*
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* - Silent fail: returning null instead of throwing ensures callers
|
|
10
10
|
* can fall back to Gemini without crashing the MCP server.
|
|
11
11
|
* - Fire-and-forget safe: wrapped in try/catch, never propagates.
|
|
12
|
-
* - Default model: prism-coder:
|
|
12
|
+
* - Default model: prism-coder:9b — fine-tuned on Prism tool schemas,
|
|
13
13
|
* 8192-token context, Q8_0 quantization, ~8.1GB RAM footprint.
|
|
14
14
|
*
|
|
15
15
|
* FEATURE FLAG:
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
*
|
|
17
17
|
* Architecture:
|
|
18
18
|
* 1. Rule-based extraction (fast, zero-cost, always available)
|
|
19
|
-
* 2. Local LLM extraction (optional, higher quality, uses prism-coder:
|
|
19
|
+
* 2. Local LLM extraction (optional, higher quality, uses prism-coder:9b)
|
|
20
20
|
* 3. Merged + deduplicated results
|
|
21
21
|
*/
|
|
22
22
|
import { debugLog } from "./logger.js";
|
|
@@ -27,11 +27,20 @@ export function passesQualityGate(stripped, thinkOnly, finishReason) {
|
|
|
27
27
|
if (finishReason === "length") {
|
|
28
28
|
return { pass: false, reason: "hard_truncation" };
|
|
29
29
|
}
|
|
30
|
-
// Signal 4: Exact-loop
|
|
31
|
-
|
|
32
|
-
|
|
30
|
+
// Signal 4: Exact-loop detection (two passes).
|
|
31
|
+
//
|
|
32
|
+
// Pass A (prose-only, threshold ≥3): strip structural markdown that
|
|
33
|
+
// naturally repeats (code blocks, tables, headings, bold labels).
|
|
34
|
+
// Catches loops in explanatory text.
|
|
35
|
+
const proseOnly = stripped
|
|
36
|
+
.replace(/```[\s\S]*?```/g, "")
|
|
37
|
+
.replace(/^\|.*\|$/gm, "")
|
|
38
|
+
.replace(/^#{1,6}\s+.*$/gm, "")
|
|
39
|
+
.replace(/^[\s*-]*\*{1,2}[^*]+\*{1,2}:?\s*$/gm, "");
|
|
40
|
+
const proseSentences = proseOnly.split(/[.!?\n]+/).map(s => s.trim()).filter(s => s.length > 10);
|
|
41
|
+
if (proseSentences.length >= 6) {
|
|
33
42
|
const counts = new Map();
|
|
34
|
-
for (const s of
|
|
43
|
+
for (const s of proseSentences) {
|
|
35
44
|
const key = s.toLowerCase();
|
|
36
45
|
counts.set(key, (counts.get(key) ?? 0) + 1);
|
|
37
46
|
if ((counts.get(key) ?? 0) >= 3) {
|
|
@@ -39,5 +48,20 @@ export function passesQualityGate(stripped, thinkOnly, finishReason) {
|
|
|
39
48
|
}
|
|
40
49
|
}
|
|
41
50
|
}
|
|
51
|
+
// Pass B (full text, threshold ≥5): catches egregious loops hidden
|
|
52
|
+
// inside fake code blocks or other structural elements. Higher
|
|
53
|
+
// threshold avoids false positives on legitimate code patterns
|
|
54
|
+
// (e.g. `node = self.root` × 4 is fine, × 5 is suspicious).
|
|
55
|
+
const allSentences = stripped.split(/[.!?\n]+/).map(s => s.trim()).filter(s => s.length > 10);
|
|
56
|
+
if (allSentences.length >= 10) {
|
|
57
|
+
const counts = new Map();
|
|
58
|
+
for (const s of allSentences) {
|
|
59
|
+
const key = s.toLowerCase();
|
|
60
|
+
counts.set(key, (counts.get(key) ?? 0) + 1);
|
|
61
|
+
if ((counts.get(key) ?? 0) >= 5) {
|
|
62
|
+
return { pass: false, reason: "loop_detected" };
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
42
66
|
return { pass: true };
|
|
43
67
|
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* L1 Safety Gate — deterministic crisis/medical interception for prism_infer.
|
|
3
|
+
*
|
|
4
|
+
* Runs BEFORE model output reaches the caller. High precision, low recall:
|
|
5
|
+
* only intercepts patterns that unambiguously indicate self-harm intent
|
|
6
|
+
* (input) or dangerous instructions (output). Generic clinical/pharmacological
|
|
7
|
+
* terms ("dose of", "milligrams", "lethal") are NOT intercepted — they appear
|
|
8
|
+
* in routine BCBA/medical notes.
|
|
9
|
+
*
|
|
10
|
+
* This is a backstop, not a comprehensive filter. The model's own safety
|
|
11
|
+
* training is the primary layer.
|
|
12
|
+
*/
|
|
13
|
+
// ── Input: first-person crisis expressions ───────────────────────────────────
|
|
14
|
+
const CRISIS_INPUT_RE = [
|
|
15
|
+
// English
|
|
16
|
+
/hurt(?:ing)?\s+(?:my)?self/i,
|
|
17
|
+
/kill\s+(?:my)?self/i,
|
|
18
|
+
/end\s+my\s+life/i,
|
|
19
|
+
/want\s+to\s+die/i,
|
|
20
|
+
/want\s+to\s+(?:commit\s+)?suicide/i,
|
|
21
|
+
/cut(?:ting)?\s+(?:my)?self/i,
|
|
22
|
+
/(?:hang|hanging)\s+(?:my)?self/i,
|
|
23
|
+
/jump(?:ing)?\s+off/i,
|
|
24
|
+
/how\s+(?:many|much|to).*(?:pills|overdose|die)/i,
|
|
25
|
+
// Spanish — block hyperbole nouns only (NOT hunger/thirst/cold — may be literal for neglected child)
|
|
26
|
+
/quiero\s+morir(?!\s+de\s+(?:risa|la\s+risa|vergüenza|ganas|envidia|aburrimiento)\b)/i,
|
|
27
|
+
/(?:voy\s+a\s+)?matarme(?!\s+(?:estudiando|trabajando|riendo|de\s+(?:risa|la\s+risa))\b)/i,
|
|
28
|
+
/hacerme\s+da[ñn]o/i,
|
|
29
|
+
/(?:quiero\s+)?suicidarme/i,
|
|
30
|
+
// French — block hyperbole nouns only (NOT faim/soif/chaud/froid)
|
|
31
|
+
/je\s+veux\s+mourir(?!\s+d[e']\s*(?:rire|honte|envie|ennui)\b)/i,
|
|
32
|
+
/(?:vais\s+)?me\s+tuer(?!\s+[àa]\s+(?:rire|travailler|étudier)\b)/i,
|
|
33
|
+
/me\s+suicider/i,
|
|
34
|
+
// Portuguese — block hyperbole nouns only (NOT fome/sede/calor/frio/sono)
|
|
35
|
+
/quero\s+morrer(?!\s+de\s+(?:rir|vergonha|inveja|tédio)\b)/i,
|
|
36
|
+
/(?:vou\s+)?me\s+matar(?!\s+(?:estudando|trabalhando|rindo|de\s+(?:rir|tanto))\b)/i,
|
|
37
|
+
// Russian — block hyperbole nouns (смех/стыд/зависть/голод/жара/скука)
|
|
38
|
+
// Russian — NOT голода/жары/холода (may be literal)
|
|
39
|
+
/хочу\s+умереть(?!\s+от\s+(?:смеха|стыда|зависти|скуки)(?:\s|$))/i,
|
|
40
|
+
/убить\s+себя/i,
|
|
41
|
+
/покончить\s+с\s+собой/i,
|
|
42
|
+
// Arabic (alef variants handled in normalize)
|
|
43
|
+
/[اأإآ]ر[يى]د\s+[اأإآ]ن\s+[اأإآ]موت/,
|
|
44
|
+
/قتل\s+نفس[يى]/,
|
|
45
|
+
/[اأإآ]نه[يى]\s+ح[يى][اأإآ]ت[يى]/,
|
|
46
|
+
// German — block hyperbole nouns (Lachen/Scham/Neid/Hunger/Langeweile)
|
|
47
|
+
// German — NOT hunger/kälte (may be literal)
|
|
48
|
+
/(?:ich\s+)?will\s+sterben(?!\s+vor\s+(?:lachen|scham|neid|langeweile)\b)/i,
|
|
49
|
+
/mich\s+umbringen/i,
|
|
50
|
+
// Ukrainian — block hyperbole nouns (сміх/сором/заздрість/голод)
|
|
51
|
+
// Ukrainian — NOT голоду/холоду (may be literal)
|
|
52
|
+
/хочу\s+померти(?!\s+від\s+(?:сміху|сорому|заздрості|нудьги)(?:\s|$))/i,
|
|
53
|
+
/вбити\s+себе/i,
|
|
54
|
+
];
|
|
55
|
+
const MEDICAL_INPUT_RE = [
|
|
56
|
+
/(?:how\s+much|what\s+dose)\s+.*(?:should\s+I|do\s+I|can\s+I)\s+(?:inject|take|give)/i,
|
|
57
|
+
/(?:dose|dosage)\s+(?:should\s+I|do\s+I|can\s+I)\s+(?:inject|take|give)/i,
|
|
58
|
+
];
|
|
59
|
+
// ── Output: model providing dangerous instructions ───────────────────────────
|
|
60
|
+
const CRISIS_OUTPUT_RE = [
|
|
61
|
+
/\bhow many pills\s+to\b/i,
|
|
62
|
+
/\boverdose amount\b/i,
|
|
63
|
+
/\bnoose\b/i,
|
|
64
|
+
/\bhanging yourself\b/i,
|
|
65
|
+
/\bsuicide method\b/i,
|
|
66
|
+
/\bmethod of suicide\b/i,
|
|
67
|
+
/\bself[- ]?harm method\b/i,
|
|
68
|
+
/\bhow to (?:kill|hang|poison|overdose)\s+(?:your|my|him|her|them|one)self\b/i,
|
|
69
|
+
/\bhow to overdose\b/i,
|
|
70
|
+
];
|
|
71
|
+
const MEDICAL_OUTPUT_RE = [
|
|
72
|
+
/\binject this (?:amount|dose|much)\b/i,
|
|
73
|
+
/\btake this (?:amount|many|dose)\b/i,
|
|
74
|
+
/\byou should (?:inject|take)\s+\d+\s+units\b/i,
|
|
75
|
+
];
|
|
76
|
+
// ── Responses ────────────────────────────────────────────────────────────────
|
|
77
|
+
export const CRISIS_RESPONSE = "I'm concerned about your safety. Please call or text 988 (Suicide & Crisis Lifeline) right now — available 24/7. If in immediate danger, call 911. International: findahelpline.com. You are not alone.";
|
|
78
|
+
export const MEDICAL_RESPONSE = "I cannot provide specific medical dosing advice. Please contact your doctor or pharmacist. For emergencies, call 911.";
|
|
79
|
+
// ── API ──────────────────────────────────────────────────────────────────────
|
|
80
|
+
function normalize(text) {
|
|
81
|
+
return text
|
|
82
|
+
.toLowerCase()
|
|
83
|
+
.replace(/\p{Cf}/gu, "")
|
|
84
|
+
.replace(/\p{Mn}/gu, "") // Arabic harakat + all combining marks
|
|
85
|
+
.replace(/ـ/g, "")
|
|
86
|
+
.replace(/[أإآ]/g, "ا")
|
|
87
|
+
.replace(/\s+/g, " ");
|
|
88
|
+
}
|
|
89
|
+
export function checkInputSafety(text) {
|
|
90
|
+
const t = normalize(text);
|
|
91
|
+
if (CRISIS_INPUT_RE.some(p => p.test(t)))
|
|
92
|
+
return CRISIS_RESPONSE;
|
|
93
|
+
if (MEDICAL_INPUT_RE.some(p => p.test(t)))
|
|
94
|
+
return MEDICAL_RESPONSE;
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
export function checkOutputSafety(response) {
|
|
98
|
+
const r = normalize(response);
|
|
99
|
+
if (CRISIS_OUTPUT_RE.some(re => re.test(r)))
|
|
100
|
+
return CRISIS_RESPONSE;
|
|
101
|
+
if (MEDICAL_OUTPUT_RE.some(re => re.test(r)))
|
|
102
|
+
return MEDICAL_RESPONSE;
|
|
103
|
+
return response;
|
|
104
|
+
}
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "prism-mcp-server",
|
|
3
|
-
"version": "19.1
|
|
3
|
+
"version": "19.2.1",
|
|
4
4
|
"mcpName": "io.github.dcostenco/prism-coder",
|
|
5
|
-
"description": "Prism Coder
|
|
5
|
+
"description": "Prism Coder — Cognitive memory + tool-calling intelligence for AI agents. Mind Palace persistent memory (BFCL Gold Certified, 100% Tool-Call Accuracy, 114 Agent Skills, PHI Guard, Tier Enforcement, Prompt-Based Skill Routing, Zero-Search HDC/HRR retrieval, HRR Semantic Drift Detection across BCBA/Coding/AAC domains, HIPAA-hardened local-first storage, SLERP-optimized GRPO alignment) plus the prism-coder 1.7B–32B open-weights LLM fleet.",
|
|
6
6
|
"module": "index.ts",
|
|
7
7
|
"type": "module",
|
|
8
8
|
"main": "dist/server.js",
|