prism-mcp-server 19.1.0 → 19.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +102 -0
- package/dist/config.js +4 -4
- package/dist/tools/compactionHandler.js +2 -2
- package/dist/tools/ledgerHandlers.js +9 -0
- package/dist/tools/prismInferHandler.js +45 -7
- package/dist/tools/taskRouterHandler.js +2 -2
- package/dist/utils/ddLogger.js +57 -19
- package/dist/utils/inferenceMetrics.js +64 -0
- package/dist/utils/localLlm.js +2 -2
- package/dist/utils/nerExtractor.js +1 -1
- package/dist/utils/qualityGate.js +28 -4
- package/dist/utils/safetyGate.js +104 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -78,6 +78,23 @@ Every session is logged with files changed, decisions made, and TODOs. Search, f
|
|
|
78
78
|
<img src="docs/session-ledger.jpg" alt="Session Ledger — 93 sessions, 847 decisions logged across 12 projects" width="700" />
|
|
79
79
|
</p>
|
|
80
80
|
|
|
81
|
+
### Inference Metrics — see where your tokens go
|
|
82
|
+
|
|
83
|
+
Every `prism_infer` call tracks which model handled it (local Ollama vs cloud) and how many tokens were consumed. When you save a session, Prism shows a summary:
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
📊 Inference Metrics (this session):
|
|
87
|
+
Total calls: 12 — Local: 10 (83%) | Cloud: 2 (17%)
|
|
88
|
+
Tokens: 8,420 in + 3,150 out = 11,570 total
|
|
89
|
+
Avg latency: 1,240ms
|
|
90
|
+
By model:
|
|
91
|
+
prism-coder:27b: 6 calls, 7,200 tokens, avg 1,800ms
|
|
92
|
+
prism-coder:9b: 4 calls, 2,870 tokens, avg 620ms
|
|
93
|
+
synalux-27b: 2 calls, 1,500 tokens, avg 1,100ms
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Local calls use actual Ollama token counts; cloud calls use estimates. Metrics are aggregated by the Synalux portal — Prism is a thin client that forwards per-call data and fetches the summary on demand.
|
|
97
|
+
|
|
81
98
|
### Session Drift Detection
|
|
82
99
|
|
|
83
100
|
Long agent sessions can wander from their original goal. `session_detect_drift` compares current work against the stated goal and returns `on_track / minor_drift / major_drift` so the agent can self-correct.
|
|
@@ -204,6 +221,91 @@ python3 tests/benchmarks/prism-routing-100/benchmark.py --models 2b 4b 9b 27b
|
|
|
204
221
|
|
|
205
222
|
**Memory uplift (LoCoMo-Plus, self-published).** A separate long-context dialogue benchmark ([dcostenco/Locomo-Plus](https://github.com/dcostenco/Locomo-Plus)) measures how much structured memory helps a base model retain multi-day context. Results show large gains when a model is paired with Prism memory versus running raw. Note this benchmark is authored, run, and LLM-judged by this project — treat it as a reproducible demonstration, not an independent third-party result, and run it yourself with the commands in that repo.
|
|
206
223
|
|
|
224
|
+
### Code Generation Quality (27B vs Claude Opus)
|
|
225
|
+
|
|
226
|
+
Three progressively harder Python tasks run through `prism_infer(mode:"code", think:true)` on the local 27B and compared with Claude Opus. Both produce correct, production-quality code. The 27B is slightly more verbose (docstrings, examples); Opus is slightly tighter (`__slots__`, early-exit DFS). On routine coding the 27B at $0 replaces cloud calls entirely.
|
|
227
|
+
|
|
228
|
+
| Task | Local 27B | Claude Opus | Verdict |
|
|
229
|
+
|------|-----------|-------------|---------|
|
|
230
|
+
| Fibonacci with memoization | `@lru_cache`, ValueError on negative, docstring | Nested `_fib` to keep cache private | Both correct, equivalent |
|
|
231
|
+
| LRU Cache (OrderedDict, O(1)) | `Any` keys, isinstance capacity check, `__repr__` | `Hashable` key type (more precise), same ops | Both correct, Opus marginally tighter |
|
|
232
|
+
| Trie with autocomplete | `.lower()` normalization, collect+sort+slice | `__slots__` on TrieNode, early-exit DFS at limit | Both correct, Opus slightly more optimized |
|
|
233
|
+
|
|
234
|
+
<details>
|
|
235
|
+
<summary>Local 27B output — Trie with autocomplete (hardest task)</summary>
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
class TrieNode:
|
|
239
|
+
def __init__(self):
|
|
240
|
+
self.children: dict[str, 'TrieNode'] = {}
|
|
241
|
+
self.is_end_of_word: bool = False
|
|
242
|
+
|
|
243
|
+
class Trie:
|
|
244
|
+
def __init__(self):
|
|
245
|
+
self.root: TrieNode = TrieNode()
|
|
246
|
+
|
|
247
|
+
def insert(self, word: str) -> None:
|
|
248
|
+
node = self.root
|
|
249
|
+
for char in word.lower():
|
|
250
|
+
if char not in node.children:
|
|
251
|
+
node.children[char] = TrieNode()
|
|
252
|
+
node = node.children[char]
|
|
253
|
+
node.is_end_of_word = True
|
|
254
|
+
|
|
255
|
+
def search(self, word: str) -> bool:
|
|
256
|
+
node = self._get_node(word.lower())
|
|
257
|
+
return node is not None and node.is_end_of_word
|
|
258
|
+
|
|
259
|
+
def starts_with(self, prefix: str) -> bool:
|
|
260
|
+
return self._get_node(prefix.lower()) is not None
|
|
261
|
+
|
|
262
|
+
def autocomplete(self, prefix: str, limit: int = 5) -> list[str]:
|
|
263
|
+
node = self._get_node(prefix.lower())
|
|
264
|
+
if node is None:
|
|
265
|
+
return []
|
|
266
|
+
results: list[str] = []
|
|
267
|
+
self._collect_words(node, prefix.lower(), results)
|
|
268
|
+
results.sort()
|
|
269
|
+
return results[:limit]
|
|
270
|
+
|
|
271
|
+
def _get_node(self, key: str) -> 'TrieNode | None':
|
|
272
|
+
node = self.root
|
|
273
|
+
for char in key:
|
|
274
|
+
if char not in node.children:
|
|
275
|
+
return None
|
|
276
|
+
node = node.children[char]
|
|
277
|
+
return node
|
|
278
|
+
|
|
279
|
+
def _collect_words(self, node: TrieNode, prefix: str, results: list[str]) -> None:
|
|
280
|
+
if node.is_end_of_word:
|
|
281
|
+
results.append(prefix)
|
|
282
|
+
for char, child in sorted(node.children.items()):
|
|
283
|
+
self._collect_words(child, prefix + char, results)
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
</details>
|
|
287
|
+
|
|
288
|
+
| Metric | Local 27B | Cloud (Opus) |
|
|
289
|
+
|--------|-----------|-------------|
|
|
290
|
+
| Latency (Trie task) | ~30s | ~8s |
|
|
291
|
+
| Cost | $0 | ~$0.05 |
|
|
292
|
+
| Think mode | Enabled (stripped before serving) | N/A |
|
|
293
|
+
| Quality gate | Passed (no escalation needed) | N/A |
|
|
294
|
+
|
|
295
|
+
### Cloud Escalation in Practice (`cloud_fallback: true`)
|
|
296
|
+
|
|
297
|
+
The same three tasks with `cloud_fallback: true` — the quality gate decides whether local output is good enough or needs cloud escalation.
|
|
298
|
+
|
|
299
|
+
| Task | used_cloud | Quality Gate | Latency | What happened |
|
|
300
|
+
|------|:----------:|-------------|---------|---------------|
|
|
301
|
+
| Fibonacci (simple) | **no** | Passed | 11s | 27B served directly, $0 |
|
|
302
|
+
| LRU Cache (medium) | **no** | Passed | 21s | 27B served directly, $0 |
|
|
303
|
+
| Trie (hard) | **yes** | `loop_detected` | 55s | 27B looped → gate caught it → escalated to cloud 27B |
|
|
304
|
+
|
|
305
|
+
The quality gate detected repeated sentences (≥3 of the same sentence in ≥6 total) in the 27B's Trie output and escalated automatically. The cloud fallback returned clean code. On a second run of the same prompt, the 27B produced clean output without escalation — the loop is stochastic, not systematic.
|
|
306
|
+
|
|
307
|
+
**Takeaway:** for ~80–90% of coding tasks, the 27B handles everything locally at $0. The quality gate + cloud escalation exists as a safety net for the remaining cases where the local model loops, truncates, or produces empty output. Paid tiers get automatic escalation; free tier gets the local result with a warning.
|
|
308
|
+
|
|
207
309
|
---
|
|
208
310
|
|
|
209
311
|
## Why Prism Coder
|
package/dist/config.js
CHANGED
|
@@ -307,11 +307,11 @@ const rawTiebreakerEpsilon = parseFloat(process.env.PRISM_TURBOQUANT_TIEBREAKER_
|
|
|
307
307
|
export const PRISM_TURBOQUANT_TIEBREAKER_EPSILON = Number.isFinite(rawTiebreakerEpsilon) && rawTiebreakerEpsilon >= 0
|
|
308
308
|
? rawTiebreakerEpsilon
|
|
309
309
|
: 0;
|
|
310
|
-
// ─── v9.x: Local LLM (prism-coder
|
|
310
|
+
// ─── v9.x: Local LLM (prism-coder) Integration ────────────────────────────
|
|
311
311
|
// Enables background tasks (compaction, task-router fallback, pipeline ops)
|
|
312
312
|
// to use a local Ollama model instead of the cloud LLM provider.
|
|
313
313
|
//
|
|
314
|
-
// Default model is prism-coder:
|
|
314
|
+
// Default model is prism-coder:9b — fine-tuned on Prism tool schemas.
|
|
315
315
|
// Disabled by default so existing deployments are unaffected.
|
|
316
316
|
//
|
|
317
317
|
// Set PRISM_LOCAL_LLM_ENABLED=true to activate.
|
|
@@ -319,10 +319,10 @@ export const PRISM_TURBOQUANT_TIEBREAKER_EPSILON = Number.isFinite(rawTiebreaker
|
|
|
319
319
|
// Set PRISM_LOCAL_LLM_URL to override the Ollama endpoint (default: localhost:11434).
|
|
320
320
|
// Set PRISM_LOCAL_LLM_TIMEOUT_MS to override per-call timeout (default: 60000, max: 300000).
|
|
321
321
|
// Set PRISM_STRICT_LOCAL_MODE=true to block cloud fallback when local LLM is enabled (HIPAA).
|
|
322
|
-
/** Master switch — enables the local prism-coder
|
|
322
|
+
/** Master switch — enables the local prism-coder LLM for background tasks. */
|
|
323
323
|
export const PRISM_LOCAL_LLM_ENABLED = process.env.PRISM_LOCAL_LLM_ENABLED === "true"; // Opt-in, default false
|
|
324
324
|
/** Ollama model tag to use for local LLM calls. */
|
|
325
|
-
export const PRISM_LOCAL_LLM_MODEL = (process.env.PRISM_LOCAL_LLM_MODEL || "prism-coder:
|
|
325
|
+
export const PRISM_LOCAL_LLM_MODEL = (process.env.PRISM_LOCAL_LLM_MODEL || "prism-coder:9b").trim();
|
|
326
326
|
/** Ollama base URL. Override for remote Ollama instances. */
|
|
327
327
|
export const PRISM_LOCAL_LLM_URL = (process.env.PRISM_LOCAL_LLM_URL || "http://localhost:11434").trim();
|
|
328
328
|
/** Per-call timeout in ms. Prevents stalled background tasks. Capped at 300s. */
|
|
@@ -108,7 +108,7 @@ function parseCompactionResponse(response, source) {
|
|
|
108
108
|
}
|
|
109
109
|
async function summarizeEntries(entries) {
|
|
110
110
|
const prompt = buildCompactionPrompt(entries);
|
|
111
|
-
// ── Path 1: Local LLM (prism-coder:
|
|
111
|
+
// ── Path 1: Local LLM (prism-coder:9b) ───────────────────────────
|
|
112
112
|
if (PRISM_LOCAL_LLM_ENABLED) {
|
|
113
113
|
debugLog(`[compact_ledger] Attempting local LLM summarization (${entries.length} entries)`);
|
|
114
114
|
const localResponse = await callLocalLlm(prompt);
|
|
@@ -123,7 +123,7 @@ async function summarizeEntries(entries) {
|
|
|
123
123
|
if (PRISM_STRICT_LOCAL_MODE) {
|
|
124
124
|
throw new Error("[HIPAA] Local LLM failed and PRISM_STRICT_LOCAL_MODE=true. " +
|
|
125
125
|
"Cloud fallback is blocked to prevent unauthorized PHI disclosure. " +
|
|
126
|
-
"Ensure Ollama is running and prism-coder:
|
|
126
|
+
"Ensure Ollama is running and prism-coder:9b is available.");
|
|
127
127
|
}
|
|
128
128
|
debugLog(`[compact_ledger] Local LLM returned null — falling back to cloud LLM`);
|
|
129
129
|
}
|
|
@@ -89,6 +89,7 @@ const MEMORY_BOUNDARY_SUFFIX = '\n</prism_memory>';
|
|
|
89
89
|
* After saving, generates an embedding vector for the entry via fire-and-forget.
|
|
90
90
|
*/
|
|
91
91
|
import { computeEffectiveImportance, recordMemoryAccess } from "../utils/cognitiveMemory.js";
|
|
92
|
+
import { fetchPortalInferenceMetrics, markSessionStart } from "../utils/inferenceMetrics.js";
|
|
92
93
|
export async function sessionSaveLedgerHandler(args) {
|
|
93
94
|
if (!isSessionSaveLedgerArgs(args)) {
|
|
94
95
|
throw new Error("Invalid arguments for session_save_ledger");
|
|
@@ -229,6 +230,8 @@ export async function sessionSaveLedgerHandler(args) {
|
|
|
229
230
|
storage.decayImportance(project, PRISM_USER_ID, 30).catch((err) => {
|
|
230
231
|
debugLog(`[session_save_ledger] Background decay failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
231
232
|
});
|
|
233
|
+
// Fetch inference metrics from portal (thin-client: portal is authority)
|
|
234
|
+
const metricsBlock = await fetchPortalInferenceMetrics();
|
|
232
235
|
return {
|
|
233
236
|
content: [{
|
|
234
237
|
type: "text",
|
|
@@ -238,6 +241,7 @@ export async function sessionSaveLedgerHandler(args) {
|
|
|
238
241
|
(files_changed?.length ? `Files changed: ${files_changed.length}\n` : "") +
|
|
239
242
|
(decisions?.length ? `Decisions: ${decisions.length}\n` : "") +
|
|
240
243
|
`📊 Embedding generation queued for semantic search.` +
|
|
244
|
+
metricsBlock +
|
|
241
245
|
resolverNote,
|
|
242
246
|
}],
|
|
243
247
|
isError: false,
|
|
@@ -548,11 +552,13 @@ export async function sessionSaveHandoffHandler(args, server) {
|
|
|
548
552
|
// Dynamic import itself failed — module not found or similar
|
|
549
553
|
console.error("[FactMerger] Module load failed (non-fatal): " + err));
|
|
550
554
|
}
|
|
555
|
+
const metricsBlock = await fetchPortalInferenceMetrics();
|
|
551
556
|
// Build response text based on whether a CRDT merge occurred
|
|
552
557
|
const responseText = isMerged
|
|
553
558
|
? `🔄 Auto-merged conflict for "${project}" (v${expected_version} → v${newVersion})\n` +
|
|
554
559
|
`Strategy: ${JSON.stringify(mergeStrategy)}\n` +
|
|
555
560
|
(last_summary ? `Summary: ${last_summary}\n` : "") +
|
|
561
|
+
metricsBlock +
|
|
556
562
|
`\n🔑 Remember: pass expected_version: ${newVersion} on your next save ` +
|
|
557
563
|
`to maintain concurrency control.`
|
|
558
564
|
: `✅ Handoff ${data.status || "saved"} for project "${project}" ` +
|
|
@@ -561,6 +567,7 @@ export async function sessionSaveHandoffHandler(args, server) {
|
|
|
561
567
|
(open_todos?.length ? `Open TODOs: ${open_todos.length} items\n` : "") +
|
|
562
568
|
(active_branch ? `Active branch: ${active_branch}\n` : "") +
|
|
563
569
|
`📊 Embedding generation queued for semantic search.\n` +
|
|
570
|
+
metricsBlock +
|
|
564
571
|
`\n🔑 Remember: pass expected_version: ${newVersion} on your next save ` +
|
|
565
572
|
`to maintain concurrency control.`;
|
|
566
573
|
return {
|
|
@@ -575,6 +582,8 @@ export async function sessionLoadContextHandler(args) {
|
|
|
575
582
|
if (!isSessionLoadContextArgs(args)) {
|
|
576
583
|
throw new Error("Invalid arguments for session_load_context");
|
|
577
584
|
}
|
|
585
|
+
// Mark session boundary — portal metrics fetched with since=this timestamp
|
|
586
|
+
markSessionStart();
|
|
578
587
|
const { project, level = "standard", role } = args;
|
|
579
588
|
const maxTokens = args.max_tokens
|
|
580
589
|
|| parseInt(await getSetting("max_tokens", "0"), 10) || undefined; // v4.0: arg > dashboard setting > none
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* prism_infer — local-first inference tool
|
|
3
3
|
* ─────────────────────────────────────────────────────────────
|
|
4
4
|
* Save the caller's cloud tokens by routing to a local prism-coder
|
|
5
|
-
* model via Ollama. Tiers (27B/9B/
|
|
5
|
+
* model via Ollama. Tiers (27B/9B/4B/2B) auto-selected by free
|
|
6
6
|
* RAM, then capped by `model_ceiling` and the set of tags that are
|
|
7
7
|
* actually pulled into Ollama.
|
|
8
8
|
*
|
|
@@ -28,11 +28,12 @@ import { getEntitlements, clampCeiling } from "../utils/entitlements.js";
|
|
|
28
28
|
import { ddLog } from "../utils/ddLogger.js";
|
|
29
29
|
import { stripThink } from "../utils/thinkStrip.js";
|
|
30
30
|
import { passesQualityGate } from "../utils/qualityGate.js";
|
|
31
|
+
import { checkInputSafety, checkOutputSafety } from "../utils/safetyGate.js";
|
|
31
32
|
// ─── Tool Definition ────────────────────────────────────────────
|
|
32
33
|
export const PRISM_INFER_TOOL = {
|
|
33
34
|
name: "prism_infer",
|
|
34
35
|
description: "Run an inference on a local prism-coder model (Ollama) to save cloud tokens. " +
|
|
35
|
-
"Picks the largest viable tier — 27B / 9B /
|
|
36
|
+
"Picks the largest viable tier — 27B / 9B / 4B / 2B — based on free RAM at call time, " +
|
|
36
37
|
"clamped by `model_ceiling` and what is actually pulled in Ollama. " +
|
|
37
38
|
"Falls through to the synalux portal cloud cascade (9B → 27B → Claude Opus 4.7) " +
|
|
38
39
|
"only when local is unviable AND `cloud_fallback=true`. " +
|
|
@@ -71,7 +72,7 @@ export const PRISM_INFER_TOOL = {
|
|
|
71
72
|
},
|
|
72
73
|
timeout_ms: {
|
|
73
74
|
type: "number",
|
|
74
|
-
description: "Override per-call timeout. Default scales with model size: 27B=120s, 9B=60s, 4B=20s,
|
|
75
|
+
description: "Override per-call timeout. Default scales with model size: 27B=120s, 9B=60s, 4B=20s, 2B=15s.",
|
|
75
76
|
},
|
|
76
77
|
evidence: {
|
|
77
78
|
type: "array",
|
|
@@ -242,7 +243,7 @@ async function callOllamaGenerate(url, model, prompt, system, maxTokens, tempera
|
|
|
242
243
|
const text = (data.message?.content ?? "").trim();
|
|
243
244
|
if (!text)
|
|
244
245
|
return { ok: false, reason: "empty_response" };
|
|
245
|
-
return { ok: true, text, doneReason: data.done_reason };
|
|
246
|
+
return { ok: true, text, doneReason: data.done_reason, promptTokens: data.prompt_eval_count, completionTokens: data.eval_count };
|
|
246
247
|
}
|
|
247
248
|
catch (err) {
|
|
248
249
|
const name = err instanceof Error ? err.name : "Unknown";
|
|
@@ -300,6 +301,19 @@ async function callSynaluxInference(prompt, maxTokens, timeoutMs) {
|
|
|
300
301
|
export async function runInfer(args, deps) {
|
|
301
302
|
const t0 = Date.now();
|
|
302
303
|
const temperature = args.temperature ?? 0;
|
|
304
|
+
// ── L1 Safety — deterministic input interception ────────────
|
|
305
|
+
const safetyIntercept = checkInputSafety(args.prompt);
|
|
306
|
+
if (safetyIntercept) {
|
|
307
|
+
return {
|
|
308
|
+
output: safetyIntercept,
|
|
309
|
+
backend: "safety_gate",
|
|
310
|
+
model_picked: null,
|
|
311
|
+
ram_free_mb: Math.round(deps.freemem() / (1024 * 1024)),
|
|
312
|
+
latency_ms: Date.now() - t0,
|
|
313
|
+
used_cloud: false,
|
|
314
|
+
attempts: [{ tier: "l1_safety", reason: "crisis_or_medical_intercept" }],
|
|
315
|
+
};
|
|
316
|
+
}
|
|
303
317
|
// ── Entitlement enforcement ──────────────────────────────────
|
|
304
318
|
// Fetch user's plan limits (cached 1hr). Free users without auth
|
|
305
319
|
// get 4b ceiling, 50 calls/day, 512 max tokens.
|
|
@@ -392,7 +406,7 @@ export async function runInfer(args, deps) {
|
|
|
392
406
|
debugLog(`[prism_infer] quality gate FAIL (${gate.reason}) — escalating to cloud`);
|
|
393
407
|
attempts.push({ tier: tier.tag, reason: `quality_gate:${gate.reason}` });
|
|
394
408
|
if (gate.reason === "hard_truncation" || gate.reason === "loop_detected") {
|
|
395
|
-
localDraft = { output, tier: tier.tag };
|
|
409
|
+
localDraft = { output, tier: tier.tag, promptTokens: result.promptTokens, completionTokens: result.completionTokens };
|
|
396
410
|
}
|
|
397
411
|
break;
|
|
398
412
|
}
|
|
@@ -408,6 +422,8 @@ export async function runInfer(args, deps) {
|
|
|
408
422
|
used_cloud: false,
|
|
409
423
|
attempts,
|
|
410
424
|
plan: ent.plan,
|
|
425
|
+
prompt_tokens: result.promptTokens,
|
|
426
|
+
completion_tokens: result.completionTokens,
|
|
411
427
|
});
|
|
412
428
|
}
|
|
413
429
|
attempts.push({ tier: tier.tag, reason: result.reason });
|
|
@@ -431,6 +447,8 @@ export async function runInfer(args, deps) {
|
|
|
431
447
|
used_cloud: true,
|
|
432
448
|
attempts,
|
|
433
449
|
plan: ent.plan,
|
|
450
|
+
prompt_tokens: Math.ceil(args.prompt.length / 4),
|
|
451
|
+
completion_tokens: Math.ceil(cloud.output.length / 4),
|
|
434
452
|
});
|
|
435
453
|
}
|
|
436
454
|
attempts.push({ tier: "synalux", reason: cloud.reason ?? "unknown" });
|
|
@@ -449,6 +467,8 @@ export async function runInfer(args, deps) {
|
|
|
449
467
|
used_cloud: false,
|
|
450
468
|
attempts,
|
|
451
469
|
plan: ent.plan,
|
|
470
|
+
prompt_tokens: localDraft.promptTokens,
|
|
471
|
+
completion_tokens: localDraft.completionTokens,
|
|
452
472
|
quality_gate_failed: true,
|
|
453
473
|
});
|
|
454
474
|
}
|
|
@@ -464,9 +484,11 @@ export async function runInfer(args, deps) {
|
|
|
464
484
|
* field so callers can route refusals separately from successes.
|
|
465
485
|
*/
|
|
466
486
|
async function applyVerification(draft, args, deps, partial) {
|
|
487
|
+
// L1 output safety — intercept dangerous model-generated content
|
|
488
|
+
const safeDraft = checkOutputSafety(draft);
|
|
467
489
|
const shouldVerify = args.verify ?? (args.evidence !== undefined && args.evidence.length > 0);
|
|
468
490
|
if (!shouldVerify || !deps.callVerifier) {
|
|
469
|
-
return { ...partial, output:
|
|
491
|
+
return { ...partial, output: safeDraft };
|
|
470
492
|
}
|
|
471
493
|
const verifier = deps.callVerifier;
|
|
472
494
|
const outcome = await verifier({
|
|
@@ -478,7 +500,7 @@ async function applyVerification(draft, args, deps, partial) {
|
|
|
478
500
|
});
|
|
479
501
|
return {
|
|
480
502
|
...partial,
|
|
481
|
-
output: outcome.finalText,
|
|
503
|
+
output: checkOutputSafety(outcome.finalText),
|
|
482
504
|
verification: {
|
|
483
505
|
action: outcome.action,
|
|
484
506
|
verifierChain: outcome.verifierChain,
|
|
@@ -503,12 +525,28 @@ export async function prismInferHandler(args) {
|
|
|
503
525
|
ollamaUrl: PRISM_LOCAL_LLM_URL,
|
|
504
526
|
});
|
|
505
527
|
debugLog(`[prism_infer] backend=${result.backend} model=${result.model_picked} latency=${result.latency_ms}ms free=${result.ram_free_mb}MB`);
|
|
528
|
+
// Forward per-call metrics to portal (thin-client pattern).
|
|
529
|
+
// safety_gate excluded — logging crisis filter triggers is a HIPAA concern.
|
|
530
|
+
if (result.backend !== "safety_gate") {
|
|
531
|
+
ddLog("info", "prism_infer.usage", {
|
|
532
|
+
backend: result.backend,
|
|
533
|
+
model: result.model_picked ?? result.backend,
|
|
534
|
+
used_cloud: result.used_cloud,
|
|
535
|
+
prompt_tokens: result.prompt_tokens ?? 0,
|
|
536
|
+
completion_tokens: result.completion_tokens ?? 0,
|
|
537
|
+
latency_ms: result.latency_ms,
|
|
538
|
+
});
|
|
539
|
+
}
|
|
540
|
+
const tokenStr = result.prompt_tokens != null || result.completion_tokens != null
|
|
541
|
+
? ` tokens=${result.prompt_tokens ?? "?"}in/${result.completion_tokens ?? "?"}out`
|
|
542
|
+
: "";
|
|
506
543
|
const header = `[prism_infer] backend=${result.backend}` +
|
|
507
544
|
` model=${result.model_picked ?? "n/a"}` +
|
|
508
545
|
` plan=${result.plan ?? "unknown"}` +
|
|
509
546
|
` free_ram=${result.ram_free_mb}MB` +
|
|
510
547
|
` latency=${result.latency_ms}ms` +
|
|
511
548
|
` used_cloud=${result.used_cloud}` +
|
|
549
|
+
tokenStr +
|
|
512
550
|
(result.quality_gate_failed ? ` quality_gate_failed=true` : "") +
|
|
513
551
|
(result.verification ? ` verify=${result.verification.action}` : "") +
|
|
514
552
|
(result.attempts.length ? ` attempts=${JSON.stringify(result.attempts)}` : "");
|
|
@@ -317,7 +317,7 @@ export async function sessionTaskRouteHandler(args) {
|
|
|
317
317
|
delete result._rawComposite;
|
|
318
318
|
// ── v9.x: Local LLM second-opinion for low-confidence cases ──────────────
|
|
319
319
|
// When confidence is below the threshold AND local LLM is enabled,
|
|
320
|
-
// ask prism-coder:
|
|
320
|
+
// ask prism-coder:9b to break the tie. This is purely additive — if the
|
|
321
321
|
// LLM call fails or times out, the original heuristic result is returned.
|
|
322
322
|
if (PRISM_LOCAL_LLM_ENABLED &&
|
|
323
323
|
result.confidence < PRISM_TASK_ROUTER_CONFIDENCE_THRESHOLD) {
|
|
@@ -350,7 +350,7 @@ export async function sessionTaskRouteHandler(args) {
|
|
|
350
350
|
}
|
|
351
351
|
// ─── Local LLM Route Classifier ──────────────────────────────
|
|
352
352
|
/**
|
|
353
|
-
* Ask prism-coder:
|
|
353
|
+
* Ask prism-coder:9b to classify a task description as "claw" or "host".
|
|
354
354
|
* Returns the string or null if the model is unavailable / response unparseable.
|
|
355
355
|
* Called only when heuristic confidence is below the threshold.
|
|
356
356
|
*/
|
package/dist/utils/ddLogger.js
CHANGED
|
@@ -8,9 +8,17 @@
|
|
|
8
8
|
* Env: PRISM_SYNALUX_BASE_URL (default https://synalux.ai)
|
|
9
9
|
*/
|
|
10
10
|
const SYNALUX_BASE = process.env.PRISM_SYNALUX_BASE_URL || "https://synalux.ai";
|
|
11
|
+
const TELEMETRY_WRITE_TOKEN = process.env.TELEMETRY_WRITE_TOKEN || "";
|
|
11
12
|
const DD_API_KEY = process.env.DD_API_KEY || "";
|
|
12
13
|
const DD_SITE = process.env.DD_SITE || "datadoghq.com";
|
|
13
14
|
const SERVICE = "prism-mcp";
|
|
15
|
+
const CONTEXT_ALLOWLIST = new Set([
|
|
16
|
+
"backend", "model", "used_cloud", "prompt_tokens", "completion_tokens",
|
|
17
|
+
"latency_ms", "plan", "requested_ceiling", "effective_ceiling",
|
|
18
|
+
"ceiling_clamped", "requested_tokens", "effective_tokens", "tokens_clamped",
|
|
19
|
+
"cloud_requested", "cloud_allowed", "cloud_blocked",
|
|
20
|
+
"verify_requested", "verify_allowed", "verify_blocked",
|
|
21
|
+
]);
|
|
14
22
|
const queue = [];
|
|
15
23
|
let flushTimer = null;
|
|
16
24
|
const FLUSH_INTERVAL_MS = 5_000;
|
|
@@ -26,31 +34,61 @@ async function flush() {
|
|
|
26
34
|
return;
|
|
27
35
|
const batch = queue.splice(0, MAX_BATCH);
|
|
28
36
|
// Primary: Synalux portal → Supabase (always available)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
37
|
+
if (TELEMETRY_WRITE_TOKEN) {
|
|
38
|
+
try {
|
|
39
|
+
await fetch(`${SYNALUX_BASE}/api/v1/telemetry`, {
|
|
40
|
+
method: "POST",
|
|
41
|
+
headers: {
|
|
42
|
+
"Content-Type": "application/json",
|
|
43
|
+
"Authorization": `Bearer ${TELEMETRY_WRITE_TOKEN}`,
|
|
44
|
+
"X-Prism-Client": "prism-mcp",
|
|
45
|
+
},
|
|
46
|
+
body: JSON.stringify(batch.map(e => {
|
|
47
|
+
const ctx = {};
|
|
48
|
+
for (const [k, v] of Object.entries(e)) {
|
|
49
|
+
if (CONTEXT_ALLOWLIST.has(k))
|
|
50
|
+
ctx[k] = v;
|
|
51
|
+
}
|
|
52
|
+
return {
|
|
53
|
+
service: SERVICE,
|
|
54
|
+
event_type: e.status === "error" ? "error" : "action",
|
|
55
|
+
message: e.message,
|
|
56
|
+
context: ctx,
|
|
57
|
+
user_id: e.user_id,
|
|
58
|
+
user_plan: e.user_plan,
|
|
59
|
+
};
|
|
60
|
+
})),
|
|
61
|
+
signal: AbortSignal.timeout(5_000),
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
catch {
|
|
65
|
+
// Silent — don't crash the MCP server
|
|
66
|
+
}
|
|
46
67
|
}
|
|
47
68
|
// Secondary: Datadog Logs (if API key is set AND Logs product is enabled)
|
|
69
|
+
// Same allowlist applied — both sinks get identical filtered context.
|
|
48
70
|
if (DD_API_KEY) {
|
|
49
71
|
try {
|
|
50
72
|
await fetch(`https://http-intake.logs.${DD_SITE}/api/v2/logs`, {
|
|
51
73
|
method: "POST",
|
|
52
74
|
headers: { "Content-Type": "application/json", "DD-API-KEY": DD_API_KEY },
|
|
53
|
-
body: JSON.stringify(batch
|
|
75
|
+
body: JSON.stringify(batch.map(e => {
|
|
76
|
+
const ctx = {};
|
|
77
|
+
for (const [k, v] of Object.entries(e)) {
|
|
78
|
+
if (CONTEXT_ALLOWLIST.has(k))
|
|
79
|
+
ctx[k] = v;
|
|
80
|
+
}
|
|
81
|
+
return {
|
|
82
|
+
ddsource: "nodejs",
|
|
83
|
+
ddtags: e.ddtags,
|
|
84
|
+
hostname: e.hostname,
|
|
85
|
+
service: SERVICE,
|
|
86
|
+
status: e.status,
|
|
87
|
+
message: e.message,
|
|
88
|
+
...ctx,
|
|
89
|
+
timestamp: e.timestamp,
|
|
90
|
+
};
|
|
91
|
+
})),
|
|
54
92
|
signal: AbortSignal.timeout(5_000),
|
|
55
93
|
});
|
|
56
94
|
}
|
|
@@ -68,7 +106,7 @@ export function ddLog(level, message, context) {
|
|
|
68
106
|
hostname: process.env.HOSTNAME || "prism-mcp",
|
|
69
107
|
service: SERVICE,
|
|
70
108
|
status: level,
|
|
71
|
-
message,
|
|
109
|
+
message: message.slice(0, 200),
|
|
72
110
|
...context,
|
|
73
111
|
timestamp: new Date().toISOString(),
|
|
74
112
|
});
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Inference metrics — thin-client fetch from Synalux portal.
|
|
3
|
+
*
|
|
4
|
+
* Prism forwards per-call metrics via ddLog("prism_infer.usage").
|
|
5
|
+
* The portal aggregates them in app_telemetry. This module fetches
|
|
6
|
+
* the aggregated summary on demand (session_save_ledger/handoff).
|
|
7
|
+
*/
|
|
8
|
+
import { getSynaluxJwt } from "./synaluxJwt.js";
|
|
9
|
+
import { PRISM_SYNALUX_BASE_URL } from "../config.js";
|
|
10
|
+
import { debugLog } from "./logger.js";
|
|
11
|
+
let sessionStartedAt = new Date().toISOString();
|
|
12
|
+
export function markSessionStart() {
|
|
13
|
+
sessionStartedAt = new Date().toISOString();
|
|
14
|
+
}
|
|
15
|
+
async function fetchMetrics() {
|
|
16
|
+
if (!PRISM_SYNALUX_BASE_URL)
|
|
17
|
+
return { metrics: null, error: "no_portal_url" };
|
|
18
|
+
const jwt = await getSynaluxJwt();
|
|
19
|
+
if (!jwt)
|
|
20
|
+
return { metrics: null, error: "jwt_unavailable" };
|
|
21
|
+
try {
|
|
22
|
+
const url = `${PRISM_SYNALUX_BASE_URL}/api/v1/telemetry/inference-metrics?since=${encodeURIComponent(sessionStartedAt)}`;
|
|
23
|
+
const res = await fetch(url, {
|
|
24
|
+
headers: { "Authorization": `Bearer ${jwt}` },
|
|
25
|
+
signal: AbortSignal.timeout(5_000),
|
|
26
|
+
});
|
|
27
|
+
if (!res.ok) {
|
|
28
|
+
debugLog(`[inference-metrics] portal returned ${res.status}`);
|
|
29
|
+
return { metrics: null, error: `portal_${res.status}` };
|
|
30
|
+
}
|
|
31
|
+
return { metrics: (await res.json()) };
|
|
32
|
+
}
|
|
33
|
+
catch (err) {
|
|
34
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
35
|
+
debugLog(`[inference-metrics] fetch failed: ${msg}`);
|
|
36
|
+
return { metrics: null, error: msg };
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
export async function fetchPortalInferenceMetrics() {
|
|
40
|
+
const { metrics, error } = await fetchMetrics();
|
|
41
|
+
if (!metrics) {
|
|
42
|
+
if (error)
|
|
43
|
+
debugLog(`[inference-metrics] unavailable: ${error}`);
|
|
44
|
+
return "";
|
|
45
|
+
}
|
|
46
|
+
if (metrics.total_calls === 0)
|
|
47
|
+
return "";
|
|
48
|
+
const lines = [
|
|
49
|
+
`\n📊 Inference Metrics (this session):`,
|
|
50
|
+
` Total calls: ${metrics.total_calls} — Local: ${metrics.local_calls} (${metrics.local_pct}%) | Cloud: ${metrics.cloud_calls} (${metrics.cloud_pct}%)`,
|
|
51
|
+
` Tokens: ${metrics.total_prompt_tokens.toLocaleString()} in + ${metrics.total_completion_tokens.toLocaleString()} out = ${metrics.total_tokens.toLocaleString()} total`,
|
|
52
|
+
` Avg latency: ${metrics.avg_latency_ms}ms`,
|
|
53
|
+
];
|
|
54
|
+
const models = Object.entries(metrics.by_model).sort((a, b) => b[1].calls - a[1].calls);
|
|
55
|
+
if (models.length > 1) {
|
|
56
|
+
lines.push(` By model:`);
|
|
57
|
+
for (const [name, stats] of models) {
|
|
58
|
+
const tokens = stats.prompt_tokens + stats.completion_tokens;
|
|
59
|
+
const avgMs = stats.calls > 0 ? Math.round(stats.total_latency_ms / stats.calls) : 0;
|
|
60
|
+
lines.push(` ${name}: ${stats.calls} calls, ${tokens.toLocaleString()} tokens, avg ${avgMs}ms`);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return lines.join("\n");
|
|
64
|
+
}
|
package/dist/utils/localLlm.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Local LLM Client — Ollama/prism-coder:
|
|
2
|
+
* Local LLM Client — Ollama/prism-coder:9b Integration (v1.0.0)
|
|
3
3
|
* ──────────────────────────────────────────────────────────────────
|
|
4
4
|
* Thin HTTP wrapper around the Ollama /api/chat endpoint.
|
|
5
5
|
*
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* - Silent fail: returning null instead of throwing ensures callers
|
|
10
10
|
* can fall back to Gemini without crashing the MCP server.
|
|
11
11
|
* - Fire-and-forget safe: wrapped in try/catch, never propagates.
|
|
12
|
-
* - Default model: prism-coder:
|
|
12
|
+
* - Default model: prism-coder:9b — fine-tuned on Prism tool schemas,
|
|
13
13
|
* 8192-token context, Q8_0 quantization, ~8.1GB RAM footprint.
|
|
14
14
|
*
|
|
15
15
|
* FEATURE FLAG:
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
*
|
|
17
17
|
* Architecture:
|
|
18
18
|
* 1. Rule-based extraction (fast, zero-cost, always available)
|
|
19
|
-
* 2. Local LLM extraction (optional, higher quality, uses prism-coder:
|
|
19
|
+
* 2. Local LLM extraction (optional, higher quality, uses prism-coder:9b)
|
|
20
20
|
* 3. Merged + deduplicated results
|
|
21
21
|
*/
|
|
22
22
|
import { debugLog } from "./logger.js";
|
|
@@ -27,11 +27,20 @@ export function passesQualityGate(stripped, thinkOnly, finishReason) {
|
|
|
27
27
|
if (finishReason === "length") {
|
|
28
28
|
return { pass: false, reason: "hard_truncation" };
|
|
29
29
|
}
|
|
30
|
-
// Signal 4: Exact-loop
|
|
31
|
-
|
|
32
|
-
|
|
30
|
+
// Signal 4: Exact-loop detection (two passes).
|
|
31
|
+
//
|
|
32
|
+
// Pass A (prose-only, threshold ≥3): strip structural markdown that
|
|
33
|
+
// naturally repeats (code blocks, tables, headings, bold labels).
|
|
34
|
+
// Catches loops in explanatory text.
|
|
35
|
+
const proseOnly = stripped
|
|
36
|
+
.replace(/```[\s\S]*?```/g, "")
|
|
37
|
+
.replace(/^\|.*\|$/gm, "")
|
|
38
|
+
.replace(/^#{1,6}\s+.*$/gm, "")
|
|
39
|
+
.replace(/^[\s*-]*\*{1,2}[^*]+\*{1,2}:?\s*$/gm, "");
|
|
40
|
+
const proseSentences = proseOnly.split(/[.!?\n]+/).map(s => s.trim()).filter(s => s.length > 10);
|
|
41
|
+
if (proseSentences.length >= 6) {
|
|
33
42
|
const counts = new Map();
|
|
34
|
-
for (const s of
|
|
43
|
+
for (const s of proseSentences) {
|
|
35
44
|
const key = s.toLowerCase();
|
|
36
45
|
counts.set(key, (counts.get(key) ?? 0) + 1);
|
|
37
46
|
if ((counts.get(key) ?? 0) >= 3) {
|
|
@@ -39,5 +48,20 @@ export function passesQualityGate(stripped, thinkOnly, finishReason) {
|
|
|
39
48
|
}
|
|
40
49
|
}
|
|
41
50
|
}
|
|
51
|
+
// Pass B (full text, threshold ≥5): catches egregious loops hidden
|
|
52
|
+
// inside fake code blocks or other structural elements. Higher
|
|
53
|
+
// threshold avoids false positives on legitimate code patterns
|
|
54
|
+
// (e.g. `node = self.root` × 4 is fine, × 5 is suspicious).
|
|
55
|
+
const allSentences = stripped.split(/[.!?\n]+/).map(s => s.trim()).filter(s => s.length > 10);
|
|
56
|
+
if (allSentences.length >= 10) {
|
|
57
|
+
const counts = new Map();
|
|
58
|
+
for (const s of allSentences) {
|
|
59
|
+
const key = s.toLowerCase();
|
|
60
|
+
counts.set(key, (counts.get(key) ?? 0) + 1);
|
|
61
|
+
if ((counts.get(key) ?? 0) >= 5) {
|
|
62
|
+
return { pass: false, reason: "loop_detected" };
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
42
66
|
return { pass: true };
|
|
43
67
|
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* L1 Safety Gate — deterministic crisis/medical interception for prism_infer.
|
|
3
|
+
*
|
|
4
|
+
* Runs BEFORE model output reaches the caller. High precision, low recall:
|
|
5
|
+
* only intercepts patterns that unambiguously indicate self-harm intent
|
|
6
|
+
* (input) or dangerous instructions (output). Generic clinical/pharmacological
|
|
7
|
+
* terms ("dose of", "milligrams", "lethal") are NOT intercepted — they appear
|
|
8
|
+
* in routine BCBA/medical notes.
|
|
9
|
+
*
|
|
10
|
+
* This is a backstop, not a comprehensive filter. The model's own safety
|
|
11
|
+
* training is the primary layer.
|
|
12
|
+
*/
|
|
13
|
+
// ── Input: first-person crisis expressions ───────────────────────────────────
|
|
14
|
+
const CRISIS_INPUT_RE = [
|
|
15
|
+
// English
|
|
16
|
+
/hurt(?:ing)?\s+(?:my)?self/i,
|
|
17
|
+
/kill\s+(?:my)?self/i,
|
|
18
|
+
/end\s+my\s+life/i,
|
|
19
|
+
/want\s+to\s+die/i,
|
|
20
|
+
/want\s+to\s+(?:commit\s+)?suicide/i,
|
|
21
|
+
/cut(?:ting)?\s+(?:my)?self/i,
|
|
22
|
+
/(?:hang|hanging)\s+(?:my)?self/i,
|
|
23
|
+
/jump(?:ing)?\s+off/i,
|
|
24
|
+
/how\s+(?:many|much|to).*(?:pills|overdose|die)/i,
|
|
25
|
+
// Spanish — block hyperbole nouns only (NOT hunger/thirst/cold — may be literal for neglected child)
|
|
26
|
+
/quiero\s+morir(?!\s+de\s+(?:risa|la\s+risa|vergüenza|ganas|envidia|aburrimiento)\b)/i,
|
|
27
|
+
/(?:voy\s+a\s+)?matarme(?!\s+(?:estudiando|trabajando|riendo|de\s+(?:risa|la\s+risa))\b)/i,
|
|
28
|
+
/hacerme\s+da[ñn]o/i,
|
|
29
|
+
/(?:quiero\s+)?suicidarme/i,
|
|
30
|
+
// French — block hyperbole nouns only (NOT faim/soif/chaud/froid)
|
|
31
|
+
/je\s+veux\s+mourir(?!\s+d[e']\s*(?:rire|honte|envie|ennui)\b)/i,
|
|
32
|
+
/(?:vais\s+)?me\s+tuer(?!\s+[àa]\s+(?:rire|travailler|étudier)\b)/i,
|
|
33
|
+
/me\s+suicider/i,
|
|
34
|
+
// Portuguese — block hyperbole nouns only (NOT fome/sede/calor/frio/sono)
|
|
35
|
+
/quero\s+morrer(?!\s+de\s+(?:rir|vergonha|inveja|tédio)\b)/i,
|
|
36
|
+
/(?:vou\s+)?me\s+matar(?!\s+(?:estudando|trabalhando|rindo|de\s+(?:rir|tanto))\b)/i,
|
|
37
|
+
// Russian — block hyperbole nouns (смех/стыд/зависть/голод/жара/скука)
|
|
38
|
+
// Russian — NOT голода/жары/холода (may be literal)
|
|
39
|
+
/хочу\s+умереть(?!\s+от\s+(?:смеха|стыда|зависти|скуки)(?:\s|$))/i,
|
|
40
|
+
/убить\s+себя/i,
|
|
41
|
+
/покончить\s+с\s+собой/i,
|
|
42
|
+
// Arabic (alef variants handled in normalize)
|
|
43
|
+
/[اأإآ]ر[يى]د\s+[اأإآ]ن\s+[اأإآ]موت/,
|
|
44
|
+
/قتل\s+نفس[يى]/,
|
|
45
|
+
/[اأإآ]نه[يى]\s+ح[يى][اأإآ]ت[يى]/,
|
|
46
|
+
// German — block hyperbole nouns (Lachen/Scham/Neid/Hunger/Langeweile)
|
|
47
|
+
// German — NOT hunger/kälte (may be literal)
|
|
48
|
+
/(?:ich\s+)?will\s+sterben(?!\s+vor\s+(?:lachen|scham|neid|langeweile)\b)/i,
|
|
49
|
+
/mich\s+umbringen/i,
|
|
50
|
+
// Ukrainian — block hyperbole nouns (сміх/сором/заздрість/голод)
|
|
51
|
+
// Ukrainian — NOT голоду/холоду (may be literal)
|
|
52
|
+
/хочу\s+померти(?!\s+від\s+(?:сміху|сорому|заздрості|нудьги)(?:\s|$))/i,
|
|
53
|
+
/вбити\s+себе/i,
|
|
54
|
+
];
|
|
55
|
+
const MEDICAL_INPUT_RE = [
|
|
56
|
+
/(?:how\s+much|what\s+dose)\s+.*(?:should\s+I|do\s+I|can\s+I)\s+(?:inject|take|give)/i,
|
|
57
|
+
/(?:dose|dosage)\s+(?:should\s+I|do\s+I|can\s+I)\s+(?:inject|take|give)/i,
|
|
58
|
+
];
|
|
59
|
+
// ── Output: model providing dangerous instructions ───────────────────────────
|
|
60
|
+
const CRISIS_OUTPUT_RE = [
|
|
61
|
+
/\bhow many pills\s+to\b/i,
|
|
62
|
+
/\boverdose amount\b/i,
|
|
63
|
+
/\bnoose\b/i,
|
|
64
|
+
/\bhanging yourself\b/i,
|
|
65
|
+
/\bsuicide method\b/i,
|
|
66
|
+
/\bmethod of suicide\b/i,
|
|
67
|
+
/\bself[- ]?harm method\b/i,
|
|
68
|
+
/\bhow to (?:kill|hang|poison|overdose)\s+(?:your|my|him|her|them|one)self\b/i,
|
|
69
|
+
/\bhow to overdose\b/i,
|
|
70
|
+
];
|
|
71
|
+
const MEDICAL_OUTPUT_RE = [
|
|
72
|
+
/\binject this (?:amount|dose|much)\b/i,
|
|
73
|
+
/\btake this (?:amount|many|dose)\b/i,
|
|
74
|
+
/\byou should (?:inject|take)\s+\d+\s+units\b/i,
|
|
75
|
+
];
|
|
76
|
+
// ── Responses ────────────────────────────────────────────────────────────────
|
|
77
|
+
export const CRISIS_RESPONSE = "I'm concerned about your safety. Please call or text 988 (Suicide & Crisis Lifeline) right now — available 24/7. If in immediate danger, call 911. International: findahelpline.com. You are not alone.";
|
|
78
|
+
export const MEDICAL_RESPONSE = "I cannot provide specific medical dosing advice. Please contact your doctor or pharmacist. For emergencies, call 911.";
|
|
79
|
+
// ── API ──────────────────────────────────────────────────────────────────────
|
|
80
|
+
function normalize(text) {
|
|
81
|
+
return text
|
|
82
|
+
.toLowerCase()
|
|
83
|
+
.replace(/\p{Cf}/gu, "")
|
|
84
|
+
.replace(/\p{Mn}/gu, "") // Arabic harakat + all combining marks
|
|
85
|
+
.replace(/ـ/g, "")
|
|
86
|
+
.replace(/[أإآ]/g, "ا")
|
|
87
|
+
.replace(/\s+/g, " ");
|
|
88
|
+
}
|
|
89
|
+
export function checkInputSafety(text) {
|
|
90
|
+
const t = normalize(text);
|
|
91
|
+
if (CRISIS_INPUT_RE.some(p => p.test(t)))
|
|
92
|
+
return CRISIS_RESPONSE;
|
|
93
|
+
if (MEDICAL_INPUT_RE.some(p => p.test(t)))
|
|
94
|
+
return MEDICAL_RESPONSE;
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
export function checkOutputSafety(response) {
|
|
98
|
+
const r = normalize(response);
|
|
99
|
+
if (CRISIS_OUTPUT_RE.some(re => re.test(r)))
|
|
100
|
+
return CRISIS_RESPONSE;
|
|
101
|
+
if (MEDICAL_OUTPUT_RE.some(re => re.test(r)))
|
|
102
|
+
return MEDICAL_RESPONSE;
|
|
103
|
+
return response;
|
|
104
|
+
}
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "prism-mcp-server",
|
|
3
|
-
"version": "19.
|
|
3
|
+
"version": "19.2.0",
|
|
4
4
|
"mcpName": "io.github.dcostenco/prism-coder",
|
|
5
|
-
"description": "Prism Coder
|
|
5
|
+
"description": "Prism Coder — Cognitive memory + tool-calling intelligence for AI agents. Mind Palace persistent memory (BFCL Gold Certified, 100% Tool-Call Accuracy, 114 Agent Skills, PHI Guard, Tier Enforcement, Prompt-Based Skill Routing, Zero-Search HDC/HRR retrieval, HRR Semantic Drift Detection across BCBA/Coding/AAC domains, HIPAA-hardened local-first storage, SLERP-optimized GRPO alignment) plus the prism-coder 1.7B–32B open-weights LLM fleet.",
|
|
6
6
|
"module": "index.ts",
|
|
7
7
|
"type": "module",
|
|
8
8
|
"main": "dist/server.js",
|