prism-mcp-server 15.4.0 → 15.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +82 -57
- package/dist/cli.js +65 -0
- package/dist/storage/index.js +4 -0
- package/dist/tools/prismInferHandler.js +9 -4
- package/dist/utils/modelPicker.js +39 -3
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
**Persistent memory + tool-calling intelligence for AI agents.** *(formerly Prism MCP)*
|
|
6
6
|
|
|
7
|
-
A Model Context Protocol server that gives Claude, Cursor, and other AI tools a Mind Palace — long-term memory that survives across sessions, with semantic search, cognitive routing, a visual dashboard, and the `prism-coder:1b7` / `prism-coder:14b` / `prism-coder:32b` LLM fleet for offline tool-calling. **[→ prism-mcp.com](https://prism-mcp.com)**
|
|
7
|
+
A Model Context Protocol server that gives Claude, Cursor, and other AI tools a Mind Palace — long-term memory that survives across sessions, with semantic search, cognitive routing, a visual dashboard, and the `prism-coder:1b7` / `prism-coder:8b` / `prism-coder:14b` / `prism-coder:32b` LLM fleet for offline tool-calling. **[→ prism-mcp.com](https://prism-mcp.com)**
|
|
8
8
|
|
|
9
9
|
[](https://www.npmjs.com/package/prism-mcp-server)
|
|
10
10
|
[](https://marketplace.visualstudio.com/items?itemName=synalux-ai.synalux)
|
|
@@ -13,7 +13,7 @@ A Model Context Protocol server that gives Claude, Cursor, and other AI tools a
|
|
|
13
13
|
[](https://smithery.ai/server/@dcostenco/prism-mcp)
|
|
14
14
|
[](LICENSE)
|
|
15
15
|
|
|
16
|
-
> **Renamed in v14.0.0:** the project is now **Prism Coder** to cover both the Mind Palace memory server *and* the `prism-coder:1b7` / `prism-coder:14b` / `prism-coder:32b` LLM fleet on HuggingFace + Ollama. The npm package stays `prism-mcp-server` so existing install URLs and `mcp.json` entries keep working — the `prism-coder` binary has been the canonical entry point since v12.
|
|
16
|
+
> **Renamed in v14.0.0:** the project is now **Prism Coder** to cover both the Mind Palace memory server *and* the `prism-coder:1b7` / `prism-coder:8b` / `prism-coder:14b` / `prism-coder:32b` LLM fleet on HuggingFace + Ollama. The npm package stays `prism-mcp-server` so existing install URLs and `mcp.json` entries keep working — the `prism-coder` binary has been the canonical entry point since v12.
|
|
17
17
|
|
|
18
18
|
---
|
|
19
19
|
|
|
@@ -61,19 +61,27 @@ Install in one command — no config, no keys, no vendor agreements:
|
|
|
61
61
|
ollama pull dcostenco/prism-coder:1b7 # 2.2 GB · ~1.6s · any machine
|
|
62
62
|
ollama pull dcostenco/prism-coder:8b # 4.7 GB · ~0.8s · Mac M1+ / iPhone 8GB
|
|
63
63
|
ollama pull dcostenco/prism-coder:14b # 8.4 GB · ~1.1s · Mac M2+ / iPad Pro 16GB
|
|
64
|
-
ollama pull dcostenco/prism-coder:32b #
|
|
64
|
+
ollama pull dcostenco/prism-coder:32b # 16 GB · ~0.8s · Mac M2 Ultra+ (30B-A3B MoE)
|
|
65
65
|
```
|
|
66
|
+
|
|
67
|
+
Prism MCP detects both the namespaced (`dcostenco/prism-coder:14b`) and bare (`prism-coder:14b`) Ollama tag forms automatically — nothing else to configure. If you want the bare tags as aliases for direct `ollama run prism-coder:14b` use, run:
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
prism register-models # aliases */prism-coder:* → prism-coder:* via `ollama cp`
|
|
71
|
+
prism register-models --dry-run # preview what would be aliased
|
|
72
|
+
```
|
|
73
|
+
|
|
66
74
|
### Cascade architecture
|
|
67
75
|
|
|
68
76
|
Two cascades operate independently depending on the deployment context:
|
|
69
77
|
|
|
70
78
|
**Desktop / server cascade** (quality-first, used in Prism MCP + Synalux portal):
|
|
71
79
|
```
|
|
72
|
-
prism-coder:14b ─── correct? ──YES──▶ serve (
|
|
80
|
+
prism-coder:14b ─── correct? ──YES──▶ serve (99% of traffic, ~1.1s)
|
|
73
81
|
│ NO
|
|
74
|
-
prism-coder:32b ─── correct? ──YES──▶ serve (
|
|
82
|
+
prism-coder:32b ─── correct? ──YES──▶ serve (~1% of traffic, ~0.8s)
|
|
75
83
|
│ NO
|
|
76
|
-
Claude Opus 4.7 ──────────────────────▶ serve (
|
|
84
|
+
Claude Opus 4.7 ──────────────────────▶ serve (0% in practice, cloud)
|
|
77
85
|
```
|
|
78
86
|
|
|
79
87
|
**Mobile / offline cascade** (availability-first, used in Prism AAC iOS):
|
|
@@ -82,27 +90,36 @@ prism-coder:14b (~1.1s) — iPad Pro 16GB → prism-coder:8b (~0.8s) — iPhon
|
|
|
82
90
|
→ prism-coder:1.7b (~1.6s) — any device, always fits
|
|
83
91
|
```
|
|
84
92
|
|
|
85
|
-
|
|
93
|
+
**Code generation cascade** (used in Prism Coder IDE + Agent Mode):
|
|
94
|
+
```
|
|
95
|
+
prism-ide:14b ─── quality OK? ──YES──▶ serve (~1.1s, 22/22 TypeScript eval)
|
|
96
|
+
│ NO (complex / multi-file)
|
|
97
|
+
prism-ide:32b ─── quality OK? ──YES──▶ serve (~0.8s MoE, deep reasoning)
|
|
98
|
+
│ NO
|
|
99
|
+
Claude Sonnet 4 ──────────────────────▶ serve (cloud fallback)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
The routing cascade validates each response against the 6 known tool names and escalates on empty, truncated, or hallucinated tool calls. The code generation cascade escalates on incomplete or syntactically invalid output.
|
|
86
103
|
|
|
87
|
-
**Routing accuracy** ([102-case Prism eval](tests/benchmarks/prism-routing-100/README.md),
|
|
104
|
+
**Routing accuracy** ([102-case Prism eval](tests/benchmarks/prism-routing-100/README.md), v36/v7 system prompt, 3-seed mean, May 2026):
|
|
88
105
|
|
|
89
106
|
| Model | Accuracy | Cost/req | Latency | Runs on | AAC | Edge cases |
|
|
90
107
|
|---|---|---|---|---|---|---|
|
|
91
108
|
| Claude Sonnet 4 | **99%** | ~$0.01 | 3.2s | Cloud | 100% | 83% |
|
|
92
|
-
| **prism-coder:32b**
|
|
93
|
-
| **prism-coder:8b**
|
|
94
|
-
| **prism-coder:14b**
|
|
95
|
-
| Claude Opus 4.7 | **
|
|
96
|
-
| **prism-coder:1.7b**
|
|
97
|
-
| **14B→32B cascade** | **
|
|
109
|
+
| **prism-coder:32b** v7 | **100.0%** | **$0** | 0.8s | Mac 24GB+ (MoE) | **100%** | **100%** |
|
|
110
|
+
| **prism-coder:8b** v36 | **100.0%** | **$0** | **0.8s** | iPhone/iPad 8GB | **100%** | **100%** |
|
|
111
|
+
| **prism-coder:14b** v36 | **100.0%** | **$0** | **1.1s** | Mac 24GB+ / iPad Pro 16GB | **100%** | **100%** |
|
|
112
|
+
| Claude Opus 4.7 | **98.3%** | ~$0.05 | 3.0s | Cloud | 100% | 83% |
|
|
113
|
+
| **prism-coder:1.7b** v42 | **100.0%** | **$0** | 1.6s | Any device | **100%** | **100%** |
|
|
114
|
+
| **14B→32B cascade** | **100.0%** | **~$0** | ~1.1s¹ | Mac 24GB+ | **100%** | **100%** |
|
|
98
115
|
|
|
99
|
-
¹
|
|
116
|
+
¹ ~99% of requests served by 14B at 1.1s; 32B for the ~1% 14B misses.
|
|
100
117
|
|
|
101
|
-
**Why this matters for a life-critical AAC app**: a child in a hospital without WiFi, a nonverbal adult on an airplane, or a family on a budget gets Claude-grade routing accuracy
|
|
118
|
+
**Why this matters for a life-critical AAC app**: a child in a hospital without WiFi, a nonverbal adult on an airplane, or a family on a budget gets Claude-grade routing accuracy with zero cloud dependency — and the AAC path (expressing pain, asking for help) routes correctly **100% of the time across all tiers and all seeds tested**.
|
|
102
119
|
|
|
103
120
|
**What it does NOT mean**: these scores measure routing precision on a narrow 6-tool taxonomy, not general intelligence. Claude outperforms these models on everything outside this task. The value is **offline reliability at zero cost**, not replacing Claude.
|
|
104
121
|
|
|
105
|
-
> **The prompt engineering breakthrough**: Q4_K_M quantized models confuse semantically similar tool names when routing rules use plain keyword lists. Two structural fixes eliminated all confusion: (1) replacing `-> plain text` with `-> respond directly (no tool)`, and (2) adding category labels (`CONVERSATION RECALL:` / `SAVED KNOWLEDGE:`) as semantic anchors stronger than keyword matching. Combined effect: 14B went from 87% →
|
|
122
|
+
> **The prompt engineering breakthrough**: Q4_K_M quantized models confuse semantically similar tool names when routing rules use plain keyword lists. Two structural fixes eliminated all confusion: (1) replacing `-> plain text` with `-> respond directly (no tool)`, and (2) adding category labels (`CONVERSATION RECALL:` / `SAVED KNOWLEDGE:`) as semantic anchors stronger than keyword matching. Combined effect: 14B went from 87% → 100% on the 102-case Prism eval (v36/v7 system prompt, 3-seed mean).
|
|
106
123
|
|
|
107
124
|
### ⚡ Zero-search retrieval
|
|
108
125
|
Holographic Reduced Representations (HRR) for instant similarity lookups without an index. ~5ms over 100K memories.
|
|
@@ -180,30 +197,30 @@ Prism Coder inference cascades through fine-tuned models first, with Claude as a
|
|
|
180
197
|
|
|
181
198
|
| Model | Ollama tag | Where | Tier | Latency |
|
|
182
199
|
|---|---|---|---|---|
|
|
183
|
-
| **prism-coder:1.7b** | `prism-coder:1b7
|
|
184
|
-
| **prism-coder:
|
|
185
|
-
| **prism-coder:
|
|
200
|
+
| **prism-coder:1.7b** | `prism-coder:1b7` (v42) | On-device (Mac/local) · iOS via llama.cpp | Free | ~1.6s |
|
|
201
|
+
| **prism-coder:8b** | `prism-coder:8b` (v36) | On-device iPhone/iPad 8GB+ · local Mac | Free | ~0.8s |
|
|
202
|
+
| **prism-coder:14b** | `prism-coder:14b` (v36) | On-device Mac 24GB+ · iPad Pro · Cloud A100 | Standard+ | ~1.1s |
|
|
203
|
+
| **prism-coder:32b** | `prism-coder:32b` (v7 MoE) | Cloud (OpenRouter) A100 80GB via Synalux | Pro/Enterprise | ~0.8s |
|
|
186
204
|
|
|
187
|
-
Models use the Synalux SFT corpus (AAC + Prism MCP tool taxonomy + clinical workflows). **Internal quality gate: ≥ 90% on the Prism
|
|
205
|
+
Models use the Synalux SFT corpus (AAC + Prism MCP tool taxonomy + clinical workflows). **Internal quality gate: ≥ 90% on the Prism 102-case eval before production promotion.**
|
|
188
206
|
|
|
189
207
|
> **Training note**: Base Qwen3 models are strong tool-routers out of the box. Heavy fine-tuning regresses tool-vs-plain-text decisions; light-touch polish recipes (small corpus, balanced tool/plain-text split) are the published path. Production adapter selection and retrain methodology are managed in the Synalux portal.
|
|
190
208
|
|
|
191
|
-
**Per-category breakdown — [Prism 102-case eval](tests/benchmarks/prism-routing-100/README.md) (3-seed mean,
|
|
209
|
+
**Per-category breakdown — [Prism 102-case eval](tests/benchmarks/prism-routing-100/README.md) (3-seed mean, v36/v7 system prompt, May 2026):**
|
|
192
210
|
|
|
193
211
|
| Model | Overall | Load ctx | Save | Srch mem | Handoff | Compact | Know srch | AAC | Translate | No-tool | Info | Edge | Avg lat | Inv |
|
|
194
212
|
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
195
|
-
| **prism-coder:32b**
|
|
196
|
-
| **prism-coder:8b**
|
|
197
|
-
| **prism-coder:14b**
|
|
198
|
-
| **Claude Opus 4.7** | **
|
|
199
|
-
| **prism-coder:1.7b**
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
> **Methodology**: 102-case pool across 12 categories. Scores are 3-seed mean (seeds 2027/2028/2029, zero variance across all seeds). All fine-tuned models use the Qwen3 nothink template. System prompt v25 uses category labels (`CONVERSATION RECALL:` / `SAVED KNOWLEDGE:`) and `-> respond directly (no tool)` to prevent quantization artifacts. Full runner: [`tests/benchmarks/prism-routing-100/benchmark.py`](tests/benchmarks/prism-routing-100/benchmark.py) · Cascade runner: [`tests/benchmarks/cascade-14b-32b-opus/cascade_eval.py`](tests/benchmarks/cascade-14b-32b-opus/cascade_eval.py).
|
|
213
|
+
| **prism-coder:32b** v7 | **100.0%** | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | **100%** | 0.8s | 0 |
|
|
214
|
+
| **prism-coder:8b** v36 | **100.0%** | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | **100%** | 0.8s | 0 |
|
|
215
|
+
| **prism-coder:14b** v36 | **100.0%** | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | **100%** | 1.1s | 0 |
|
|
216
|
+
| **Claude Opus 4.7** | **98.3%** | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 83% | 3.0s | 0 |
|
|
217
|
+
| **prism-coder:1.7b** v42 | **100.0%** | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | 100% | **100%** | 1.6s | 0 |
|
|
218
|
+
|
|
219
|
+
> **Methodology**: 102-case pool across 12 categories. Scores are 3-seed mean (seeds 2027/2028/2029, zero variance across all seeds). All fine-tuned models use the Qwen3 nothink template with keyword-trigger routing prompts and `-> respond directly (no tool)` for the no-tool class. Full runner: [`tests/benchmarks/prism-routing-100/benchmark.py`](tests/benchmarks/prism-routing-100/benchmark.py) · Cascade runner: [`tests/benchmarks/cascade-14b-32b-opus/cascade_eval.py`](tests/benchmarks/cascade-14b-32b-opus/cascade_eval.py).
|
|
203
220
|
>
|
|
204
221
|
> **These are NOT general-purpose LLM benchmarks.** This eval measures routing precision on 6 specific MCP tools. The prism-coder models are specialists trained on this exact task — they match or exceed Claude on routing while Claude dominates on general reasoning, coding, and open-domain QA. The value is **offline reliability at zero cost**, not replacing cloud AI.
|
|
205
222
|
|
|
206
|
-
**iOS deployment:** On-device inference via **llama.cpp Swift SPM**. Auto-selects by device RAM: 14B on iPad Pro 16GB (
|
|
223
|
+
**iOS deployment:** On-device inference via **llama.cpp Swift SPM**. Auto-selects by device RAM: 14B on iPad Pro 16GB (100% routing), 8B on iPhone/iPad 8GB (100%, OOM fallback to 1.7B at 100%). CoreML not viable — coremltools doesn't support Qwen3 attention ops. Integration: `LLMEngine.swift` → `prismNativeBridge.askAI()` → token stream. WiFi fallback: Mac Ollama (`OLLAMA_HOST=0.0.0.0`).
|
|
207
224
|
|
|
208
225
|
### Benchmarks — run them yourself
|
|
209
226
|
|
|
@@ -234,10 +251,12 @@ python3 tests/benchmarks/cascade-14b-32b-opus/cascade_eval.py
|
|
|
234
251
|
|
|
235
252
|
| Model | HuggingFace | Solo BFCL | Cascade role | Size |
|
|
236
253
|
|---|---|---|---|---|
|
|
237
|
-
| prism-coder:32b | [dcostenco/prism-coder-32b](https://huggingface.co/dcostenco/prism-coder-32b) | **
|
|
238
|
-
| prism-coder:8b | [dcostenco/prism-coder-8b](https://huggingface.co/dcostenco/prism-coder-8b) | **
|
|
239
|
-
| prism-coder:14b | [dcostenco/prism-coder-14b](https://huggingface.co/dcostenco/prism-coder-14b) | **
|
|
240
|
-
| prism-coder:1.7b | [dcostenco/prism-coder-1.7b](https://huggingface.co/dcostenco/prism-coder-1.7b) | **
|
|
254
|
+
| prism-coder:32b | [dcostenco/prism-coder-32b](https://huggingface.co/dcostenco/prism-coder-32b) | **100.0%** routing (v7 MoE) | Tier 2 (catches ~1% 14B misses) | 16 GB |
|
|
255
|
+
| prism-coder:8b | [dcostenco/prism-coder-8b](https://huggingface.co/dcostenco/prism-coder-8b) | **100.0%** routing (v36) | Mobile tier | 4.7 GB |
|
|
256
|
+
| prism-coder:14b | [dcostenco/prism-coder-14b](https://huggingface.co/dcostenco/prism-coder-14b) | **100.0%** routing (v36) | Tier 1 (serves ~99% of traffic) | 8.4 GB |
|
|
257
|
+
| prism-coder:1.7b | [dcostenco/prism-coder-1.7b](https://huggingface.co/dcostenco/prism-coder-1.7b) | **100.0%** routing (v42) | On-device / always-fits fallback | 1.1 GB |
|
|
258
|
+
| prism-ide:14b | [dcostenco/prism-ide](https://huggingface.co/dcostenco/prism-ide) | **22/22** TypeScript eval (v1) | Code generation tier 1 (~1.1s) | 8.4 GB |
|
|
259
|
+
| prism-ide:32b | [dcostenco/prism-ide](https://huggingface.co/dcostenco/prism-ide) | Complex code + multi-file (v3) | Code generation tier 2 (~0.8s MoE) | 16 GB |
|
|
241
260
|
|
|
242
261
|
## Self-hosted / Local AI (Enterprise)
|
|
243
262
|
|
|
@@ -246,16 +265,16 @@ Run the full Prism model stack on your own hardware — zero cloud, zero latency
|
|
|
246
265
|
**Requirements:** Mac M2 Pro+ (48GB recommended) or Linux with NVIDIA GPU · [Ollama](https://ollama.com)
|
|
247
266
|
|
|
248
267
|
```bash
|
|
249
|
-
# On-device tier —
|
|
268
|
+
# On-device tier — 1.1 GB (any machine, iPhone) — 100% routing
|
|
250
269
|
ollama pull dcostenco/prism-coder:1b7
|
|
251
270
|
|
|
252
|
-
# Mobile tier — 4.7 GB (iPhone/iPad 8GB, Mac M1+)
|
|
271
|
+
# Mobile tier — 4.7 GB (iPhone/iPad 8GB, Mac M1+) — 100% routing
|
|
253
272
|
ollama pull dcostenco/prism-coder:8b
|
|
254
273
|
|
|
255
|
-
# Standard tier — 8.4 GB (Mac
|
|
274
|
+
# Standard tier — 8.4 GB (Mac 24GB+, iPad Pro 16GB) — 100% routing
|
|
256
275
|
ollama pull dcostenco/prism-coder:14b
|
|
257
276
|
|
|
258
|
-
# Reasoning tier —
|
|
277
|
+
# Reasoning tier — 16 GB (Mac M2 Ultra+, 30B-A3B MoE) — 100% routing
|
|
259
278
|
ollama pull dcostenco/prism-coder:32b
|
|
260
279
|
```
|
|
261
280
|
|
|
@@ -264,8 +283,8 @@ Set `LOCAL_LLM_URL=http://localhost:11434` in your portal config. Routing is aut
|
|
|
264
283
|
**Desktop/server**: 14B → 32B → Claude Opus fallback · **Mobile/offline**: 14B → 8B → 1.7B
|
|
265
284
|
|
|
266
285
|
iOS/mobile on same WiFi: `OLLAMA_HOST=0.0.0.0 ollama serve` on the Mac, then point `LOCAL_LLM_URL` at the Mac's IP.
|
|
267
|
-
Routing accuracy (May 2026, 3-seed mean):
|
|
268
|
-
Cascade (14B→32B): **
|
|
286
|
+
Routing accuracy (May 2026, v36/v7 system prompt, 3-seed mean): 32B v7 = **100.0%** · 8B v36 = **100.0%** · 14B v36 = **100.0%** · 1.7B v42 = **100.0%**
|
|
287
|
+
Cascade (14B→32B): **100.0%** · Opus solo: 98.3% · Opus engaged: **0% of requests** → [Full results](tests/benchmarks/cascade-14b-32b-opus/README.md)
|
|
269
288
|
|
|
270
289
|
---
|
|
271
290
|
|
|
@@ -273,7 +292,7 @@ Cascade (14B→32B): **99.0%** · Opus solo: 97.1% · Opus engaged: **1% of requ
|
|
|
273
292
|
|
|
274
293
|
| Plan | Cloud model | Daily limit | On-device |
|
|
275
294
|
|---|---|---|---|
|
|
276
|
-
| **Free** | — | unlimited local | prism-coder:1.7b (
|
|
295
|
+
| **Free** | — | unlimited local | prism-coder:1.7b (100%) + 8b (100%) + 14b (100%) |
|
|
277
296
|
| **Standard $19/mo** | Claude Sonnet 4 | 200 req | + cloud fallback |
|
|
278
297
|
| **Pro $49/mo** | prism-coder:32b | 2,000 req | + reasoning tier |
|
|
279
298
|
| **Enterprise $99/mo** | prism-coder:32b priority | unlimited | + HIPAA BAA + custom fine-tuning |
|
|
@@ -360,7 +379,7 @@ python3 tests/benchmarks/prism-routing-100/benchmark.py --models 1b7 14b 32b
|
|
|
360
379
|
- BCBA skill integration
|
|
361
380
|
- Deep storage tier
|
|
362
381
|
- Dashboard rendering
|
|
363
|
-
- Routing benchmarks (
|
|
382
|
+
- Routing benchmarks (102-case Prism eval) — see `tests/benchmarks/prism-routing-100/`
|
|
364
383
|
|
|
365
384
|
## Migration
|
|
366
385
|
|
|
@@ -413,14 +432,16 @@ node scripts/migrate-local-to-portal.mjs --include-scholar
|
|
|
413
432
|
└──────────┬───────────┘ └─────────────┬───────────────┘
|
|
414
433
|
│ │
|
|
415
434
|
▼ ▼
|
|
416
|
-
|
|
417
|
-
│ OPENROUTER / LOCAL
|
|
418
|
-
│
|
|
419
|
-
│ Cloud: Claude Sonnet │ │ knowledge graph │
|
|
420
|
-
│
|
|
421
|
-
│ :
|
|
422
|
-
│ :
|
|
423
|
-
|
|
435
|
+
┌──────────────────────────────┐ ┌─────────────────────────────┐
|
|
436
|
+
│ OPENROUTER / LOCAL │ │ SUPABASE │
|
|
437
|
+
│ │ │ session ledgers │
|
|
438
|
+
│ Cloud: Claude Sonnet 4 │ │ knowledge graph │
|
|
439
|
+
│ Routing: prism-coder │ │ handoffs & todos │
|
|
440
|
+
│ :32b(100%) :14b(100%) │ │ │
|
|
441
|
+
│ :8b(100%) :1b7(100%) │ │ source of truth │
|
|
442
|
+
│ Code: prism-ide │ │ │
|
|
443
|
+
│ :14b · :32b │ │ │
|
|
444
|
+
└──────────────────────────────┘ └─────────────────────────────┘
|
|
424
445
|
```
|
|
425
446
|
|
|
426
447
|
### Service Routing
|
|
@@ -439,8 +460,8 @@ node scripts/migrate-local-to-portal.mjs --include-scholar
|
|
|
439
460
|
| Surface | Primary | Fallback |
|
|
440
461
|
|---|---|---|
|
|
441
462
|
| AI Chat `@search` | Firecrawl | — |
|
|
442
|
-
| Prism MCP agents (cloud) | Firecrawl |
|
|
443
|
-
| Prism MCP server (local) |
|
|
463
|
+
| Prism MCP agents (cloud) | Firecrawl | — |
|
|
464
|
+
| Prism MCP server (local) | Firecrawl (via MCP tools) | — |
|
|
444
465
|
| Clinical research | PubMed + ERIC + Semantic Scholar | DuckDuckGo |
|
|
445
466
|
|
|
446
467
|
**TTS (Text-to-Speech)**
|
|
@@ -503,10 +524,14 @@ HuggingFace: dcostenco/prism-coder-{14b,8b,32b,1.7b} (public GGUF weights)
|
|
|
503
524
|
|
|
504
525
|
| Plan | Cloud model | Daily limit | On-device |
|
|
505
526
|
|---|---|---|---|
|
|
506
|
-
| Free | — | unlimited local | prism-coder:1.7b |
|
|
507
|
-
| Standard $19/mo |
|
|
508
|
-
| Pro $49/mo | prism-coder:32b | 2,000 req | + reasoning |
|
|
509
|
-
| Enterprise $99/mo | prism-coder:32b priority | unlimited |
|
|
527
|
+
| **Free** | — | unlimited local | prism-coder:1.7b (100%) + 8b (100%) + 14b (100%) |
|
|
528
|
+
| **Standard $19/mo** | Claude Sonnet 4 | 200 req | + cloud fallback |
|
|
529
|
+
| **Pro $49/mo** | prism-coder:32b | 2,000 req | + reasoning tier |
|
|
530
|
+
| **Enterprise $99/mo** | prism-coder:32b priority | unlimited | + HIPAA BAA + custom fine-tuning |
|
|
531
|
+
|
|
532
|
+
All on-device models are **free for every tier** — no subscription needed for local inference. Offline translation (1,261 phrases × 20 languages) included in all plans.
|
|
533
|
+
|
|
534
|
+
[Subscribe →](https://synalux.ai/pricing)
|
|
510
535
|
|
|
511
536
|
See [`docs/WOW_FEATURES.md`](docs/WOW_FEATURES.md) for the algorithm catalogue. Release notes in [`docs/releases/v14.0.0-prism-as-foundation.md`](docs/releases/v14.0.0-prism-as-foundation.md).
|
|
512
537
|
|
package/dist/cli.js
CHANGED
|
@@ -519,4 +519,69 @@ scmCmd
|
|
|
519
519
|
process.exit(1);
|
|
520
520
|
}
|
|
521
521
|
});
|
|
522
|
+
// ─── prism register-models ────────────────────────────────────
|
|
523
|
+
// Convenience: alias namespaced HF-style prism-coder tags
|
|
524
|
+
// (`dcostenco/prism-coder:14b`) to the bare tags (`prism-coder:14b`)
|
|
525
|
+
// some external tooling expects. The MCP picker handles both forms
|
|
526
|
+
// natively as of v15.5, so this command is OPTIONAL — useful only
|
|
527
|
+
// when a user wants to run `ollama run prism-coder:14b` directly,
|
|
528
|
+
// or for tools that pre-date the picker's namespace fallback.
|
|
529
|
+
program
|
|
530
|
+
.command('register-models')
|
|
531
|
+
.description('Alias namespaced prism-coder Ollama tags to bare tags (optional convenience)')
|
|
532
|
+
.option('-u, --url <url>', 'Ollama base URL', process.env.PRISM_LOCAL_LLM_URL || 'http://localhost:11434')
|
|
533
|
+
.option('--dry-run', 'Print what would be aliased without running ollama cp')
|
|
534
|
+
.action(async (options) => {
|
|
535
|
+
let installed = [];
|
|
536
|
+
try {
|
|
537
|
+
const res = await fetch(`${options.url}/api/tags`, { signal: AbortSignal.timeout(3_000) });
|
|
538
|
+
if (!res.ok) {
|
|
539
|
+
console.error(`Ollama /api/tags returned HTTP ${res.status}. Is Ollama running at ${options.url}?`);
|
|
540
|
+
process.exit(1);
|
|
541
|
+
}
|
|
542
|
+
const data = (await res.json());
|
|
543
|
+
installed = data.models ?? [];
|
|
544
|
+
}
|
|
545
|
+
catch (err) {
|
|
546
|
+
console.error(`Cannot reach Ollama at ${options.url}: ${err instanceof Error ? err.message : String(err)}`);
|
|
547
|
+
process.exit(1);
|
|
548
|
+
}
|
|
549
|
+
const installedNames = new Set(installed.map(m => m.name));
|
|
550
|
+
const candidates = installed
|
|
551
|
+
.map(m => m.name)
|
|
552
|
+
.filter(n => /\/prism-coder:/.test(n))
|
|
553
|
+
.map(n => ({ from: n, to: n.replace(/^[^/]+\//, '') }))
|
|
554
|
+
.filter(({ to }) => !installedNames.has(to));
|
|
555
|
+
if (candidates.length === 0) {
|
|
556
|
+
console.log('Nothing to do — no namespaced prism-coder tags need aliasing.');
|
|
557
|
+
return;
|
|
558
|
+
}
|
|
559
|
+
console.log(`Found ${candidates.length} model(s) to alias:`);
|
|
560
|
+
for (const { from, to } of candidates) {
|
|
561
|
+
console.log(` ${from} → ${to}`);
|
|
562
|
+
}
|
|
563
|
+
if (options.dryRun) {
|
|
564
|
+
console.log('\n(dry-run — no changes made)');
|
|
565
|
+
return;
|
|
566
|
+
}
|
|
567
|
+
const { execFile } = await import('node:child_process');
|
|
568
|
+
const { promisify } = await import('node:util');
|
|
569
|
+
const exec = promisify(execFile);
|
|
570
|
+
let ok = 0;
|
|
571
|
+
let fail = 0;
|
|
572
|
+
for (const { from, to } of candidates) {
|
|
573
|
+
try {
|
|
574
|
+
await exec('ollama', ['cp', from, to]);
|
|
575
|
+
console.log(` ✓ aliased ${to}`);
|
|
576
|
+
ok++;
|
|
577
|
+
}
|
|
578
|
+
catch (err) {
|
|
579
|
+
console.error(` ✗ ${from} → ${to}: ${err instanceof Error ? err.message : String(err)}`);
|
|
580
|
+
fail++;
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
console.log(`\nDone. Aliased ${ok}, failed ${fail}.`);
|
|
584
|
+
if (fail > 0)
|
|
585
|
+
process.exit(1);
|
|
586
|
+
});
|
|
522
587
|
program.parse(process.argv);
|
package/dist/storage/index.js
CHANGED
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
* directly — all cloud traffic goes via the synalux portal so billing,
|
|
20
20
|
* tier gating, and HIPAA audit are enforced in one place.
|
|
21
21
|
*/
|
|
22
|
-
import { pickLocalModel, fmtGb, MODEL_TIERS } from "../utils/modelPicker.js";
|
|
22
|
+
import { pickLocalModel, fmtGb, MODEL_TIERS, resolveOllamaName } from "../utils/modelPicker.js";
|
|
23
23
|
import { getSynaluxJwt, invalidateSynaluxJwt } from "../utils/synaluxJwt.js";
|
|
24
24
|
import { getAvailableMemoryBytes } from "../utils/availableMemory.js";
|
|
25
25
|
import { PRISM_SYNALUX_BASE_URL, PRISM_LOCAL_LLM_URL, } from "../config.js";
|
|
@@ -249,20 +249,25 @@ export async function runInfer(args, deps) {
|
|
|
249
249
|
let anyViable = false;
|
|
250
250
|
for (let i = ceilStart; i < MODEL_TIERS.length; i++) {
|
|
251
251
|
const tier = MODEL_TIERS[i];
|
|
252
|
-
|
|
252
|
+
// Accept the tier whether Ollama reports it as bare (`prism-coder:32b`)
|
|
253
|
+
// or namespaced (`dcostenco/prism-coder:32b`, the form `ollama pull`
|
|
254
|
+
// produces from a HF repo). resolveOllamaName returns the actual
|
|
255
|
+
// name Ollama knows so /api/generate finds the model.
|
|
256
|
+
const ollamaName = resolveOllamaName(tier.tag, installed);
|
|
257
|
+
if (!installed.has(ollamaName)) {
|
|
253
258
|
attempts.push({ tier: tier.tag, reason: "not_pulled" });
|
|
254
259
|
continue;
|
|
255
260
|
}
|
|
256
261
|
// RAM gate — but skip the check if the tier is already warm in
|
|
257
262
|
// Ollama. Reused models don't reallocate weight buffers.
|
|
258
|
-
const isWarm = loaded.has(
|
|
263
|
+
const isWarm = loaded.has(ollamaName);
|
|
259
264
|
if (!isWarm && freeBytes < tier.minFreeGb * (1024 ** 3)) {
|
|
260
265
|
attempts.push({ tier: tier.tag, reason: "ram_insufficient" });
|
|
261
266
|
continue;
|
|
262
267
|
}
|
|
263
268
|
anyViable = true;
|
|
264
269
|
const timeout = args.timeout_ms ?? DEFAULT_TIMEOUTS[tier.tag] ?? 60_000;
|
|
265
|
-
const result = await deps.callLocal(deps.ollamaUrl,
|
|
270
|
+
const result = await deps.callLocal(deps.ollamaUrl, ollamaName, args.prompt, args.system, maxTokens, temperature, timeout);
|
|
266
271
|
if (result.ok) {
|
|
267
272
|
return {
|
|
268
273
|
output: result.text,
|
|
@@ -31,13 +31,25 @@ export const MODEL_TIERS = [
|
|
|
31
31
|
{ tag: 'prism-coder:8b', weightsGb: 5, minFreeGb: 7, ctxTokens: 32_768 },
|
|
32
32
|
{ tag: 'prism-coder:1b7', weightsGb: 2, minFreeGb: 3, ctxTokens: 8_192 },
|
|
33
33
|
];
|
|
34
|
+
/**
|
|
35
|
+
* True when `installed` matches `tierTag` either as a bare tag
|
|
36
|
+
* (`prism-coder:32b`) or as a namespaced HuggingFace-style tag
|
|
37
|
+
* (`dcostenco/prism-coder:32b`). The README documents `ollama pull
|
|
38
|
+
* dcostenco/prism-coder:32b`, so Ollama's /api/tags returns the
|
|
39
|
+
* namespaced form — without this matcher the picker would never
|
|
40
|
+
* see them and silently fall through to cloud.
|
|
41
|
+
*/
|
|
42
|
+
function tagMatches(installed, tierTag) {
|
|
43
|
+
return installed === tierTag || installed.endsWith(`/${tierTag}`);
|
|
44
|
+
}
|
|
34
45
|
/**
|
|
35
46
|
* Pick the largest viable tier for the given free RAM.
|
|
36
47
|
* Returns null when no tier fits (caller should go cloud-only).
|
|
37
48
|
*
|
|
38
49
|
* @param freeBytes Result of os.freemem() — binary bytes
|
|
39
50
|
* @param ceiling Optional cap (e.g. "14b" to forbid 32B even if RAM allows)
|
|
40
|
-
* @param available Optional whitelist — only consider tags in this set
|
|
51
|
+
* @param available Optional whitelist — only consider tags in this set. Accepts
|
|
52
|
+
* bare (`prism-coder:32b`) or namespaced (`dcostenco/prism-coder:32b`).
|
|
41
53
|
*/
|
|
42
54
|
export function pickLocalModel(freeBytes, ceiling, available) {
|
|
43
55
|
if (!Number.isFinite(freeBytes) || freeBytes <= 0)
|
|
@@ -50,12 +62,36 @@ export function pickLocalModel(freeBytes, ceiling, available) {
|
|
|
50
62
|
const tier = MODEL_TIERS[i];
|
|
51
63
|
if (freeBytes < tier.minFreeGb * GB)
|
|
52
64
|
continue;
|
|
53
|
-
if (available
|
|
54
|
-
|
|
65
|
+
if (available) {
|
|
66
|
+
let found = false;
|
|
67
|
+
for (const a of available) {
|
|
68
|
+
if (tagMatches(a, tier.tag)) {
|
|
69
|
+
found = true;
|
|
70
|
+
break;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (!found)
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
55
76
|
return tier;
|
|
56
77
|
}
|
|
57
78
|
return null;
|
|
58
79
|
}
|
|
80
|
+
/**
|
|
81
|
+
* Resolve a tier tag to the actual Ollama name installed locally.
|
|
82
|
+
* If `installed` contains a namespaced match (e.g. `dcostenco/prism-coder:32b`),
|
|
83
|
+
* the namespaced form is returned so Ollama's /api/generate finds it.
|
|
84
|
+
* Falls back to the bare tag when only the bare form is present.
|
|
85
|
+
*/
|
|
86
|
+
export function resolveOllamaName(tierTag, installed) {
|
|
87
|
+
if (installed.has(tierTag))
|
|
88
|
+
return tierTag;
|
|
89
|
+
for (const a of installed) {
|
|
90
|
+
if (a.endsWith(`/${tierTag}`))
|
|
91
|
+
return a;
|
|
92
|
+
}
|
|
93
|
+
return tierTag;
|
|
94
|
+
}
|
|
59
95
|
/**
|
|
60
96
|
* Format a byte count for logging. 12_884_901_888 → "12.0 GB".
|
|
61
97
|
*/
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "prism-mcp-server",
|
|
3
|
-
"version": "15.
|
|
3
|
+
"version": "15.5.1",
|
|
4
4
|
"mcpName": "io.github.dcostenco/prism-coder",
|
|
5
5
|
"description": "Prism Coder — Cognitive memory + tool-calling intelligence for AI agents. Mind Palace persistent memory (BFCL Gold Certified, 100% Tool-Call Accuracy, 54 Agent Skills, Zero-Search HDC/HRR retrieval, HIPAA-hardened local-first storage, SLERP-optimized GRPO alignment) plus the prism-coder:7b / 14b open-weights LLM fleet.",
|
|
6
6
|
"module": "index.ts",
|