prism-mcp-server 7.8.4 → 7.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -445,6 +445,7 @@ return false;}
445
445
  let cursorId = undefined;
446
446
  let iterations = 0;
447
447
  const MAX_ITERATIONS = 100; // safety cap: 100 × 50 = 5000 entries max
448
+ let lastBackfillError = undefined;
448
449
  while (hasMore && iterations < MAX_ITERATIONS) {
449
450
  iterations++;
450
451
  const result = await backfillEmbeddingsHandler({ dry_run: false, limit: 50, _cursor_id: cursorId });
@@ -452,6 +453,8 @@ return false;}
452
453
  if (bStats) {
453
454
  repairedCount += bStats.repaired;
454
455
  failedCount += bStats.failed;
456
+ if (bStats.error)
457
+ lastBackfillError = bStats.error;
455
458
  if (bStats.last_id)
456
459
  cursorId = bStats.last_id;
457
460
  else
@@ -464,8 +467,10 @@ return false;}
464
467
  }
465
468
  }
466
469
  cleanupMessages.push(`Repaired ${repairedCount} embeddings`);
467
- if (failedCount > 0)
468
- cleanupMessages.push(`Failed to repair ${failedCount} embeddings`);
470
+ if (failedCount > 0) {
471
+ const errMsg = lastBackfillError ? ` (${lastBackfillError})` : '';
472
+ cleanupMessages.push(`Failed to repair ${failedCount} embeddings${errMsg}`);
473
+ }
469
474
  }
470
475
  catch (err) {
471
476
  console.error("[Dashboard] Failed to backfill embeddings:", err);
@@ -1442,8 +1442,43 @@ export class SupabaseStorage {
1442
1442
  }
1443
1443
  // ─── v7.5: Semantic Consolidation ────────────────────────────────
1444
1444
  async upsertSemanticKnowledge(data) {
1445
- // For now we just implement graceful degradation/no-op on Supabase until the SQL is deployed.
1446
- debugLog(`[SupabaseStorage] upsertSemanticKnowledge is not fully implemented in Supabase yet. Skipping for ${data.concept}.`);
1447
- return crypto.randomUUID();
1445
+ const userId = data.userId || PRISM_USER_ID;
1446
+ // Check if concept already exists
1447
+ const existing = await supabaseGet("semantic_knowledge", {
1448
+ project: `eq.${data.project}`,
1449
+ concept: `eq.${data.concept}`,
1450
+ select: "id,instances,confidence",
1451
+ limit: "1"
1452
+ });
1453
+ const rows = Array.isArray(existing) ? existing : [];
1454
+ if (rows.length > 0) {
1455
+ const row = rows[0];
1456
+ const newConfidence = Math.min(1.0, (row.confidence || 0) + 0.1);
1457
+ const newInstances = (row.instances || 0) + 1;
1458
+ await supabasePatch("semantic_knowledge", {
1459
+ instances: newInstances,
1460
+ confidence: newConfidence,
1461
+ updated_at: new Date().toISOString()
1462
+ }, {
1463
+ id: `eq.${row.id}`
1464
+ });
1465
+ return row.id;
1466
+ }
1467
+ else {
1468
+ const id = crypto.randomUUID();
1469
+ await supabasePost("semantic_knowledge", {
1470
+ id,
1471
+ project: data.project,
1472
+ user_id: userId,
1473
+ concept: data.concept,
1474
+ description: data.description,
1475
+ confidence: 0.5,
1476
+ instances: 1,
1477
+ related_entities: data.related_entities ? JSON.stringify(data.related_entities) : "[]",
1478
+ created_at: new Date().toISOString(),
1479
+ updated_at: new Date().toISOString()
1480
+ });
1481
+ return id;
1482
+ }
1448
1483
  }
1449
1484
  }
@@ -98,46 +98,74 @@ export async function backfillEmbeddingsHandler(args) {
98
98
  isError: false,
99
99
  };
100
100
  }
101
- // Generate embeddings for each entry
102
101
  let repaired = 0;
103
102
  let failed = 0;
104
- for (const entry of entries) {
103
+ let lastError = undefined;
104
+ const validEntries = entries.map(e => {
105
+ const entry = e;
106
+ const textToEmbed = [
107
+ entry.summary || "",
108
+ ...(entry.decisions || []),
109
+ ].filter(Boolean).join(" | ");
110
+ return { entry, textToEmbed };
111
+ }).filter(x => {
112
+ if (!x.textToEmbed.trim()) {
113
+ debugLog(`[backfill] Skipping entry ${x.entry.id}: no text content`);
114
+ failed++;
115
+ return false;
116
+ }
117
+ return true;
118
+ });
119
+ if (validEntries.length > 0) {
120
+ const provider = getLLMProvider();
105
121
  try {
106
- const e = entry;
107
- const textToEmbed = [
108
- e.summary || "",
109
- ...(e.decisions || []),
110
- ].filter(Boolean).join(" | ");
111
- if (!textToEmbed.trim()) {
112
- debugLog(`[backfill] Skipping entry ${e.id}: no text content`);
113
- failed++;
114
- continue;
122
+ let embeddings;
123
+ if (provider.generateEmbeddings) {
124
+ // Use batch API
125
+ embeddings = await provider.generateEmbeddings(validEntries.map(x => x.textToEmbed));
115
126
  }
116
- const embedding = await getLLMProvider().generateEmbedding(textToEmbed);
117
- // Build atomic patch float32 + TurboQuant in ONE DB update
118
- const patchData = {
119
- embedding: JSON.stringify(embedding),
120
- };
121
- // TurboQuant: compress alongside repair (non-fatal)
122
- try {
123
- const { getDefaultCompressor, serialize } = await import("../utils/turboquant.js");
124
- const compressor = getDefaultCompressor();
125
- const compressed = compressor.compress(embedding);
126
- const buf = serialize(compressed);
127
- patchData.embedding_compressed = buf.toString("base64");
128
- patchData.embedding_format = `turbo${compressor.bits}`;
129
- patchData.embedding_turbo_radius = compressed.radius;
127
+ else {
128
+ // Fallback to sequential if batching is not supported by the adapter
129
+ embeddings = [];
130
+ for (const { textToEmbed } of validEntries) {
131
+ embeddings.push(await provider.generateEmbedding(textToEmbed));
132
+ }
130
133
  }
131
- catch (turboErr) {
132
- debugLog(`[backfill] TurboQuant compression failed for ${e.id} (non-fatal): ${turboErr.message}`);
134
+ for (let i = 0; i < validEntries.length; i++) {
135
+ const { entry } = validEntries[i];
136
+ const embedding = embeddings[i];
137
+ try {
138
+ const patchData = {
139
+ embedding: JSON.stringify(embedding),
140
+ };
141
+ try {
142
+ const { getDefaultCompressor, serialize } = await import("../utils/turboquant.js");
143
+ const compressor = getDefaultCompressor();
144
+ const compressed = compressor.compress(embedding);
145
+ const buf = serialize(compressed);
146
+ patchData.embedding_compressed = buf.toString("base64");
147
+ patchData.embedding_format = `turbo${compressor.bits}`;
148
+ patchData.embedding_turbo_radius = compressed.radius;
149
+ }
150
+ catch (turboErr) {
151
+ debugLog(`[backfill] TurboQuant compression failed for ${entry.id} (non-fatal): ${turboErr.message}`);
152
+ }
153
+ await storage.patchLedger(entry.id, patchData);
154
+ repaired++;
155
+ debugLog(`[backfill] ✅ Repaired ${entry.id} (${entry.project})`);
156
+ }
157
+ catch (entryErr) {
158
+ failed++;
159
+ lastError = entryErr instanceof Error ? entryErr.message : String(entryErr);
160
+ console.error(`[backfill] ❌ Failed ${entry.id}: ${lastError}`);
161
+ }
133
162
  }
134
- await storage.patchLedger(e.id, patchData);
135
- repaired++;
136
- debugLog(`[backfill] ✅ Repaired ${e.id} (${e.project})`);
137
163
  }
138
164
  catch (err) {
139
- failed++;
140
- console.error(`[backfill] Failed ${entry.id}: ${err instanceof Error ? err.message : err}`);
165
+ // Embedding API call itself failed — entire batch is lost.
166
+ failed += validEntries.length;
167
+ lastError = err instanceof Error ? err.message : String(err);
168
+ console.error(`[backfill] ❌ Embedding API failed for batch of ${validEntries.length}: ${lastError}`);
141
169
  }
142
170
  }
143
171
  return {
@@ -152,7 +180,7 @@ export async function backfillEmbeddingsHandler(args) {
152
180
  : `All entries now have embeddings for semantic search.`),
153
181
  }],
154
182
  isError: false,
155
- _stats: { repaired, failed, last_id: entries[entries.length - 1]?.id },
183
+ _stats: { repaired, failed, error: lastError, last_id: entries[entries.length - 1]?.id },
156
184
  };
157
185
  }
158
186
  export async function sessionBackfillLinksHandler(args) {
@@ -46,6 +46,10 @@ import { getTracer } from "../../telemetry.js";
46
46
  export class TracingLLMProvider {
47
47
  inner;
48
48
  providerName;
49
+ /**
50
+ * Optional batch embeddings generation support.
51
+ */
52
+ generateEmbeddings;
49
53
  /**
50
54
  * The optional VLM method is declared here as a typed property so TypeScript
51
55
  * knows about it. It is assigned (or left undefined) in the constructor body
@@ -62,6 +66,37 @@ export class TracingLLMProvider {
62
66
  constructor(inner, providerName) {
63
67
  this.inner = inner;
64
68
  this.providerName = providerName;
69
+ // ── Batch Embeddings: conditional own-property assignment ───────────────
70
+ if (inner.generateEmbeddings) {
71
+ const innerEmbeds = inner.generateEmbeddings.bind(inner);
72
+ const providerName = this.providerName;
73
+ this.generateEmbeddings = async (texts) => {
74
+ const span = getTracer().startSpan("llm.generate_embeddings_batch", {
75
+ attributes: {
76
+ "llm.provider": providerName,
77
+ "llm.batch_size": texts.length,
78
+ },
79
+ });
80
+ return context.with(trace.setSpan(context.active(), span), async () => {
81
+ try {
82
+ const result = await innerEmbeds(texts);
83
+ span.setStatus({ code: SpanStatusCode.OK });
84
+ return result;
85
+ }
86
+ catch (err) {
87
+ span.recordException(err instanceof Error ? err : new Error(String(err)));
88
+ span.setStatus({
89
+ code: SpanStatusCode.ERROR,
90
+ message: err instanceof Error ? err.message : String(err),
91
+ });
92
+ throw err;
93
+ }
94
+ finally {
95
+ span.end();
96
+ }
97
+ });
98
+ };
99
+ }
65
100
  // ── VLM method: conditional own-property assignment ──────────────────
66
101
  // REVIEWER NOTE: TypeScript class methods always appear on the prototype,
67
102
  // which means `if (llm.generateImageDescription)` would always be truthy
@@ -78,53 +78,78 @@ export class VoyageAdapter {
78
78
  "Set text_provider to 'anthropic', 'openai', or 'gemini' in the dashboard.");
79
79
  }
80
80
  // ─── Embedding Generation ────────────────────────────────────────────────
81
- async generateEmbedding(text) {
82
- if (!text || !text.trim()) {
83
- throw new Error("[VoyageAdapter] generateEmbedding called with empty text");
84
- }
81
+ async generateEmbeddings(texts) {
82
+ if (!texts || texts.length === 0)
83
+ return [];
85
84
  // Truncate to character limit (consistent with other adapters)
86
- const truncated = text.length > MAX_EMBEDDING_CHARS
85
+ const truncatedTexts = texts.map(text => text.length > MAX_EMBEDDING_CHARS
87
86
  ? text.slice(0, MAX_EMBEDDING_CHARS).replace(/\s+\S*$/, "")
88
- : text;
87
+ : text);
89
88
  const model = getSettingSync("voyage_model", DEFAULT_MODEL);
90
- debugLog(`[VoyageAdapter] generateEmbedding — model=${model}, chars=${truncated.length}`);
89
+ debugLog(`[VoyageAdapter] generateEmbeddings batch — model=${model}, count=${texts.length}`);
91
90
  const requestBody = {
92
- input: [truncated],
91
+ input: truncatedTexts,
93
92
  model,
94
93
  // We do NOT send output_dimension here because Voyage's API explicitly
95
94
  // restricts it to [256, 512, 1024, 2048] for MRL models. We will
96
95
  // manually slice the 1024-dim result down to 768 client-side.
97
96
  };
98
- const response = await fetch(`${VOYAGE_API_BASE}/embeddings`, {
99
- method: "POST",
100
- headers: {
101
- "Authorization": `Bearer ${this.apiKey}`,
102
- "Content-Type": "application/json",
103
- },
104
- body: JSON.stringify(requestBody),
105
- });
106
- if (!response.ok) {
97
+ let response = null;
98
+ let retries = 0;
99
+ const maxRetries = 4;
100
+ const baseDelayMs = 15000; // 15 seconds base delay
101
+ while (true) {
102
+ response = await fetch(`${VOYAGE_API_BASE}/embeddings`, {
103
+ method: "POST",
104
+ headers: {
105
+ "Authorization": `Bearer ${this.apiKey}`,
106
+ "Content-Type": "application/json",
107
+ },
108
+ body: JSON.stringify(requestBody),
109
+ });
110
+ if (response.ok) {
111
+ break;
112
+ }
107
113
  const errorText = await response.text().catch(() => "unknown error");
114
+ if (response.status === 429 && retries < maxRetries) {
115
+ // Simple backoff: baseDelayMs * (retries + 1) -> 15s, 30s, 45s, 60s
116
+ const delay = baseDelayMs * (retries + 1);
117
+ retries++;
118
+ debugLog(`[VoyageAdapter] Rate limited (429). Retrying in ${delay}ms... (Attempt ${retries}/${maxRetries}): ${errorText.substring(0, 50)}...`);
119
+ await new Promise(resolve => setTimeout(resolve, delay));
120
+ continue;
121
+ }
108
122
  throw new Error(`[VoyageAdapter] API request failed — status=${response.status}: ${errorText}`);
109
123
  }
110
124
  const data = (await response.json());
111
- let embedding = data?.data?.[0]?.embedding;
112
- if (!Array.isArray(embedding)) {
113
- throw new Error("[VoyageAdapter] Unexpected response formatno embedding array found");
114
- }
115
- // Client-side MRL Truncation:
116
- // Voyage models returning 1024 dims can be safely sliced to 768 since they
117
- // are trained with Matryoshka Representation Learning.
118
- if (embedding.length > EMBEDDING_DIMS) {
119
- embedding = embedding.slice(0, EMBEDDING_DIMS);
120
- }
121
- // Dimension guard: Prism's DB schema requires exactly 768 dims.
122
- if (embedding.length !== EMBEDDING_DIMS) {
123
- throw new Error(`[VoyageAdapter] Embedding dimension mismatch: expected ${EMBEDDING_DIMS}, ` +
124
- `got ${embedding.length}. Make sure you are using a model that returns at least 768 dims.`);
125
+ const embeddings = data?.data?.map(d => d.embedding) || [];
126
+ if (embeddings.length !== texts.length) {
127
+ throw new Error(`[VoyageAdapter] Unexpected response lengthexpected ${texts.length}, got ${embeddings.length}`);
125
128
  }
126
- debugLog(`[VoyageAdapter] Embedding generated — dims=${embedding.length}, ` +
129
+ const processedEmbeddings = embeddings.map(emb => {
130
+ let embedding = emb;
131
+ // Client-side MRL Truncation:
132
+ // Voyage models returning 1024 dims can be safely sliced to 768 since they
133
+ // are trained with Matryoshka Representation Learning.
134
+ if (embedding.length > EMBEDDING_DIMS) {
135
+ embedding = embedding.slice(0, EMBEDDING_DIMS);
136
+ }
137
+ // Dimension guard: Prism's DB schema requires exactly 768 dims.
138
+ if (embedding.length !== EMBEDDING_DIMS) {
139
+ throw new Error(`[VoyageAdapter] Embedding dimension mismatch: expected ${EMBEDDING_DIMS}, ` +
140
+ `got ${embedding.length}. Make sure you are using a model that returns at least 768 dims.`);
141
+ }
142
+ return embedding;
143
+ });
144
+ debugLog(`[VoyageAdapter] Batch embeddings generated — count=${processedEmbeddings.length}, ` +
127
145
  `tokens_used=${data.usage?.total_tokens ?? "unknown"}`);
128
- return embedding;
146
+ return processedEmbeddings;
147
+ }
148
+ async generateEmbedding(text) {
149
+ if (!text || !text.trim()) {
150
+ throw new Error("[VoyageAdapter] generateEmbedding called with empty text");
151
+ }
152
+ const results = await this.generateEmbeddings([text]);
153
+ return results[0];
129
154
  }
130
155
  }
@@ -117,6 +117,10 @@ export function getLLMProvider() {
117
117
  generateText: textAdapter.generateText.bind(textAdapter),
118
118
  generateEmbedding: embedAdapter.generateEmbedding.bind(embedAdapter),
119
119
  };
120
+ // Wire batch embeddings if the embed adapter supports it (e.g. VoyageAdapter).
121
+ if (embedAdapter.generateEmbeddings) {
122
+ composed.generateEmbeddings = embedAdapter.generateEmbeddings.bind(embedAdapter);
123
+ }
120
124
  // Pass VLM support through from the text adapter if it exists.
121
125
  // generateImageDescription is a text-generation concern (it calls the
122
126
  // text/vision model, not the embedding model). The text adapter owns it.
@@ -141,6 +145,9 @@ export function getLLMProvider() {
141
145
  generateText: fallback.generateText.bind(fallback),
142
146
  generateEmbedding: fallback.generateEmbedding.bind(fallback),
143
147
  };
148
+ if (typeof fallback.generateEmbeddings === 'function') {
149
+ fallbackComposed.generateEmbeddings = fallback.generateEmbeddings.bind(fallback);
150
+ }
144
151
  if (fallback.generateImageDescription) {
145
152
  fallbackComposed.generateImageDescription = fallback.generateImageDescription.bind(fallback);
146
153
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "prism-mcp-server",
3
- "version": "7.8.4",
3
+ "version": "7.8.6",
4
4
  "mcpName": "io.github.dcostenco/prism-mcp",
5
5
  "description": "The Mind Palace for AI Agents — a true Cognitive Architecture with Hebbian learning (episodic→semantic consolidation), ACT-R spreading activation (multi-hop causal reasoning), uncertainty-aware rejection gates (agents that know when they don't know), adversarial evaluation (anti-sycophancy), fail-closed Dark Factory pipelines, persistent memory (SQLite/Supabase), multi-agent Hivemind, time travel & visual dashboard. Zero-config local mode.",
6
6
  "module": "index.ts",