prism-mcp-server 4.2.0 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +196 -67
- package/dist/dashboard/ui.js +333 -2
- package/dist/lifecycle.js +6 -0
- package/dist/server.js +229 -139
- package/dist/storage/sqlite.js +52 -0
- package/dist/storage/supabase.js +73 -14
- package/dist/storage/supabaseMigrations.js +42 -1
- package/dist/tools/compactionHandler.js +7 -14
- package/dist/tools/handlers.js +26 -3
- package/dist/tools/index.js +2 -2
- package/dist/tools/sessionMemoryDefinitions.js +93 -0
- package/dist/tools/sessionMemoryHandlers.js +384 -21
- package/dist/utils/briefing.js +9 -10
- package/dist/utils/factMerger.js +11 -16
- package/dist/utils/healthCheck.js +19 -22
- package/dist/utils/imageCaptioner.js +240 -0
- package/dist/utils/llm/adapters/anthropic.js +128 -0
- package/dist/utils/llm/adapters/gemini.js +152 -0
- package/dist/utils/llm/adapters/openai.js +183 -0
- package/dist/utils/llm/adapters/traced.js +190 -0
- package/dist/utils/llm/factory.js +143 -0
- package/dist/utils/llm/provider.js +25 -0
- package/dist/utils/telemetry.js +174 -0
- package/package.json +9 -2
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI Adapter (v4.5)
|
|
3
|
+
* ─────────────────────────────────────────────────────────────────────────────
|
|
4
|
+
* PURPOSE:
|
|
5
|
+
* Implements LLMProvider using the official `openai` Node.js SDK.
|
|
6
|
+
* A single adapter that covers FOUR deployment scenarios:
|
|
7
|
+
*
|
|
8
|
+
* 1. Cloud OpenAI (default)
|
|
9
|
+
* apiKey = OPENAI_API_KEY env var or dashboard setting
|
|
10
|
+
* baseURL = https://api.openai.com/v1 (default)
|
|
11
|
+
* Models: gpt-4o-mini (default text), text-embedding-3-small (embedding)
|
|
12
|
+
*
|
|
13
|
+
* 2. Ollama (local, open-source)
|
|
14
|
+
* apiKey = (not needed — leave blank)
|
|
15
|
+
* baseURL = http://localhost:11434/v1
|
|
16
|
+
* Models: any model you've pulled (e.g. llama3.2, nomic-embed-text)
|
|
17
|
+
*
|
|
18
|
+
* 3. LM Studio (local GUI)
|
|
19
|
+
* apiKey = (not needed — leave blank)
|
|
20
|
+
* baseURL = http://localhost:1234/v1
|
|
21
|
+
*
|
|
22
|
+
* 4. vLLM / custom OpenAI-compatible server
|
|
23
|
+
* apiKey = (server-specific)
|
|
24
|
+
* baseURL = http://<host>:<port>/v1
|
|
25
|
+
*
|
|
26
|
+
* EMBEDDING DIMENSION PARITY (768 dims):
|
|
27
|
+
* Prism's SQLite (sqlite-vec) and Supabase (pgvector) schemas define
|
|
28
|
+
* embedding columns as EXACTLY 768 dimensions. This was chosen to match
|
|
29
|
+
* Gemini's native output size. All adapters MUST return 768-dim vectors.
|
|
30
|
+
*
|
|
31
|
+
* OpenAI solution: text-embedding-3-* models support the `dimensions`
|
|
32
|
+
* parameter via Matryoshka Representation Learning (MRL), which produces
|
|
33
|
+
* a shorter but still high-quality vector. text-embedding-3-small at 768
|
|
34
|
+
* dims outperforms text-embedding-ada-002 at its native 1536 dims.
|
|
35
|
+
*
|
|
36
|
+
* WARNING for local models (Ollama / LM Studio):
|
|
37
|
+
* Many locally-served models do NOT support the `dimensions` parameter.
|
|
38
|
+
* We log a warning but do NOT throw — the error will surface at the DB
|
|
39
|
+
* write boundary, which is the right place to enforce the constraint.
|
|
40
|
+
* Choose a local embedding model that natively outputs 768 dims
|
|
41
|
+
* (e.g. nomic-embed-text = 768, mxbai-embed-large = 1024 — avoid latter).
|
|
42
|
+
*
|
|
43
|
+
* CONFIG KEYS (Prism dashboard "AI Providers" tab OR environment variables):
|
|
44
|
+
* openai_api_key — API key (empty = localhost/Ollama mode)
|
|
45
|
+
* openai_base_url — Base URL (default: https://api.openai.com/v1)
|
|
46
|
+
* openai_model — Chat model (default: gpt-4o-mini)
|
|
47
|
+
* openai_embedding_model — Embedding model (default: text-embedding-3-small)
|
|
48
|
+
*/
|
|
49
|
+
import OpenAI from "openai";
|
|
50
|
+
import { getSettingSync } from "../../../storage/configStorage.js";
|
|
51
|
+
import { debugLog } from "../../logger.js";
|
|
52
|
+
// ─── Constants ────────────────────────────────────────────────────────────────
|
|
53
|
+
// Must match Prism's DB schema (sqlite-vec and pgvector column sizes).
|
|
54
|
+
// Changing this requires a DB migration — do not adjust casually.
|
|
55
|
+
const EMBEDDING_DIMS = 768;
|
|
56
|
+
// text-embedding-3-small has an 8191-token context window.
|
|
57
|
+
// We use a conservative character-based cap to avoid needing a tokenizer.
|
|
58
|
+
// 8000 chars ≈ 1500-2000 tokens for typical session summaries.
|
|
59
|
+
const MAX_EMBEDDING_CHARS = 8000;
|
|
60
|
+
export class OpenAIAdapter {
|
|
61
|
+
// The OpenAI SDK client — stateful, holds the API key + base URL.
|
|
62
|
+
// One instance per factory singleton = one instance per MCP server process.
|
|
63
|
+
client;
|
|
64
|
+
constructor() {
|
|
65
|
+
// Priority: dashboard setting → environment variable → empty string.
|
|
66
|
+
// This lets users configure keys via the dashboard without touching .env.
|
|
67
|
+
const apiKey = getSettingSync("openai_api_key", process.env.OPENAI_API_KEY ?? "");
|
|
68
|
+
const baseURL = getSettingSync("openai_base_url", "https://api.openai.com/v1");
|
|
69
|
+
// Detect local inference endpoints — these don't need a real API key.
|
|
70
|
+
// Ollama and LM Studio use local HTTP servers with no authentication.
|
|
71
|
+
const isLocal = baseURL.includes("localhost") || baseURL.includes("127.0.0.1");
|
|
72
|
+
// Fail construction if no key AND we're pointing at a real API endpoint.
|
|
73
|
+
// The factory will catch this and fall back to GeminiAdapter gracefully.
|
|
74
|
+
if (!apiKey && !isLocal) {
|
|
75
|
+
throw new Error("OpenAI API key is not set and base URL is not a local endpoint. " +
|
|
76
|
+
"Set OPENAI_API_KEY or configure a local base URL (e.g. http://localhost:11434/v1).");
|
|
77
|
+
}
|
|
78
|
+
this.client = new OpenAI({
|
|
79
|
+
// Ollama requires a non-empty string for apiKey even though it ignores it.
|
|
80
|
+
// "ollama" is the conventional placeholder in the Ollama docs.
|
|
81
|
+
apiKey: apiKey || "ollama",
|
|
82
|
+
baseURL,
|
|
83
|
+
});
|
|
84
|
+
debugLog(`[OpenAIAdapter] Initialized — baseURL=${baseURL}, keyless=${!apiKey}`);
|
|
85
|
+
}
|
|
86
|
+
// ─── Text Generation ─────────────────────────────────────────────────────
|
|
87
|
+
async generateText(prompt, systemInstruction) {
|
|
88
|
+
// Read model at call time (not constructor) so the user can hot-swap
|
|
89
|
+
// the model setting without restarting the server.
|
|
90
|
+
const model = getSettingSync("openai_model", "gpt-4o-mini");
|
|
91
|
+
// Build message array: optional system message first, then user prompt.
|
|
92
|
+
// This maps cleanly to Gemini's systemInstruction + user prompt pattern.
|
|
93
|
+
const messages = [];
|
|
94
|
+
if (systemInstruction) {
|
|
95
|
+
messages.push({ role: "system", content: systemInstruction });
|
|
96
|
+
}
|
|
97
|
+
messages.push({ role: "user", content: prompt });
|
|
98
|
+
debugLog(`[OpenAIAdapter] generateText — model=${model}, messages=${messages.length}`);
|
|
99
|
+
const response = await this.client.chat.completions.create({ model, messages });
|
|
100
|
+
// choices[0] is always the primary completion. ?? "" returns empty string
|
|
101
|
+
// on null content (e.g. if the model returned a function call instead).
|
|
102
|
+
return response.choices[0]?.message?.content ?? "";
|
|
103
|
+
}
|
|
104
|
+
// ─── Embedding Generation ────────────────────────────────────────────────
|
|
105
|
+
async generateEmbedding(text) {
|
|
106
|
+
// Guard: empty input produces a degenerate embedding — fail loudly.
|
|
107
|
+
if (!text || !text.trim()) {
|
|
108
|
+
throw new Error("Cannot generate embedding for empty text.");
|
|
109
|
+
}
|
|
110
|
+
// Read embedding model at call time for hot-swap support.
|
|
111
|
+
const model = getSettingSync("openai_embedding_model", "text-embedding-3-small");
|
|
112
|
+
// ── Truncation Guard ───────────────────────────────────────────────────
|
|
113
|
+
// text-embedding-3-small accepts up to 8191 tokens.
|
|
114
|
+
// We apply the same preventive truncation as GeminiAdapter so behavior
|
|
115
|
+
// is consistent regardless of which provider is active.
|
|
116
|
+
let inputText = text;
|
|
117
|
+
if (inputText.length > MAX_EMBEDDING_CHARS) {
|
|
118
|
+
debugLog(`[OpenAIAdapter] Embedding input truncated from ${inputText.length}` +
|
|
119
|
+
` to ~${MAX_EMBEDDING_CHARS} chars (word-safe)`);
|
|
120
|
+
// Hard cut, then snap back to last word boundary (avoids mid-word splits)
|
|
121
|
+
inputText = inputText.substring(0, MAX_EMBEDDING_CHARS);
|
|
122
|
+
const lastSpace = inputText.lastIndexOf(" ");
|
|
123
|
+
if (lastSpace > 0)
|
|
124
|
+
inputText = inputText.substring(0, lastSpace);
|
|
125
|
+
}
|
|
126
|
+
debugLog(`[OpenAIAdapter] generateEmbedding — model=${model}, dims=${EMBEDDING_DIMS}`);
|
|
127
|
+
const response = await this.client.embeddings.create({
|
|
128
|
+
model,
|
|
129
|
+
input: inputText,
|
|
130
|
+
// `dimensions` triggers Matryoshka truncation — produces a 768-dim vector
|
|
131
|
+
// without the full 1536-dim generation + local truncation overhead.
|
|
132
|
+
// ONLY works with text-embedding-3-* models. ada-002 ignores this field.
|
|
133
|
+
dimensions: EMBEDDING_DIMS,
|
|
134
|
+
});
|
|
135
|
+
const embedding = response.data[0]?.embedding;
|
|
136
|
+
// Hard check: null/empty response means the API returned nothing useful.
|
|
137
|
+
if (!Array.isArray(embedding) || embedding.length === 0) {
|
|
138
|
+
throw new Error(`[OpenAIAdapter] Embedding response is empty for model "${model}"`);
|
|
139
|
+
}
|
|
140
|
+
// ── Dimension Warning (soft — not a hard throw) ────────────────────────
|
|
141
|
+
// Why soft? Local models (Ollama) may ignore `dimensions` and return their
|
|
142
|
+
// native size. A hard throw here would make Ollama completely unusable.
|
|
143
|
+
// The mismatch will be caught at the DB write boundary (pgvector/sqlite-vec
|
|
144
|
+
// will reject mismatched vector sizes with a clear error message).
|
|
145
|
+
if (embedding.length !== EMBEDDING_DIMS) {
|
|
146
|
+
console.warn(`[OpenAIAdapter] Embedding dimension mismatch: expected ${EMBEDDING_DIMS}, ` +
|
|
147
|
+
`got ${embedding.length}. ` +
|
|
148
|
+
`If using a local model, use one that natively outputs ${EMBEDDING_DIMS} dims ` +
|
|
149
|
+
`(e.g. nomic-embed-text) or supports the Matryoshka 'dimensions' parameter.`);
|
|
150
|
+
}
|
|
151
|
+
return embedding;
|
|
152
|
+
}
|
|
153
|
+
// ─── Image Description (VLM) ─────────────────────────────────────────────
|
|
154
|
+
/**
|
|
155
|
+
* Describe an image via the OpenAI Chat Completions vision API.
|
|
156
|
+
* Uses the chat model (gpt-4o-mini default) since OpenAI embeds vision
|
|
157
|
+
* into their chat API rather than a separate endpoint.
|
|
158
|
+
* Works with any OpenAI-compatible server that supports vision
|
|
159
|
+
* (Ollama with llava, LM Studio with vision models, etc.).
|
|
160
|
+
*/
|
|
161
|
+
async generateImageDescription(imageBase64, mimeType, context) {
|
|
162
|
+
const model = getSettingSync("openai_model", "gpt-4o-mini");
|
|
163
|
+
const prompt = context
|
|
164
|
+
? `Describe this image in rich detail for a developer knowledge base. User context: "${context}"`
|
|
165
|
+
: "Describe this image in rich detail for a developer knowledge base. Include: UI elements, visible text, architectural components, and key observations.";
|
|
166
|
+
const response = await this.client.chat.completions.create({
|
|
167
|
+
model,
|
|
168
|
+
max_tokens: 1024,
|
|
169
|
+
messages: [{
|
|
170
|
+
role: "user",
|
|
171
|
+
content: [
|
|
172
|
+
{
|
|
173
|
+
type: "image_url",
|
|
174
|
+
// OpenAI vision requires the data-URI prefix
|
|
175
|
+
image_url: { url: `data:${mimeType};base64,${imageBase64}` },
|
|
176
|
+
},
|
|
177
|
+
{ type: "text", text: prompt },
|
|
178
|
+
],
|
|
179
|
+
}],
|
|
180
|
+
});
|
|
181
|
+
return response.choices[0]?.message?.content ?? "";
|
|
182
|
+
}
|
|
183
|
+
}
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* TracingLLMProvider — OpenTelemetry Decorator (v4.6.0)
|
|
3
|
+
* ─────────────────────────────────────────────────────────────────────────────
|
|
4
|
+
* PURPOSE:
|
|
5
|
+
* Wraps any LLMProvider with OTel span instrumentation without modifying
|
|
6
|
+
* any of the three existing adapters (gemini.ts, openai.ts, anthropic.ts).
|
|
7
|
+
*
|
|
8
|
+
* PATTERN: Decorator (Gang of Four)
|
|
9
|
+
* Implements LLMProvider and delegates every method call to the wrapped
|
|
10
|
+
* `inner` provider, bookending each call with an OTel span.
|
|
11
|
+
*
|
|
12
|
+
* WHY NOT INSTRUMENT INSIDE THE ADAPTERS?
|
|
13
|
+
* 1. Single Responsibility — each adapter has one job: talk to its API.
|
|
14
|
+
* 2. DRY — the span pattern is identical across all three adapters.
|
|
15
|
+
* 3. Testability — this class can be tested with a mock inner provider.
|
|
16
|
+
* 4. Composability — future decorators (rate-limiting, caching) layer on
|
|
17
|
+
* top without touching any adapter code.
|
|
18
|
+
*
|
|
19
|
+
* VLM METHOD OPTIONALITY:
|
|
20
|
+
* TypeScript class methods always exist on the prototype — even optional ones.
|
|
21
|
+
* To preserve the `generateImageDescription?` contract (so imageCaptioner.ts's
|
|
22
|
+
* `if (llm.generateImageDescription)` check works correctly), we assign the
|
|
23
|
+
* VLM method as an own-property in the constructor only when the inner
|
|
24
|
+
* adapter supports it. Otherwise the property stays `undefined`.
|
|
25
|
+
*
|
|
26
|
+
* GDPR NOTE ON SPAN ATTRIBUTES:
|
|
27
|
+
* We log character counts and dimensions — never the full prompt, embedding
|
|
28
|
+
* vector, or base64 image content. A full prompt stored in Jaeger/Datadog
|
|
29
|
+
* would be a GDPR compliance risk.
|
|
30
|
+
*
|
|
31
|
+
* SPAN HIERARCHY (example for session_search_memory):
|
|
32
|
+
* ▼ mcp.call_tool (session_search_memory) [root — server.ts]
|
|
33
|
+
* ▼ llm.generate_embedding [this decorator]
|
|
34
|
+
*
|
|
35
|
+
* CONTEXT PROPAGATION:
|
|
36
|
+
* AsyncLocalStorage (OTel's context mechanism) automatically parents these
|
|
37
|
+
* spans to the active root span from server.ts. No explicit ref-passing needed.
|
|
38
|
+
*
|
|
39
|
+
* FILE LOCATION: src/utils/llm/adapters/traced.ts
|
|
40
|
+
* IMPORTS FROM: ../provider.js (one level up, in src/utils/llm/)
|
|
41
|
+
* ../../telemetry.js (two levels up, in src/utils/)
|
|
42
|
+
* ─────────────────────────────────────────────────────────────────────────────
|
|
43
|
+
*/
|
|
44
|
+
import { SpanStatusCode, context, trace } from "@opentelemetry/api";
|
|
45
|
+
import { getTracer } from "../../telemetry.js";
|
|
46
|
+
export class TracingLLMProvider {
|
|
47
|
+
inner;
|
|
48
|
+
providerName;
|
|
49
|
+
/**
|
|
50
|
+
* The optional VLM method is declared here as a typed property so TypeScript
|
|
51
|
+
* knows about it. It is assigned (or left undefined) in the constructor body
|
|
52
|
+
* based on whether the inner adapter supports it.
|
|
53
|
+
*
|
|
54
|
+
* @see constructor for assignment logic
|
|
55
|
+
*/
|
|
56
|
+
generateImageDescription;
|
|
57
|
+
/**
|
|
58
|
+
* @param inner The actual LLM adapter (Gemini, OpenAI, or Anthropic).
|
|
59
|
+
* @param providerName Human-readable label used in span attributes.
|
|
60
|
+
* factory.ts passes e.g. "gemini", "openai", "anthropic".
|
|
61
|
+
*/
|
|
62
|
+
constructor(inner, providerName) {
|
|
63
|
+
this.inner = inner;
|
|
64
|
+
this.providerName = providerName;
|
|
65
|
+
// ── VLM method: conditional own-property assignment ──────────────────
|
|
66
|
+
// REVIEWER NOTE: TypeScript class methods always appear on the prototype,
|
|
67
|
+
// which means `if (llm.generateImageDescription)` would always be truthy
|
|
68
|
+
// even if we wrote `generateImageDescription?() {}` as a class method.
|
|
69
|
+
// Assigning as an own-property in the constructor and leaving it undefined
|
|
70
|
+
// when the inner adapter has no VLM support is the correct TypeScript
|
|
71
|
+
// pattern for preserving optional interface method semantics.
|
|
72
|
+
if (inner.generateImageDescription) {
|
|
73
|
+
const innerVlm = inner.generateImageDescription.bind(inner);
|
|
74
|
+
const providerName = this.providerName; // capture for closure (avoids 'this' ambiguity)
|
|
75
|
+
this.generateImageDescription = async (imageBase64, mimeType, ctx) => {
|
|
76
|
+
/**
|
|
77
|
+
* Span: llm.generate_image_description
|
|
78
|
+
*
|
|
79
|
+
* VLM calls are the most expensive operations in Prism (2–5 seconds).
|
|
80
|
+
* We log the image size (bytes) as a cost proxy but NOT the base64
|
|
81
|
+
* content itself — storing megabytes in OTLP backends causes OOM in
|
|
82
|
+
* most collector configurations and violates GDPR data minimization.
|
|
83
|
+
*/
|
|
84
|
+
const span = getTracer().startSpan("llm.generate_image_description", {
|
|
85
|
+
attributes: {
|
|
86
|
+
"llm.provider": providerName,
|
|
87
|
+
"llm.mime_type": mimeType,
|
|
88
|
+
// Estimate decoded byte size from base64 length (base64 overhead ≈ 4/3)
|
|
89
|
+
"llm.image_size_bytes": Math.round(imageBase64.length * 0.75),
|
|
90
|
+
},
|
|
91
|
+
});
|
|
92
|
+
return context.with(trace.setSpan(context.active(), span), async () => {
|
|
93
|
+
try {
|
|
94
|
+
const result = await innerVlm(imageBase64, mimeType, ctx);
|
|
95
|
+
span.setAttribute("llm.caption_len", result.length);
|
|
96
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
97
|
+
return result;
|
|
98
|
+
}
|
|
99
|
+
catch (err) {
|
|
100
|
+
span.recordException(err instanceof Error ? err : new Error(String(err)));
|
|
101
|
+
span.setStatus({
|
|
102
|
+
code: SpanStatusCode.ERROR,
|
|
103
|
+
message: err instanceof Error ? err.message : String(err),
|
|
104
|
+
});
|
|
105
|
+
throw err;
|
|
106
|
+
}
|
|
107
|
+
finally {
|
|
108
|
+
span.end();
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
// If inner.generateImageDescription is undefined, this.generateImageDescription
|
|
114
|
+
// stays undefined (as declared above) — the property check in imageCaptioner.ts
|
|
115
|
+
// will correctly evaluate to false.
|
|
116
|
+
}
|
|
117
|
+
// ── generateText ──────────────────────────────────────────────────────────
|
|
118
|
+
async generateText(prompt, systemInstruction) {
|
|
119
|
+
/**
|
|
120
|
+
* Span: llm.generate_text
|
|
121
|
+
*
|
|
122
|
+
* `llm.text_len` (character count) is a cost proxy. We do NOT store the
|
|
123
|
+
* full prompt — it can contain session memory content (PII risk).
|
|
124
|
+
*/
|
|
125
|
+
const span = getTracer().startSpan("llm.generate_text", {
|
|
126
|
+
attributes: {
|
|
127
|
+
"llm.provider": this.providerName,
|
|
128
|
+
"llm.text_len": prompt.length,
|
|
129
|
+
},
|
|
130
|
+
});
|
|
131
|
+
return context.with(trace.setSpan(context.active(), span), async () => {
|
|
132
|
+
try {
|
|
133
|
+
const result = await this.inner.generateText(prompt, systemInstruction);
|
|
134
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
135
|
+
return result;
|
|
136
|
+
}
|
|
137
|
+
catch (err) {
|
|
138
|
+
span.recordException(err instanceof Error ? err : new Error(String(err)));
|
|
139
|
+
span.setStatus({
|
|
140
|
+
code: SpanStatusCode.ERROR,
|
|
141
|
+
message: err instanceof Error ? err.message : String(err),
|
|
142
|
+
});
|
|
143
|
+
throw err;
|
|
144
|
+
}
|
|
145
|
+
finally {
|
|
146
|
+
// Always end the span — even on error — to prevent BatchSpanProcessor
|
|
147
|
+
// from holding a reference to a never-ending span object indefinitely.
|
|
148
|
+
span.end();
|
|
149
|
+
}
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
// ── generateEmbedding ─────────────────────────────────────────────────────
|
|
153
|
+
async generateEmbedding(text) {
|
|
154
|
+
/**
|
|
155
|
+
* Span: llm.generate_embedding
|
|
156
|
+
*
|
|
157
|
+
* Embeddings are the most frequent LLM calls in Prism — one fires
|
|
158
|
+
* asynchronously on every ledger save. The latency distribution in Jaeger
|
|
159
|
+
* reveals when to consider local embedding models (Ollama nomic-embed-text).
|
|
160
|
+
*
|
|
161
|
+
* `llm.embed_dim` lets us catch dimension mismatches before pgvector fails:
|
|
162
|
+
* if an adapter returns 384 dimensions instead of 768, it shows in the trace.
|
|
163
|
+
*/
|
|
164
|
+
const span = getTracer().startSpan("llm.generate_embedding", {
|
|
165
|
+
attributes: {
|
|
166
|
+
"llm.provider": this.providerName,
|
|
167
|
+
"llm.embed_len": text.length,
|
|
168
|
+
},
|
|
169
|
+
});
|
|
170
|
+
return context.with(trace.setSpan(context.active(), span), async () => {
|
|
171
|
+
try {
|
|
172
|
+
const result = await this.inner.generateEmbedding(text);
|
|
173
|
+
span.setAttribute("llm.embed_dim", result.length);
|
|
174
|
+
span.setStatus({ code: SpanStatusCode.OK });
|
|
175
|
+
return result;
|
|
176
|
+
}
|
|
177
|
+
catch (err) {
|
|
178
|
+
span.recordException(err instanceof Error ? err : new Error(String(err)));
|
|
179
|
+
span.setStatus({
|
|
180
|
+
code: SpanStatusCode.ERROR,
|
|
181
|
+
message: err instanceof Error ? err.message : String(err),
|
|
182
|
+
});
|
|
183
|
+
throw err;
|
|
184
|
+
}
|
|
185
|
+
finally {
|
|
186
|
+
span.end();
|
|
187
|
+
}
|
|
188
|
+
});
|
|
189
|
+
}
|
|
190
|
+
}
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM Provider Factory (v4.4 — Split Provider Architecture)
|
|
3
|
+
* ─────────────────────────────────────────────────────────────────────────────
|
|
4
|
+
* PURPOSE:
|
|
5
|
+
* Single point of resolution for the active LLMProvider.
|
|
6
|
+
* Composes a TEXT adapter and an EMBEDDING adapter independently, returning
|
|
7
|
+
* a single object that satisfies the LLMProvider interface. Consumers never
|
|
8
|
+
* know the difference — getLLMProvider() behavior is unchanged.
|
|
9
|
+
*
|
|
10
|
+
* SPLIT PROVIDER ARCHITECTURE:
|
|
11
|
+
* Two independent settings control text and embedding routing:
|
|
12
|
+
*
|
|
13
|
+
* text_provider — "gemini" (default) | "openai" | "anthropic"
|
|
14
|
+
* embedding_provider — "auto" (default) | "gemini" | "openai"
|
|
15
|
+
*
|
|
16
|
+
* When embedding_provider = "auto":
|
|
17
|
+
* * If text_provider is gemini or openai → use same provider for embeddings
|
|
18
|
+
* * If text_provider is anthropic → auto-fallback to gemini for embeddings
|
|
19
|
+
* (Anthropic has no native embedding API)
|
|
20
|
+
*
|
|
21
|
+
* EXAMPLE CONFIGURATIONS:
|
|
22
|
+
* text_provider=gemini, embedding_provider=auto → Gemini+Gemini (default)
|
|
23
|
+
* text_provider=openai, embedding_provider=auto → OpenAI+OpenAI
|
|
24
|
+
* text_provider=anthropic, embedding_provider=auto → Claude+Gemini (auto-bridge)
|
|
25
|
+
* text_provider=anthropic, embedding_provider=openai → Claude+Ollama (cost-optimized)
|
|
26
|
+
* text_provider=gemini, embedding_provider=openai → Gemini+Ollama (mixed)
|
|
27
|
+
*
|
|
28
|
+
* SINGLETON + GRACEFUL DEGRADATION:
|
|
29
|
+
* Same as before — instance cached per process, errors fall back to Gemini.
|
|
30
|
+
* Provider switches require an MCP server restart.
|
|
31
|
+
*
|
|
32
|
+
* TESTING:
|
|
33
|
+
* _resetLLMProvider() clears the singleton for test injection.
|
|
34
|
+
*
|
|
35
|
+
* ADDING NEW PROVIDERS:
|
|
36
|
+
* 1. Implement LLMProvider in src/utils/llm/adapters/<name>.ts
|
|
37
|
+
* 2. Add a case to buildTextAdapter() and/or buildEmbeddingAdapter() below
|
|
38
|
+
* 3. Add the option to the dashboard "AI Providers" tab
|
|
39
|
+
*/
|
|
40
|
+
import { getSettingSync } from "../../storage/configStorage.js";
|
|
41
|
+
import { GeminiAdapter } from "./adapters/gemini.js";
|
|
42
|
+
import { OpenAIAdapter } from "./adapters/openai.js";
|
|
43
|
+
import { AnthropicAdapter } from "./adapters/anthropic.js";
|
|
44
|
+
import { TracingLLMProvider } from "./adapters/traced.js";
|
|
45
|
+
// Module-level singleton — one composed provider per MCP server process.
|
|
46
|
+
let providerInstance = null;
|
|
47
|
+
// ─── Adapter Builders ─────────────────────────────────────────────────────────
|
|
48
|
+
// Separated from getLLMProvider() so they can be called independently for the
|
|
49
|
+
// text and embedding halves of the composite provider.
|
|
50
|
+
function buildTextAdapter(type) {
|
|
51
|
+
switch (type) {
|
|
52
|
+
case "anthropic": return new AnthropicAdapter();
|
|
53
|
+
case "openai": return new OpenAIAdapter();
|
|
54
|
+
case "gemini":
|
|
55
|
+
default: return new GeminiAdapter();
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
function buildEmbeddingAdapter(type) {
|
|
59
|
+
// Note: "anthropic" is intentionally absent from this switch.
|
|
60
|
+
// Anthropic has no embedding API, so it can never be an embedding provider.
|
|
61
|
+
// The factory resolves "auto" away from "anthropic" before calling this.
|
|
62
|
+
switch (type) {
|
|
63
|
+
case "openai": return new OpenAIAdapter();
|
|
64
|
+
case "gemini":
|
|
65
|
+
default: return new GeminiAdapter();
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
// ─── Factory ─────────────────────────────────────────────────────────────────
|
|
69
|
+
/**
|
|
70
|
+
* Returns the singleton LLM provider, initializing it on first call.
|
|
71
|
+
*
|
|
72
|
+
* The returned object composes two independent adapters:
|
|
73
|
+
* - generateText() → text adapter (text_provider setting)
|
|
74
|
+
* - generateEmbedding() → embedding adapter (embedding_provider setting)
|
|
75
|
+
*
|
|
76
|
+
* Consumers see no difference — the interface is identical to before.
|
|
77
|
+
*/
|
|
78
|
+
export function getLLMProvider() {
|
|
79
|
+
// Fast path: return cached composite instance
|
|
80
|
+
if (providerInstance)
|
|
81
|
+
return providerInstance;
|
|
82
|
+
// ── Resolve text provider ─────────────────────────────────────────────
|
|
83
|
+
const textType = getSettingSync("text_provider", "gemini");
|
|
84
|
+
// ── Resolve embedding provider ────────────────────────────────────────
|
|
85
|
+
let embedType = getSettingSync("embedding_provider", "auto");
|
|
86
|
+
if (embedType === "auto") {
|
|
87
|
+
// Anthropic has no embedding API — auto-bridge to Gemini.
|
|
88
|
+
// For all other text providers, use the same provider for embeddings.
|
|
89
|
+
embedType = textType === "anthropic" ? "gemini" : textType;
|
|
90
|
+
if (textType === "anthropic") {
|
|
91
|
+
console.info("[LLMFactory] text_provider=anthropic with embedding_provider=auto: " +
|
|
92
|
+
"routing embeddings to GeminiAdapter (Anthropic has no native embedding API). " +
|
|
93
|
+
"Set embedding_provider=openai in dashboard to use Ollama/OpenAI instead.");
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
try {
|
|
97
|
+
const textAdapter = buildTextAdapter(textType);
|
|
98
|
+
const embedAdapter = buildEmbeddingAdapter(embedType);
|
|
99
|
+
// Compose into a single LLMProvider-compatible object.
|
|
100
|
+
// Methods are bound to their respective adapter instances so `this`
|
|
101
|
+
// resolves correctly inside the adapter methods.
|
|
102
|
+
const composed = {
|
|
103
|
+
generateText: textAdapter.generateText.bind(textAdapter),
|
|
104
|
+
generateEmbedding: embedAdapter.generateEmbedding.bind(embedAdapter),
|
|
105
|
+
};
|
|
106
|
+
// Pass VLM support through from the text adapter if it exists.
|
|
107
|
+
// generateImageDescription is a text-generation concern (it calls the
|
|
108
|
+
// text/vision model, not the embedding model). The text adapter owns it.
|
|
109
|
+
if (textAdapter.generateImageDescription) {
|
|
110
|
+
composed.generateImageDescription = textAdapter.generateImageDescription.bind(textAdapter);
|
|
111
|
+
}
|
|
112
|
+
// ── v4.6.0: Wrap with OTel tracing decorator ─────────────────────────
|
|
113
|
+
// TracingLLMProvider is a zero-overhead no-op when otel_enabled=false.
|
|
114
|
+
// The text provider name is used as the primary span attribute label.
|
|
115
|
+
providerInstance = new TracingLLMProvider(composed, textType);
|
|
116
|
+
if (textType !== embedType) {
|
|
117
|
+
console.info(`[LLMFactory] Split provider: text=${textType}, embedding=${embedType}`);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
catch (err) {
|
|
121
|
+
// Init failure (e.g. missing API key) → fall back to full Gemini provider.
|
|
122
|
+
// A crash here would silently kill the MCP server.
|
|
123
|
+
console.error(`[LLMFactory] Failed to initialise providers (text=${textType}, embed=${embedType}): ${err}. ` +
|
|
124
|
+
`Falling back to GeminiAdapter for both.`);
|
|
125
|
+
const fallback = new GeminiAdapter();
|
|
126
|
+
const fallbackComposed = {
|
|
127
|
+
generateText: fallback.generateText.bind(fallback),
|
|
128
|
+
generateEmbedding: fallback.generateEmbedding.bind(fallback),
|
|
129
|
+
};
|
|
130
|
+
if (fallback.generateImageDescription) {
|
|
131
|
+
fallbackComposed.generateImageDescription = fallback.generateImageDescription.bind(fallback);
|
|
132
|
+
}
|
|
133
|
+
providerInstance = new TracingLLMProvider(fallbackComposed, "gemini");
|
|
134
|
+
}
|
|
135
|
+
return providerInstance;
|
|
136
|
+
}
|
|
137
|
+
/**
|
|
138
|
+
* Reset the cached singleton.
|
|
139
|
+
* ONLY for unit tests — never call in production code.
|
|
140
|
+
*/
|
|
141
|
+
export function _resetLLMProvider() {
|
|
142
|
+
providerInstance = null;
|
|
143
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM Provider Interface (v4.5)
|
|
3
|
+
* ─────────────────────────────────────────────────────────────────────────────
|
|
4
|
+
* PURPOSE:
|
|
5
|
+
* Defines the contract that ALL LLM adapters must satisfy.
|
|
6
|
+
* This is the single seam in Prism's AI layer — the only thing consumers
|
|
7
|
+
* (compaction, summarization, embedding, security scan, briefing) need to
|
|
8
|
+
* know about. They never reference a specific model or SDK.
|
|
9
|
+
*
|
|
10
|
+
* DESIGN PHILOSOPHY:
|
|
11
|
+
* Keep the interface intentionally minimal. Prism only needs two LLM
|
|
12
|
+
* capabilities for its own internal operations. Adding more methods here
|
|
13
|
+
* would force every future adapter to implement things it doesn't need.
|
|
14
|
+
*
|
|
15
|
+
* ADAPTER IMPLEMENTATIONS (src/utils/llm/adapters/):
|
|
16
|
+
* - gemini.ts → Google Gemini (default; all methods including VLM)
|
|
17
|
+
* - openai.ts → OpenAI Cloud + Ollama + LM Studio + vLLM
|
|
18
|
+
* - anthropic.ts → Anthropic Claude (VLM supported; embeddings unsupported)
|
|
19
|
+
*
|
|
20
|
+
* FACTORY RESOLUTION:
|
|
21
|
+
* Never instantiate adapters directly. Always call:
|
|
22
|
+
* import { getLLMProvider } from "../llm/factory.js";
|
|
23
|
+
* const llm = getLLMProvider();
|
|
24
|
+
*/
|
|
25
|
+
export {};
|