prism-mcp-server 4.2.0 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,183 @@
1
+ /**
2
+ * OpenAI Adapter (v4.5)
3
+ * ─────────────────────────────────────────────────────────────────────────────
4
+ * PURPOSE:
5
+ * Implements LLMProvider using the official `openai` Node.js SDK.
6
+ * A single adapter that covers FOUR deployment scenarios:
7
+ *
8
+ * 1. Cloud OpenAI (default)
9
+ * apiKey = OPENAI_API_KEY env var or dashboard setting
10
+ * baseURL = https://api.openai.com/v1 (default)
11
+ * Models: gpt-4o-mini (default text), text-embedding-3-small (embedding)
12
+ *
13
+ * 2. Ollama (local, open-source)
14
+ * apiKey = (not needed — leave blank)
15
+ * baseURL = http://localhost:11434/v1
16
+ * Models: any model you've pulled (e.g. llama3.2, nomic-embed-text)
17
+ *
18
+ * 3. LM Studio (local GUI)
19
+ * apiKey = (not needed — leave blank)
20
+ * baseURL = http://localhost:1234/v1
21
+ *
22
+ * 4. vLLM / custom OpenAI-compatible server
23
+ * apiKey = (server-specific)
24
+ * baseURL = http://<host>:<port>/v1
25
+ *
26
+ * EMBEDDING DIMENSION PARITY (768 dims):
27
+ * Prism's SQLite (sqlite-vec) and Supabase (pgvector) schemas define
28
+ * embedding columns as EXACTLY 768 dimensions. This was chosen to match
29
+ * Gemini's native output size. All adapters MUST return 768-dim vectors.
30
+ *
31
+ * OpenAI solution: text-embedding-3-* models support the `dimensions`
32
+ * parameter via Matryoshka Representation Learning (MRL), which produces
33
+ * a shorter but still high-quality vector. text-embedding-3-small at 768
34
+ * dims outperforms text-embedding-ada-002 at its native 1536 dims.
35
+ *
36
+ * WARNING for local models (Ollama / LM Studio):
37
+ * Many locally-served models do NOT support the `dimensions` parameter.
38
+ * We log a warning but do NOT throw — the error will surface at the DB
39
+ * write boundary, which is the right place to enforce the constraint.
40
+ * Choose a local embedding model that natively outputs 768 dims
41
+ * (e.g. nomic-embed-text = 768, mxbai-embed-large = 1024 — avoid latter).
42
+ *
43
+ * CONFIG KEYS (Prism dashboard "AI Providers" tab OR environment variables):
44
+ * openai_api_key — API key (empty = localhost/Ollama mode)
45
+ * openai_base_url — Base URL (default: https://api.openai.com/v1)
46
+ * openai_model — Chat model (default: gpt-4o-mini)
47
+ * openai_embedding_model — Embedding model (default: text-embedding-3-small)
48
+ */
49
+ import OpenAI from "openai";
50
+ import { getSettingSync } from "../../../storage/configStorage.js";
51
+ import { debugLog } from "../../logger.js";
52
+ // ─── Constants ────────────────────────────────────────────────────────────────
53
+ // Must match Prism's DB schema (sqlite-vec and pgvector column sizes).
54
+ // Changing this requires a DB migration — do not adjust casually.
55
+ const EMBEDDING_DIMS = 768;
56
+ // text-embedding-3-small has an 8191-token context window.
57
+ // We use a conservative character-based cap to avoid needing a tokenizer.
58
+ // 8000 chars ≈ 1500-2000 tokens for typical session summaries.
59
+ const MAX_EMBEDDING_CHARS = 8000;
60
+ export class OpenAIAdapter {
61
+ // The OpenAI SDK client — stateful, holds the API key + base URL.
62
+ // One instance per factory singleton = one instance per MCP server process.
63
+ client;
64
+ constructor() {
65
+ // Priority: dashboard setting → environment variable → empty string.
66
+ // This lets users configure keys via the dashboard without touching .env.
67
+ const apiKey = getSettingSync("openai_api_key", process.env.OPENAI_API_KEY ?? "");
68
+ const baseURL = getSettingSync("openai_base_url", "https://api.openai.com/v1");
69
+ // Detect local inference endpoints — these don't need a real API key.
70
+ // Ollama and LM Studio use local HTTP servers with no authentication.
71
+ const isLocal = baseURL.includes("localhost") || baseURL.includes("127.0.0.1");
72
+ // Fail construction if no key AND we're pointing at a real API endpoint.
73
+ // The factory will catch this and fall back to GeminiAdapter gracefully.
74
+ if (!apiKey && !isLocal) {
75
+ throw new Error("OpenAI API key is not set and base URL is not a local endpoint. " +
76
+ "Set OPENAI_API_KEY or configure a local base URL (e.g. http://localhost:11434/v1).");
77
+ }
78
+ this.client = new OpenAI({
79
+ // Ollama requires a non-empty string for apiKey even though it ignores it.
80
+ // "ollama" is the conventional placeholder in the Ollama docs.
81
+ apiKey: apiKey || "ollama",
82
+ baseURL,
83
+ });
84
+ debugLog(`[OpenAIAdapter] Initialized — baseURL=${baseURL}, keyless=${!apiKey}`);
85
+ }
86
+ // ─── Text Generation ─────────────────────────────────────────────────────
87
+ async generateText(prompt, systemInstruction) {
88
+ // Read model at call time (not constructor) so the user can hot-swap
89
+ // the model setting without restarting the server.
90
+ const model = getSettingSync("openai_model", "gpt-4o-mini");
91
+ // Build message array: optional system message first, then user prompt.
92
+ // This maps cleanly to Gemini's systemInstruction + user prompt pattern.
93
+ const messages = [];
94
+ if (systemInstruction) {
95
+ messages.push({ role: "system", content: systemInstruction });
96
+ }
97
+ messages.push({ role: "user", content: prompt });
98
+ debugLog(`[OpenAIAdapter] generateText — model=${model}, messages=${messages.length}`);
99
+ const response = await this.client.chat.completions.create({ model, messages });
100
+ // choices[0] is always the primary completion. ?? "" returns empty string
101
+ // on null content (e.g. if the model returned a function call instead).
102
+ return response.choices[0]?.message?.content ?? "";
103
+ }
104
+ // ─── Embedding Generation ────────────────────────────────────────────────
105
+ async generateEmbedding(text) {
106
+ // Guard: empty input produces a degenerate embedding — fail loudly.
107
+ if (!text || !text.trim()) {
108
+ throw new Error("Cannot generate embedding for empty text.");
109
+ }
110
+ // Read embedding model at call time for hot-swap support.
111
+ const model = getSettingSync("openai_embedding_model", "text-embedding-3-small");
112
+ // ── Truncation Guard ───────────────────────────────────────────────────
113
+ // text-embedding-3-small accepts up to 8191 tokens.
114
+ // We apply the same preventive truncation as GeminiAdapter so behavior
115
+ // is consistent regardless of which provider is active.
116
+ let inputText = text;
117
+ if (inputText.length > MAX_EMBEDDING_CHARS) {
118
+ debugLog(`[OpenAIAdapter] Embedding input truncated from ${inputText.length}` +
119
+ ` to ~${MAX_EMBEDDING_CHARS} chars (word-safe)`);
120
+ // Hard cut, then snap back to last word boundary (avoids mid-word splits)
121
+ inputText = inputText.substring(0, MAX_EMBEDDING_CHARS);
122
+ const lastSpace = inputText.lastIndexOf(" ");
123
+ if (lastSpace > 0)
124
+ inputText = inputText.substring(0, lastSpace);
125
+ }
126
+ debugLog(`[OpenAIAdapter] generateEmbedding — model=${model}, dims=${EMBEDDING_DIMS}`);
127
+ const response = await this.client.embeddings.create({
128
+ model,
129
+ input: inputText,
130
+ // `dimensions` triggers Matryoshka truncation — produces a 768-dim vector
131
+ // without the full 1536-dim generation + local truncation overhead.
132
+ // ONLY works with text-embedding-3-* models. ada-002 ignores this field.
133
+ dimensions: EMBEDDING_DIMS,
134
+ });
135
+ const embedding = response.data[0]?.embedding;
136
+ // Hard check: null/empty response means the API returned nothing useful.
137
+ if (!Array.isArray(embedding) || embedding.length === 0) {
138
+ throw new Error(`[OpenAIAdapter] Embedding response is empty for model "${model}"`);
139
+ }
140
+ // ── Dimension Warning (soft — not a hard throw) ────────────────────────
141
+ // Why soft? Local models (Ollama) may ignore `dimensions` and return their
142
+ // native size. A hard throw here would make Ollama completely unusable.
143
+ // The mismatch will be caught at the DB write boundary (pgvector/sqlite-vec
144
+ // will reject mismatched vector sizes with a clear error message).
145
+ if (embedding.length !== EMBEDDING_DIMS) {
146
+ console.warn(`[OpenAIAdapter] Embedding dimension mismatch: expected ${EMBEDDING_DIMS}, ` +
147
+ `got ${embedding.length}. ` +
148
+ `If using a local model, use one that natively outputs ${EMBEDDING_DIMS} dims ` +
149
+ `(e.g. nomic-embed-text) or supports the Matryoshka 'dimensions' parameter.`);
150
+ }
151
+ return embedding;
152
+ }
153
+ // ─── Image Description (VLM) ─────────────────────────────────────────────
154
+ /**
155
+ * Describe an image via the OpenAI Chat Completions vision API.
156
+ * Uses the chat model (gpt-4o-mini default) since OpenAI embeds vision
157
+ * into their chat API rather than a separate endpoint.
158
+ * Works with any OpenAI-compatible server that supports vision
159
+ * (Ollama with llava, LM Studio with vision models, etc.).
160
+ */
161
+ async generateImageDescription(imageBase64, mimeType, context) {
162
+ const model = getSettingSync("openai_model", "gpt-4o-mini");
163
+ const prompt = context
164
+ ? `Describe this image in rich detail for a developer knowledge base. User context: "${context}"`
165
+ : "Describe this image in rich detail for a developer knowledge base. Include: UI elements, visible text, architectural components, and key observations.";
166
+ const response = await this.client.chat.completions.create({
167
+ model,
168
+ max_tokens: 1024,
169
+ messages: [{
170
+ role: "user",
171
+ content: [
172
+ {
173
+ type: "image_url",
174
+ // OpenAI vision requires the data-URI prefix
175
+ image_url: { url: `data:${mimeType};base64,${imageBase64}` },
176
+ },
177
+ { type: "text", text: prompt },
178
+ ],
179
+ }],
180
+ });
181
+ return response.choices[0]?.message?.content ?? "";
182
+ }
183
+ }
@@ -0,0 +1,190 @@
1
+ /**
2
+ * TracingLLMProvider — OpenTelemetry Decorator (v4.6.0)
3
+ * ─────────────────────────────────────────────────────────────────────────────
4
+ * PURPOSE:
5
+ * Wraps any LLMProvider with OTel span instrumentation without modifying
6
+ * any of the three existing adapters (gemini.ts, openai.ts, anthropic.ts).
7
+ *
8
+ * PATTERN: Decorator (Gang of Four)
9
+ * Implements LLMProvider and delegates every method call to the wrapped
10
+ * `inner` provider, bookending each call with an OTel span.
11
+ *
12
+ * WHY NOT INSTRUMENT INSIDE THE ADAPTERS?
13
+ * 1. Single Responsibility — each adapter has one job: talk to its API.
14
+ * 2. DRY — the span pattern is identical across all three adapters.
15
+ * 3. Testability — this class can be tested with a mock inner provider.
16
+ * 4. Composability — future decorators (rate-limiting, caching) layer on
17
+ * top without touching any adapter code.
18
+ *
19
+ * VLM METHOD OPTIONALITY:
20
+ * TypeScript class methods always exist on the prototype — even optional ones.
21
+ * To preserve the `generateImageDescription?` contract (so imageCaptioner.ts's
22
+ * `if (llm.generateImageDescription)` check works correctly), we assign the
23
+ * VLM method as an own-property in the constructor only when the inner
24
+ * adapter supports it. Otherwise the property stays `undefined`.
25
+ *
26
+ * GDPR NOTE ON SPAN ATTRIBUTES:
27
+ * We log character counts and dimensions — never the full prompt, embedding
28
+ * vector, or base64 image content. A full prompt stored in Jaeger/Datadog
29
+ * would be a GDPR compliance risk.
30
+ *
31
+ * SPAN HIERARCHY (example for session_search_memory):
32
+ * ▼ mcp.call_tool (session_search_memory) [root — server.ts]
33
+ * ▼ llm.generate_embedding [this decorator]
34
+ *
35
+ * CONTEXT PROPAGATION:
36
+ * AsyncLocalStorage (OTel's context mechanism) automatically parents these
37
+ * spans to the active root span from server.ts. No explicit ref-passing needed.
38
+ *
39
+ * FILE LOCATION: src/utils/llm/adapters/traced.ts
40
+ * IMPORTS FROM: ../provider.js (one level up, in src/utils/llm/)
41
+ * ../../telemetry.js (two levels up, in src/utils/)
42
+ * ─────────────────────────────────────────────────────────────────────────────
43
+ */
44
+ import { SpanStatusCode, context, trace } from "@opentelemetry/api";
45
+ import { getTracer } from "../../telemetry.js";
46
+ export class TracingLLMProvider {
47
+ inner;
48
+ providerName;
49
+ /**
50
+ * The optional VLM method is declared here as a typed property so TypeScript
51
+ * knows about it. It is assigned (or left undefined) in the constructor body
52
+ * based on whether the inner adapter supports it.
53
+ *
54
+ * @see constructor for assignment logic
55
+ */
56
+ generateImageDescription;
57
+ /**
58
+ * @param inner The actual LLM adapter (Gemini, OpenAI, or Anthropic).
59
+ * @param providerName Human-readable label used in span attributes.
60
+ * factory.ts passes e.g. "gemini", "openai", "anthropic".
61
+ */
62
+ constructor(inner, providerName) {
63
+ this.inner = inner;
64
+ this.providerName = providerName;
65
+ // ── VLM method: conditional own-property assignment ──────────────────
66
+ // REVIEWER NOTE: TypeScript class methods always appear on the prototype,
67
+ // which means `if (llm.generateImageDescription)` would always be truthy
68
+ // even if we wrote `generateImageDescription?() {}` as a class method.
69
+ // Assigning as an own-property in the constructor and leaving it undefined
70
+ // when the inner adapter has no VLM support is the correct TypeScript
71
+ // pattern for preserving optional interface method semantics.
72
+ if (inner.generateImageDescription) {
73
+ const innerVlm = inner.generateImageDescription.bind(inner);
74
+ const providerName = this.providerName; // capture for closure (avoids 'this' ambiguity)
75
+ this.generateImageDescription = async (imageBase64, mimeType, ctx) => {
76
+ /**
77
+ * Span: llm.generate_image_description
78
+ *
79
+ * VLM calls are the most expensive operations in Prism (2–5 seconds).
80
+ * We log the image size (bytes) as a cost proxy but NOT the base64
81
+ * content itself — storing megabytes in OTLP backends causes OOM in
82
+ * most collector configurations and violates GDPR data minimization.
83
+ */
84
+ const span = getTracer().startSpan("llm.generate_image_description", {
85
+ attributes: {
86
+ "llm.provider": providerName,
87
+ "llm.mime_type": mimeType,
88
+ // Estimate decoded byte size from base64 length (base64 overhead ≈ 4/3)
89
+ "llm.image_size_bytes": Math.round(imageBase64.length * 0.75),
90
+ },
91
+ });
92
+ return context.with(trace.setSpan(context.active(), span), async () => {
93
+ try {
94
+ const result = await innerVlm(imageBase64, mimeType, ctx);
95
+ span.setAttribute("llm.caption_len", result.length);
96
+ span.setStatus({ code: SpanStatusCode.OK });
97
+ return result;
98
+ }
99
+ catch (err) {
100
+ span.recordException(err instanceof Error ? err : new Error(String(err)));
101
+ span.setStatus({
102
+ code: SpanStatusCode.ERROR,
103
+ message: err instanceof Error ? err.message : String(err),
104
+ });
105
+ throw err;
106
+ }
107
+ finally {
108
+ span.end();
109
+ }
110
+ });
111
+ };
112
+ }
113
+ // If inner.generateImageDescription is undefined, this.generateImageDescription
114
+ // stays undefined (as declared above) — the property check in imageCaptioner.ts
115
+ // will correctly evaluate to false.
116
+ }
117
+ // ── generateText ──────────────────────────────────────────────────────────
118
+ async generateText(prompt, systemInstruction) {
119
+ /**
120
+ * Span: llm.generate_text
121
+ *
122
+ * `llm.text_len` (character count) is a cost proxy. We do NOT store the
123
+ * full prompt — it can contain session memory content (PII risk).
124
+ */
125
+ const span = getTracer().startSpan("llm.generate_text", {
126
+ attributes: {
127
+ "llm.provider": this.providerName,
128
+ "llm.text_len": prompt.length,
129
+ },
130
+ });
131
+ return context.with(trace.setSpan(context.active(), span), async () => {
132
+ try {
133
+ const result = await this.inner.generateText(prompt, systemInstruction);
134
+ span.setStatus({ code: SpanStatusCode.OK });
135
+ return result;
136
+ }
137
+ catch (err) {
138
+ span.recordException(err instanceof Error ? err : new Error(String(err)));
139
+ span.setStatus({
140
+ code: SpanStatusCode.ERROR,
141
+ message: err instanceof Error ? err.message : String(err),
142
+ });
143
+ throw err;
144
+ }
145
+ finally {
146
+ // Always end the span — even on error — to prevent BatchSpanProcessor
147
+ // from holding a reference to a never-ending span object indefinitely.
148
+ span.end();
149
+ }
150
+ });
151
+ }
152
+ // ── generateEmbedding ─────────────────────────────────────────────────────
153
+ async generateEmbedding(text) {
154
+ /**
155
+ * Span: llm.generate_embedding
156
+ *
157
+ * Embeddings are the most frequent LLM calls in Prism — one fires
158
+ * asynchronously on every ledger save. The latency distribution in Jaeger
159
+ * reveals when to consider local embedding models (Ollama nomic-embed-text).
160
+ *
161
+ * `llm.embed_dim` lets us catch dimension mismatches before pgvector fails:
162
+ * if an adapter returns 384 dimensions instead of 768, it shows in the trace.
163
+ */
164
+ const span = getTracer().startSpan("llm.generate_embedding", {
165
+ attributes: {
166
+ "llm.provider": this.providerName,
167
+ "llm.embed_len": text.length,
168
+ },
169
+ });
170
+ return context.with(trace.setSpan(context.active(), span), async () => {
171
+ try {
172
+ const result = await this.inner.generateEmbedding(text);
173
+ span.setAttribute("llm.embed_dim", result.length);
174
+ span.setStatus({ code: SpanStatusCode.OK });
175
+ return result;
176
+ }
177
+ catch (err) {
178
+ span.recordException(err instanceof Error ? err : new Error(String(err)));
179
+ span.setStatus({
180
+ code: SpanStatusCode.ERROR,
181
+ message: err instanceof Error ? err.message : String(err),
182
+ });
183
+ throw err;
184
+ }
185
+ finally {
186
+ span.end();
187
+ }
188
+ });
189
+ }
190
+ }
@@ -0,0 +1,143 @@
1
+ /**
2
+ * LLM Provider Factory (v4.4 — Split Provider Architecture)
3
+ * ─────────────────────────────────────────────────────────────────────────────
4
+ * PURPOSE:
5
+ * Single point of resolution for the active LLMProvider.
6
+ * Composes a TEXT adapter and an EMBEDDING adapter independently, returning
7
+ * a single object that satisfies the LLMProvider interface. Consumers never
8
+ * know the difference — getLLMProvider() behavior is unchanged.
9
+ *
10
+ * SPLIT PROVIDER ARCHITECTURE:
11
+ * Two independent settings control text and embedding routing:
12
+ *
13
+ * text_provider — "gemini" (default) | "openai" | "anthropic"
14
+ * embedding_provider — "auto" (default) | "gemini" | "openai"
15
+ *
16
+ * When embedding_provider = "auto":
17
+ * * If text_provider is gemini or openai → use same provider for embeddings
18
+ * * If text_provider is anthropic → auto-fallback to gemini for embeddings
19
+ * (Anthropic has no native embedding API)
20
+ *
21
+ * EXAMPLE CONFIGURATIONS:
22
+ * text_provider=gemini, embedding_provider=auto → Gemini+Gemini (default)
23
+ * text_provider=openai, embedding_provider=auto → OpenAI+OpenAI
24
+ * text_provider=anthropic, embedding_provider=auto → Claude+Gemini (auto-bridge)
25
+ * text_provider=anthropic, embedding_provider=openai → Claude+Ollama (cost-optimized)
26
+ * text_provider=gemini, embedding_provider=openai → Gemini+Ollama (mixed)
27
+ *
28
+ * SINGLETON + GRACEFUL DEGRADATION:
29
+ * Same as before — instance cached per process, errors fall back to Gemini.
30
+ * Provider switches require an MCP server restart.
31
+ *
32
+ * TESTING:
33
+ * _resetLLMProvider() clears the singleton for test injection.
34
+ *
35
+ * ADDING NEW PROVIDERS:
36
+ * 1. Implement LLMProvider in src/utils/llm/adapters/<name>.ts
37
+ * 2. Add a case to buildTextAdapter() and/or buildEmbeddingAdapter() below
38
+ * 3. Add the option to the dashboard "AI Providers" tab
39
+ */
40
+ import { getSettingSync } from "../../storage/configStorage.js";
41
+ import { GeminiAdapter } from "./adapters/gemini.js";
42
+ import { OpenAIAdapter } from "./adapters/openai.js";
43
+ import { AnthropicAdapter } from "./adapters/anthropic.js";
44
+ import { TracingLLMProvider } from "./adapters/traced.js";
45
+ // Module-level singleton — one composed provider per MCP server process.
46
+ let providerInstance = null;
47
+ // ─── Adapter Builders ─────────────────────────────────────────────────────────
48
+ // Separated from getLLMProvider() so they can be called independently for the
49
+ // text and embedding halves of the composite provider.
50
+ function buildTextAdapter(type) {
51
+ switch (type) {
52
+ case "anthropic": return new AnthropicAdapter();
53
+ case "openai": return new OpenAIAdapter();
54
+ case "gemini":
55
+ default: return new GeminiAdapter();
56
+ }
57
+ }
58
+ function buildEmbeddingAdapter(type) {
59
+ // Note: "anthropic" is intentionally absent from this switch.
60
+ // Anthropic has no embedding API, so it can never be an embedding provider.
61
+ // The factory resolves "auto" away from "anthropic" before calling this.
62
+ switch (type) {
63
+ case "openai": return new OpenAIAdapter();
64
+ case "gemini":
65
+ default: return new GeminiAdapter();
66
+ }
67
+ }
68
+ // ─── Factory ─────────────────────────────────────────────────────────────────
69
+ /**
70
+ * Returns the singleton LLM provider, initializing it on first call.
71
+ *
72
+ * The returned object composes two independent adapters:
73
+ * - generateText() → text adapter (text_provider setting)
74
+ * - generateEmbedding() → embedding adapter (embedding_provider setting)
75
+ *
76
+ * Consumers see no difference — the interface is identical to before.
77
+ */
78
+ export function getLLMProvider() {
79
+ // Fast path: return cached composite instance
80
+ if (providerInstance)
81
+ return providerInstance;
82
+ // ── Resolve text provider ─────────────────────────────────────────────
83
+ const textType = getSettingSync("text_provider", "gemini");
84
+ // ── Resolve embedding provider ────────────────────────────────────────
85
+ let embedType = getSettingSync("embedding_provider", "auto");
86
+ if (embedType === "auto") {
87
+ // Anthropic has no embedding API — auto-bridge to Gemini.
88
+ // For all other text providers, use the same provider for embeddings.
89
+ embedType = textType === "anthropic" ? "gemini" : textType;
90
+ if (textType === "anthropic") {
91
+ console.info("[LLMFactory] text_provider=anthropic with embedding_provider=auto: " +
92
+ "routing embeddings to GeminiAdapter (Anthropic has no native embedding API). " +
93
+ "Set embedding_provider=openai in dashboard to use Ollama/OpenAI instead.");
94
+ }
95
+ }
96
+ try {
97
+ const textAdapter = buildTextAdapter(textType);
98
+ const embedAdapter = buildEmbeddingAdapter(embedType);
99
+ // Compose into a single LLMProvider-compatible object.
100
+ // Methods are bound to their respective adapter instances so `this`
101
+ // resolves correctly inside the adapter methods.
102
+ const composed = {
103
+ generateText: textAdapter.generateText.bind(textAdapter),
104
+ generateEmbedding: embedAdapter.generateEmbedding.bind(embedAdapter),
105
+ };
106
+ // Pass VLM support through from the text adapter if it exists.
107
+ // generateImageDescription is a text-generation concern (it calls the
108
+ // text/vision model, not the embedding model). The text adapter owns it.
109
+ if (textAdapter.generateImageDescription) {
110
+ composed.generateImageDescription = textAdapter.generateImageDescription.bind(textAdapter);
111
+ }
112
+ // ── v4.6.0: Wrap with OTel tracing decorator ─────────────────────────
113
+ // TracingLLMProvider is a zero-overhead no-op when otel_enabled=false.
114
+ // The text provider name is used as the primary span attribute label.
115
+ providerInstance = new TracingLLMProvider(composed, textType);
116
+ if (textType !== embedType) {
117
+ console.info(`[LLMFactory] Split provider: text=${textType}, embedding=${embedType}`);
118
+ }
119
+ }
120
+ catch (err) {
121
+ // Init failure (e.g. missing API key) → fall back to full Gemini provider.
122
+ // A crash here would silently kill the MCP server.
123
+ console.error(`[LLMFactory] Failed to initialise providers (text=${textType}, embed=${embedType}): ${err}. ` +
124
+ `Falling back to GeminiAdapter for both.`);
125
+ const fallback = new GeminiAdapter();
126
+ const fallbackComposed = {
127
+ generateText: fallback.generateText.bind(fallback),
128
+ generateEmbedding: fallback.generateEmbedding.bind(fallback),
129
+ };
130
+ if (fallback.generateImageDescription) {
131
+ fallbackComposed.generateImageDescription = fallback.generateImageDescription.bind(fallback);
132
+ }
133
+ providerInstance = new TracingLLMProvider(fallbackComposed, "gemini");
134
+ }
135
+ return providerInstance;
136
+ }
137
+ /**
138
+ * Reset the cached singleton.
139
+ * ONLY for unit tests — never call in production code.
140
+ */
141
+ export function _resetLLMProvider() {
142
+ providerInstance = null;
143
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * LLM Provider Interface (v4.5)
3
+ * ─────────────────────────────────────────────────────────────────────────────
4
+ * PURPOSE:
5
+ * Defines the contract that ALL LLM adapters must satisfy.
6
+ * This is the single seam in Prism's AI layer — the only thing consumers
7
+ * (compaction, summarization, embedding, security scan, briefing) need to
8
+ * know about. They never reference a specific model or SDK.
9
+ *
10
+ * DESIGN PHILOSOPHY:
11
+ * Keep the interface intentionally minimal. Prism only needs two LLM
12
+ * capabilities for its own internal operations. Adding more methods here
13
+ * would force every future adapter to implement things it doesn't need.
14
+ *
15
+ * ADAPTER IMPLEMENTATIONS (src/utils/llm/adapters/):
16
+ * - gemini.ts → Google Gemini (default; all methods including VLM)
17
+ * - openai.ts → OpenAI Cloud + Ollama + LM Studio + vLLM
18
+ * - anthropic.ts → Anthropic Claude (VLM supported; embeddings unsupported)
19
+ *
20
+ * FACTORY RESOLUTION:
21
+ * Never instantiate adapters directly. Always call:
22
+ * import { getLLMProvider } from "../llm/factory.js";
23
+ * const llm = getLLMProvider();
24
+ */
25
+ export {};