nexus-dev 3.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nexus_dev/__init__.py +4 -0
- nexus_dev/agent_templates/__init__.py +26 -0
- nexus_dev/agent_templates/api_designer.yaml +26 -0
- nexus_dev/agent_templates/code_reviewer.yaml +26 -0
- nexus_dev/agent_templates/debug_detective.yaml +26 -0
- nexus_dev/agent_templates/doc_writer.yaml +26 -0
- nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
- nexus_dev/agent_templates/refactor_architect.yaml +26 -0
- nexus_dev/agent_templates/security_auditor.yaml +26 -0
- nexus_dev/agent_templates/test_engineer.yaml +26 -0
- nexus_dev/agents/__init__.py +20 -0
- nexus_dev/agents/agent_config.py +97 -0
- nexus_dev/agents/agent_executor.py +197 -0
- nexus_dev/agents/agent_manager.py +104 -0
- nexus_dev/agents/prompt_factory.py +91 -0
- nexus_dev/chunkers/__init__.py +168 -0
- nexus_dev/chunkers/base.py +202 -0
- nexus_dev/chunkers/docs_chunker.py +291 -0
- nexus_dev/chunkers/java_chunker.py +343 -0
- nexus_dev/chunkers/javascript_chunker.py +312 -0
- nexus_dev/chunkers/python_chunker.py +308 -0
- nexus_dev/cli.py +2017 -0
- nexus_dev/config.py +261 -0
- nexus_dev/database.py +569 -0
- nexus_dev/embeddings.py +703 -0
- nexus_dev/gateway/__init__.py +10 -0
- nexus_dev/gateway/connection_manager.py +348 -0
- nexus_dev/github_importer.py +247 -0
- nexus_dev/mcp_client.py +281 -0
- nexus_dev/mcp_config.py +184 -0
- nexus_dev/schemas/mcp_config_schema.json +166 -0
- nexus_dev/server.py +1866 -0
- nexus_dev/templates/pre-commit-hook +56 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/__init__.py +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/api_designer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/code_reviewer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/debug_detective.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/doc_writer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/refactor_architect.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/security_auditor.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/test_engineer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/templates/pre-commit-hook +56 -0
- nexus_dev-3.3.1.dist-info/METADATA +668 -0
- nexus_dev-3.3.1.dist-info/RECORD +48 -0
- nexus_dev-3.3.1.dist-info/WHEEL +4 -0
- nexus_dev-3.3.1.dist-info/entry_points.txt +14 -0
- nexus_dev-3.3.1.dist-info/licenses/LICENSE +21 -0
nexus_dev/embeddings.py
ADDED
|
@@ -0,0 +1,703 @@
|
|
|
1
|
+
"""Embedding providers for Nexus-Dev.
|
|
2
|
+
|
|
3
|
+
⚠️ IMPORTANT: Embedding Portability Warning
|
|
4
|
+
|
|
5
|
+
Embeddings are NOT portable between different models or providers:
|
|
6
|
+
- OpenAI text-embedding-3-small produces 1536-dimensional vectors
|
|
7
|
+
- Ollama nomic-embed-text produces 768-dimensional vectors
|
|
8
|
+
- Different models produce incompatible vector spaces
|
|
9
|
+
|
|
10
|
+
Once you choose an embedding provider for a project, you MUST keep
|
|
11
|
+
using the same provider and model. Changing providers requires
|
|
12
|
+
re-indexing ALL documents.
|
|
13
|
+
|
|
14
|
+
The embedding provider is configured ONCE at MCP server startup via
|
|
15
|
+
nexus_config.json and cannot be changed at runtime.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
from abc import ABC, abstractmethod
|
|
22
|
+
from functools import lru_cache
|
|
23
|
+
from typing import TYPE_CHECKING
|
|
24
|
+
|
|
25
|
+
import httpx
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from .config import NexusConfig
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class EmbeddingProvider(ABC):
|
|
32
|
+
"""Abstract base class for embedding providers."""
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def model_name(self) -> str:
|
|
37
|
+
"""Name of the embedding model."""
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def dimensions(self) -> int:
|
|
42
|
+
"""Number of dimensions in the embedding vectors."""
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
async def embed(self, text: str) -> list[float]:
|
|
46
|
+
"""Generate embedding for a single text.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
text: Text to embed.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Embedding vector as list of floats.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
@abstractmethod
|
|
56
|
+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
57
|
+
"""Generate embeddings for multiple texts.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
texts: List of texts to embed.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
List of embedding vectors.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class OpenAIEmbedder(EmbeddingProvider):
|
|
68
|
+
"""OpenAI embedding provider using text-embedding-3-small by default."""
|
|
69
|
+
|
|
70
|
+
DIMENSIONS_MAP = {
|
|
71
|
+
"text-embedding-3-small": 1536,
|
|
72
|
+
"text-embedding-3-large": 3072,
|
|
73
|
+
"text-embedding-ada-002": 1536,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
def __init__(
|
|
77
|
+
self,
|
|
78
|
+
model: str = "text-embedding-3-small",
|
|
79
|
+
api_key: str | None = None,
|
|
80
|
+
) -> None:
|
|
81
|
+
"""Initialize OpenAI embedder.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
model: OpenAI embedding model name.
|
|
85
|
+
api_key: OpenAI API key. If None, uses OPENAI_API_KEY env var.
|
|
86
|
+
"""
|
|
87
|
+
self._model = model
|
|
88
|
+
self._api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
|
89
|
+
if not self._api_key:
|
|
90
|
+
raise ValueError(
|
|
91
|
+
"OpenAI API key required. Set OPENAI_API_KEY environment variable "
|
|
92
|
+
"or pass api_key parameter."
|
|
93
|
+
)
|
|
94
|
+
self._client: httpx.AsyncClient | None = None
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def model_name(self) -> str:
|
|
98
|
+
return self._model
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def dimensions(self) -> int:
|
|
102
|
+
return self.DIMENSIONS_MAP.get(self._model, 1536)
|
|
103
|
+
|
|
104
|
+
async def _get_client(self) -> httpx.AsyncClient:
|
|
105
|
+
"""Get or create async HTTP client."""
|
|
106
|
+
if self._client is None:
|
|
107
|
+
self._client = httpx.AsyncClient(
|
|
108
|
+
base_url="https://api.openai.com/v1",
|
|
109
|
+
headers={
|
|
110
|
+
"Authorization": f"Bearer {self._api_key}",
|
|
111
|
+
"Content-Type": "application/json",
|
|
112
|
+
},
|
|
113
|
+
timeout=60.0,
|
|
114
|
+
)
|
|
115
|
+
return self._client
|
|
116
|
+
|
|
117
|
+
async def embed(self, text: str) -> list[float]:
|
|
118
|
+
"""Generate embedding for a single text using OpenAI API."""
|
|
119
|
+
result = await self.embed_batch([text])
|
|
120
|
+
return result[0]
|
|
121
|
+
|
|
122
|
+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
123
|
+
"""Generate embeddings for multiple texts using OpenAI API.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
texts: List of texts to embed.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
List of embedding vectors.
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
httpx.HTTPStatusError: If API request fails.
|
|
133
|
+
"""
|
|
134
|
+
if not texts:
|
|
135
|
+
return []
|
|
136
|
+
|
|
137
|
+
client = await self._get_client()
|
|
138
|
+
|
|
139
|
+
# OpenAI has a limit of ~8000 tokens per request, batch if needed
|
|
140
|
+
batch_size = 100
|
|
141
|
+
all_embeddings: list[list[float]] = []
|
|
142
|
+
|
|
143
|
+
for i in range(0, len(texts), batch_size):
|
|
144
|
+
batch = texts[i : i + batch_size]
|
|
145
|
+
|
|
146
|
+
response = await client.post(
|
|
147
|
+
"/embeddings",
|
|
148
|
+
json={
|
|
149
|
+
"model": self._model,
|
|
150
|
+
"input": batch,
|
|
151
|
+
},
|
|
152
|
+
)
|
|
153
|
+
response.raise_for_status()
|
|
154
|
+
|
|
155
|
+
data = response.json()
|
|
156
|
+
# Sort by index to maintain order
|
|
157
|
+
sorted_data = sorted(data["data"], key=lambda x: x["index"])
|
|
158
|
+
batch_embeddings = [item["embedding"] for item in sorted_data]
|
|
159
|
+
all_embeddings.extend(batch_embeddings)
|
|
160
|
+
|
|
161
|
+
return all_embeddings
|
|
162
|
+
|
|
163
|
+
async def close(self) -> None:
|
|
164
|
+
"""Close the HTTP client."""
|
|
165
|
+
if self._client:
|
|
166
|
+
await self._client.aclose()
|
|
167
|
+
self._client = None
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class OllamaEmbedder(EmbeddingProvider):
|
|
171
|
+
"""Local Ollama embedding provider with smart batching and text chunking.
|
|
172
|
+
|
|
173
|
+
Handles large documents by:
|
|
174
|
+
1. Splitting texts that exceed token limits
|
|
175
|
+
2. Batching requests to avoid memory issues
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
DIMENSIONS_MAP = {
|
|
179
|
+
"nomic-embed-text": 768,
|
|
180
|
+
"mxbai-embed-large": 1024,
|
|
181
|
+
"all-minilm": 384,
|
|
182
|
+
"snowflake-arctic-embed": 1024,
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
def __init__(
|
|
186
|
+
self,
|
|
187
|
+
model: str = "nomic-embed-text",
|
|
188
|
+
base_url: str = "http://localhost:11434",
|
|
189
|
+
batch_size: int = 10,
|
|
190
|
+
max_text_tokens: int = 1000,
|
|
191
|
+
) -> None:
|
|
192
|
+
"""Initialize Ollama embedder.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
model: Ollama embedding model name.
|
|
196
|
+
base_url: Ollama server URL.
|
|
197
|
+
batch_size: Number of texts per API request (default: 10).
|
|
198
|
+
max_text_tokens: Maximum tokens per text before splitting (default: 1000).
|
|
199
|
+
"""
|
|
200
|
+
self._model = model
|
|
201
|
+
self._base_url = base_url.rstrip("/")
|
|
202
|
+
self._batch_size = batch_size
|
|
203
|
+
self._max_text_tokens = max_text_tokens
|
|
204
|
+
self._client: httpx.AsyncClient | None = None
|
|
205
|
+
|
|
206
|
+
@property
|
|
207
|
+
def model_name(self) -> str:
|
|
208
|
+
return self._model
|
|
209
|
+
|
|
210
|
+
@property
|
|
211
|
+
def dimensions(self) -> int:
|
|
212
|
+
return self.DIMENSIONS_MAP.get(self._model, 768)
|
|
213
|
+
|
|
214
|
+
@staticmethod
|
|
215
|
+
def _estimate_tokens(text: str) -> int:
|
|
216
|
+
"""Estimate token count for text.
|
|
217
|
+
|
|
218
|
+
Uses rough approximation: ~4 characters = 1 token for English.
|
|
219
|
+
This is conservative to avoid overshooting actual token limits.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
text: Text to estimate tokens for.
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Approximate token count.
|
|
226
|
+
"""
|
|
227
|
+
# Rough estimation: 1 token ≈ 4 characters for English
|
|
228
|
+
# This is conservative; actual tokenization may be more efficient
|
|
229
|
+
return max(1, len(text) // 4)
|
|
230
|
+
|
|
231
|
+
def _split_text_by_tokens(self, text: str) -> list[str]:
|
|
232
|
+
"""Split text if it exceeds token limit.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
text: Text to split if needed.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
List of text chunks, each under token limit.
|
|
239
|
+
"""
|
|
240
|
+
estimated_tokens = self._estimate_tokens(text)
|
|
241
|
+
|
|
242
|
+
# If text is under limit, return as-is
|
|
243
|
+
if estimated_tokens <= self._max_text_tokens:
|
|
244
|
+
return [text]
|
|
245
|
+
|
|
246
|
+
# Calculate approximate characters per chunk
|
|
247
|
+
chars_per_chunk = (len(text) // estimated_tokens) * self._max_text_tokens
|
|
248
|
+
chunks = []
|
|
249
|
+
start = 0
|
|
250
|
+
|
|
251
|
+
while start < len(text):
|
|
252
|
+
end = min(start + chars_per_chunk, len(text))
|
|
253
|
+
|
|
254
|
+
# Try to break at a sentence boundary (period + space)
|
|
255
|
+
if end < len(text):
|
|
256
|
+
# Look backwards for a good break point
|
|
257
|
+
sentence_break = text.rfind(". ", start, end)
|
|
258
|
+
if sentence_break > start:
|
|
259
|
+
end = sentence_break + 2
|
|
260
|
+
else:
|
|
261
|
+
# Try line break as fallback
|
|
262
|
+
newline_break = text.rfind("\n", start, end)
|
|
263
|
+
if newline_break > start:
|
|
264
|
+
end = newline_break + 1
|
|
265
|
+
|
|
266
|
+
chunk = text[start:end].strip()
|
|
267
|
+
if chunk: # Only add non-empty chunks
|
|
268
|
+
chunks.append(chunk)
|
|
269
|
+
start = end
|
|
270
|
+
|
|
271
|
+
return chunks if chunks else [text] # Fallback to original if splitting failed
|
|
272
|
+
|
|
273
|
+
async def _get_client(self) -> httpx.AsyncClient:
|
|
274
|
+
"""Get or create async HTTP client."""
|
|
275
|
+
if self._client is None:
|
|
276
|
+
self._client = httpx.AsyncClient(
|
|
277
|
+
base_url=self._base_url,
|
|
278
|
+
timeout=120.0, # Ollama can be slow on first request
|
|
279
|
+
)
|
|
280
|
+
return self._client
|
|
281
|
+
|
|
282
|
+
async def embed(self, text: str) -> list[float]:
|
|
283
|
+
"""Generate embedding for a single text using Ollama API."""
|
|
284
|
+
result = await self.embed_batch([text])
|
|
285
|
+
return result[0]
|
|
286
|
+
|
|
287
|
+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
288
|
+
"""Generate embeddings for multiple texts using Ollama API.
|
|
289
|
+
|
|
290
|
+
Handles large batches by:
|
|
291
|
+
1. Pre-splitting texts that exceed token limits
|
|
292
|
+
2. Batching requests to avoid memory issues
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
texts: List of texts to embed.
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
List of embedding vectors (in same order as input texts).
|
|
299
|
+
"""
|
|
300
|
+
if not texts:
|
|
301
|
+
return []
|
|
302
|
+
|
|
303
|
+
client = await self._get_client()
|
|
304
|
+
|
|
305
|
+
# Pre-process: split any texts that exceed token limits
|
|
306
|
+
processed_texts: list[str] = []
|
|
307
|
+
text_chunk_mapping: list[list[int]] = [] # Maps original text index to chunk indices
|
|
308
|
+
|
|
309
|
+
for text in texts:
|
|
310
|
+
chunks = self._split_text_by_tokens(text)
|
|
311
|
+
chunk_indices = list(range(len(processed_texts), len(processed_texts) + len(chunks)))
|
|
312
|
+
text_chunk_mapping.append(chunk_indices)
|
|
313
|
+
processed_texts.extend(chunks)
|
|
314
|
+
|
|
315
|
+
# Process texts in batches to avoid memory issues
|
|
316
|
+
all_embeddings: list[list[float]] = []
|
|
317
|
+
|
|
318
|
+
for batch_start in range(0, len(processed_texts), self._batch_size):
|
|
319
|
+
batch_end = min(batch_start + self._batch_size, len(processed_texts))
|
|
320
|
+
batch = processed_texts[batch_start:batch_end]
|
|
321
|
+
|
|
322
|
+
# For single text, use the simple endpoint
|
|
323
|
+
if len(batch) == 1:
|
|
324
|
+
response = await client.post(
|
|
325
|
+
"/api/embed",
|
|
326
|
+
json={
|
|
327
|
+
"model": self._model,
|
|
328
|
+
"input": batch[0],
|
|
329
|
+
},
|
|
330
|
+
)
|
|
331
|
+
else:
|
|
332
|
+
# For multiple texts, use array input
|
|
333
|
+
response = await client.post(
|
|
334
|
+
"/api/embed",
|
|
335
|
+
json={
|
|
336
|
+
"model": self._model,
|
|
337
|
+
"input": batch,
|
|
338
|
+
},
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
try:
|
|
342
|
+
response.raise_for_status()
|
|
343
|
+
except httpx.HTTPStatusError as e:
|
|
344
|
+
raise httpx.HTTPStatusError(
|
|
345
|
+
f"Ollama embedding request failed for batch of {len(batch)} texts: {e}",
|
|
346
|
+
request=e.request,
|
|
347
|
+
response=e.response,
|
|
348
|
+
) from e
|
|
349
|
+
|
|
350
|
+
data = response.json()
|
|
351
|
+
|
|
352
|
+
# Extract embeddings based on response format
|
|
353
|
+
if "embeddings" in data:
|
|
354
|
+
batch_embeddings = data["embeddings"]
|
|
355
|
+
elif "embedding" in data:
|
|
356
|
+
# Single embedding response
|
|
357
|
+
batch_embeddings = [data["embedding"]]
|
|
358
|
+
else:
|
|
359
|
+
raise ValueError(f"Unexpected Ollama response format: {data.keys()}")
|
|
360
|
+
|
|
361
|
+
all_embeddings.extend(batch_embeddings)
|
|
362
|
+
|
|
363
|
+
# Map chunk embeddings back to original texts
|
|
364
|
+
# For texts that were split, average their chunk embeddings
|
|
365
|
+
result_embeddings: list[list[float]] = []
|
|
366
|
+
embedding_dim = len(all_embeddings[0]) if all_embeddings else 0
|
|
367
|
+
|
|
368
|
+
for chunk_indices in text_chunk_mapping:
|
|
369
|
+
if len(chunk_indices) == 1:
|
|
370
|
+
# Single chunk, use its embedding directly
|
|
371
|
+
result_embeddings.append(all_embeddings[chunk_indices[0]])
|
|
372
|
+
else:
|
|
373
|
+
# Multiple chunks, average their embeddings
|
|
374
|
+
chunk_embeddings = [all_embeddings[i] for i in chunk_indices]
|
|
375
|
+
avg_embedding = [
|
|
376
|
+
sum(emb[j] for emb in chunk_embeddings) / len(chunk_embeddings)
|
|
377
|
+
for j in range(embedding_dim)
|
|
378
|
+
]
|
|
379
|
+
result_embeddings.append(avg_embedding)
|
|
380
|
+
|
|
381
|
+
return result_embeddings
|
|
382
|
+
|
|
383
|
+
async def close(self) -> None:
|
|
384
|
+
"""Close the HTTP client."""
|
|
385
|
+
if self._client:
|
|
386
|
+
await self._client.aclose()
|
|
387
|
+
self._client = None
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
class VertexAIEmbedder(EmbeddingProvider):
|
|
391
|
+
"""Google Vertex AI embedding provider."""
|
|
392
|
+
|
|
393
|
+
def __init__(
|
|
394
|
+
self,
|
|
395
|
+
model: str = "text-embedding-004",
|
|
396
|
+
project_id: str | None = None,
|
|
397
|
+
location: str | None = None,
|
|
398
|
+
) -> None:
|
|
399
|
+
"""Initialize Vertex AI embedder.
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
model: Vertex AI embedding model name.
|
|
403
|
+
project_id: Google Cloud project ID.
|
|
404
|
+
location: Google Cloud region (e.g., "us-central1").
|
|
405
|
+
"""
|
|
406
|
+
try:
|
|
407
|
+
import vertexai
|
|
408
|
+
from vertexai.language_models import TextEmbeddingModel
|
|
409
|
+
except ImportError:
|
|
410
|
+
raise ImportError(
|
|
411
|
+
"Google Vertex AI dependencies not found. "
|
|
412
|
+
"Please run `pip install nexus-dev[google]`."
|
|
413
|
+
) from None
|
|
414
|
+
|
|
415
|
+
self._model_name = model
|
|
416
|
+
|
|
417
|
+
# Initialize Vertex AI SDK if project/location provided or not already initialized
|
|
418
|
+
# User can also rely on gcloud default auth and config
|
|
419
|
+
if project_id or location:
|
|
420
|
+
vertexai.init(project=project_id, location=location)
|
|
421
|
+
|
|
422
|
+
try:
|
|
423
|
+
self._model = TextEmbeddingModel.from_pretrained(model)
|
|
424
|
+
except Exception as e:
|
|
425
|
+
raise ValueError(f"Failed to load Vertex AI model '{model}': {e}") from e
|
|
426
|
+
|
|
427
|
+
@property
|
|
428
|
+
def model_name(self) -> str:
|
|
429
|
+
return self._model_name
|
|
430
|
+
|
|
431
|
+
@property
|
|
432
|
+
def dimensions(self) -> int:
|
|
433
|
+
# Default to 768 for most Vertex models if unknown
|
|
434
|
+
return 768
|
|
435
|
+
|
|
436
|
+
async def embed(self, text: str) -> list[float]:
|
|
437
|
+
"""Generate embedding for a single text."""
|
|
438
|
+
result = await self.embed_batch([text])
|
|
439
|
+
return result[0]
|
|
440
|
+
|
|
441
|
+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
442
|
+
"""Generate embeddings for multiple texts.
|
|
443
|
+
|
|
444
|
+
Vertex AI has a limit of 5 texts per request for Gecko models,
|
|
445
|
+
but up to 250 for newer models like text-embedding-004.
|
|
446
|
+
We'll use a conservative batch size of 5 for safety or 100 for newer ones.
|
|
447
|
+
"""
|
|
448
|
+
if not texts:
|
|
449
|
+
return []
|
|
450
|
+
|
|
451
|
+
# Determine batch size based on model
|
|
452
|
+
batch_size = 100 if "text-embedding-004" in self._model_name else 5
|
|
453
|
+
all_embeddings: list[list[float]] = []
|
|
454
|
+
|
|
455
|
+
# Process in batches
|
|
456
|
+
for i in range(0, len(texts), batch_size):
|
|
457
|
+
batch = texts[i : i + batch_size]
|
|
458
|
+
embeddings = self._model.get_embeddings(list(batch))
|
|
459
|
+
all_embeddings.extend([e.values for e in embeddings])
|
|
460
|
+
|
|
461
|
+
return all_embeddings
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
class BedrockEmbedder(EmbeddingProvider):
|
|
465
|
+
"""AWS Bedrock embedding provider."""
|
|
466
|
+
|
|
467
|
+
def __init__(
|
|
468
|
+
self,
|
|
469
|
+
model: str = "amazon.titan-embed-text-v1",
|
|
470
|
+
region_name: str | None = None,
|
|
471
|
+
aws_access_key_id: str | None = None,
|
|
472
|
+
aws_secret_access_key: str | None = None,
|
|
473
|
+
) -> None:
|
|
474
|
+
"""Initialize AWS Bedrock embedder.
|
|
475
|
+
|
|
476
|
+
Args:
|
|
477
|
+
model: Bedrock model ID.
|
|
478
|
+
region_name: AWS region.
|
|
479
|
+
aws_access_key_id: AWS access key.
|
|
480
|
+
aws_secret_access_key: AWS secret key.
|
|
481
|
+
"""
|
|
482
|
+
try:
|
|
483
|
+
import boto3
|
|
484
|
+
except ImportError:
|
|
485
|
+
raise ImportError(
|
|
486
|
+
"AWS Bedrock dependencies not found. Please run `pip install nexus-dev[aws]`."
|
|
487
|
+
) from None
|
|
488
|
+
|
|
489
|
+
self._model = model
|
|
490
|
+
self._client = boto3.client(
|
|
491
|
+
service_name="bedrock-runtime",
|
|
492
|
+
region_name=region_name,
|
|
493
|
+
aws_access_key_id=aws_access_key_id,
|
|
494
|
+
aws_secret_access_key=aws_secret_access_key,
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
@property
|
|
498
|
+
def model_name(self) -> str:
|
|
499
|
+
return self._model
|
|
500
|
+
|
|
501
|
+
@property
|
|
502
|
+
def dimensions(self) -> int:
|
|
503
|
+
# Defaults
|
|
504
|
+
if "titan-embed-text-v2" in self._model:
|
|
505
|
+
return 1024
|
|
506
|
+
if "titan" in self._model:
|
|
507
|
+
return 1536
|
|
508
|
+
return 1024
|
|
509
|
+
|
|
510
|
+
async def embed(self, text: str) -> list[float]:
|
|
511
|
+
import json
|
|
512
|
+
|
|
513
|
+
# Bedrock API format varies by model provider (Amazon vs Cohere)
|
|
514
|
+
if "cohere" in self._model:
|
|
515
|
+
body = json.dumps({"texts": [text], "input_type": "search_query"})
|
|
516
|
+
else:
|
|
517
|
+
# Amazon Titan format
|
|
518
|
+
body = json.dumps({"inputText": text})
|
|
519
|
+
|
|
520
|
+
response = self._client.invoke_model(
|
|
521
|
+
body=body,
|
|
522
|
+
modelId=self._model,
|
|
523
|
+
accept="application/json",
|
|
524
|
+
contentType="application/json",
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
response_body = json.loads(response.get("body").read())
|
|
528
|
+
|
|
529
|
+
if "cohere" in self._model:
|
|
530
|
+
return response_body.get("embeddings")[0]
|
|
531
|
+
else:
|
|
532
|
+
return response_body.get("embedding")
|
|
533
|
+
|
|
534
|
+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
535
|
+
# Bedrock invoke_model typically handles one string for Titan
|
|
536
|
+
# Cohere models on Bedrock support batching
|
|
537
|
+
if "cohere" in self._model:
|
|
538
|
+
try:
|
|
539
|
+
import json
|
|
540
|
+
|
|
541
|
+
body = json.dumps({"texts": texts, "input_type": "search_query"})
|
|
542
|
+
response = self._client.invoke_model(
|
|
543
|
+
body=body,
|
|
544
|
+
modelId=self._model,
|
|
545
|
+
accept="application/json",
|
|
546
|
+
contentType="application/json",
|
|
547
|
+
)
|
|
548
|
+
response_body = json.loads(response.get("body").read())
|
|
549
|
+
return response_body.get("embeddings")
|
|
550
|
+
except Exception:
|
|
551
|
+
# Fallback to sequential if batch fails
|
|
552
|
+
pass
|
|
553
|
+
|
|
554
|
+
# Sequential fallback for Titan or if batching fails
|
|
555
|
+
embeddings = []
|
|
556
|
+
for text in texts:
|
|
557
|
+
embeddings.append(await self.embed(text))
|
|
558
|
+
return embeddings
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
class VoyageEmbedder(EmbeddingProvider):
|
|
562
|
+
"""Voyage AI embedding provider."""
|
|
563
|
+
|
|
564
|
+
def __init__(
|
|
565
|
+
self,
|
|
566
|
+
model: str = "voyage-large-2",
|
|
567
|
+
api_key: str | None = None,
|
|
568
|
+
) -> None:
|
|
569
|
+
try:
|
|
570
|
+
import voyageai
|
|
571
|
+
except ImportError:
|
|
572
|
+
raise ImportError(
|
|
573
|
+
"Voyage AI dependencies not found. Please run `pip install nexus-dev[voyage]`."
|
|
574
|
+
) from None
|
|
575
|
+
|
|
576
|
+
self._model = model
|
|
577
|
+
self._client = voyageai.AsyncClient(api_key=api_key or os.environ.get("VOYAGE_API_KEY"))
|
|
578
|
+
|
|
579
|
+
@property
|
|
580
|
+
def model_name(self) -> str:
|
|
581
|
+
return self._model
|
|
582
|
+
|
|
583
|
+
@property
|
|
584
|
+
def dimensions(self) -> int:
|
|
585
|
+
return 1536 # Most Voyage models are 1536 (check specific docs if needed)
|
|
586
|
+
|
|
587
|
+
async def embed(self, text: str) -> list[float]:
|
|
588
|
+
result = await self.embed_batch([text])
|
|
589
|
+
return result[0]
|
|
590
|
+
|
|
591
|
+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
592
|
+
if not texts:
|
|
593
|
+
return []
|
|
594
|
+
|
|
595
|
+
# Voyage handles batching internally, but we can respect a safe limit
|
|
596
|
+
batch_size = 128
|
|
597
|
+
all_embeddings: list[list[float]] = []
|
|
598
|
+
|
|
599
|
+
for i in range(0, len(texts), batch_size):
|
|
600
|
+
batch = texts[i : i + batch_size]
|
|
601
|
+
response = await self._client.embed(
|
|
602
|
+
batch,
|
|
603
|
+
model=self._model,
|
|
604
|
+
input_type="document", # optimized for retrieval
|
|
605
|
+
)
|
|
606
|
+
all_embeddings.extend(list(response.embeddings))
|
|
607
|
+
|
|
608
|
+
return all_embeddings
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
class CohereEmbedder(EmbeddingProvider):
|
|
612
|
+
"""Cohere embedding provider."""
|
|
613
|
+
|
|
614
|
+
def __init__(
|
|
615
|
+
self,
|
|
616
|
+
model: str = "embed-multilingual-v3.0",
|
|
617
|
+
api_key: str | None = None,
|
|
618
|
+
) -> None:
|
|
619
|
+
try:
|
|
620
|
+
import cohere
|
|
621
|
+
except ImportError:
|
|
622
|
+
raise ImportError(
|
|
623
|
+
"Cohere dependencies not found. Please run `pip install nexus-dev[cohere]`."
|
|
624
|
+
) from None
|
|
625
|
+
|
|
626
|
+
self._model = model
|
|
627
|
+
self._client = cohere.AsyncClient(api_key=api_key or os.environ.get("CO_API_KEY"))
|
|
628
|
+
|
|
629
|
+
@property
|
|
630
|
+
def model_name(self) -> str:
|
|
631
|
+
return self._model
|
|
632
|
+
|
|
633
|
+
@property
|
|
634
|
+
def dimensions(self) -> int:
|
|
635
|
+
return 1024 # Default for v3 models
|
|
636
|
+
|
|
637
|
+
async def embed(self, text: str) -> list[float]:
|
|
638
|
+
result = await self.embed_batch([text])
|
|
639
|
+
return result[0]
|
|
640
|
+
|
|
641
|
+
async def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
642
|
+
if not texts:
|
|
643
|
+
return []
|
|
644
|
+
|
|
645
|
+
response = await self._client.embed(
|
|
646
|
+
texts=texts, model=self._model, input_type="search_document", embedding_types=["float"]
|
|
647
|
+
)
|
|
648
|
+
return response.embeddings.float
|
|
649
|
+
|
|
650
|
+
|
|
651
|
+
def create_embedder(config: NexusConfig) -> EmbeddingProvider:
|
|
652
|
+
"""Create an embedding provider based on configuration.
|
|
653
|
+
|
|
654
|
+
Args:
|
|
655
|
+
config: Nexus-Dev configuration.
|
|
656
|
+
|
|
657
|
+
Returns:
|
|
658
|
+
Configured embedding provider.
|
|
659
|
+
|
|
660
|
+
Raises:
|
|
661
|
+
ValueError: If provider is not supported.
|
|
662
|
+
"""
|
|
663
|
+
if config.embedding_provider == "openai":
|
|
664
|
+
return OpenAIEmbedder(model=config.embedding_model)
|
|
665
|
+
elif config.embedding_provider == "ollama":
|
|
666
|
+
return OllamaEmbedder(
|
|
667
|
+
model=config.embedding_model,
|
|
668
|
+
base_url=config.ollama_url,
|
|
669
|
+
batch_size=config.ollama_batch_size,
|
|
670
|
+
max_text_tokens=config.ollama_max_text_tokens,
|
|
671
|
+
)
|
|
672
|
+
elif config.embedding_provider == "google":
|
|
673
|
+
return VertexAIEmbedder(
|
|
674
|
+
model=config.embedding_model,
|
|
675
|
+
project_id=config.google_project_id,
|
|
676
|
+
location=config.google_location,
|
|
677
|
+
)
|
|
678
|
+
elif config.embedding_provider == "aws":
|
|
679
|
+
return BedrockEmbedder(
|
|
680
|
+
model=config.embedding_model,
|
|
681
|
+
region_name=config.aws_region,
|
|
682
|
+
aws_access_key_id=config.aws_access_key_id,
|
|
683
|
+
aws_secret_access_key=config.aws_secret_access_key,
|
|
684
|
+
)
|
|
685
|
+
elif config.embedding_provider == "voyage":
|
|
686
|
+
return VoyageEmbedder(
|
|
687
|
+
model=config.embedding_model,
|
|
688
|
+
api_key=config.voyage_api_key,
|
|
689
|
+
)
|
|
690
|
+
elif config.embedding_provider == "cohere":
|
|
691
|
+
return CohereEmbedder(
|
|
692
|
+
model=config.embedding_model,
|
|
693
|
+
api_key=config.cohere_api_key,
|
|
694
|
+
)
|
|
695
|
+
else:
|
|
696
|
+
raise ValueError(f"Unsupported embedding provider: {config.embedding_provider}")
|
|
697
|
+
|
|
698
|
+
|
|
699
|
+
# Simple LRU cache for recent embeddings (in-memory)
|
|
700
|
+
@lru_cache(maxsize=1000)
|
|
701
|
+
def _cached_embedding_key(text: str) -> str:
|
|
702
|
+
"""Generate a cache key for embeddings."""
|
|
703
|
+
return text[:500] # Truncate for cache key efficiency
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Gateway module for MCP connection management."""
|
|
2
|
+
|
|
3
|
+
from .connection_manager import (
|
|
4
|
+
ConnectionManager,
|
|
5
|
+
MCPConnection,
|
|
6
|
+
MCPConnectionError,
|
|
7
|
+
MCPTimeoutError,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
__all__ = ["ConnectionManager", "MCPConnection", "MCPConnectionError", "MCPTimeoutError"]
|