nexus-dev 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexus-dev might be problematic. Click here for more details.

Files changed (48) hide show
  1. nexus_dev/__init__.py +4 -0
  2. nexus_dev/agent_templates/__init__.py +26 -0
  3. nexus_dev/agent_templates/api_designer.yaml +26 -0
  4. nexus_dev/agent_templates/code_reviewer.yaml +26 -0
  5. nexus_dev/agent_templates/debug_detective.yaml +26 -0
  6. nexus_dev/agent_templates/doc_writer.yaml +26 -0
  7. nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
  8. nexus_dev/agent_templates/refactor_architect.yaml +26 -0
  9. nexus_dev/agent_templates/security_auditor.yaml +26 -0
  10. nexus_dev/agent_templates/test_engineer.yaml +26 -0
  11. nexus_dev/agents/__init__.py +20 -0
  12. nexus_dev/agents/agent_config.py +97 -0
  13. nexus_dev/agents/agent_executor.py +197 -0
  14. nexus_dev/agents/agent_manager.py +104 -0
  15. nexus_dev/agents/prompt_factory.py +91 -0
  16. nexus_dev/chunkers/__init__.py +168 -0
  17. nexus_dev/chunkers/base.py +202 -0
  18. nexus_dev/chunkers/docs_chunker.py +291 -0
  19. nexus_dev/chunkers/java_chunker.py +343 -0
  20. nexus_dev/chunkers/javascript_chunker.py +312 -0
  21. nexus_dev/chunkers/python_chunker.py +308 -0
  22. nexus_dev/cli.py +1673 -0
  23. nexus_dev/config.py +253 -0
  24. nexus_dev/database.py +558 -0
  25. nexus_dev/embeddings.py +585 -0
  26. nexus_dev/gateway/__init__.py +10 -0
  27. nexus_dev/gateway/connection_manager.py +348 -0
  28. nexus_dev/github_importer.py +247 -0
  29. nexus_dev/mcp_client.py +281 -0
  30. nexus_dev/mcp_config.py +184 -0
  31. nexus_dev/schemas/mcp_config_schema.json +166 -0
  32. nexus_dev/server.py +1866 -0
  33. nexus_dev/templates/pre-commit-hook +33 -0
  34. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/__init__.py +26 -0
  35. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/api_designer.yaml +26 -0
  36. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/code_reviewer.yaml +26 -0
  37. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/debug_detective.yaml +26 -0
  38. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/doc_writer.yaml +26 -0
  39. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
  40. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/refactor_architect.yaml +26 -0
  41. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/security_auditor.yaml +26 -0
  42. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/test_engineer.yaml +26 -0
  43. nexus_dev-3.2.0.data/data/nexus_dev/templates/pre-commit-hook +33 -0
  44. nexus_dev-3.2.0.dist-info/METADATA +636 -0
  45. nexus_dev-3.2.0.dist-info/RECORD +48 -0
  46. nexus_dev-3.2.0.dist-info/WHEEL +4 -0
  47. nexus_dev-3.2.0.dist-info/entry_points.txt +12 -0
  48. nexus_dev-3.2.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,585 @@
1
+ """Embedding providers for Nexus-Dev.
2
+
3
+ ⚠️ IMPORTANT: Embedding Portability Warning
4
+
5
+ Embeddings are NOT portable between different models or providers:
6
+ - OpenAI text-embedding-3-small produces 1536-dimensional vectors
7
+ - Ollama nomic-embed-text produces 768-dimensional vectors
8
+ - Different models produce incompatible vector spaces
9
+
10
+ Once you choose an embedding provider for a project, you MUST keep
11
+ using the same provider and model. Changing providers requires
12
+ re-indexing ALL documents.
13
+
14
+ The embedding provider is configured ONCE at MCP server startup via
15
+ nexus_config.json and cannot be changed at runtime.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import os
21
+ from abc import ABC, abstractmethod
22
+ from functools import lru_cache
23
+ from typing import TYPE_CHECKING
24
+
25
+ import httpx
26
+
27
+ if TYPE_CHECKING:
28
+ from .config import NexusConfig
29
+
30
+
31
+ class EmbeddingProvider(ABC):
32
+ """Abstract base class for embedding providers."""
33
+
34
+ @property
35
+ @abstractmethod
36
+ def model_name(self) -> str:
37
+ """Name of the embedding model."""
38
+
39
+ @property
40
+ @abstractmethod
41
+ def dimensions(self) -> int:
42
+ """Number of dimensions in the embedding vectors."""
43
+
44
+ @abstractmethod
45
+ async def embed(self, text: str) -> list[float]:
46
+ """Generate embedding for a single text.
47
+
48
+ Args:
49
+ text: Text to embed.
50
+
51
+ Returns:
52
+ Embedding vector as list of floats.
53
+ """
54
+
55
+ @abstractmethod
56
+ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
57
+ """Generate embeddings for multiple texts.
58
+
59
+ Args:
60
+ texts: List of texts to embed.
61
+
62
+ Returns:
63
+ List of embedding vectors.
64
+ """
65
+
66
+
67
+ class OpenAIEmbedder(EmbeddingProvider):
68
+ """OpenAI embedding provider using text-embedding-3-small by default."""
69
+
70
+ DIMENSIONS_MAP = {
71
+ "text-embedding-3-small": 1536,
72
+ "text-embedding-3-large": 3072,
73
+ "text-embedding-ada-002": 1536,
74
+ }
75
+
76
+ def __init__(
77
+ self,
78
+ model: str = "text-embedding-3-small",
79
+ api_key: str | None = None,
80
+ ) -> None:
81
+ """Initialize OpenAI embedder.
82
+
83
+ Args:
84
+ model: OpenAI embedding model name.
85
+ api_key: OpenAI API key. If None, uses OPENAI_API_KEY env var.
86
+ """
87
+ self._model = model
88
+ self._api_key = api_key or os.environ.get("OPENAI_API_KEY")
89
+ if not self._api_key:
90
+ raise ValueError(
91
+ "OpenAI API key required. Set OPENAI_API_KEY environment variable "
92
+ "or pass api_key parameter."
93
+ )
94
+ self._client: httpx.AsyncClient | None = None
95
+
96
+ @property
97
+ def model_name(self) -> str:
98
+ return self._model
99
+
100
+ @property
101
+ def dimensions(self) -> int:
102
+ return self.DIMENSIONS_MAP.get(self._model, 1536)
103
+
104
+ async def _get_client(self) -> httpx.AsyncClient:
105
+ """Get or create async HTTP client."""
106
+ if self._client is None:
107
+ self._client = httpx.AsyncClient(
108
+ base_url="https://api.openai.com/v1",
109
+ headers={
110
+ "Authorization": f"Bearer {self._api_key}",
111
+ "Content-Type": "application/json",
112
+ },
113
+ timeout=60.0,
114
+ )
115
+ return self._client
116
+
117
+ async def embed(self, text: str) -> list[float]:
118
+ """Generate embedding for a single text using OpenAI API."""
119
+ result = await self.embed_batch([text])
120
+ return result[0]
121
+
122
+ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
123
+ """Generate embeddings for multiple texts using OpenAI API.
124
+
125
+ Args:
126
+ texts: List of texts to embed.
127
+
128
+ Returns:
129
+ List of embedding vectors.
130
+
131
+ Raises:
132
+ httpx.HTTPStatusError: If API request fails.
133
+ """
134
+ if not texts:
135
+ return []
136
+
137
+ client = await self._get_client()
138
+
139
+ # OpenAI has a limit of ~8000 tokens per request, batch if needed
140
+ batch_size = 100
141
+ all_embeddings: list[list[float]] = []
142
+
143
+ for i in range(0, len(texts), batch_size):
144
+ batch = texts[i : i + batch_size]
145
+
146
+ response = await client.post(
147
+ "/embeddings",
148
+ json={
149
+ "model": self._model,
150
+ "input": batch,
151
+ },
152
+ )
153
+ response.raise_for_status()
154
+
155
+ data = response.json()
156
+ # Sort by index to maintain order
157
+ sorted_data = sorted(data["data"], key=lambda x: x["index"])
158
+ batch_embeddings = [item["embedding"] for item in sorted_data]
159
+ all_embeddings.extend(batch_embeddings)
160
+
161
+ return all_embeddings
162
+
163
+ async def close(self) -> None:
164
+ """Close the HTTP client."""
165
+ if self._client:
166
+ await self._client.aclose()
167
+ self._client = None
168
+
169
+
170
+ class OllamaEmbedder(EmbeddingProvider):
171
+ """Local Ollama embedding provider."""
172
+
173
+ DIMENSIONS_MAP = {
174
+ "nomic-embed-text": 768,
175
+ "mxbai-embed-large": 1024,
176
+ "all-minilm": 384,
177
+ "snowflake-arctic-embed": 1024,
178
+ }
179
+
180
+ def __init__(
181
+ self,
182
+ model: str = "nomic-embed-text",
183
+ base_url: str = "http://localhost:11434",
184
+ ) -> None:
185
+ """Initialize Ollama embedder.
186
+
187
+ Args:
188
+ model: Ollama embedding model name.
189
+ base_url: Ollama server URL.
190
+ """
191
+ self._model = model
192
+ self._base_url = base_url.rstrip("/")
193
+ self._client: httpx.AsyncClient | None = None
194
+
195
+ @property
196
+ def model_name(self) -> str:
197
+ return self._model
198
+
199
+ @property
200
+ def dimensions(self) -> int:
201
+ return self.DIMENSIONS_MAP.get(self._model, 768)
202
+
203
+ async def _get_client(self) -> httpx.AsyncClient:
204
+ """Get or create async HTTP client."""
205
+ if self._client is None:
206
+ self._client = httpx.AsyncClient(
207
+ base_url=self._base_url,
208
+ timeout=120.0, # Ollama can be slow on first request
209
+ )
210
+ return self._client
211
+
212
+ async def embed(self, text: str) -> list[float]:
213
+ """Generate embedding for a single text using Ollama API."""
214
+ client = await self._get_client()
215
+
216
+ response = await client.post(
217
+ "/api/embed",
218
+ json={
219
+ "model": self._model,
220
+ "input": text,
221
+ },
222
+ )
223
+ response.raise_for_status()
224
+
225
+ data = response.json()
226
+ # Ollama returns embeddings in different formats depending on version
227
+ if "embeddings" in data:
228
+ return data["embeddings"][0]
229
+ elif "embedding" in data:
230
+ return data["embedding"]
231
+ else:
232
+ raise ValueError(f"Unexpected Ollama response format: {data.keys()}")
233
+
234
+ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
235
+ """Generate embeddings for multiple texts using Ollama API.
236
+
237
+ Note: Ollama processes requests sequentially, so this is slower than OpenAI.
238
+
239
+ Args:
240
+ texts: List of texts to embed.
241
+
242
+ Returns:
243
+ List of embedding vectors.
244
+ """
245
+ if not texts:
246
+ return []
247
+
248
+ client = await self._get_client()
249
+
250
+ # Ollama supports batch embedding in newer versions
251
+ response = await client.post(
252
+ "/api/embed",
253
+ json={
254
+ "model": self._model,
255
+ "input": texts,
256
+ },
257
+ )
258
+ response.raise_for_status()
259
+
260
+ data = response.json()
261
+ if "embeddings" in data:
262
+ return data["embeddings"]
263
+
264
+ # Fallback: process one by one for older Ollama versions
265
+ return [await self.embed(text) for text in texts]
266
+
267
+ async def close(self) -> None:
268
+ """Close the HTTP client."""
269
+ if self._client:
270
+ await self._client.aclose()
271
+ self._client = None
272
+
273
+
274
+ class VertexAIEmbedder(EmbeddingProvider):
275
+ """Google Vertex AI embedding provider."""
276
+
277
+ def __init__(
278
+ self,
279
+ model: str = "text-embedding-004",
280
+ project_id: str | None = None,
281
+ location: str | None = None,
282
+ ) -> None:
283
+ """Initialize Vertex AI embedder.
284
+
285
+ Args:
286
+ model: Vertex AI embedding model name.
287
+ project_id: Google Cloud project ID.
288
+ location: Google Cloud region (e.g., "us-central1").
289
+ """
290
+ try:
291
+ import vertexai
292
+ from vertexai.language_models import TextEmbeddingModel
293
+ except ImportError:
294
+ raise ImportError(
295
+ "Google Vertex AI dependencies not found. "
296
+ "Please run `pip install nexus-dev[google]`."
297
+ ) from None
298
+
299
+ self._model_name = model
300
+
301
+ # Initialize Vertex AI SDK if project/location provided or not already initialized
302
+ # User can also rely on gcloud default auth and config
303
+ if project_id or location:
304
+ vertexai.init(project=project_id, location=location)
305
+
306
+ try:
307
+ self._model = TextEmbeddingModel.from_pretrained(model)
308
+ except Exception as e:
309
+ raise ValueError(f"Failed to load Vertex AI model '{model}': {e}") from e
310
+
311
+ @property
312
+ def model_name(self) -> str:
313
+ return self._model_name
314
+
315
+ @property
316
+ def dimensions(self) -> int:
317
+ # Default to 768 for most Vertex models if unknown
318
+ return 768
319
+
320
+ async def embed(self, text: str) -> list[float]:
321
+ """Generate embedding for a single text."""
322
+ result = await self.embed_batch([text])
323
+ return result[0]
324
+
325
+ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
326
+ """Generate embeddings for multiple texts.
327
+
328
+ Vertex AI has a limit of 5 texts per request for Gecko models,
329
+ but up to 250 for newer models like text-embedding-004.
330
+ We'll use a conservative batch size of 5 for safety or 100 for newer ones.
331
+ """
332
+ if not texts:
333
+ return []
334
+
335
+ # Determine batch size based on model
336
+ batch_size = 100 if "text-embedding-004" in self._model_name else 5
337
+ all_embeddings: list[list[float]] = []
338
+
339
+ # Process in batches
340
+ for i in range(0, len(texts), batch_size):
341
+ batch = texts[i : i + batch_size]
342
+ embeddings = self._model.get_embeddings(list(batch))
343
+ all_embeddings.extend([e.values for e in embeddings])
344
+
345
+ return all_embeddings
346
+
347
+
348
+ class BedrockEmbedder(EmbeddingProvider):
349
+ """AWS Bedrock embedding provider."""
350
+
351
+ def __init__(
352
+ self,
353
+ model: str = "amazon.titan-embed-text-v1",
354
+ region_name: str | None = None,
355
+ aws_access_key_id: str | None = None,
356
+ aws_secret_access_key: str | None = None,
357
+ ) -> None:
358
+ """Initialize AWS Bedrock embedder.
359
+
360
+ Args:
361
+ model: Bedrock model ID.
362
+ region_name: AWS region.
363
+ aws_access_key_id: AWS access key.
364
+ aws_secret_access_key: AWS secret key.
365
+ """
366
+ try:
367
+ import boto3
368
+ except ImportError:
369
+ raise ImportError(
370
+ "AWS Bedrock dependencies not found. Please run `pip install nexus-dev[aws]`."
371
+ ) from None
372
+
373
+ self._model = model
374
+ self._client = boto3.client(
375
+ service_name="bedrock-runtime",
376
+ region_name=region_name,
377
+ aws_access_key_id=aws_access_key_id,
378
+ aws_secret_access_key=aws_secret_access_key,
379
+ )
380
+
381
+ @property
382
+ def model_name(self) -> str:
383
+ return self._model
384
+
385
+ @property
386
+ def dimensions(self) -> int:
387
+ # Defaults
388
+ if "titan-embed-text-v2" in self._model:
389
+ return 1024
390
+ if "titan" in self._model:
391
+ return 1536
392
+ return 1024
393
+
394
+ async def embed(self, text: str) -> list[float]:
395
+ import json
396
+
397
+ # Bedrock API format varies by model provider (Amazon vs Cohere)
398
+ if "cohere" in self._model:
399
+ body = json.dumps({"texts": [text], "input_type": "search_query"})
400
+ else:
401
+ # Amazon Titan format
402
+ body = json.dumps({"inputText": text})
403
+
404
+ response = self._client.invoke_model(
405
+ body=body,
406
+ modelId=self._model,
407
+ accept="application/json",
408
+ contentType="application/json",
409
+ )
410
+
411
+ response_body = json.loads(response.get("body").read())
412
+
413
+ if "cohere" in self._model:
414
+ return response_body.get("embeddings")[0]
415
+ else:
416
+ return response_body.get("embedding")
417
+
418
+ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
419
+ # Bedrock invoke_model typically handles one string for Titan
420
+ # Cohere models on Bedrock support batching
421
+ if "cohere" in self._model:
422
+ try:
423
+ import json
424
+
425
+ body = json.dumps({"texts": texts, "input_type": "search_query"})
426
+ response = self._client.invoke_model(
427
+ body=body,
428
+ modelId=self._model,
429
+ accept="application/json",
430
+ contentType="application/json",
431
+ )
432
+ response_body = json.loads(response.get("body").read())
433
+ return response_body.get("embeddings")
434
+ except Exception:
435
+ # Fallback to sequential if batch fails
436
+ pass
437
+
438
+ # Sequential fallback for Titan or if batching fails
439
+ embeddings = []
440
+ for text in texts:
441
+ embeddings.append(await self.embed(text))
442
+ return embeddings
443
+
444
+
445
+ class VoyageEmbedder(EmbeddingProvider):
446
+ """Voyage AI embedding provider."""
447
+
448
+ def __init__(
449
+ self,
450
+ model: str = "voyage-large-2",
451
+ api_key: str | None = None,
452
+ ) -> None:
453
+ try:
454
+ import voyageai
455
+ except ImportError:
456
+ raise ImportError(
457
+ "Voyage AI dependencies not found. Please run `pip install nexus-dev[voyage]`."
458
+ ) from None
459
+
460
+ self._model = model
461
+ self._client = voyageai.AsyncClient(api_key=api_key or os.environ.get("VOYAGE_API_KEY"))
462
+
463
+ @property
464
+ def model_name(self) -> str:
465
+ return self._model
466
+
467
+ @property
468
+ def dimensions(self) -> int:
469
+ return 1536 # Most Voyage models are 1536 (check specific docs if needed)
470
+
471
+ async def embed(self, text: str) -> list[float]:
472
+ result = await self.embed_batch([text])
473
+ return result[0]
474
+
475
+ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
476
+ if not texts:
477
+ return []
478
+
479
+ # Voyage handles batching internally, but we can respect a safe limit
480
+ batch_size = 128
481
+ all_embeddings: list[list[float]] = []
482
+
483
+ for i in range(0, len(texts), batch_size):
484
+ batch = texts[i : i + batch_size]
485
+ response = await self._client.embed(
486
+ batch,
487
+ model=self._model,
488
+ input_type="document", # optimized for retrieval
489
+ )
490
+ all_embeddings.extend(list(response.embeddings))
491
+
492
+ return all_embeddings
493
+
494
+
495
+ class CohereEmbedder(EmbeddingProvider):
496
+ """Cohere embedding provider."""
497
+
498
+ def __init__(
499
+ self,
500
+ model: str = "embed-multilingual-v3.0",
501
+ api_key: str | None = None,
502
+ ) -> None:
503
+ try:
504
+ import cohere
505
+ except ImportError:
506
+ raise ImportError(
507
+ "Cohere dependencies not found. Please run `pip install nexus-dev[cohere]`."
508
+ ) from None
509
+
510
+ self._model = model
511
+ self._client = cohere.AsyncClient(api_key=api_key or os.environ.get("CO_API_KEY"))
512
+
513
+ @property
514
+ def model_name(self) -> str:
515
+ return self._model
516
+
517
+ @property
518
+ def dimensions(self) -> int:
519
+ return 1024 # Default for v3 models
520
+
521
+ async def embed(self, text: str) -> list[float]:
522
+ result = await self.embed_batch([text])
523
+ return result[0]
524
+
525
+ async def embed_batch(self, texts: list[str]) -> list[list[float]]:
526
+ if not texts:
527
+ return []
528
+
529
+ response = await self._client.embed(
530
+ texts=texts, model=self._model, input_type="search_document", embedding_types=["float"]
531
+ )
532
+ return response.embeddings.float
533
+
534
+
535
+ def create_embedder(config: NexusConfig) -> EmbeddingProvider:
536
+ """Create an embedding provider based on configuration.
537
+
538
+ Args:
539
+ config: Nexus-Dev configuration.
540
+
541
+ Returns:
542
+ Configured embedding provider.
543
+
544
+ Raises:
545
+ ValueError: If provider is not supported.
546
+ """
547
+ if config.embedding_provider == "openai":
548
+ return OpenAIEmbedder(model=config.embedding_model)
549
+ elif config.embedding_provider == "ollama":
550
+ return OllamaEmbedder(
551
+ model=config.embedding_model,
552
+ base_url=config.ollama_url,
553
+ )
554
+ elif config.embedding_provider == "google":
555
+ return VertexAIEmbedder(
556
+ model=config.embedding_model,
557
+ project_id=config.google_project_id,
558
+ location=config.google_location,
559
+ )
560
+ elif config.embedding_provider == "aws":
561
+ return BedrockEmbedder(
562
+ model=config.embedding_model,
563
+ region_name=config.aws_region,
564
+ aws_access_key_id=config.aws_access_key_id,
565
+ aws_secret_access_key=config.aws_secret_access_key,
566
+ )
567
+ elif config.embedding_provider == "voyage":
568
+ return VoyageEmbedder(
569
+ model=config.embedding_model,
570
+ api_key=config.voyage_api_key,
571
+ )
572
+ elif config.embedding_provider == "cohere":
573
+ return CohereEmbedder(
574
+ model=config.embedding_model,
575
+ api_key=config.cohere_api_key,
576
+ )
577
+ else:
578
+ raise ValueError(f"Unsupported embedding provider: {config.embedding_provider}")
579
+
580
+
581
+ # Simple LRU cache for recent embeddings (in-memory)
582
+ @lru_cache(maxsize=1000)
583
+ def _cached_embedding_key(text: str) -> str:
584
+ """Generate a cache key for embeddings."""
585
+ return text[:500] # Truncate for cache key efficiency
@@ -0,0 +1,10 @@
1
+ """Gateway module for MCP connection management."""
2
+
3
+ from .connection_manager import (
4
+ ConnectionManager,
5
+ MCPConnection,
6
+ MCPConnectionError,
7
+ MCPTimeoutError,
8
+ )
9
+
10
+ __all__ = ["ConnectionManager", "MCPConnection", "MCPConnectionError", "MCPTimeoutError"]