nexus-dev 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nexus-dev might be problematic. Click here for more details.

Files changed (48) hide show
  1. nexus_dev/__init__.py +4 -0
  2. nexus_dev/agent_templates/__init__.py +26 -0
  3. nexus_dev/agent_templates/api_designer.yaml +26 -0
  4. nexus_dev/agent_templates/code_reviewer.yaml +26 -0
  5. nexus_dev/agent_templates/debug_detective.yaml +26 -0
  6. nexus_dev/agent_templates/doc_writer.yaml +26 -0
  7. nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
  8. nexus_dev/agent_templates/refactor_architect.yaml +26 -0
  9. nexus_dev/agent_templates/security_auditor.yaml +26 -0
  10. nexus_dev/agent_templates/test_engineer.yaml +26 -0
  11. nexus_dev/agents/__init__.py +20 -0
  12. nexus_dev/agents/agent_config.py +97 -0
  13. nexus_dev/agents/agent_executor.py +197 -0
  14. nexus_dev/agents/agent_manager.py +104 -0
  15. nexus_dev/agents/prompt_factory.py +91 -0
  16. nexus_dev/chunkers/__init__.py +168 -0
  17. nexus_dev/chunkers/base.py +202 -0
  18. nexus_dev/chunkers/docs_chunker.py +291 -0
  19. nexus_dev/chunkers/java_chunker.py +343 -0
  20. nexus_dev/chunkers/javascript_chunker.py +312 -0
  21. nexus_dev/chunkers/python_chunker.py +308 -0
  22. nexus_dev/cli.py +1673 -0
  23. nexus_dev/config.py +253 -0
  24. nexus_dev/database.py +558 -0
  25. nexus_dev/embeddings.py +585 -0
  26. nexus_dev/gateway/__init__.py +10 -0
  27. nexus_dev/gateway/connection_manager.py +348 -0
  28. nexus_dev/github_importer.py +247 -0
  29. nexus_dev/mcp_client.py +281 -0
  30. nexus_dev/mcp_config.py +184 -0
  31. nexus_dev/schemas/mcp_config_schema.json +166 -0
  32. nexus_dev/server.py +1866 -0
  33. nexus_dev/templates/pre-commit-hook +33 -0
  34. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/__init__.py +26 -0
  35. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/api_designer.yaml +26 -0
  36. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/code_reviewer.yaml +26 -0
  37. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/debug_detective.yaml +26 -0
  38. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/doc_writer.yaml +26 -0
  39. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
  40. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/refactor_architect.yaml +26 -0
  41. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/security_auditor.yaml +26 -0
  42. nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/test_engineer.yaml +26 -0
  43. nexus_dev-3.2.0.data/data/nexus_dev/templates/pre-commit-hook +33 -0
  44. nexus_dev-3.2.0.dist-info/METADATA +636 -0
  45. nexus_dev-3.2.0.dist-info/RECORD +48 -0
  46. nexus_dev-3.2.0.dist-info/WHEEL +4 -0
  47. nexus_dev-3.2.0.dist-info/entry_points.txt +12 -0
  48. nexus_dev-3.2.0.dist-info/licenses/LICENSE +21 -0
nexus_dev/database.py ADDED
@@ -0,0 +1,558 @@
1
+ """LanceDB database manager for Nexus-Dev."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import uuid
7
+ from dataclasses import dataclass, field
8
+ from datetime import UTC, datetime
9
+ from enum import Enum
10
+ from typing import Any
11
+
12
+ import lancedb
13
+ import pyarrow as pa
14
+
15
+ from .config import NexusConfig
16
+ from .embeddings import EmbeddingProvider
17
+
18
+
19
+ class DocumentType(str, Enum):
20
+ """Type of indexed document."""
21
+
22
+ CODE = "code"
23
+ LESSON = "lesson"
24
+ DOCUMENTATION = "documentation"
25
+ TOOL = "tool"
26
+ INSIGHT = "insight" # LLM reasoning, mistakes, backtracking
27
+ IMPLEMENTATION = "implementation" # Plan summaries, design decisions
28
+ GITHUB_ISSUE = "github_issue"
29
+ GITHUB_PR = "github_pr"
30
+
31
+
32
+ @dataclass
33
+ class Document:
34
+ """A document to be stored in the vector database.
35
+
36
+ Attributes:
37
+ id: Unique document identifier (UUID).
38
+ text: Document content.
39
+ vector: Embedding vector.
40
+ project_id: Project this document belongs to.
41
+ file_path: Source file path.
42
+ doc_type: Type of document (code, lesson, documentation, tool).
43
+ chunk_type: Type of code chunk (function, class, method, module).
44
+ language: Programming language or "markdown".
45
+ name: Name of the code element (function/class name).
46
+ start_line: Starting line number in source file.
47
+ end_line: Ending line number in source file.
48
+ timestamp: When the document was indexed.
49
+ server_name: For TOOL type: MCP server name.
50
+ parameters_schema: For TOOL type: JSON schema string.
51
+ """
52
+
53
+ id: str
54
+ text: str
55
+ vector: list[float]
56
+ project_id: str
57
+ file_path: str
58
+ doc_type: DocumentType
59
+ chunk_type: str = "module"
60
+ language: str = "unknown"
61
+ name: str = ""
62
+ start_line: int = 0
63
+ end_line: int = 0
64
+ timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
65
+ server_name: str = ""
66
+ parameters_schema: str = ""
67
+
68
+ def to_dict(self) -> dict[str, Any]:
69
+ """Convert to dictionary for LanceDB insertion."""
70
+ return {
71
+ "id": self.id,
72
+ "text": self.text,
73
+ "vector": self.vector,
74
+ "project_id": self.project_id,
75
+ "file_path": self.file_path,
76
+ "doc_type": self.doc_type.value,
77
+ "chunk_type": self.chunk_type,
78
+ "language": self.language,
79
+ "name": self.name,
80
+ "start_line": self.start_line,
81
+ "end_line": self.end_line,
82
+ "timestamp": self.timestamp.isoformat(),
83
+ "server_name": self.server_name,
84
+ "parameters_schema": self.parameters_schema,
85
+ }
86
+
87
+
88
+ @dataclass
89
+ class ToolDocument:
90
+ """An MCP tool document for indexing and search.
91
+
92
+ Attributes:
93
+ id: Unique identifier (server_name:tool_name)
94
+ server_name: Name of the MCP server (e.g., "github")
95
+ tool_name: Name of the tool (e.g., "create_pull_request")
96
+ description: Tool description/docstring
97
+ parameters: JSON schema dict for parameters
98
+ examples: Optional usage examples
99
+ vector: Embedding vector for semantic search
100
+ timestamp: When the tool was indexed
101
+ """
102
+
103
+ id: str
104
+ server_name: str
105
+ tool_name: str
106
+ description: str
107
+ parameters: dict[str, Any]
108
+ vector: list[float]
109
+ examples: list[str] = field(default_factory=list)
110
+ timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
111
+
112
+ def to_dict(self) -> dict[str, Any]:
113
+ """Convert to dictionary for LanceDB insertion."""
114
+ return {
115
+ "id": self.id,
116
+ "text": self.get_searchable_text(),
117
+ "vector": self.vector,
118
+ "project_id": "mcp_tools", # Special project for tools
119
+ "file_path": f"mcp://{self.server_name}/{self.tool_name}",
120
+ "doc_type": DocumentType.TOOL.value,
121
+ "chunk_type": "tool",
122
+ "language": "mcp",
123
+ "name": self.tool_name,
124
+ "start_line": 0,
125
+ "end_line": 0,
126
+ "timestamp": self.timestamp.isoformat(),
127
+ "server_name": self.server_name,
128
+ "parameters_schema": json.dumps(self.parameters),
129
+ }
130
+
131
+ def get_searchable_text(self) -> str:
132
+ """Get text for embedding generation."""
133
+ parts = [
134
+ f"MCP Tool: {self.server_name}.{self.tool_name}",
135
+ f"Description: {self.description}",
136
+ ]
137
+ if self.examples:
138
+ parts.append(f"Examples: {', '.join(self.examples)}")
139
+ return "\n".join(parts)
140
+
141
+
142
+ @dataclass
143
+ class SearchResult:
144
+ """Result from a similarity search.
145
+
146
+ Attributes:
147
+ id: Document ID.
148
+ text: Document content.
149
+ score: Similarity score (lower is more similar for L2 distance).
150
+ project_id: Project the document belongs to.
151
+ file_path: Source file path.
152
+ doc_type: Type of document.
153
+ chunk_type: Type of code chunk.
154
+ language: Programming language.
155
+ name: Name of the code element.
156
+ start_line: Starting line number.
157
+ end_line: Ending line number.
158
+ server_name: For TOOL type: MCP server name.
159
+ parameters_schema: For TOOL type: JSON schema string.
160
+ """
161
+
162
+ id: str
163
+ text: str
164
+ score: float
165
+ project_id: str
166
+ file_path: str
167
+ doc_type: str
168
+ chunk_type: str
169
+ language: str
170
+ name: str
171
+ start_line: int
172
+ end_line: int
173
+ server_name: str = ""
174
+ parameters_schema: str = ""
175
+
176
+
177
+ class NexusDatabase:
178
+ """LanceDB wrapper for Nexus-Dev vector storage."""
179
+
180
+ TABLE_NAME = "documents"
181
+
182
+ def __init__(
183
+ self,
184
+ config: NexusConfig,
185
+ embedder: EmbeddingProvider,
186
+ ) -> None:
187
+ """Initialize the database connection.
188
+
189
+ Args:
190
+ config: Nexus-Dev configuration.
191
+ embedder: Embedding provider for vector generation.
192
+ """
193
+ self.config = config
194
+ self.embedder = embedder
195
+ self._db: lancedb.DBConnection | None = None
196
+ self._table: lancedb.table.Table | None = None
197
+
198
+ def _get_schema(self) -> pa.Schema:
199
+ """Get the PyArrow schema for the documents table."""
200
+ return pa.schema(
201
+ [
202
+ pa.field("id", pa.string()),
203
+ pa.field("text", pa.string()),
204
+ pa.field(
205
+ "vector",
206
+ pa.list_(pa.float32(), self.config.get_embedding_dimensions()),
207
+ ),
208
+ pa.field("project_id", pa.string()),
209
+ pa.field("file_path", pa.string()),
210
+ pa.field("doc_type", pa.string()),
211
+ pa.field("chunk_type", pa.string()),
212
+ pa.field("language", pa.string()),
213
+ pa.field("name", pa.string()),
214
+ pa.field("start_line", pa.int32()),
215
+ pa.field("end_line", pa.int32()),
216
+ pa.field("timestamp", pa.string()),
217
+ pa.field("server_name", pa.string()),
218
+ pa.field("parameters_schema", pa.string()),
219
+ ]
220
+ )
221
+
222
+ def reset(self) -> None:
223
+ """Delete the entire table to force schema recreation."""
224
+ if self._db is None:
225
+ self.connect()
226
+ assert self._db is not None
227
+
228
+ if self.TABLE_NAME in self._db.table_names():
229
+ self._db.drop_table(self.TABLE_NAME)
230
+ self._table = None
231
+
232
+ def connect(self) -> None:
233
+ """Connect to the LanceDB database and ensure table exists."""
234
+ db_path = self.config.get_db_path()
235
+ db_path.mkdir(parents=True, exist_ok=True)
236
+
237
+ self._db = lancedb.connect(str(db_path))
238
+
239
+ # Create table if it doesn't exist
240
+ if self.TABLE_NAME not in self._db.table_names():
241
+ self._table = self._db.create_table(
242
+ self.TABLE_NAME,
243
+ schema=self._get_schema(),
244
+ )
245
+ else:
246
+ self._table = self._db.open_table(self.TABLE_NAME)
247
+
248
+ def _ensure_connected(self) -> lancedb.table.Table:
249
+ """Ensure database is connected and return table.
250
+
251
+ We re-open the table to ensure we see the latest updates from other processes.
252
+ """
253
+ if self._db is None:
254
+ self.connect()
255
+ assert self._db is not None
256
+
257
+ # Always re-open table to pick up external updates (e.g. from indexer)
258
+ try:
259
+ self._table = self._db.open_table(self.TABLE_NAME)
260
+ except Exception:
261
+ # Table might not exist yet if created but not committed, or other issue
262
+ # If so, rely on connect()'s creation logic or handle error
263
+ if self._table is None:
264
+ self.connect()
265
+
266
+ assert self._table is not None
267
+ return self._table
268
+
269
+ async def upsert_document(self, doc: Document) -> str:
270
+ """Insert or update a document.
271
+
272
+ Args:
273
+ doc: Document to upsert.
274
+
275
+ Returns:
276
+ Document ID.
277
+ """
278
+ table = self._ensure_connected()
279
+
280
+ # Delete existing document with same ID if exists
281
+ try:
282
+ table.delete(f"id = '{doc.id}'")
283
+ except Exception:
284
+ pass # Ignore if document doesn't exist
285
+
286
+ # Insert new document
287
+ table.add([doc.to_dict()])
288
+
289
+ return doc.id
290
+
291
+ async def upsert_documents(self, docs: list[Document]) -> list[str]:
292
+ """Insert or update multiple documents.
293
+
294
+ Args:
295
+ docs: Documents to upsert.
296
+
297
+ Returns:
298
+ List of document IDs.
299
+ """
300
+ if not docs:
301
+ return []
302
+
303
+ table = self._ensure_connected()
304
+
305
+ # Delete existing documents
306
+ ids = [doc.id for doc in docs]
307
+ for doc_id in ids:
308
+ try:
309
+ table.delete(f"id = '{doc_id}'")
310
+ except Exception:
311
+ pass
312
+
313
+ # Insert all documents
314
+ table.add([doc.to_dict() for doc in docs])
315
+
316
+ return ids
317
+
318
+ async def search(
319
+ self,
320
+ query: str,
321
+ project_id: str | None = None,
322
+ doc_type: DocumentType | None = None,
323
+ limit: int = 10,
324
+ ) -> list[SearchResult]:
325
+ """Perform semantic similarity search.
326
+
327
+ Args:
328
+ query: Search query text.
329
+ project_id: Optional project filter.
330
+ doc_type: Optional document type filter.
331
+ limit: Maximum number of results.
332
+
333
+ Returns:
334
+ List of search results ordered by similarity.
335
+ """
336
+ table = self._ensure_connected()
337
+
338
+ # Generate query embedding
339
+ query_vector = await self.embedder.embed(query)
340
+
341
+ # Build search query
342
+ search_query = table.search(query_vector).limit(limit)
343
+
344
+ # Apply filters
345
+ filters = []
346
+ if project_id:
347
+ filters.append(f"project_id = '{project_id}'")
348
+ if doc_type:
349
+ filters.append(f"doc_type = '{doc_type.value}'")
350
+
351
+ if filters:
352
+ search_query = search_query.where(" AND ".join(filters))
353
+
354
+ # Execute search
355
+ results = search_query.to_pandas()
356
+
357
+ # Convert to SearchResult objects
358
+ search_results = []
359
+ for _, row in results.iterrows():
360
+ search_results.append(
361
+ SearchResult(
362
+ id=row["id"],
363
+ text=row["text"],
364
+ score=row["_distance"],
365
+ project_id=row["project_id"],
366
+ file_path=row["file_path"],
367
+ doc_type=row["doc_type"],
368
+ chunk_type=row["chunk_type"],
369
+ language=row["language"],
370
+ name=row["name"],
371
+ start_line=row["start_line"],
372
+ end_line=row["end_line"],
373
+ server_name=row.get("server_name", ""),
374
+ parameters_schema=row.get("parameters_schema", ""),
375
+ )
376
+ )
377
+
378
+ return search_results
379
+
380
+ async def delete_by_file(self, file_path: str, project_id: str) -> int:
381
+ """Delete all documents for a specific file.
382
+
383
+ Args:
384
+ file_path: Path to the file.
385
+ project_id: Project ID.
386
+
387
+ Returns:
388
+ Number of documents deleted.
389
+ """
390
+ table = self._ensure_connected()
391
+
392
+ # Get count before deletion
393
+ try:
394
+ count_before = len(
395
+ table.search()
396
+ .where(f"file_path = '{file_path}' AND project_id = '{project_id}'")
397
+ .to_pandas()
398
+ )
399
+ except Exception:
400
+ count_before = 0
401
+
402
+ # Delete documents
403
+ try:
404
+ table.delete(f"file_path = '{file_path}' AND project_id = '{project_id}'")
405
+ except Exception:
406
+ pass
407
+
408
+ return count_before
409
+
410
+ async def delete_by_project(self, project_id: str) -> int:
411
+ """Delete all documents for a project.
412
+
413
+ Args:
414
+ project_id: Project ID.
415
+
416
+ Returns:
417
+ Number of documents deleted.
418
+ """
419
+ table = self._ensure_connected()
420
+
421
+ # Get count before deletion
422
+ try:
423
+ count_before = len(table.search().where(f"project_id = '{project_id}'").to_pandas())
424
+ except Exception:
425
+ count_before = 0
426
+
427
+ # Delete documents
428
+ try:
429
+ table.delete(f"project_id = '{project_id}'")
430
+ except Exception:
431
+ pass
432
+
433
+ return count_before
434
+
435
+ async def get_project_stats(self, project_id: str | None = None) -> dict[str, int]:
436
+ """Get statistics for a project or all projects.
437
+
438
+ Args:
439
+ project_id: Project ID. If None, returns stats for all projects.
440
+
441
+ Returns:
442
+ Dictionary with counts by document type.
443
+ """
444
+ table = self._ensure_connected()
445
+
446
+ try:
447
+ query = table.search()
448
+ if project_id:
449
+ query = query.where(f"project_id = '{project_id}'")
450
+ df = query.to_pandas()
451
+ stats = df.groupby("doc_type").size().to_dict()
452
+ stats["total"] = len(df)
453
+ return stats
454
+ except Exception:
455
+ return {"total": 0}
456
+
457
+ async def get_recent_lessons(
458
+ self,
459
+ project_id: str | None = None,
460
+ limit: int = 10,
461
+ ) -> list[SearchResult]:
462
+ """Get recent lessons ordered by timestamp.
463
+
464
+ Args:
465
+ project_id: Optional project filter.
466
+ limit: Maximum number of results.
467
+
468
+ Returns:
469
+ List of recent lessons.
470
+ """
471
+ table = self._ensure_connected()
472
+
473
+ try:
474
+ query = table.search()
475
+ filters = [f"doc_type = '{DocumentType.LESSON.value}'"]
476
+
477
+ if project_id:
478
+ filters.append(f"project_id = '{project_id}'")
479
+
480
+ df = query.where(" AND ".join(filters)).limit(limit * 2).to_pandas()
481
+
482
+ # Sort by timestamp (descending) and limit
483
+ df = df.sort_values("timestamp", ascending=False).head(limit)
484
+
485
+ results = []
486
+ for _, row in df.iterrows():
487
+ results.append(
488
+ SearchResult(
489
+ id=row["id"],
490
+ text=row["text"],
491
+ score=0.0,
492
+ project_id=row["project_id"],
493
+ file_path=row["file_path"],
494
+ doc_type=row["doc_type"],
495
+ chunk_type=row["chunk_type"],
496
+ language=row["language"],
497
+ name=row["name"],
498
+ start_line=row["start_line"],
499
+ end_line=row["end_line"],
500
+ server_name=row.get("server_name", ""),
501
+ parameters_schema=row.get("parameters_schema", ""),
502
+ )
503
+ )
504
+ return results
505
+ return results
506
+ except Exception:
507
+ return []
508
+
509
+
510
+ def generate_document_id(
511
+ project_id: str,
512
+ file_path: str,
513
+ chunk_name: str,
514
+ start_line: int,
515
+ ) -> str:
516
+ """Generate a deterministic document ID.
517
+
518
+ This allows for idempotent updates when re-indexing the same code.
519
+
520
+ Args:
521
+ project_id: Project ID.
522
+ file_path: File path.
523
+ chunk_name: Name of the chunk (function/class name).
524
+ start_line: Starting line number.
525
+
526
+ Returns:
527
+ Deterministic UUID based on input parameters.
528
+ """
529
+ # Create a deterministic ID from the combination
530
+ key = f"{project_id}:{file_path}:{chunk_name}:{start_line}"
531
+ return str(uuid.uuid5(uuid.NAMESPACE_DNS, key))
532
+
533
+
534
+ def tool_document_from_schema(
535
+ server_name: str,
536
+ tool_name: str,
537
+ schema: dict[str, Any],
538
+ vector: list[float],
539
+ ) -> ToolDocument:
540
+ """Create ToolDocument from MCP tool schema.
541
+
542
+ Args:
543
+ server_name: Name of the MCP server.
544
+ tool_name: Name of the tool.
545
+ schema: MCP tool schema dictionary.
546
+ vector: Embedding vector for the tool.
547
+
548
+ Returns:
549
+ ToolDocument instance.
550
+ """
551
+ return ToolDocument(
552
+ id=f"{server_name}:{tool_name}",
553
+ server_name=server_name,
554
+ tool_name=tool_name,
555
+ description=schema.get("description", ""),
556
+ parameters=schema.get("inputSchema", {}),
557
+ vector=vector,
558
+ )