nexus-dev 3.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nexus_dev/__init__.py +4 -0
- nexus_dev/agent_templates/__init__.py +26 -0
- nexus_dev/agent_templates/api_designer.yaml +26 -0
- nexus_dev/agent_templates/code_reviewer.yaml +26 -0
- nexus_dev/agent_templates/debug_detective.yaml +26 -0
- nexus_dev/agent_templates/doc_writer.yaml +26 -0
- nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
- nexus_dev/agent_templates/refactor_architect.yaml +26 -0
- nexus_dev/agent_templates/security_auditor.yaml +26 -0
- nexus_dev/agent_templates/test_engineer.yaml +26 -0
- nexus_dev/agents/__init__.py +20 -0
- nexus_dev/agents/agent_config.py +97 -0
- nexus_dev/agents/agent_executor.py +197 -0
- nexus_dev/agents/agent_manager.py +104 -0
- nexus_dev/agents/prompt_factory.py +91 -0
- nexus_dev/chunkers/__init__.py +168 -0
- nexus_dev/chunkers/base.py +202 -0
- nexus_dev/chunkers/docs_chunker.py +291 -0
- nexus_dev/chunkers/java_chunker.py +343 -0
- nexus_dev/chunkers/javascript_chunker.py +312 -0
- nexus_dev/chunkers/python_chunker.py +308 -0
- nexus_dev/cli.py +2017 -0
- nexus_dev/config.py +261 -0
- nexus_dev/database.py +569 -0
- nexus_dev/embeddings.py +703 -0
- nexus_dev/gateway/__init__.py +10 -0
- nexus_dev/gateway/connection_manager.py +348 -0
- nexus_dev/github_importer.py +247 -0
- nexus_dev/mcp_client.py +281 -0
- nexus_dev/mcp_config.py +184 -0
- nexus_dev/schemas/mcp_config_schema.json +166 -0
- nexus_dev/server.py +1866 -0
- nexus_dev/templates/pre-commit-hook +56 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/__init__.py +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/api_designer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/code_reviewer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/debug_detective.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/doc_writer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/refactor_architect.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/security_auditor.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/test_engineer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/templates/pre-commit-hook +56 -0
- nexus_dev-3.3.1.dist-info/METADATA +668 -0
- nexus_dev-3.3.1.dist-info/RECORD +48 -0
- nexus_dev-3.3.1.dist-info/WHEEL +4 -0
- nexus_dev-3.3.1.dist-info/entry_points.txt +14 -0
- nexus_dev-3.3.1.dist-info/licenses/LICENSE +21 -0
nexus_dev/database.py
ADDED
|
@@ -0,0 +1,569 @@
|
|
|
1
|
+
"""LanceDB database manager for Nexus-Dev."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import uuid
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from datetime import UTC, datetime
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import lancedb
|
|
13
|
+
import pyarrow as pa
|
|
14
|
+
|
|
15
|
+
from .config import NexusConfig
|
|
16
|
+
from .embeddings import EmbeddingProvider
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DocumentType(str, Enum):
|
|
20
|
+
"""Type of indexed document."""
|
|
21
|
+
|
|
22
|
+
CODE = "code"
|
|
23
|
+
LESSON = "lesson"
|
|
24
|
+
DOCUMENTATION = "documentation"
|
|
25
|
+
TOOL = "tool"
|
|
26
|
+
INSIGHT = "insight" # LLM reasoning, mistakes, backtracking
|
|
27
|
+
IMPLEMENTATION = "implementation" # Plan summaries, design decisions
|
|
28
|
+
GITHUB_ISSUE = "github_issue"
|
|
29
|
+
GITHUB_PR = "github_pr"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class Document:
|
|
34
|
+
"""A document to be stored in the vector database.
|
|
35
|
+
|
|
36
|
+
Attributes:
|
|
37
|
+
id: Unique document identifier (UUID).
|
|
38
|
+
text: Document content.
|
|
39
|
+
vector: Embedding vector.
|
|
40
|
+
project_id: Project this document belongs to.
|
|
41
|
+
file_path: Source file path.
|
|
42
|
+
doc_type: Type of document (code, lesson, documentation, tool).
|
|
43
|
+
chunk_type: Type of code chunk (function, class, method, module).
|
|
44
|
+
language: Programming language or "markdown".
|
|
45
|
+
name: Name of the code element (function/class name).
|
|
46
|
+
start_line: Starting line number in source file.
|
|
47
|
+
end_line: Ending line number in source file.
|
|
48
|
+
timestamp: When the document was indexed.
|
|
49
|
+
server_name: For TOOL type: MCP server name.
|
|
50
|
+
parameters_schema: For TOOL type: JSON schema string.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
id: str
|
|
54
|
+
text: str
|
|
55
|
+
vector: list[float]
|
|
56
|
+
project_id: str
|
|
57
|
+
file_path: str
|
|
58
|
+
doc_type: DocumentType
|
|
59
|
+
chunk_type: str = "module"
|
|
60
|
+
language: str = "unknown"
|
|
61
|
+
name: str = ""
|
|
62
|
+
start_line: int = 0
|
|
63
|
+
end_line: int = 0
|
|
64
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
|
|
65
|
+
server_name: str = ""
|
|
66
|
+
parameters_schema: str = ""
|
|
67
|
+
|
|
68
|
+
def to_dict(self) -> dict[str, Any]:
|
|
69
|
+
"""Convert to dictionary for LanceDB insertion."""
|
|
70
|
+
return {
|
|
71
|
+
"id": self.id,
|
|
72
|
+
"text": self.text,
|
|
73
|
+
"vector": self.vector,
|
|
74
|
+
"project_id": self.project_id,
|
|
75
|
+
"file_path": self.file_path,
|
|
76
|
+
"doc_type": self.doc_type.value,
|
|
77
|
+
"chunk_type": self.chunk_type,
|
|
78
|
+
"language": self.language,
|
|
79
|
+
"name": self.name,
|
|
80
|
+
"start_line": self.start_line,
|
|
81
|
+
"end_line": self.end_line,
|
|
82
|
+
"timestamp": self.timestamp.isoformat(),
|
|
83
|
+
"server_name": self.server_name,
|
|
84
|
+
"parameters_schema": self.parameters_schema,
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@dataclass
|
|
89
|
+
class ToolDocument:
|
|
90
|
+
"""An MCP tool document for indexing and search.
|
|
91
|
+
|
|
92
|
+
Attributes:
|
|
93
|
+
id: Unique identifier (server_name:tool_name)
|
|
94
|
+
server_name: Name of the MCP server (e.g., "github")
|
|
95
|
+
tool_name: Name of the tool (e.g., "create_pull_request")
|
|
96
|
+
description: Tool description/docstring
|
|
97
|
+
parameters: JSON schema dict for parameters
|
|
98
|
+
examples: Optional usage examples
|
|
99
|
+
vector: Embedding vector for semantic search
|
|
100
|
+
timestamp: When the tool was indexed
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
id: str
|
|
104
|
+
server_name: str
|
|
105
|
+
tool_name: str
|
|
106
|
+
description: str
|
|
107
|
+
parameters: dict[str, Any]
|
|
108
|
+
vector: list[float]
|
|
109
|
+
examples: list[str] = field(default_factory=list)
|
|
110
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
|
|
111
|
+
|
|
112
|
+
def to_dict(self) -> dict[str, Any]:
|
|
113
|
+
"""Convert to dictionary for LanceDB insertion."""
|
|
114
|
+
return {
|
|
115
|
+
"id": self.id,
|
|
116
|
+
"text": self.get_searchable_text(),
|
|
117
|
+
"vector": self.vector,
|
|
118
|
+
"project_id": "mcp_tools", # Special project for tools
|
|
119
|
+
"file_path": f"mcp://{self.server_name}/{self.tool_name}",
|
|
120
|
+
"doc_type": DocumentType.TOOL.value,
|
|
121
|
+
"chunk_type": "tool",
|
|
122
|
+
"language": "mcp",
|
|
123
|
+
"name": self.tool_name,
|
|
124
|
+
"start_line": 0,
|
|
125
|
+
"end_line": 0,
|
|
126
|
+
"timestamp": self.timestamp.isoformat(),
|
|
127
|
+
"server_name": self.server_name,
|
|
128
|
+
"parameters_schema": json.dumps(self.parameters),
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
def get_searchable_text(self) -> str:
|
|
132
|
+
"""Get text for embedding generation."""
|
|
133
|
+
parts = [
|
|
134
|
+
f"MCP Tool: {self.server_name}.{self.tool_name}",
|
|
135
|
+
f"Description: {self.description}",
|
|
136
|
+
]
|
|
137
|
+
if self.examples:
|
|
138
|
+
parts.append(f"Examples: {', '.join(self.examples)}")
|
|
139
|
+
return "\n".join(parts)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@dataclass
|
|
143
|
+
class SearchResult:
|
|
144
|
+
"""Result from a similarity search.
|
|
145
|
+
|
|
146
|
+
Attributes:
|
|
147
|
+
id: Document ID.
|
|
148
|
+
text: Document content.
|
|
149
|
+
score: Similarity score (lower is more similar for L2 distance).
|
|
150
|
+
project_id: Project the document belongs to.
|
|
151
|
+
file_path: Source file path.
|
|
152
|
+
doc_type: Type of document.
|
|
153
|
+
chunk_type: Type of code chunk.
|
|
154
|
+
language: Programming language.
|
|
155
|
+
name: Name of the code element.
|
|
156
|
+
start_line: Starting line number.
|
|
157
|
+
end_line: Ending line number.
|
|
158
|
+
server_name: For TOOL type: MCP server name.
|
|
159
|
+
parameters_schema: For TOOL type: JSON schema string.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
id: str
|
|
163
|
+
text: str
|
|
164
|
+
score: float
|
|
165
|
+
project_id: str
|
|
166
|
+
file_path: str
|
|
167
|
+
doc_type: str
|
|
168
|
+
chunk_type: str
|
|
169
|
+
language: str
|
|
170
|
+
name: str
|
|
171
|
+
start_line: int
|
|
172
|
+
end_line: int
|
|
173
|
+
server_name: str = ""
|
|
174
|
+
parameters_schema: str = ""
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class NexusDatabase:
|
|
178
|
+
"""LanceDB wrapper for Nexus-Dev vector storage."""
|
|
179
|
+
|
|
180
|
+
TABLE_NAME = "documents"
|
|
181
|
+
|
|
182
|
+
def __init__(
|
|
183
|
+
self,
|
|
184
|
+
config: NexusConfig,
|
|
185
|
+
embedder: EmbeddingProvider,
|
|
186
|
+
) -> None:
|
|
187
|
+
"""Initialize the database connection.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
config: Nexus-Dev configuration.
|
|
191
|
+
embedder: Embedding provider for vector generation.
|
|
192
|
+
"""
|
|
193
|
+
self.config = config
|
|
194
|
+
self.embedder = embedder
|
|
195
|
+
self._db: lancedb.DBConnection | None = None
|
|
196
|
+
self._table: lancedb.table.Table | None = None
|
|
197
|
+
|
|
198
|
+
def _get_schema(self) -> pa.Schema:
|
|
199
|
+
"""Get the PyArrow schema for the documents table."""
|
|
200
|
+
return pa.schema(
|
|
201
|
+
[
|
|
202
|
+
pa.field("id", pa.string()),
|
|
203
|
+
pa.field("text", pa.string()),
|
|
204
|
+
pa.field(
|
|
205
|
+
"vector",
|
|
206
|
+
pa.list_(pa.float32(), self.config.get_embedding_dimensions()),
|
|
207
|
+
),
|
|
208
|
+
pa.field("project_id", pa.string()),
|
|
209
|
+
pa.field("file_path", pa.string()),
|
|
210
|
+
pa.field("doc_type", pa.string()),
|
|
211
|
+
pa.field("chunk_type", pa.string()),
|
|
212
|
+
pa.field("language", pa.string()),
|
|
213
|
+
pa.field("name", pa.string()),
|
|
214
|
+
pa.field("start_line", pa.int32()),
|
|
215
|
+
pa.field("end_line", pa.int32()),
|
|
216
|
+
pa.field("timestamp", pa.string()),
|
|
217
|
+
pa.field("server_name", pa.string()),
|
|
218
|
+
pa.field("parameters_schema", pa.string()),
|
|
219
|
+
]
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
def reset(self) -> None:
|
|
223
|
+
"""Delete the entire table to force schema recreation."""
|
|
224
|
+
if self._db is None:
|
|
225
|
+
self.connect()
|
|
226
|
+
assert self._db is not None
|
|
227
|
+
|
|
228
|
+
if self.TABLE_NAME in self._db.table_names():
|
|
229
|
+
self._db.drop_table(self.TABLE_NAME)
|
|
230
|
+
self._table = None
|
|
231
|
+
|
|
232
|
+
def connect(self) -> None:
|
|
233
|
+
"""Connect to the LanceDB database and ensure table exists."""
|
|
234
|
+
db_path = self.config.get_db_path()
|
|
235
|
+
db_path.mkdir(parents=True, exist_ok=True)
|
|
236
|
+
|
|
237
|
+
self._db = lancedb.connect(str(db_path))
|
|
238
|
+
|
|
239
|
+
# Create table if it doesn't exist
|
|
240
|
+
if self.TABLE_NAME not in self._db.table_names():
|
|
241
|
+
self._table = self._db.create_table(
|
|
242
|
+
self.TABLE_NAME,
|
|
243
|
+
schema=self._get_schema(),
|
|
244
|
+
)
|
|
245
|
+
else:
|
|
246
|
+
self._table = self._db.open_table(self.TABLE_NAME)
|
|
247
|
+
|
|
248
|
+
def _ensure_connected(self) -> lancedb.table.Table:
|
|
249
|
+
"""Ensure database is connected and return table.
|
|
250
|
+
|
|
251
|
+
We re-open the table to ensure we see the latest updates from other processes.
|
|
252
|
+
"""
|
|
253
|
+
if self._db is None:
|
|
254
|
+
self.connect()
|
|
255
|
+
assert self._db is not None
|
|
256
|
+
|
|
257
|
+
# Always re-open table to pick up external updates (e.g. from indexer)
|
|
258
|
+
try:
|
|
259
|
+
self._table = self._db.open_table(self.TABLE_NAME)
|
|
260
|
+
except Exception:
|
|
261
|
+
# Table might not exist yet if created but not committed, or other issue
|
|
262
|
+
# If so, rely on connect()'s creation logic or handle error
|
|
263
|
+
if self._table is None:
|
|
264
|
+
self.connect()
|
|
265
|
+
|
|
266
|
+
assert self._table is not None
|
|
267
|
+
return self._table
|
|
268
|
+
|
|
269
|
+
async def upsert_document(self, doc: Document) -> str:
|
|
270
|
+
"""Insert or update a document.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
doc: Document to upsert.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
Document ID.
|
|
277
|
+
"""
|
|
278
|
+
table = self._ensure_connected()
|
|
279
|
+
|
|
280
|
+
# Delete existing document with same ID if exists
|
|
281
|
+
try:
|
|
282
|
+
table.delete(f"id = '{doc.id}'")
|
|
283
|
+
except Exception:
|
|
284
|
+
pass # Ignore if document doesn't exist
|
|
285
|
+
|
|
286
|
+
# Insert new document
|
|
287
|
+
table.add([doc.to_dict()])
|
|
288
|
+
|
|
289
|
+
return doc.id
|
|
290
|
+
|
|
291
|
+
async def upsert_documents(self, docs: list[Document]) -> list[str]:
|
|
292
|
+
"""Insert or update multiple documents.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
docs: Documents to upsert.
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
List of document IDs.
|
|
299
|
+
"""
|
|
300
|
+
if not docs:
|
|
301
|
+
return []
|
|
302
|
+
|
|
303
|
+
table = self._ensure_connected()
|
|
304
|
+
|
|
305
|
+
# Delete existing documents
|
|
306
|
+
ids = [doc.id for doc in docs]
|
|
307
|
+
for doc_id in ids:
|
|
308
|
+
try:
|
|
309
|
+
table.delete(f"id = '{doc_id}'")
|
|
310
|
+
except Exception:
|
|
311
|
+
pass
|
|
312
|
+
|
|
313
|
+
# Insert all documents
|
|
314
|
+
table.add([doc.to_dict() for doc in docs])
|
|
315
|
+
|
|
316
|
+
return ids
|
|
317
|
+
|
|
318
|
+
async def search(
|
|
319
|
+
self,
|
|
320
|
+
query: str,
|
|
321
|
+
project_id: str | None = None,
|
|
322
|
+
doc_type: DocumentType | None = None,
|
|
323
|
+
limit: int = 10,
|
|
324
|
+
) -> list[SearchResult]:
|
|
325
|
+
"""Perform semantic similarity search.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
query: Search query text.
|
|
329
|
+
project_id: Optional project filter.
|
|
330
|
+
doc_type: Optional document type filter.
|
|
331
|
+
limit: Maximum number of results.
|
|
332
|
+
|
|
333
|
+
Returns:
|
|
334
|
+
List of search results ordered by similarity.
|
|
335
|
+
"""
|
|
336
|
+
table = self._ensure_connected()
|
|
337
|
+
|
|
338
|
+
# Generate query embedding
|
|
339
|
+
query_vector = await self.embedder.embed(query)
|
|
340
|
+
|
|
341
|
+
# Build search query
|
|
342
|
+
search_query = table.search(query_vector).limit(limit)
|
|
343
|
+
|
|
344
|
+
# Apply filters
|
|
345
|
+
filters = []
|
|
346
|
+
if project_id:
|
|
347
|
+
filters.append(f"project_id = '{project_id}'")
|
|
348
|
+
if doc_type:
|
|
349
|
+
filters.append(f"doc_type = '{doc_type.value}'")
|
|
350
|
+
|
|
351
|
+
if filters:
|
|
352
|
+
search_query = search_query.where(" AND ".join(filters))
|
|
353
|
+
|
|
354
|
+
# Execute search
|
|
355
|
+
results = search_query.to_pandas()
|
|
356
|
+
|
|
357
|
+
# Convert to SearchResult objects
|
|
358
|
+
search_results = []
|
|
359
|
+
for _, row in results.iterrows():
|
|
360
|
+
search_results.append(
|
|
361
|
+
SearchResult(
|
|
362
|
+
id=row["id"],
|
|
363
|
+
text=row["text"],
|
|
364
|
+
score=row["_distance"],
|
|
365
|
+
project_id=row["project_id"],
|
|
366
|
+
file_path=row["file_path"],
|
|
367
|
+
doc_type=row["doc_type"],
|
|
368
|
+
chunk_type=row["chunk_type"],
|
|
369
|
+
language=row["language"],
|
|
370
|
+
name=row["name"],
|
|
371
|
+
start_line=row["start_line"],
|
|
372
|
+
end_line=row["end_line"],
|
|
373
|
+
server_name=row.get("server_name", ""),
|
|
374
|
+
parameters_schema=row.get("parameters_schema", ""),
|
|
375
|
+
)
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
return search_results
|
|
379
|
+
|
|
380
|
+
async def delete_by_file(self, file_path: str, project_id: str) -> int:
|
|
381
|
+
"""Delete all documents for a specific file.
|
|
382
|
+
|
|
383
|
+
Args:
|
|
384
|
+
file_path: Path to the file.
|
|
385
|
+
project_id: Project ID.
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
Number of documents deleted.
|
|
389
|
+
"""
|
|
390
|
+
table = self._ensure_connected()
|
|
391
|
+
|
|
392
|
+
# Get count before deletion
|
|
393
|
+
try:
|
|
394
|
+
count_before = len(
|
|
395
|
+
table.search()
|
|
396
|
+
.where(f"file_path = '{file_path}' AND project_id = '{project_id}'")
|
|
397
|
+
.to_pandas()
|
|
398
|
+
)
|
|
399
|
+
except Exception:
|
|
400
|
+
count_before = 0
|
|
401
|
+
|
|
402
|
+
# Delete documents
|
|
403
|
+
try:
|
|
404
|
+
table.delete(f"file_path = '{file_path}' AND project_id = '{project_id}'")
|
|
405
|
+
except Exception:
|
|
406
|
+
pass
|
|
407
|
+
|
|
408
|
+
return count_before
|
|
409
|
+
|
|
410
|
+
async def delete_by_project(self, project_id: str) -> int:
|
|
411
|
+
"""Delete all documents for a project.
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
project_id: Project ID.
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
Number of documents deleted.
|
|
418
|
+
"""
|
|
419
|
+
table = self._ensure_connected()
|
|
420
|
+
|
|
421
|
+
# Get count before deletion
|
|
422
|
+
try:
|
|
423
|
+
count_before = len(table.search().where(f"project_id = '{project_id}'").to_pandas())
|
|
424
|
+
except Exception:
|
|
425
|
+
count_before = 0
|
|
426
|
+
|
|
427
|
+
# Delete documents
|
|
428
|
+
try:
|
|
429
|
+
table.delete(f"project_id = '{project_id}'")
|
|
430
|
+
except Exception:
|
|
431
|
+
pass
|
|
432
|
+
|
|
433
|
+
return count_before
|
|
434
|
+
|
|
435
|
+
async def get_project_stats(self, project_id: str | None = None) -> dict[str, int]:
|
|
436
|
+
"""Get statistics for a project or all projects.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
project_id: Project ID. If None, returns stats for all projects.
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
Dictionary with counts by document type.
|
|
443
|
+
"""
|
|
444
|
+
table = self._ensure_connected()
|
|
445
|
+
|
|
446
|
+
try:
|
|
447
|
+
# Get all data as pandas DataFrame
|
|
448
|
+
df = table.to_pandas()
|
|
449
|
+
|
|
450
|
+
# Filter by project_id if specified
|
|
451
|
+
if project_id:
|
|
452
|
+
df = df[df["project_id"] == project_id]
|
|
453
|
+
|
|
454
|
+
# Group by document type
|
|
455
|
+
if len(df) == 0:
|
|
456
|
+
return {"total": 0}
|
|
457
|
+
|
|
458
|
+
stats = df.groupby("doc_type").size().to_dict()
|
|
459
|
+
stats["total"] = len(df)
|
|
460
|
+
return stats
|
|
461
|
+
except Exception as e:
|
|
462
|
+
# Return error details for debugging
|
|
463
|
+
import logging
|
|
464
|
+
|
|
465
|
+
logging.error(f"Failed to get project stats: {e}", exc_info=True)
|
|
466
|
+
return {"total": 0}
|
|
467
|
+
|
|
468
|
+
async def get_recent_lessons(
|
|
469
|
+
self,
|
|
470
|
+
project_id: str | None = None,
|
|
471
|
+
limit: int = 10,
|
|
472
|
+
) -> list[SearchResult]:
|
|
473
|
+
"""Get recent lessons ordered by timestamp.
|
|
474
|
+
|
|
475
|
+
Args:
|
|
476
|
+
project_id: Optional project filter.
|
|
477
|
+
limit: Maximum number of results.
|
|
478
|
+
|
|
479
|
+
Returns:
|
|
480
|
+
List of recent lessons.
|
|
481
|
+
"""
|
|
482
|
+
table = self._ensure_connected()
|
|
483
|
+
|
|
484
|
+
try:
|
|
485
|
+
query = table.search()
|
|
486
|
+
filters = [f"doc_type = '{DocumentType.LESSON.value}'"]
|
|
487
|
+
|
|
488
|
+
if project_id:
|
|
489
|
+
filters.append(f"project_id = '{project_id}'")
|
|
490
|
+
|
|
491
|
+
df = query.where(" AND ".join(filters)).limit(limit * 2).to_pandas()
|
|
492
|
+
|
|
493
|
+
# Sort by timestamp (descending) and limit
|
|
494
|
+
df = df.sort_values("timestamp", ascending=False).head(limit)
|
|
495
|
+
|
|
496
|
+
results = []
|
|
497
|
+
for _, row in df.iterrows():
|
|
498
|
+
results.append(
|
|
499
|
+
SearchResult(
|
|
500
|
+
id=row["id"],
|
|
501
|
+
text=row["text"],
|
|
502
|
+
score=0.0,
|
|
503
|
+
project_id=row["project_id"],
|
|
504
|
+
file_path=row["file_path"],
|
|
505
|
+
doc_type=row["doc_type"],
|
|
506
|
+
chunk_type=row["chunk_type"],
|
|
507
|
+
language=row["language"],
|
|
508
|
+
name=row["name"],
|
|
509
|
+
start_line=row["start_line"],
|
|
510
|
+
end_line=row["end_line"],
|
|
511
|
+
server_name=row.get("server_name", ""),
|
|
512
|
+
parameters_schema=row.get("parameters_schema", ""),
|
|
513
|
+
)
|
|
514
|
+
)
|
|
515
|
+
return results
|
|
516
|
+
return results
|
|
517
|
+
except Exception:
|
|
518
|
+
return []
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def generate_document_id(
|
|
522
|
+
project_id: str,
|
|
523
|
+
file_path: str,
|
|
524
|
+
chunk_name: str,
|
|
525
|
+
start_line: int,
|
|
526
|
+
) -> str:
|
|
527
|
+
"""Generate a deterministic document ID.
|
|
528
|
+
|
|
529
|
+
This allows for idempotent updates when re-indexing the same code.
|
|
530
|
+
|
|
531
|
+
Args:
|
|
532
|
+
project_id: Project ID.
|
|
533
|
+
file_path: File path.
|
|
534
|
+
chunk_name: Name of the chunk (function/class name).
|
|
535
|
+
start_line: Starting line number.
|
|
536
|
+
|
|
537
|
+
Returns:
|
|
538
|
+
Deterministic UUID based on input parameters.
|
|
539
|
+
"""
|
|
540
|
+
# Create a deterministic ID from the combination
|
|
541
|
+
key = f"{project_id}:{file_path}:{chunk_name}:{start_line}"
|
|
542
|
+
return str(uuid.uuid5(uuid.NAMESPACE_DNS, key))
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def tool_document_from_schema(
|
|
546
|
+
server_name: str,
|
|
547
|
+
tool_name: str,
|
|
548
|
+
schema: dict[str, Any],
|
|
549
|
+
vector: list[float],
|
|
550
|
+
) -> ToolDocument:
|
|
551
|
+
"""Create ToolDocument from MCP tool schema.
|
|
552
|
+
|
|
553
|
+
Args:
|
|
554
|
+
server_name: Name of the MCP server.
|
|
555
|
+
tool_name: Name of the tool.
|
|
556
|
+
schema: MCP tool schema dictionary.
|
|
557
|
+
vector: Embedding vector for the tool.
|
|
558
|
+
|
|
559
|
+
Returns:
|
|
560
|
+
ToolDocument instance.
|
|
561
|
+
"""
|
|
562
|
+
return ToolDocument(
|
|
563
|
+
id=f"{server_name}:{tool_name}",
|
|
564
|
+
server_name=server_name,
|
|
565
|
+
tool_name=tool_name,
|
|
566
|
+
description=schema.get("description", ""),
|
|
567
|
+
parameters=schema.get("inputSchema", {}),
|
|
568
|
+
vector=vector,
|
|
569
|
+
)
|