mcp-vector-search 0.0.3__py3-none-any.whl → 0.4.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mcp-vector-search might be problematic. Click here for more details.
- mcp_vector_search/__init__.py +3 -2
- mcp_vector_search/cli/commands/auto_index.py +397 -0
- mcp_vector_search/cli/commands/config.py +88 -40
- mcp_vector_search/cli/commands/index.py +198 -52
- mcp_vector_search/cli/commands/init.py +472 -58
- mcp_vector_search/cli/commands/install.py +284 -0
- mcp_vector_search/cli/commands/mcp.py +495 -0
- mcp_vector_search/cli/commands/search.py +241 -87
- mcp_vector_search/cli/commands/status.py +184 -58
- mcp_vector_search/cli/commands/watch.py +34 -35
- mcp_vector_search/cli/didyoumean.py +184 -0
- mcp_vector_search/cli/export.py +320 -0
- mcp_vector_search/cli/history.py +292 -0
- mcp_vector_search/cli/interactive.py +342 -0
- mcp_vector_search/cli/main.py +163 -26
- mcp_vector_search/cli/output.py +63 -45
- mcp_vector_search/config/defaults.py +50 -36
- mcp_vector_search/config/settings.py +49 -35
- mcp_vector_search/core/auto_indexer.py +298 -0
- mcp_vector_search/core/connection_pool.py +322 -0
- mcp_vector_search/core/database.py +335 -25
- mcp_vector_search/core/embeddings.py +73 -29
- mcp_vector_search/core/exceptions.py +19 -2
- mcp_vector_search/core/factory.py +310 -0
- mcp_vector_search/core/git_hooks.py +345 -0
- mcp_vector_search/core/indexer.py +237 -73
- mcp_vector_search/core/models.py +21 -19
- mcp_vector_search/core/project.py +73 -58
- mcp_vector_search/core/scheduler.py +330 -0
- mcp_vector_search/core/search.py +574 -86
- mcp_vector_search/core/watcher.py +48 -46
- mcp_vector_search/mcp/__init__.py +4 -0
- mcp_vector_search/mcp/__main__.py +25 -0
- mcp_vector_search/mcp/server.py +701 -0
- mcp_vector_search/parsers/base.py +30 -31
- mcp_vector_search/parsers/javascript.py +74 -48
- mcp_vector_search/parsers/python.py +57 -49
- mcp_vector_search/parsers/registry.py +47 -32
- mcp_vector_search/parsers/text.py +179 -0
- mcp_vector_search/utils/__init__.py +40 -0
- mcp_vector_search/utils/gitignore.py +229 -0
- mcp_vector_search/utils/timing.py +334 -0
- mcp_vector_search/utils/version.py +47 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/METADATA +173 -7
- mcp_vector_search-0.4.11.dist-info/RECORD +54 -0
- mcp_vector_search-0.0.3.dist-info/RECORD +0 -35
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/WHEEL +0 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/entry_points.txt +0 -0
- {mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
"""Database abstraction and ChromaDB implementation for MCP Vector Search."""
|
|
2
2
|
|
|
3
|
-
import asyncio
|
|
4
3
|
from abc import ABC, abstractmethod
|
|
5
4
|
from pathlib import Path
|
|
6
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, Protocol, runtime_checkable
|
|
7
6
|
|
|
8
7
|
from loguru import logger
|
|
9
8
|
|
|
9
|
+
from .connection_pool import ChromaConnectionPool
|
|
10
10
|
from .exceptions import (
|
|
11
11
|
DatabaseError,
|
|
12
12
|
DatabaseInitializationError,
|
|
@@ -21,7 +21,7 @@ from .models import CodeChunk, IndexStats, SearchResult
|
|
|
21
21
|
class EmbeddingFunction(Protocol):
|
|
22
22
|
"""Protocol for embedding functions."""
|
|
23
23
|
|
|
24
|
-
def __call__(self, texts:
|
|
24
|
+
def __call__(self, texts: list[str]) -> list[list[float]]:
|
|
25
25
|
"""Generate embeddings for input texts."""
|
|
26
26
|
...
|
|
27
27
|
|
|
@@ -40,9 +40,9 @@ class VectorDatabase(ABC):
|
|
|
40
40
|
...
|
|
41
41
|
|
|
42
42
|
@abstractmethod
|
|
43
|
-
async def add_chunks(self, chunks:
|
|
43
|
+
async def add_chunks(self, chunks: list[CodeChunk]) -> None:
|
|
44
44
|
"""Add code chunks to the database.
|
|
45
|
-
|
|
45
|
+
|
|
46
46
|
Args:
|
|
47
47
|
chunks: List of code chunks to add
|
|
48
48
|
"""
|
|
@@ -53,17 +53,17 @@ class VectorDatabase(ABC):
|
|
|
53
53
|
self,
|
|
54
54
|
query: str,
|
|
55
55
|
limit: int = 10,
|
|
56
|
-
filters:
|
|
56
|
+
filters: dict[str, Any] | None = None,
|
|
57
57
|
similarity_threshold: float = 0.7,
|
|
58
|
-
) ->
|
|
58
|
+
) -> list[SearchResult]:
|
|
59
59
|
"""Search for similar code chunks.
|
|
60
|
-
|
|
60
|
+
|
|
61
61
|
Args:
|
|
62
62
|
query: Search query
|
|
63
63
|
limit: Maximum number of results
|
|
64
64
|
filters: Optional filters to apply
|
|
65
65
|
similarity_threshold: Minimum similarity score
|
|
66
|
-
|
|
66
|
+
|
|
67
67
|
Returns:
|
|
68
68
|
List of search results
|
|
69
69
|
"""
|
|
@@ -72,10 +72,10 @@ class VectorDatabase(ABC):
|
|
|
72
72
|
@abstractmethod
|
|
73
73
|
async def delete_by_file(self, file_path: Path) -> int:
|
|
74
74
|
"""Delete all chunks for a specific file.
|
|
75
|
-
|
|
75
|
+
|
|
76
76
|
Args:
|
|
77
77
|
file_path: Path to the file
|
|
78
|
-
|
|
78
|
+
|
|
79
79
|
Returns:
|
|
80
80
|
Number of deleted chunks
|
|
81
81
|
"""
|
|
@@ -84,7 +84,7 @@ class VectorDatabase(ABC):
|
|
|
84
84
|
@abstractmethod
|
|
85
85
|
async def get_stats(self) -> IndexStats:
|
|
86
86
|
"""Get database statistics.
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
Returns:
|
|
89
89
|
Index statistics
|
|
90
90
|
"""
|
|
@@ -115,7 +115,7 @@ class ChromaVectorDatabase(VectorDatabase):
|
|
|
115
115
|
collection_name: str = "code_search",
|
|
116
116
|
) -> None:
|
|
117
117
|
"""Initialize ChromaDB vector database.
|
|
118
|
-
|
|
118
|
+
|
|
119
119
|
Args:
|
|
120
120
|
persist_directory: Directory to persist database
|
|
121
121
|
embedding_function: Function to generate embeddings
|
|
@@ -141,7 +141,7 @@ class ChromaVectorDatabase(VectorDatabase):
|
|
|
141
141
|
settings=chromadb.Settings(
|
|
142
142
|
anonymized_telemetry=False,
|
|
143
143
|
allow_reset=True,
|
|
144
|
-
)
|
|
144
|
+
),
|
|
145
145
|
)
|
|
146
146
|
|
|
147
147
|
# Create or get collection
|
|
@@ -153,11 +153,13 @@ class ChromaVectorDatabase(VectorDatabase):
|
|
|
153
153
|
},
|
|
154
154
|
)
|
|
155
155
|
|
|
156
|
-
logger.
|
|
156
|
+
logger.debug(f"ChromaDB initialized at {self.persist_directory}")
|
|
157
157
|
|
|
158
158
|
except Exception as e:
|
|
159
159
|
logger.error(f"Failed to initialize ChromaDB: {e}")
|
|
160
|
-
raise DatabaseInitializationError(
|
|
160
|
+
raise DatabaseInitializationError(
|
|
161
|
+
f"ChromaDB initialization failed: {e}"
|
|
162
|
+
) from e
|
|
161
163
|
|
|
162
164
|
async def remove_file_chunks(self, file_path: str) -> int:
|
|
163
165
|
"""Remove all chunks for a specific file.
|
|
@@ -173,9 +175,7 @@ class ChromaVectorDatabase(VectorDatabase):
|
|
|
173
175
|
|
|
174
176
|
try:
|
|
175
177
|
# Get all chunks for this file
|
|
176
|
-
results = self._collection.get(
|
|
177
|
-
where={"file_path": file_path}
|
|
178
|
-
)
|
|
178
|
+
results = self._collection.get(where={"file_path": file_path})
|
|
179
179
|
|
|
180
180
|
if not results["ids"]:
|
|
181
181
|
return 0
|
|
@@ -199,7 +199,7 @@ class ChromaVectorDatabase(VectorDatabase):
|
|
|
199
199
|
self._collection = None
|
|
200
200
|
logger.debug("ChromaDB connections closed")
|
|
201
201
|
|
|
202
|
-
async def add_chunks(self, chunks:
|
|
202
|
+
async def add_chunks(self, chunks: list[CodeChunk]) -> None:
|
|
203
203
|
"""Add code chunks to the database."""
|
|
204
204
|
if not self._collection:
|
|
205
205
|
raise DatabaseNotInitializedError("Database not initialized")
|
|
@@ -251,9 +251,9 @@ class ChromaVectorDatabase(VectorDatabase):
|
|
|
251
251
|
self,
|
|
252
252
|
query: str,
|
|
253
253
|
limit: int = 10,
|
|
254
|
-
filters:
|
|
254
|
+
filters: dict[str, Any] | None = None,
|
|
255
255
|
similarity_threshold: float = 0.7,
|
|
256
|
-
) ->
|
|
256
|
+
) -> list[SearchResult]:
|
|
257
257
|
"""Search for similar code chunks."""
|
|
258
258
|
if not self._collection:
|
|
259
259
|
raise DatabaseNotInitializedError("Database not initialized")
|
|
@@ -279,10 +279,13 @@ class ChromaVectorDatabase(VectorDatabase):
|
|
|
279
279
|
results["documents"][0],
|
|
280
280
|
results["metadatas"][0],
|
|
281
281
|
results["distances"][0],
|
|
282
|
+
strict=False,
|
|
282
283
|
)
|
|
283
284
|
):
|
|
284
285
|
# Convert distance to similarity (ChromaDB uses cosine distance)
|
|
285
|
-
|
|
286
|
+
# For cosine distance, use a more permissive conversion that handles distances > 1.0
|
|
287
|
+
# Convert to a 0-1 similarity score where lower distances = higher similarity
|
|
288
|
+
similarity = max(0.0, 1.0 / (1.0 + distance))
|
|
286
289
|
|
|
287
290
|
if similarity >= similarity_threshold:
|
|
288
291
|
result = SearchResult(
|
|
@@ -363,7 +366,12 @@ class ChromaVectorDatabase(VectorDatabase):
|
|
|
363
366
|
index_size_mb = count * 0.001 # Rough estimate
|
|
364
367
|
|
|
365
368
|
return IndexStats(
|
|
366
|
-
total_files=len(
|
|
369
|
+
total_files=len(
|
|
370
|
+
{
|
|
371
|
+
m.get("file_path", "")
|
|
372
|
+
for m in sample_results.get("metadatas", [])
|
|
373
|
+
}
|
|
374
|
+
),
|
|
367
375
|
total_chunks=count,
|
|
368
376
|
languages=languages,
|
|
369
377
|
file_types=file_types,
|
|
@@ -416,7 +424,7 @@ class ChromaVectorDatabase(VectorDatabase):
|
|
|
416
424
|
|
|
417
425
|
return "\n".join(parts)
|
|
418
426
|
|
|
419
|
-
def _build_where_clause(self, filters:
|
|
427
|
+
def _build_where_clause(self, filters: dict[str, Any]) -> dict[str, Any]:
|
|
420
428
|
"""Build ChromaDB where clause from filters."""
|
|
421
429
|
where = {}
|
|
422
430
|
|
|
@@ -429,3 +437,305 @@ class ChromaVectorDatabase(VectorDatabase):
|
|
|
429
437
|
where[key] = value
|
|
430
438
|
|
|
431
439
|
return where
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
class PooledChromaVectorDatabase(VectorDatabase):
|
|
443
|
+
"""ChromaDB implementation with connection pooling for improved performance."""
|
|
444
|
+
|
|
445
|
+
def __init__(
|
|
446
|
+
self,
|
|
447
|
+
persist_directory: Path,
|
|
448
|
+
embedding_function: EmbeddingFunction,
|
|
449
|
+
collection_name: str = "code_search",
|
|
450
|
+
max_connections: int = 10,
|
|
451
|
+
min_connections: int = 2,
|
|
452
|
+
max_idle_time: float = 300.0,
|
|
453
|
+
max_connection_age: float = 3600.0,
|
|
454
|
+
) -> None:
|
|
455
|
+
"""Initialize pooled ChromaDB vector database.
|
|
456
|
+
|
|
457
|
+
Args:
|
|
458
|
+
persist_directory: Directory to persist database
|
|
459
|
+
embedding_function: Function to generate embeddings
|
|
460
|
+
collection_name: Name of the collection
|
|
461
|
+
max_connections: Maximum number of connections in pool
|
|
462
|
+
min_connections: Minimum number of connections to maintain
|
|
463
|
+
max_idle_time: Maximum time a connection can be idle (seconds)
|
|
464
|
+
max_connection_age: Maximum age of a connection (seconds)
|
|
465
|
+
"""
|
|
466
|
+
self.persist_directory = persist_directory
|
|
467
|
+
self.embedding_function = embedding_function
|
|
468
|
+
self.collection_name = collection_name
|
|
469
|
+
|
|
470
|
+
self._pool = ChromaConnectionPool(
|
|
471
|
+
persist_directory=persist_directory,
|
|
472
|
+
embedding_function=embedding_function,
|
|
473
|
+
collection_name=collection_name,
|
|
474
|
+
max_connections=max_connections,
|
|
475
|
+
min_connections=min_connections,
|
|
476
|
+
max_idle_time=max_idle_time,
|
|
477
|
+
max_connection_age=max_connection_age,
|
|
478
|
+
)
|
|
479
|
+
|
|
480
|
+
async def initialize(self) -> None:
|
|
481
|
+
"""Initialize the connection pool."""
|
|
482
|
+
await self._pool.initialize()
|
|
483
|
+
logger.debug(f"Pooled ChromaDB initialized at {self.persist_directory}")
|
|
484
|
+
|
|
485
|
+
async def close(self) -> None:
|
|
486
|
+
"""Close the connection pool."""
|
|
487
|
+
await self._pool.close()
|
|
488
|
+
logger.debug("Pooled ChromaDB connections closed")
|
|
489
|
+
|
|
490
|
+
async def add_chunks(self, chunks: list[CodeChunk]) -> None:
|
|
491
|
+
"""Add code chunks to the database using pooled connection."""
|
|
492
|
+
if not chunks:
|
|
493
|
+
return
|
|
494
|
+
|
|
495
|
+
# Ensure pool is initialized
|
|
496
|
+
if not self._pool._initialized:
|
|
497
|
+
await self._pool.initialize()
|
|
498
|
+
|
|
499
|
+
try:
|
|
500
|
+
async with self._pool.get_connection() as conn:
|
|
501
|
+
# Prepare data for ChromaDB
|
|
502
|
+
documents = []
|
|
503
|
+
metadatas = []
|
|
504
|
+
ids = []
|
|
505
|
+
|
|
506
|
+
for chunk in chunks:
|
|
507
|
+
documents.append(chunk.content)
|
|
508
|
+
metadatas.append(
|
|
509
|
+
{
|
|
510
|
+
"file_path": str(chunk.file_path),
|
|
511
|
+
"start_line": chunk.start_line,
|
|
512
|
+
"end_line": chunk.end_line,
|
|
513
|
+
"language": chunk.language,
|
|
514
|
+
"chunk_type": chunk.chunk_type,
|
|
515
|
+
"function_name": chunk.function_name or "",
|
|
516
|
+
"class_name": chunk.class_name or "",
|
|
517
|
+
}
|
|
518
|
+
)
|
|
519
|
+
ids.append(chunk.id)
|
|
520
|
+
|
|
521
|
+
# Add to collection
|
|
522
|
+
conn.collection.add(documents=documents, metadatas=metadatas, ids=ids)
|
|
523
|
+
|
|
524
|
+
logger.debug(f"Added {len(chunks)} chunks to database")
|
|
525
|
+
|
|
526
|
+
except Exception as e:
|
|
527
|
+
logger.error(f"Failed to add chunks: {e}")
|
|
528
|
+
raise DocumentAdditionError(f"Failed to add chunks: {e}") from e
|
|
529
|
+
|
|
530
|
+
async def search(
|
|
531
|
+
self,
|
|
532
|
+
query: str,
|
|
533
|
+
limit: int = 10,
|
|
534
|
+
filters: dict[str, Any] | None = None,
|
|
535
|
+
similarity_threshold: float = 0.7,
|
|
536
|
+
) -> list[SearchResult]:
|
|
537
|
+
"""Search for similar code chunks using pooled connection."""
|
|
538
|
+
# Ensure pool is initialized
|
|
539
|
+
if not self._pool._initialized:
|
|
540
|
+
await self._pool.initialize()
|
|
541
|
+
|
|
542
|
+
try:
|
|
543
|
+
async with self._pool.get_connection() as conn:
|
|
544
|
+
# Build where clause
|
|
545
|
+
where_clause = self._build_where_clause(filters) if filters else None
|
|
546
|
+
|
|
547
|
+
# Perform search
|
|
548
|
+
results = conn.collection.query(
|
|
549
|
+
query_texts=[query],
|
|
550
|
+
n_results=limit,
|
|
551
|
+
where=where_clause,
|
|
552
|
+
include=["documents", "metadatas", "distances"],
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
# Process results
|
|
556
|
+
search_results = []
|
|
557
|
+
|
|
558
|
+
if results["documents"] and results["documents"][0]:
|
|
559
|
+
for i, (doc, metadata, distance) in enumerate(
|
|
560
|
+
zip(
|
|
561
|
+
results["documents"][0],
|
|
562
|
+
results["metadatas"][0],
|
|
563
|
+
results["distances"][0],
|
|
564
|
+
strict=False,
|
|
565
|
+
)
|
|
566
|
+
):
|
|
567
|
+
# Convert distance to similarity (ChromaDB uses cosine distance)
|
|
568
|
+
# For cosine distance, use a more permissive conversion that handles distances > 1.0
|
|
569
|
+
# Convert to a 0-1 similarity score where lower distances = higher similarity
|
|
570
|
+
similarity = max(0.0, 1.0 / (1.0 + distance))
|
|
571
|
+
|
|
572
|
+
if similarity >= similarity_threshold:
|
|
573
|
+
result = SearchResult(
|
|
574
|
+
content=doc,
|
|
575
|
+
file_path=Path(metadata["file_path"]),
|
|
576
|
+
start_line=metadata["start_line"],
|
|
577
|
+
end_line=metadata["end_line"],
|
|
578
|
+
language=metadata["language"],
|
|
579
|
+
similarity_score=similarity,
|
|
580
|
+
rank=i + 1,
|
|
581
|
+
chunk_type=metadata.get("chunk_type", "code"),
|
|
582
|
+
function_name=metadata.get("function_name") or None,
|
|
583
|
+
class_name=metadata.get("class_name") or None,
|
|
584
|
+
)
|
|
585
|
+
search_results.append(result)
|
|
586
|
+
|
|
587
|
+
logger.debug(f"Found {len(search_results)} results for query: {query}")
|
|
588
|
+
return search_results
|
|
589
|
+
|
|
590
|
+
except Exception as e:
|
|
591
|
+
logger.error(f"Search failed: {e}")
|
|
592
|
+
raise SearchError(f"Search failed: {e}") from e
|
|
593
|
+
|
|
594
|
+
async def delete_by_file(self, file_path: Path) -> int:
|
|
595
|
+
"""Delete all chunks for a specific file using pooled connection."""
|
|
596
|
+
try:
|
|
597
|
+
async with self._pool.get_connection() as conn:
|
|
598
|
+
# Get all chunks for this file
|
|
599
|
+
results = conn.collection.get(
|
|
600
|
+
where={"file_path": str(file_path)}, include=["metadatas"]
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
if not results["ids"]:
|
|
604
|
+
return 0
|
|
605
|
+
|
|
606
|
+
# Delete the chunks
|
|
607
|
+
conn.collection.delete(ids=results["ids"])
|
|
608
|
+
|
|
609
|
+
deleted_count = len(results["ids"])
|
|
610
|
+
logger.debug(f"Deleted {deleted_count} chunks for file: {file_path}")
|
|
611
|
+
return deleted_count
|
|
612
|
+
|
|
613
|
+
except Exception as e:
|
|
614
|
+
logger.error(f"Failed to delete chunks for file {file_path}: {e}")
|
|
615
|
+
raise DatabaseError(f"Failed to delete chunks: {e}") from e
|
|
616
|
+
|
|
617
|
+
async def get_stats(self) -> IndexStats:
|
|
618
|
+
"""Get database statistics using pooled connection."""
|
|
619
|
+
try:
|
|
620
|
+
async with self._pool.get_connection() as conn:
|
|
621
|
+
# Get total count
|
|
622
|
+
count = conn.collection.count()
|
|
623
|
+
|
|
624
|
+
# Get all metadata to analyze
|
|
625
|
+
results = conn.collection.get(include=["metadatas"])
|
|
626
|
+
|
|
627
|
+
# Analyze languages and files
|
|
628
|
+
languages = set()
|
|
629
|
+
files = set()
|
|
630
|
+
|
|
631
|
+
for metadata in results["metadatas"]:
|
|
632
|
+
if "language" in metadata:
|
|
633
|
+
languages.add(metadata["language"])
|
|
634
|
+
if "file_path" in metadata:
|
|
635
|
+
files.add(metadata["file_path"])
|
|
636
|
+
|
|
637
|
+
# Count languages and file types
|
|
638
|
+
language_counts = {}
|
|
639
|
+
file_type_counts = {}
|
|
640
|
+
|
|
641
|
+
for metadata in results["metadatas"]:
|
|
642
|
+
# Count languages
|
|
643
|
+
lang = metadata.get("language", "unknown")
|
|
644
|
+
language_counts[lang] = language_counts.get(lang, 0) + 1
|
|
645
|
+
|
|
646
|
+
# Count file types
|
|
647
|
+
file_path = metadata.get("file_path", "")
|
|
648
|
+
if file_path:
|
|
649
|
+
ext = Path(file_path).suffix or "no_extension"
|
|
650
|
+
file_type_counts[ext] = file_type_counts.get(ext, 0) + 1
|
|
651
|
+
|
|
652
|
+
# Estimate index size (rough approximation)
|
|
653
|
+
index_size_mb = count * 0.001 # Rough estimate
|
|
654
|
+
|
|
655
|
+
return IndexStats(
|
|
656
|
+
total_chunks=count,
|
|
657
|
+
total_files=len(files),
|
|
658
|
+
languages=language_counts,
|
|
659
|
+
file_types=file_type_counts,
|
|
660
|
+
index_size_mb=index_size_mb,
|
|
661
|
+
last_updated="unknown", # ChromaDB doesn't track this
|
|
662
|
+
embedding_model="unknown" # TODO: Track this in metadata
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
except Exception as e:
|
|
666
|
+
logger.error(f"Failed to get database stats: {e}")
|
|
667
|
+
raise DatabaseError(f"Failed to get stats: {e}") from e
|
|
668
|
+
|
|
669
|
+
async def remove_file_chunks(self, file_path: str) -> int:
|
|
670
|
+
"""Remove all chunks for a specific file using pooled connection."""
|
|
671
|
+
try:
|
|
672
|
+
async with self._pool.get_connection() as conn:
|
|
673
|
+
# Get all chunks for this file
|
|
674
|
+
results = conn.collection.get(where={"file_path": file_path})
|
|
675
|
+
|
|
676
|
+
if not results["ids"]:
|
|
677
|
+
return 0
|
|
678
|
+
|
|
679
|
+
# Delete the chunks
|
|
680
|
+
conn.collection.delete(ids=results["ids"])
|
|
681
|
+
|
|
682
|
+
return len(results["ids"])
|
|
683
|
+
|
|
684
|
+
except Exception as e:
|
|
685
|
+
logger.error(f"Failed to remove chunks for file {file_path}: {e}")
|
|
686
|
+
return 0
|
|
687
|
+
|
|
688
|
+
async def reset(self) -> None:
|
|
689
|
+
"""Reset the database using pooled connection."""
|
|
690
|
+
try:
|
|
691
|
+
async with self._pool.get_connection() as conn:
|
|
692
|
+
conn.client.reset()
|
|
693
|
+
# Reinitialize the pool after reset
|
|
694
|
+
await self._pool.close()
|
|
695
|
+
await self._pool.initialize()
|
|
696
|
+
logger.info("Database reset successfully")
|
|
697
|
+
except Exception as e:
|
|
698
|
+
logger.error(f"Failed to reset database: {e}")
|
|
699
|
+
raise DatabaseError(f"Failed to reset database: {e}") from e
|
|
700
|
+
|
|
701
|
+
def _build_where_clause(self, filters: dict[str, Any]) -> dict[str, Any] | None:
|
|
702
|
+
"""Build ChromaDB where clause from filters."""
|
|
703
|
+
if not filters:
|
|
704
|
+
return None
|
|
705
|
+
|
|
706
|
+
conditions = []
|
|
707
|
+
|
|
708
|
+
for key, value in filters.items():
|
|
709
|
+
if key == "language" and value:
|
|
710
|
+
conditions.append({"language": {"$eq": value}})
|
|
711
|
+
elif key == "file_path" and value:
|
|
712
|
+
if isinstance(value, list):
|
|
713
|
+
conditions.append({"file_path": {"$in": [str(p) for p in value]}})
|
|
714
|
+
else:
|
|
715
|
+
conditions.append({"file_path": {"$eq": str(value)}})
|
|
716
|
+
elif key == "chunk_type" and value:
|
|
717
|
+
conditions.append({"chunk_type": {"$eq": value}})
|
|
718
|
+
|
|
719
|
+
if not conditions:
|
|
720
|
+
return None
|
|
721
|
+
elif len(conditions) > 1:
|
|
722
|
+
return {"$and": conditions}
|
|
723
|
+
else:
|
|
724
|
+
return conditions[0]
|
|
725
|
+
|
|
726
|
+
def get_pool_stats(self) -> dict[str, Any]:
|
|
727
|
+
"""Get connection pool statistics."""
|
|
728
|
+
return self._pool.get_stats()
|
|
729
|
+
|
|
730
|
+
async def health_check(self) -> bool:
|
|
731
|
+
"""Perform a health check on the database and connection pool."""
|
|
732
|
+
return await self._pool.health_check()
|
|
733
|
+
|
|
734
|
+
async def __aenter__(self):
|
|
735
|
+
"""Async context manager entry."""
|
|
736
|
+
await self.initialize()
|
|
737
|
+
return self
|
|
738
|
+
|
|
739
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
740
|
+
"""Async context manager exit."""
|
|
741
|
+
await self.close()
|