agent-brain-rag 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,414 @@
1
+ """Query service for executing semantic search queries."""
2
+
3
+ import logging
4
+ import time
5
+ from typing import Any, Optional
6
+
7
+ from llama_index.core.retrievers import BaseRetriever
8
+ from llama_index.core.schema import NodeWithScore, QueryBundle, TextNode
9
+
10
+ from doc_serve_server.indexing import EmbeddingGenerator, get_embedding_generator
11
+ from doc_serve_server.indexing.bm25_index import BM25IndexManager, get_bm25_manager
12
+ from doc_serve_server.models import QueryMode, QueryRequest, QueryResponse, QueryResult
13
+ from doc_serve_server.storage import VectorStoreManager, get_vector_store
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class VectorManagerRetriever(BaseRetriever):
19
+ """LlamaIndex retriever wrapper for VectorStoreManager."""
20
+
21
+ def __init__(
22
+ self,
23
+ service: "QueryService",
24
+ top_k: int,
25
+ threshold: float,
26
+ ):
27
+ super().__init__()
28
+ self.service = service
29
+ self.top_k = top_k
30
+ self.threshold = threshold
31
+
32
+ def _retrieve(self, query_bundle: QueryBundle) -> list[NodeWithScore]:
33
+ # Synchronous retrieve not supported, use aretrieve
34
+ return []
35
+
36
+ async def _aretrieve(self, query_bundle: QueryBundle) -> list[NodeWithScore]:
37
+ query_embedding = await self.service.embedding_generator.embed_query(
38
+ query_bundle.query_str
39
+ )
40
+ results = await self.service.vector_store.similarity_search(
41
+ query_embedding=query_embedding,
42
+ top_k=self.top_k,
43
+ similarity_threshold=self.threshold,
44
+ )
45
+ return [
46
+ NodeWithScore(
47
+ node=TextNode(text=res.text, id_=res.chunk_id, metadata=res.metadata),
48
+ score=res.score,
49
+ )
50
+ for res in results
51
+ ]
52
+
53
+
54
+ class QueryService:
55
+ """
56
+ Executes semantic, keyword, and hybrid search queries.
57
+
58
+ Coordinates embedding generation, vector similarity search,
59
+ and BM25 retrieval with result fusion.
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ vector_store: Optional[VectorStoreManager] = None,
65
+ embedding_generator: Optional[EmbeddingGenerator] = None,
66
+ bm25_manager: Optional[BM25IndexManager] = None,
67
+ ):
68
+ """
69
+ Initialize the query service.
70
+
71
+ Args:
72
+ vector_store: Vector store manager instance.
73
+ embedding_generator: Embedding generator instance.
74
+ bm25_manager: BM25 index manager instance.
75
+ """
76
+ self.vector_store = vector_store or get_vector_store()
77
+ self.embedding_generator = embedding_generator or get_embedding_generator()
78
+ self.bm25_manager = bm25_manager or get_bm25_manager()
79
+
80
+ def is_ready(self) -> bool:
81
+ """
82
+ Check if the service is ready to process queries.
83
+
84
+ Returns:
85
+ True if the vector store is initialized and has documents.
86
+ """
87
+ return self.vector_store.is_initialized
88
+
89
+ async def execute_query(self, request: QueryRequest) -> QueryResponse:
90
+ """
91
+ Execute a search query based on the requested mode.
92
+
93
+ Args:
94
+ request: QueryRequest with query text and parameters.
95
+
96
+ Returns:
97
+ QueryResponse with ranked results.
98
+
99
+ Raises:
100
+ RuntimeError: If the service is not ready.
101
+ """
102
+ if not self.is_ready():
103
+ raise RuntimeError(
104
+ "Query service not ready. Please wait for indexing to complete."
105
+ )
106
+
107
+ start_time = time.time()
108
+
109
+ if request.mode == QueryMode.BM25:
110
+ results = await self._execute_bm25_query(request)
111
+ elif request.mode == QueryMode.VECTOR:
112
+ results = await self._execute_vector_query(request)
113
+ else: # HYBRID
114
+ results = await self._execute_hybrid_query(request)
115
+
116
+ # Apply content filters if specified
117
+ if any([request.source_types, request.languages, request.file_paths]):
118
+ results = self._filter_results(results, request)
119
+
120
+ query_time_ms = (time.time() - start_time) * 1000
121
+
122
+ logger.debug(
123
+ f"Query ({request.mode}) '{request.query[:50]}...' returned "
124
+ f"{len(results)} results in {query_time_ms:.2f}ms"
125
+ )
126
+
127
+ return QueryResponse(
128
+ results=results,
129
+ query_time_ms=query_time_ms,
130
+ total_results=len(results),
131
+ )
132
+
133
+ async def _execute_vector_query(self, request: QueryRequest) -> list[QueryResult]:
134
+ """Execute pure semantic search."""
135
+ query_embedding = await self.embedding_generator.embed_query(request.query)
136
+ where_clause = self._build_where_clause(request.source_types, request.languages)
137
+ search_results = await self.vector_store.similarity_search(
138
+ query_embedding=query_embedding,
139
+ top_k=request.top_k,
140
+ similarity_threshold=request.similarity_threshold,
141
+ where=where_clause,
142
+ )
143
+
144
+ return [
145
+ QueryResult(
146
+ text=res.text,
147
+ source=res.metadata.get(
148
+ "source", res.metadata.get("file_path", "unknown")
149
+ ),
150
+ score=res.score,
151
+ vector_score=res.score,
152
+ chunk_id=res.chunk_id,
153
+ source_type=res.metadata.get("source_type", "doc"),
154
+ language=res.metadata.get("language"),
155
+ metadata={
156
+ k: v
157
+ for k, v in res.metadata.items()
158
+ if k not in ("source", "file_path", "source_type", "language")
159
+ },
160
+ )
161
+ for res in search_results
162
+ ]
163
+
164
+ async def _execute_bm25_query(self, request: QueryRequest) -> list[QueryResult]:
165
+ """Execute pure keyword search."""
166
+ if not self.bm25_manager.is_initialized:
167
+ raise RuntimeError("BM25 index not initialized")
168
+
169
+ retriever = self.bm25_manager.get_retriever(top_k=request.top_k)
170
+ nodes = await retriever.aretrieve(request.query)
171
+
172
+ return [
173
+ QueryResult(
174
+ text=node.node.get_content(),
175
+ source=node.node.metadata.get(
176
+ "source", node.node.metadata.get("file_path", "unknown")
177
+ ),
178
+ score=node.score or 0.0,
179
+ bm25_score=node.score,
180
+ chunk_id=node.node.node_id,
181
+ source_type=node.node.metadata.get("source_type", "doc"),
182
+ language=node.node.metadata.get("language"),
183
+ metadata={
184
+ k: v
185
+ for k, v in node.node.metadata.items()
186
+ if k not in ("source", "file_path", "source_type", "language")
187
+ },
188
+ )
189
+ for node in nodes
190
+ ]
191
+
192
+ async def _execute_hybrid_query(self, request: QueryRequest) -> list[QueryResult]:
193
+ """Execute hybrid search using Relative Score Fusion."""
194
+ # For US5, we want to provide individual scores.
195
+ # We'll perform the individual searches first to get the scores.
196
+
197
+ # Get corpus size to avoid requesting more than available
198
+ corpus_size = await self.vector_store.get_count()
199
+ effective_top_k = min(request.top_k, corpus_size)
200
+
201
+ # Build ChromaDB where clause for filtering
202
+ where_clause = self._build_where_clause(request.source_types, request.languages)
203
+
204
+ # 1. Vector Search
205
+ query_embedding = await self.embedding_generator.embed_query(request.query)
206
+ vector_results = await self.vector_store.similarity_search(
207
+ query_embedding=query_embedding,
208
+ top_k=effective_top_k,
209
+ similarity_threshold=request.similarity_threshold,
210
+ where=where_clause,
211
+ )
212
+
213
+ # 2. BM25 Search
214
+ bm25_results = []
215
+ if self.bm25_manager.is_initialized:
216
+ # Use the new filtered search method
217
+ bm25_results = await self.bm25_manager.search_with_filters(
218
+ query=request.query,
219
+ top_k=effective_top_k,
220
+ source_types=request.source_types,
221
+ languages=request.languages,
222
+ max_results=corpus_size,
223
+ )
224
+ # Convert BM25 results to same format as vector results
225
+ bm25_query_results = []
226
+ for node in bm25_results:
227
+ bm25_query_results.append(
228
+ QueryResult(
229
+ text=node.node.get_content(),
230
+ source=node.node.metadata.get(
231
+ "source", node.node.metadata.get("file_path", "unknown")
232
+ ),
233
+ score=node.score or 0.0,
234
+ bm25_score=node.score,
235
+ chunk_id=node.node.node_id,
236
+ source_type=node.node.metadata.get("source_type", "doc"),
237
+ language=node.node.metadata.get("language"),
238
+ metadata={
239
+ k: v
240
+ for k, v in node.node.metadata.items()
241
+ if k not in ("source", "file_path", "source_type", "language")
242
+ },
243
+ )
244
+ )
245
+
246
+ # 3. Simple hybrid fusion for small corpora
247
+ # Combine vector and BM25 results manually to avoid retriever complexity
248
+
249
+ # Score normalization: bring both to 0-1 range
250
+ max_vector_score = max((r.score for r in vector_results), default=1.0) or 1.0
251
+ max_bm25_score = (
252
+ max((r.bm25_score or 0.0 for r in bm25_query_results), default=1.0) or 1.0
253
+ )
254
+
255
+ # Create combined results map
256
+ combined_results: dict[str, dict[str, Any]] = {}
257
+
258
+ # Add vector results (convert SearchResult to QueryResult)
259
+ for res in vector_results:
260
+ query_result = QueryResult(
261
+ text=res.text,
262
+ source=res.metadata.get(
263
+ "source", res.metadata.get("file_path", "unknown")
264
+ ),
265
+ score=res.score,
266
+ vector_score=res.score,
267
+ chunk_id=res.chunk_id,
268
+ source_type=res.metadata.get("source_type", "doc"),
269
+ language=res.metadata.get("language"),
270
+ metadata={
271
+ k: v
272
+ for k, v in res.metadata.items()
273
+ if k not in ("source", "file_path", "source_type", "language")
274
+ },
275
+ )
276
+ combined_results[res.chunk_id] = {
277
+ "result": query_result,
278
+ "vector_score": res.score / max_vector_score,
279
+ "bm25_score": 0.0,
280
+ "total_score": request.alpha * (res.score / max_vector_score),
281
+ }
282
+
283
+ # Add/merge BM25 results
284
+ for bm25_res in bm25_query_results:
285
+ chunk_id = bm25_res.chunk_id
286
+ bm25_normalized = (bm25_res.bm25_score or 0.0) / max_bm25_score
287
+ bm25_weighted = (1.0 - request.alpha) * bm25_normalized
288
+
289
+ if chunk_id in combined_results:
290
+ combined_results[chunk_id]["bm25_score"] = bm25_normalized
291
+ combined_results[chunk_id]["total_score"] += bm25_weighted
292
+ # Update BM25 score on existing result
293
+ combined_results[chunk_id]["result"].bm25_score = bm25_res.bm25_score
294
+ else:
295
+ combined_results[chunk_id] = {
296
+ "result": bm25_res,
297
+ "vector_score": 0.0,
298
+ "bm25_score": bm25_normalized,
299
+ "total_score": bm25_weighted,
300
+ }
301
+
302
+ # Convert to final results
303
+ fused_nodes = []
304
+ for _chunk_id, data in combined_results.items():
305
+ result = data["result"]
306
+ # Update score with combined score
307
+ result.score = data["total_score"]
308
+ fused_nodes.append(result)
309
+
310
+ # Sort by combined score and take top_k
311
+ fused_nodes.sort(key=lambda x: x.score, reverse=True)
312
+ fused_nodes = fused_nodes[: request.top_k]
313
+
314
+ return fused_nodes
315
+
316
+ async def get_document_count(self) -> int:
317
+ """
318
+ Get the total number of indexed documents.
319
+
320
+ Returns:
321
+ Number of documents in the vector store.
322
+ """
323
+ if not self.is_ready():
324
+ return 0
325
+ return await self.vector_store.get_count()
326
+
327
+ def _filter_results(
328
+ self, results: list[QueryResult], request: QueryRequest
329
+ ) -> list[QueryResult]:
330
+ """
331
+ Filter query results based on request parameters.
332
+
333
+ Args:
334
+ results: List of query results to filter.
335
+ request: Query request with filter parameters.
336
+
337
+ Returns:
338
+ Filtered list of results.
339
+ """
340
+ filtered_results = results
341
+
342
+ # Filter by source types
343
+ if request.source_types:
344
+ filtered_results = [
345
+ r for r in filtered_results if r.source_type in request.source_types
346
+ ]
347
+
348
+ # Filter by languages
349
+ if request.languages:
350
+ filtered_results = [
351
+ r
352
+ for r in filtered_results
353
+ if r.language and r.language in request.languages
354
+ ]
355
+
356
+ # Filter by file paths (with wildcard support)
357
+ if request.file_paths:
358
+ import fnmatch
359
+
360
+ filtered_results = [
361
+ r
362
+ for r in filtered_results
363
+ if any(
364
+ fnmatch.fnmatch(r.source, pattern) for pattern in request.file_paths
365
+ )
366
+ ]
367
+
368
+ return filtered_results
369
+
370
+ def _build_where_clause(
371
+ self, source_types: list[str] | None, languages: list[str] | None
372
+ ) -> dict[str, Any] | None:
373
+ """
374
+ Build ChromaDB where clause from filter parameters.
375
+
376
+ Args:
377
+ source_types: List of source types to filter by.
378
+ languages: List of languages to filter by.
379
+
380
+ Returns:
381
+ ChromaDB where clause dict or None.
382
+ """
383
+ conditions: list[dict[str, Any]] = []
384
+
385
+ if source_types:
386
+ if len(source_types) == 1:
387
+ conditions.append({"source_type": source_types[0]})
388
+ else:
389
+ conditions.append({"source_type": {"$in": source_types}})
390
+
391
+ if languages:
392
+ if len(languages) == 1:
393
+ conditions.append({"language": languages[0]})
394
+ else:
395
+ conditions.append({"language": {"$in": languages}})
396
+
397
+ if not conditions:
398
+ return None
399
+ elif len(conditions) == 1:
400
+ return conditions[0]
401
+ else:
402
+ return {"$and": conditions}
403
+
404
+
405
+ # Singleton instance
406
+ _query_service: Optional[QueryService] = None
407
+
408
+
409
+ def get_query_service() -> QueryService:
410
+ """Get the global query service instance."""
411
+ global _query_service
412
+ if _query_service is None:
413
+ _query_service = QueryService()
414
+ return _query_service
@@ -0,0 +1,5 @@
1
+ """Storage layer for vector database operations."""
2
+
3
+ from .vector_store import VectorStoreManager, get_vector_store, initialize_vector_store
4
+
5
+ __all__ = ["VectorStoreManager", "get_vector_store", "initialize_vector_store"]