agent-brain-rag 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_brain_rag-1.1.0.dist-info/METADATA +202 -0
- agent_brain_rag-1.1.0.dist-info/RECORD +31 -0
- agent_brain_rag-1.1.0.dist-info/WHEEL +4 -0
- agent_brain_rag-1.1.0.dist-info/entry_points.txt +3 -0
- doc_serve_server/__init__.py +3 -0
- doc_serve_server/api/__init__.py +5 -0
- doc_serve_server/api/main.py +332 -0
- doc_serve_server/api/routers/__init__.py +11 -0
- doc_serve_server/api/routers/health.py +100 -0
- doc_serve_server/api/routers/index.py +208 -0
- doc_serve_server/api/routers/query.py +96 -0
- doc_serve_server/config/__init__.py +5 -0
- doc_serve_server/config/settings.py +92 -0
- doc_serve_server/indexing/__init__.py +19 -0
- doc_serve_server/indexing/bm25_index.py +166 -0
- doc_serve_server/indexing/chunking.py +831 -0
- doc_serve_server/indexing/document_loader.py +506 -0
- doc_serve_server/indexing/embedding.py +274 -0
- doc_serve_server/locking.py +133 -0
- doc_serve_server/models/__init__.py +18 -0
- doc_serve_server/models/health.py +126 -0
- doc_serve_server/models/index.py +157 -0
- doc_serve_server/models/query.py +191 -0
- doc_serve_server/project_root.py +85 -0
- doc_serve_server/runtime.py +112 -0
- doc_serve_server/services/__init__.py +11 -0
- doc_serve_server/services/indexing_service.py +476 -0
- doc_serve_server/services/query_service.py +414 -0
- doc_serve_server/storage/__init__.py +5 -0
- doc_serve_server/storage/vector_store.py +320 -0
- doc_serve_server/storage_paths.py +72 -0
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
"""Query service for executing semantic search queries."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
from llama_index.core.retrievers import BaseRetriever
|
|
8
|
+
from llama_index.core.schema import NodeWithScore, QueryBundle, TextNode
|
|
9
|
+
|
|
10
|
+
from doc_serve_server.indexing import EmbeddingGenerator, get_embedding_generator
|
|
11
|
+
from doc_serve_server.indexing.bm25_index import BM25IndexManager, get_bm25_manager
|
|
12
|
+
from doc_serve_server.models import QueryMode, QueryRequest, QueryResponse, QueryResult
|
|
13
|
+
from doc_serve_server.storage import VectorStoreManager, get_vector_store
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class VectorManagerRetriever(BaseRetriever):
|
|
19
|
+
"""LlamaIndex retriever wrapper for VectorStoreManager."""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
service: "QueryService",
|
|
24
|
+
top_k: int,
|
|
25
|
+
threshold: float,
|
|
26
|
+
):
|
|
27
|
+
super().__init__()
|
|
28
|
+
self.service = service
|
|
29
|
+
self.top_k = top_k
|
|
30
|
+
self.threshold = threshold
|
|
31
|
+
|
|
32
|
+
def _retrieve(self, query_bundle: QueryBundle) -> list[NodeWithScore]:
|
|
33
|
+
# Synchronous retrieve not supported, use aretrieve
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
async def _aretrieve(self, query_bundle: QueryBundle) -> list[NodeWithScore]:
|
|
37
|
+
query_embedding = await self.service.embedding_generator.embed_query(
|
|
38
|
+
query_bundle.query_str
|
|
39
|
+
)
|
|
40
|
+
results = await self.service.vector_store.similarity_search(
|
|
41
|
+
query_embedding=query_embedding,
|
|
42
|
+
top_k=self.top_k,
|
|
43
|
+
similarity_threshold=self.threshold,
|
|
44
|
+
)
|
|
45
|
+
return [
|
|
46
|
+
NodeWithScore(
|
|
47
|
+
node=TextNode(text=res.text, id_=res.chunk_id, metadata=res.metadata),
|
|
48
|
+
score=res.score,
|
|
49
|
+
)
|
|
50
|
+
for res in results
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class QueryService:
|
|
55
|
+
"""
|
|
56
|
+
Executes semantic, keyword, and hybrid search queries.
|
|
57
|
+
|
|
58
|
+
Coordinates embedding generation, vector similarity search,
|
|
59
|
+
and BM25 retrieval with result fusion.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
vector_store: Optional[VectorStoreManager] = None,
|
|
65
|
+
embedding_generator: Optional[EmbeddingGenerator] = None,
|
|
66
|
+
bm25_manager: Optional[BM25IndexManager] = None,
|
|
67
|
+
):
|
|
68
|
+
"""
|
|
69
|
+
Initialize the query service.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
vector_store: Vector store manager instance.
|
|
73
|
+
embedding_generator: Embedding generator instance.
|
|
74
|
+
bm25_manager: BM25 index manager instance.
|
|
75
|
+
"""
|
|
76
|
+
self.vector_store = vector_store or get_vector_store()
|
|
77
|
+
self.embedding_generator = embedding_generator or get_embedding_generator()
|
|
78
|
+
self.bm25_manager = bm25_manager or get_bm25_manager()
|
|
79
|
+
|
|
80
|
+
def is_ready(self) -> bool:
|
|
81
|
+
"""
|
|
82
|
+
Check if the service is ready to process queries.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
True if the vector store is initialized and has documents.
|
|
86
|
+
"""
|
|
87
|
+
return self.vector_store.is_initialized
|
|
88
|
+
|
|
89
|
+
async def execute_query(self, request: QueryRequest) -> QueryResponse:
|
|
90
|
+
"""
|
|
91
|
+
Execute a search query based on the requested mode.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
request: QueryRequest with query text and parameters.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
QueryResponse with ranked results.
|
|
98
|
+
|
|
99
|
+
Raises:
|
|
100
|
+
RuntimeError: If the service is not ready.
|
|
101
|
+
"""
|
|
102
|
+
if not self.is_ready():
|
|
103
|
+
raise RuntimeError(
|
|
104
|
+
"Query service not ready. Please wait for indexing to complete."
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
start_time = time.time()
|
|
108
|
+
|
|
109
|
+
if request.mode == QueryMode.BM25:
|
|
110
|
+
results = await self._execute_bm25_query(request)
|
|
111
|
+
elif request.mode == QueryMode.VECTOR:
|
|
112
|
+
results = await self._execute_vector_query(request)
|
|
113
|
+
else: # HYBRID
|
|
114
|
+
results = await self._execute_hybrid_query(request)
|
|
115
|
+
|
|
116
|
+
# Apply content filters if specified
|
|
117
|
+
if any([request.source_types, request.languages, request.file_paths]):
|
|
118
|
+
results = self._filter_results(results, request)
|
|
119
|
+
|
|
120
|
+
query_time_ms = (time.time() - start_time) * 1000
|
|
121
|
+
|
|
122
|
+
logger.debug(
|
|
123
|
+
f"Query ({request.mode}) '{request.query[:50]}...' returned "
|
|
124
|
+
f"{len(results)} results in {query_time_ms:.2f}ms"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
return QueryResponse(
|
|
128
|
+
results=results,
|
|
129
|
+
query_time_ms=query_time_ms,
|
|
130
|
+
total_results=len(results),
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
async def _execute_vector_query(self, request: QueryRequest) -> list[QueryResult]:
|
|
134
|
+
"""Execute pure semantic search."""
|
|
135
|
+
query_embedding = await self.embedding_generator.embed_query(request.query)
|
|
136
|
+
where_clause = self._build_where_clause(request.source_types, request.languages)
|
|
137
|
+
search_results = await self.vector_store.similarity_search(
|
|
138
|
+
query_embedding=query_embedding,
|
|
139
|
+
top_k=request.top_k,
|
|
140
|
+
similarity_threshold=request.similarity_threshold,
|
|
141
|
+
where=where_clause,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
return [
|
|
145
|
+
QueryResult(
|
|
146
|
+
text=res.text,
|
|
147
|
+
source=res.metadata.get(
|
|
148
|
+
"source", res.metadata.get("file_path", "unknown")
|
|
149
|
+
),
|
|
150
|
+
score=res.score,
|
|
151
|
+
vector_score=res.score,
|
|
152
|
+
chunk_id=res.chunk_id,
|
|
153
|
+
source_type=res.metadata.get("source_type", "doc"),
|
|
154
|
+
language=res.metadata.get("language"),
|
|
155
|
+
metadata={
|
|
156
|
+
k: v
|
|
157
|
+
for k, v in res.metadata.items()
|
|
158
|
+
if k not in ("source", "file_path", "source_type", "language")
|
|
159
|
+
},
|
|
160
|
+
)
|
|
161
|
+
for res in search_results
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
async def _execute_bm25_query(self, request: QueryRequest) -> list[QueryResult]:
|
|
165
|
+
"""Execute pure keyword search."""
|
|
166
|
+
if not self.bm25_manager.is_initialized:
|
|
167
|
+
raise RuntimeError("BM25 index not initialized")
|
|
168
|
+
|
|
169
|
+
retriever = self.bm25_manager.get_retriever(top_k=request.top_k)
|
|
170
|
+
nodes = await retriever.aretrieve(request.query)
|
|
171
|
+
|
|
172
|
+
return [
|
|
173
|
+
QueryResult(
|
|
174
|
+
text=node.node.get_content(),
|
|
175
|
+
source=node.node.metadata.get(
|
|
176
|
+
"source", node.node.metadata.get("file_path", "unknown")
|
|
177
|
+
),
|
|
178
|
+
score=node.score or 0.0,
|
|
179
|
+
bm25_score=node.score,
|
|
180
|
+
chunk_id=node.node.node_id,
|
|
181
|
+
source_type=node.node.metadata.get("source_type", "doc"),
|
|
182
|
+
language=node.node.metadata.get("language"),
|
|
183
|
+
metadata={
|
|
184
|
+
k: v
|
|
185
|
+
for k, v in node.node.metadata.items()
|
|
186
|
+
if k not in ("source", "file_path", "source_type", "language")
|
|
187
|
+
},
|
|
188
|
+
)
|
|
189
|
+
for node in nodes
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
async def _execute_hybrid_query(self, request: QueryRequest) -> list[QueryResult]:
|
|
193
|
+
"""Execute hybrid search using Relative Score Fusion."""
|
|
194
|
+
# For US5, we want to provide individual scores.
|
|
195
|
+
# We'll perform the individual searches first to get the scores.
|
|
196
|
+
|
|
197
|
+
# Get corpus size to avoid requesting more than available
|
|
198
|
+
corpus_size = await self.vector_store.get_count()
|
|
199
|
+
effective_top_k = min(request.top_k, corpus_size)
|
|
200
|
+
|
|
201
|
+
# Build ChromaDB where clause for filtering
|
|
202
|
+
where_clause = self._build_where_clause(request.source_types, request.languages)
|
|
203
|
+
|
|
204
|
+
# 1. Vector Search
|
|
205
|
+
query_embedding = await self.embedding_generator.embed_query(request.query)
|
|
206
|
+
vector_results = await self.vector_store.similarity_search(
|
|
207
|
+
query_embedding=query_embedding,
|
|
208
|
+
top_k=effective_top_k,
|
|
209
|
+
similarity_threshold=request.similarity_threshold,
|
|
210
|
+
where=where_clause,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# 2. BM25 Search
|
|
214
|
+
bm25_results = []
|
|
215
|
+
if self.bm25_manager.is_initialized:
|
|
216
|
+
# Use the new filtered search method
|
|
217
|
+
bm25_results = await self.bm25_manager.search_with_filters(
|
|
218
|
+
query=request.query,
|
|
219
|
+
top_k=effective_top_k,
|
|
220
|
+
source_types=request.source_types,
|
|
221
|
+
languages=request.languages,
|
|
222
|
+
max_results=corpus_size,
|
|
223
|
+
)
|
|
224
|
+
# Convert BM25 results to same format as vector results
|
|
225
|
+
bm25_query_results = []
|
|
226
|
+
for node in bm25_results:
|
|
227
|
+
bm25_query_results.append(
|
|
228
|
+
QueryResult(
|
|
229
|
+
text=node.node.get_content(),
|
|
230
|
+
source=node.node.metadata.get(
|
|
231
|
+
"source", node.node.metadata.get("file_path", "unknown")
|
|
232
|
+
),
|
|
233
|
+
score=node.score or 0.0,
|
|
234
|
+
bm25_score=node.score,
|
|
235
|
+
chunk_id=node.node.node_id,
|
|
236
|
+
source_type=node.node.metadata.get("source_type", "doc"),
|
|
237
|
+
language=node.node.metadata.get("language"),
|
|
238
|
+
metadata={
|
|
239
|
+
k: v
|
|
240
|
+
for k, v in node.node.metadata.items()
|
|
241
|
+
if k not in ("source", "file_path", "source_type", "language")
|
|
242
|
+
},
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# 3. Simple hybrid fusion for small corpora
|
|
247
|
+
# Combine vector and BM25 results manually to avoid retriever complexity
|
|
248
|
+
|
|
249
|
+
# Score normalization: bring both to 0-1 range
|
|
250
|
+
max_vector_score = max((r.score for r in vector_results), default=1.0) or 1.0
|
|
251
|
+
max_bm25_score = (
|
|
252
|
+
max((r.bm25_score or 0.0 for r in bm25_query_results), default=1.0) or 1.0
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# Create combined results map
|
|
256
|
+
combined_results: dict[str, dict[str, Any]] = {}
|
|
257
|
+
|
|
258
|
+
# Add vector results (convert SearchResult to QueryResult)
|
|
259
|
+
for res in vector_results:
|
|
260
|
+
query_result = QueryResult(
|
|
261
|
+
text=res.text,
|
|
262
|
+
source=res.metadata.get(
|
|
263
|
+
"source", res.metadata.get("file_path", "unknown")
|
|
264
|
+
),
|
|
265
|
+
score=res.score,
|
|
266
|
+
vector_score=res.score,
|
|
267
|
+
chunk_id=res.chunk_id,
|
|
268
|
+
source_type=res.metadata.get("source_type", "doc"),
|
|
269
|
+
language=res.metadata.get("language"),
|
|
270
|
+
metadata={
|
|
271
|
+
k: v
|
|
272
|
+
for k, v in res.metadata.items()
|
|
273
|
+
if k not in ("source", "file_path", "source_type", "language")
|
|
274
|
+
},
|
|
275
|
+
)
|
|
276
|
+
combined_results[res.chunk_id] = {
|
|
277
|
+
"result": query_result,
|
|
278
|
+
"vector_score": res.score / max_vector_score,
|
|
279
|
+
"bm25_score": 0.0,
|
|
280
|
+
"total_score": request.alpha * (res.score / max_vector_score),
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
# Add/merge BM25 results
|
|
284
|
+
for bm25_res in bm25_query_results:
|
|
285
|
+
chunk_id = bm25_res.chunk_id
|
|
286
|
+
bm25_normalized = (bm25_res.bm25_score or 0.0) / max_bm25_score
|
|
287
|
+
bm25_weighted = (1.0 - request.alpha) * bm25_normalized
|
|
288
|
+
|
|
289
|
+
if chunk_id in combined_results:
|
|
290
|
+
combined_results[chunk_id]["bm25_score"] = bm25_normalized
|
|
291
|
+
combined_results[chunk_id]["total_score"] += bm25_weighted
|
|
292
|
+
# Update BM25 score on existing result
|
|
293
|
+
combined_results[chunk_id]["result"].bm25_score = bm25_res.bm25_score
|
|
294
|
+
else:
|
|
295
|
+
combined_results[chunk_id] = {
|
|
296
|
+
"result": bm25_res,
|
|
297
|
+
"vector_score": 0.0,
|
|
298
|
+
"bm25_score": bm25_normalized,
|
|
299
|
+
"total_score": bm25_weighted,
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
# Convert to final results
|
|
303
|
+
fused_nodes = []
|
|
304
|
+
for _chunk_id, data in combined_results.items():
|
|
305
|
+
result = data["result"]
|
|
306
|
+
# Update score with combined score
|
|
307
|
+
result.score = data["total_score"]
|
|
308
|
+
fused_nodes.append(result)
|
|
309
|
+
|
|
310
|
+
# Sort by combined score and take top_k
|
|
311
|
+
fused_nodes.sort(key=lambda x: x.score, reverse=True)
|
|
312
|
+
fused_nodes = fused_nodes[: request.top_k]
|
|
313
|
+
|
|
314
|
+
return fused_nodes
|
|
315
|
+
|
|
316
|
+
async def get_document_count(self) -> int:
|
|
317
|
+
"""
|
|
318
|
+
Get the total number of indexed documents.
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
Number of documents in the vector store.
|
|
322
|
+
"""
|
|
323
|
+
if not self.is_ready():
|
|
324
|
+
return 0
|
|
325
|
+
return await self.vector_store.get_count()
|
|
326
|
+
|
|
327
|
+
def _filter_results(
|
|
328
|
+
self, results: list[QueryResult], request: QueryRequest
|
|
329
|
+
) -> list[QueryResult]:
|
|
330
|
+
"""
|
|
331
|
+
Filter query results based on request parameters.
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
results: List of query results to filter.
|
|
335
|
+
request: Query request with filter parameters.
|
|
336
|
+
|
|
337
|
+
Returns:
|
|
338
|
+
Filtered list of results.
|
|
339
|
+
"""
|
|
340
|
+
filtered_results = results
|
|
341
|
+
|
|
342
|
+
# Filter by source types
|
|
343
|
+
if request.source_types:
|
|
344
|
+
filtered_results = [
|
|
345
|
+
r for r in filtered_results if r.source_type in request.source_types
|
|
346
|
+
]
|
|
347
|
+
|
|
348
|
+
# Filter by languages
|
|
349
|
+
if request.languages:
|
|
350
|
+
filtered_results = [
|
|
351
|
+
r
|
|
352
|
+
for r in filtered_results
|
|
353
|
+
if r.language and r.language in request.languages
|
|
354
|
+
]
|
|
355
|
+
|
|
356
|
+
# Filter by file paths (with wildcard support)
|
|
357
|
+
if request.file_paths:
|
|
358
|
+
import fnmatch
|
|
359
|
+
|
|
360
|
+
filtered_results = [
|
|
361
|
+
r
|
|
362
|
+
for r in filtered_results
|
|
363
|
+
if any(
|
|
364
|
+
fnmatch.fnmatch(r.source, pattern) for pattern in request.file_paths
|
|
365
|
+
)
|
|
366
|
+
]
|
|
367
|
+
|
|
368
|
+
return filtered_results
|
|
369
|
+
|
|
370
|
+
def _build_where_clause(
|
|
371
|
+
self, source_types: list[str] | None, languages: list[str] | None
|
|
372
|
+
) -> dict[str, Any] | None:
|
|
373
|
+
"""
|
|
374
|
+
Build ChromaDB where clause from filter parameters.
|
|
375
|
+
|
|
376
|
+
Args:
|
|
377
|
+
source_types: List of source types to filter by.
|
|
378
|
+
languages: List of languages to filter by.
|
|
379
|
+
|
|
380
|
+
Returns:
|
|
381
|
+
ChromaDB where clause dict or None.
|
|
382
|
+
"""
|
|
383
|
+
conditions: list[dict[str, Any]] = []
|
|
384
|
+
|
|
385
|
+
if source_types:
|
|
386
|
+
if len(source_types) == 1:
|
|
387
|
+
conditions.append({"source_type": source_types[0]})
|
|
388
|
+
else:
|
|
389
|
+
conditions.append({"source_type": {"$in": source_types}})
|
|
390
|
+
|
|
391
|
+
if languages:
|
|
392
|
+
if len(languages) == 1:
|
|
393
|
+
conditions.append({"language": languages[0]})
|
|
394
|
+
else:
|
|
395
|
+
conditions.append({"language": {"$in": languages}})
|
|
396
|
+
|
|
397
|
+
if not conditions:
|
|
398
|
+
return None
|
|
399
|
+
elif len(conditions) == 1:
|
|
400
|
+
return conditions[0]
|
|
401
|
+
else:
|
|
402
|
+
return {"$and": conditions}
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
# Singleton instance
|
|
406
|
+
_query_service: Optional[QueryService] = None
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def get_query_service() -> QueryService:
|
|
410
|
+
"""Get the global query service instance."""
|
|
411
|
+
global _query_service
|
|
412
|
+
if _query_service is None:
|
|
413
|
+
_query_service = QueryService()
|
|
414
|
+
return _query_service
|