langroid 0.56.8__py3-none-any.whl → 0.56.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/special/doc_chat_agent.py +24 -25
- langroid/vector_store/qdrantdb.py +19 -1
- {langroid-0.56.8.dist-info → langroid-0.56.10.dist-info}/METADATA +1 -1
- {langroid-0.56.8.dist-info → langroid-0.56.10.dist-info}/RECORD +6 -6
- {langroid-0.56.8.dist-info → langroid-0.56.10.dist-info}/WHEEL +0 -0
- {langroid-0.56.8.dist-info → langroid-0.56.10.dist-info}/licenses/LICENSE +0 -0
@@ -149,8 +149,8 @@ class DocChatAgentConfig(ChatAgentConfig):
|
|
149
149
|
n_fuzzy_neighbor_words: int = 100 # num neighbor words to retrieve for fuzzy match
|
150
150
|
use_fuzzy_match: bool = True
|
151
151
|
use_bm25_search: bool = True
|
152
|
-
use_reciprocal_rank_fusion: bool =
|
153
|
-
cross_encoder_reranking_model: str = (
|
152
|
+
use_reciprocal_rank_fusion: bool = False
|
153
|
+
cross_encoder_reranking_model: str = ( # ignored if use_reciprocal_rank_fusion=True
|
154
154
|
"cross-encoder/ms-marco-MiniLM-L-6-v2" if has_sentence_transformers else ""
|
155
155
|
)
|
156
156
|
rerank_diversity: bool = True # rerank to maximize diversity?
|
@@ -249,11 +249,10 @@ class DocChatAgent(ChatAgent):
|
|
249
249
|
):
|
250
250
|
logger.warning(
|
251
251
|
"""
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
`cross_encoder_reranking_model` to an empty string.
|
252
|
+
Ignoring `cross_encoder_reranking_model` since you have set
|
253
|
+
`use_reciprocal_rank_fusion` to True.
|
254
|
+
To use cross-encoder reranking, set
|
255
|
+
`use_reciprocal_rank_fusion` to False.
|
257
256
|
"""
|
258
257
|
)
|
259
258
|
|
@@ -315,6 +314,10 @@ class DocChatAgent(ChatAgent):
|
|
315
314
|
# Note we may have used a vecdb with a config.collection_name
|
316
315
|
# different from the agent's config.vecdb.collection_name!!
|
317
316
|
self.vecdb.delete_collection(collection_name)
|
317
|
+
# Close the old vecdb before creating a new one
|
318
|
+
old_vecdb = self.vecdb
|
319
|
+
if old_vecdb and hasattr(old_vecdb, "close"):
|
320
|
+
old_vecdb.close()
|
318
321
|
self.vecdb = VectorStore.create(self.vecdb.config)
|
319
322
|
except Exception as e:
|
320
323
|
logger.warning(
|
@@ -1113,7 +1116,7 @@ class DocChatAgent(ChatAgent):
|
|
1113
1116
|
key=lambda x: x[0],
|
1114
1117
|
reverse=True,
|
1115
1118
|
)
|
1116
|
-
passages = [d for _, d in sorted_pairs
|
1119
|
+
passages = [d for _, d in sorted_pairs]
|
1117
1120
|
return passages
|
1118
1121
|
|
1119
1122
|
def rerank_with_diversity(self, passages: List[Document]) -> List[Document]:
|
@@ -1320,10 +1323,7 @@ class DocChatAgent(ChatAgent):
|
|
1320
1323
|
# TODO: Add score threshold in config
|
1321
1324
|
docs_scores = self.get_similar_chunks_bm25(query, retrieval_multiple)
|
1322
1325
|
id2doc.update({d.id(): d for d, _ in docs_scores})
|
1323
|
-
if
|
1324
|
-
self.config.cross_encoder_reranking_model == ""
|
1325
|
-
and self.config.use_reciprocal_rank_fusion
|
1326
|
-
):
|
1326
|
+
if self.config.use_reciprocal_rank_fusion:
|
1327
1327
|
# if we're not re-ranking with a cross-encoder, and have RRF enabled,
|
1328
1328
|
# instead of accumulating the bm25 results into passages,
|
1329
1329
|
# we collect these ranks for Reciprocal Rank Fusion down below.
|
@@ -1338,10 +1338,7 @@ class DocChatAgent(ChatAgent):
|
|
1338
1338
|
if self.config.use_fuzzy_match:
|
1339
1339
|
# TODO: Add score threshold in config
|
1340
1340
|
fuzzy_match_doc_scores = self.get_fuzzy_matches(query, retrieval_multiple)
|
1341
|
-
if
|
1342
|
-
self.config.cross_encoder_reranking_model == ""
|
1343
|
-
and self.config.use_reciprocal_rank_fusion
|
1344
|
-
):
|
1341
|
+
if self.config.use_reciprocal_rank_fusion:
|
1345
1342
|
# if we're not re-ranking with a cross-encoder,
|
1346
1343
|
# instead of accumulating the fuzzy match results into passages,
|
1347
1344
|
# we collect these ranks for Reciprocal Rank Fusion down below.
|
@@ -1357,10 +1354,8 @@ class DocChatAgent(ChatAgent):
|
|
1357
1354
|
# eliminate duplicate ids
|
1358
1355
|
passages = [id2doc[id] for id in id2doc.keys()]
|
1359
1356
|
|
1360
|
-
if (
|
1361
|
-
self.config.
|
1362
|
-
and self.config.use_reciprocal_rank_fusion
|
1363
|
-
and (self.config.use_bm25_search or self.config.use_fuzzy_match)
|
1357
|
+
if self.config.use_reciprocal_rank_fusion and (
|
1358
|
+
self.config.use_bm25_search or self.config.use_fuzzy_match
|
1364
1359
|
):
|
1365
1360
|
# Since we're not using cross-enocder re-ranking,
|
1366
1361
|
# we need to re-order the retrieved chunks from potentially three
|
@@ -1382,9 +1377,9 @@ class DocChatAgent(ChatAgent):
|
|
1382
1377
|
# Use max_rank instead of infinity to avoid bias against
|
1383
1378
|
# single-method docs
|
1384
1379
|
max_rank = self.config.n_similar_chunks * retrieval_multiple
|
1385
|
-
rank_semantic = id2_rank_semantic.get(id_, max_rank)
|
1386
|
-
rank_bm25 = id2_rank_bm25.get(id_, max_rank)
|
1387
|
-
rank_fuzzy = id2_rank_fuzzy.get(id_, max_rank)
|
1380
|
+
rank_semantic = id2_rank_semantic.get(id_, max_rank + 1)
|
1381
|
+
rank_bm25 = id2_rank_bm25.get(id_, max_rank + 1)
|
1382
|
+
rank_fuzzy = id2_rank_fuzzy.get(id_, max_rank + 1)
|
1388
1383
|
c = self.config.reciprocal_rank_fusion_constant
|
1389
1384
|
reciprocal_fusion_score = (
|
1390
1385
|
1 / (rank_semantic + c) + 1 / (rank_bm25 + c) + 1 / (rank_fuzzy + c)
|
@@ -1421,10 +1416,14 @@ class DocChatAgent(ChatAgent):
|
|
1421
1416
|
passages_scores = self.add_context_window(passages_scores)
|
1422
1417
|
passages = [p for p, _ in passages_scores]
|
1423
1418
|
# now passages can potentially have a lot of doc chunks,
|
1424
|
-
# so we re-rank them using a cross-encoder scoring model
|
1419
|
+
# so we re-rank them using a cross-encoder scoring model
|
1420
|
+
# (provided that `reciprocal_rank_fusion` is not enabled),
|
1425
1421
|
# and pick top k where k = config..n_similar_chunks
|
1426
1422
|
# https://www.sbert.net/examples/applications/retrieve_rerank
|
1427
|
-
if
|
1423
|
+
if (
|
1424
|
+
self.config.cross_encoder_reranking_model != ""
|
1425
|
+
and not self.config.use_reciprocal_rank_fusion
|
1426
|
+
):
|
1428
1427
|
passages = self.rerank_with_cross_encoder(query, passages)
|
1429
1428
|
|
1430
1429
|
if self.config.rerank_diversity:
|
@@ -4,7 +4,7 @@ import logging
|
|
4
4
|
import os
|
5
5
|
import time
|
6
6
|
import uuid
|
7
|
-
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Tuple, TypeVar
|
7
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple, TypeVar
|
8
8
|
|
9
9
|
from dotenv import load_dotenv
|
10
10
|
|
@@ -143,6 +143,24 @@ class QdrantDB(VectorStore):
|
|
143
143
|
config.collection_name, replace=config.replace_collection
|
144
144
|
)
|
145
145
|
|
146
|
+
def close(self) -> None:
|
147
|
+
"""
|
148
|
+
Close the QdrantDB client and release any resources (e.g., file locks).
|
149
|
+
This is especially important for local storage to release the .lock file.
|
150
|
+
"""
|
151
|
+
if hasattr(self.client, "close"):
|
152
|
+
# QdrantLocal has a close method that releases the lock
|
153
|
+
self.client.close()
|
154
|
+
logger.info(f"Closed QdrantDB connection for {self.config.storage_path}")
|
155
|
+
|
156
|
+
def __enter__(self) -> "QdrantDB":
|
157
|
+
"""Context manager entry."""
|
158
|
+
return self
|
159
|
+
|
160
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
161
|
+
"""Context manager exit - ensure cleanup even if an exception occurred."""
|
162
|
+
self.close()
|
163
|
+
|
146
164
|
def clear_empty_collections(self) -> int:
|
147
165
|
coll_names = self.list_collections()
|
148
166
|
n_deletes = 0
|
@@ -15,7 +15,7 @@ langroid/agent/xml_tool_message.py,sha256=oeBKnJNoGaKdtz39XoWGMTNlVyXew2MWH5lgtY
|
|
15
15
|
langroid/agent/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
16
|
langroid/agent/callbacks/chainlit.py,sha256=4rJw07NIIVTIVvksVY08h5PdLE_kRoJItjbQM0UjRn0,20962
|
17
17
|
langroid/agent/special/__init__.py,sha256=gik_Xtm_zV7U9s30Mn8UX3Gyuy4jTjQe9zjiE3HWmEo,1273
|
18
|
-
langroid/agent/special/doc_chat_agent.py,sha256=
|
18
|
+
langroid/agent/special/doc_chat_agent.py,sha256=PysF6K_rj_xe1yHVZyaldQkGacdHv905zmNBqOdrzYU,68815
|
19
19
|
langroid/agent/special/doc_chat_task.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
20
|
langroid/agent/special/lance_doc_chat_agent.py,sha256=6pIqi2DF-MvYYN3-blsdUgulYnOBTl7I21T7wPAt1zM,10413
|
21
21
|
langroid/agent/special/lance_tools.py,sha256=qS8x4wi8mrqfbYV2ztFzrcxyhHQ0ZWOc-zkYiH7awj0,2105
|
@@ -135,9 +135,9 @@ langroid/vector_store/lancedb.py,sha256=Qd20gKjWozPWfW5-D66J6U8dSrJo1yl-maj6s1lb
|
|
135
135
|
langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3HmhHQICXLs,11663
|
136
136
|
langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZXpEY9M,14994
|
137
137
|
langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
|
138
|
-
langroid/vector_store/qdrantdb.py,sha256=
|
138
|
+
langroid/vector_store/qdrantdb.py,sha256=ZYrT9mxoUCx_67Qzb5xnkWuFG12rfe30yAg4NgG2ueA,19168
|
139
139
|
langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
|
140
|
-
langroid-0.56.
|
141
|
-
langroid-0.56.
|
142
|
-
langroid-0.56.
|
143
|
-
langroid-0.56.
|
140
|
+
langroid-0.56.10.dist-info/METADATA,sha256=QYPsEwh24uWqM4OwExH1tSmsWlnj-cyFRkTAXd2Rl64,65745
|
141
|
+
langroid-0.56.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
142
|
+
langroid-0.56.10.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
|
143
|
+
langroid-0.56.10.dist-info/RECORD,,
|
File without changes
|
File without changes
|