aiagents4pharma 1.40.1__py3-none-any.whl → 1.41.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +4 -0
- aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +44 -4
- aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker.py +127 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_answer_formatter.py +66 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_batch_processor.py +101 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_collection_manager.py +150 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_document_processor.py +69 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_generate_answer.py +75 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_gpu_detection.py +140 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_paper_loader.py +116 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_rag_pipeline.py +98 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_retrieve_chunks.py +197 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_singleton_manager.py +156 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_vector_normalization.py +121 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_vector_store.py +434 -0
- aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +89 -509
- aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py +34 -89
- aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +8 -6
- aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +6 -4
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +74 -40
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +26 -1
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +200 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +14 -14
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +63 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +154 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +60 -40
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +122 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +162 -40
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +40 -78
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +159 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +277 -96
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +12 -9
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +0 -1
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +9 -8
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -5
- {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.41.0.dist-info}/METADATA +27 -115
- {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.41.0.dist-info}/RECORD +45 -23
- aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py +0 -28
- {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.41.0.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.41.0.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.41.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,156 @@
|
|
1
|
+
"""Tests for singleton_manager: manages vector store connections and event loops."""
|
2
|
+
|
3
|
+
from unittest.mock import MagicMock, patch
|
4
|
+
import pytest
|
5
|
+
from pymilvus.exceptions import MilvusException
|
6
|
+
|
7
|
+
from aiagents4pharma.talk2scholars.tools.pdf.utils.get_vectorstore import (
|
8
|
+
get_vectorstore,
|
9
|
+
)
|
10
|
+
from aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager import (
|
11
|
+
VectorstoreSingleton,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
def test_singleton_instance_identity():
|
16
|
+
"""Singleton should return the same instance."""
|
17
|
+
a = VectorstoreSingleton()
|
18
|
+
b = VectorstoreSingleton()
|
19
|
+
assert a is b
|
20
|
+
|
21
|
+
|
22
|
+
@patch(
|
23
|
+
"aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.detect_nvidia_gpu"
|
24
|
+
)
|
25
|
+
def test_detect_gpu_once(mock_detect):
|
26
|
+
"""Ensure GPU detection is cached."""
|
27
|
+
mock_detect.return_value = True
|
28
|
+
singleton = VectorstoreSingleton()
|
29
|
+
setattr(singleton, "_gpu_detected", None)
|
30
|
+
|
31
|
+
result = singleton.detect_gpu_once()
|
32
|
+
assert result is True
|
33
|
+
|
34
|
+
result2 = singleton.detect_gpu_once()
|
35
|
+
assert result2 is True
|
36
|
+
|
37
|
+
mock_detect.assert_called_once()
|
38
|
+
|
39
|
+
|
40
|
+
def test_get_event_loop_reuses_existing():
|
41
|
+
"""get_event_loop should return the same loop if it exists."""
|
42
|
+
singleton = VectorstoreSingleton()
|
43
|
+
loop1 = singleton.get_event_loop()
|
44
|
+
loop2 = singleton.get_event_loop()
|
45
|
+
assert loop1 is loop2
|
46
|
+
|
47
|
+
|
48
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.connections")
|
49
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.db")
|
50
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.utility")
|
51
|
+
def test_get_connection_creates_connection(_, mock_db, mock_conns):
|
52
|
+
"""get_connection should create a new connection if none exists."""
|
53
|
+
singleton = VectorstoreSingleton()
|
54
|
+
mock_conns.has_connection.return_value = True
|
55
|
+
mock_db.list_database.return_value = []
|
56
|
+
|
57
|
+
conn_key = singleton.get_connection("localhost", 19530, "test_db")
|
58
|
+
assert conn_key == "default"
|
59
|
+
mock_conns.remove_connection.assert_called_once()
|
60
|
+
mock_conns.connect.assert_called_once()
|
61
|
+
mock_db.create_database.assert_called_once_with("test_db")
|
62
|
+
mock_db.using_database.assert_called_once_with("test_db")
|
63
|
+
|
64
|
+
|
65
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.Milvus")
|
66
|
+
def test_get_vector_store_creates_if_missing(mock_milvus):
|
67
|
+
"""get_vector_store should create a new vector store if missing."""
|
68
|
+
singleton = VectorstoreSingleton()
|
69
|
+
setattr(singleton, "_vector_stores", {})
|
70
|
+
setattr(singleton, "_event_loops", {})
|
71
|
+
|
72
|
+
mock_embed = MagicMock()
|
73
|
+
connection_args = {"host": "localhost", "port": 19530}
|
74
|
+
|
75
|
+
vs = singleton.get_vector_store("collection1", mock_embed, connection_args)
|
76
|
+
|
77
|
+
vector_stores = getattr(singleton, "_vector_stores")
|
78
|
+
assert vs is vector_stores["collection1"]
|
79
|
+
assert "collection1" in vector_stores
|
80
|
+
mock_milvus.assert_called_once()
|
81
|
+
|
82
|
+
|
83
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.get_vectorstore.Vectorstore")
|
84
|
+
def test_get_vectorstore_factory(mock_vectorstore_cls):
|
85
|
+
"""get_vectorstore should reuse or create Vectorstore."""
|
86
|
+
mock_config = MagicMock()
|
87
|
+
mock_config.milvus.collection_name = "demo"
|
88
|
+
mock_config.milvus.embedding_dim = 768
|
89
|
+
mock_embed = MagicMock()
|
90
|
+
|
91
|
+
result1 = get_vectorstore(mock_embed, mock_config, force_new=True)
|
92
|
+
assert result1 == mock_vectorstore_cls.return_value
|
93
|
+
|
94
|
+
result2 = get_vectorstore(mock_embed, mock_config)
|
95
|
+
assert result2 == result1
|
96
|
+
|
97
|
+
|
98
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.get_vectorstore.Vectorstore")
|
99
|
+
def test_get_vectorstore_force_new(mock_vectorstore_cls):
|
100
|
+
"""get_vectorstore should return a new instance if force_new=True."""
|
101
|
+
mock_vs1 = MagicMock(name="Vectorstore1")
|
102
|
+
mock_vs2 = MagicMock(name="Vectorstore2")
|
103
|
+
mock_vectorstore_cls.side_effect = [mock_vs1, mock_vs2]
|
104
|
+
|
105
|
+
dummy_config = MagicMock()
|
106
|
+
dummy_config.milvus.collection_name = "my_test_collection"
|
107
|
+
dummy_config.milvus.embedding_dim = 768
|
108
|
+
|
109
|
+
vs1 = get_vectorstore(mock_vs1, dummy_config)
|
110
|
+
vs2 = get_vectorstore(mock_vs2, dummy_config, force_new=True)
|
111
|
+
|
112
|
+
assert vs1 is mock_vs1
|
113
|
+
assert vs2 is mock_vs2
|
114
|
+
assert vs1 != vs2
|
115
|
+
|
116
|
+
|
117
|
+
@patch(
|
118
|
+
"aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.connections.connect"
|
119
|
+
)
|
120
|
+
@patch(
|
121
|
+
"aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.connections.has_connection"
|
122
|
+
)
|
123
|
+
@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.db")
|
124
|
+
def test_get_connection_milvus_error(_, mock_has_connection, mock_connect):
|
125
|
+
"""get_connection should raise MilvusException on connection failure."""
|
126
|
+
manager = VectorstoreSingleton()
|
127
|
+
setattr(manager, "_connections", {})
|
128
|
+
|
129
|
+
mock_has_connection.return_value = False
|
130
|
+
mock_connect.side_effect = MilvusException("Connection failed")
|
131
|
+
|
132
|
+
with pytest.raises(MilvusException, match="Connection failed"):
|
133
|
+
manager.get_connection("localhost", 19530, "test_db")
|
134
|
+
|
135
|
+
|
136
|
+
def test_get_event_loop_creates_new_loop_on_closed():
|
137
|
+
"""Ensure get_event_loop creates a new loop if current one is closed."""
|
138
|
+
manager = VectorstoreSingleton()
|
139
|
+
setattr(manager, "_event_loops", {})
|
140
|
+
|
141
|
+
mock_loop = MagicMock()
|
142
|
+
mock_loop.is_closed.return_value = True
|
143
|
+
|
144
|
+
with (
|
145
|
+
patch("asyncio.get_event_loop", return_value=mock_loop),
|
146
|
+
patch("asyncio.new_event_loop") as mock_new_loop,
|
147
|
+
patch("asyncio.set_event_loop") as mock_set_loop,
|
148
|
+
):
|
149
|
+
new_loop = MagicMock()
|
150
|
+
mock_new_loop.return_value = new_loop
|
151
|
+
|
152
|
+
result_loop = manager.get_event_loop()
|
153
|
+
|
154
|
+
mock_new_loop.assert_called_once()
|
155
|
+
mock_set_loop.assert_called_once_with(new_loop)
|
156
|
+
assert result_loop == new_loop
|
@@ -0,0 +1,121 @@
|
|
1
|
+
"""Unit tests for vector normalization utilities for GPU COSINE support."""
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
import pytest
|
6
|
+
from langchain_core.embeddings import Embeddings
|
7
|
+
|
8
|
+
from aiagents4pharma.talk2scholars.tools.pdf.utils import vector_normalization as vn
|
9
|
+
|
10
|
+
|
11
|
+
def test_normalize_vector_nonzero():
|
12
|
+
"""Test normalizing a non-zero vector."""
|
13
|
+
vec = [3.0, 4.0]
|
14
|
+
result = vn.normalize_vector(vec)
|
15
|
+
expected = [0.6, 0.8]
|
16
|
+
assert pytest.approx(result) == expected
|
17
|
+
|
18
|
+
|
19
|
+
def test_normalize_vector_zero_logs_warning(caplog):
|
20
|
+
"""Test normalizing a zero vector logs a warning."""
|
21
|
+
with caplog.at_level(logging.WARNING):
|
22
|
+
result = vn.normalize_vector([0.0, 0.0])
|
23
|
+
assert result == [0.0, 0.0]
|
24
|
+
assert "Zero vector encountered" in caplog.text
|
25
|
+
|
26
|
+
|
27
|
+
def test_normalize_vectors_batch_empty():
|
28
|
+
"""Test that an empty batch returns unchanged."""
|
29
|
+
result = vn.normalize_vectors_batch([])
|
30
|
+
assert result == []
|
31
|
+
|
32
|
+
|
33
|
+
def test_normalize_vectors_batch_normal_case():
|
34
|
+
"""Test batch normalization of valid vectors with equal dimensions."""
|
35
|
+
vectors = [[3, 4], [6, 8]]
|
36
|
+
result = vn.normalize_vectors_batch(vectors)
|
37
|
+
expected = [
|
38
|
+
[0.6, 0.8],
|
39
|
+
[0.6, 0.8],
|
40
|
+
]
|
41
|
+
for r, e in zip(result, expected):
|
42
|
+
assert pytest.approx(r) == e
|
43
|
+
|
44
|
+
|
45
|
+
def test_normalize_vectors_batch_with_zero_vector(caplog):
|
46
|
+
"""Test that zero vectors are handled and logged."""
|
47
|
+
vectors = [[0.0, 0.0], [1.0, 0.0]]
|
48
|
+
with caplog.at_level(logging.WARNING):
|
49
|
+
result = vn.normalize_vectors_batch(vectors)
|
50
|
+
assert len(result) == 2
|
51
|
+
assert "zero vectors during batch normalization" in caplog.text
|
52
|
+
assert pytest.approx(result[1]) == [1.0, 0.0]
|
53
|
+
|
54
|
+
|
55
|
+
class DummyEmbedding(Embeddings):
|
56
|
+
"""A dummy embedding class for testing normalization wrapper."""
|
57
|
+
|
58
|
+
def __init__(self):
|
59
|
+
self.test_attr = "test"
|
60
|
+
|
61
|
+
def embed_documents(self, texts):
|
62
|
+
return [[3.0, 4.0] for _ in texts]
|
63
|
+
|
64
|
+
def embed_query(self, text):
|
65
|
+
return [3.0, 4.0]
|
66
|
+
|
67
|
+
|
68
|
+
def test_normalizing_embeddings_embed_documents():
|
69
|
+
"""Test that document embeddings are normalized."""
|
70
|
+
model = vn.NormalizingEmbeddings(DummyEmbedding())
|
71
|
+
result = model.embed_documents(["doc1", "doc2"])
|
72
|
+
assert len(result) == 2
|
73
|
+
assert pytest.approx(result[0]) == [0.6, 0.8]
|
74
|
+
|
75
|
+
|
76
|
+
def test_normalizing_embeddings_embed_query():
|
77
|
+
"""Test that query embeddings are normalized."""
|
78
|
+
model = vn.NormalizingEmbeddings(DummyEmbedding())
|
79
|
+
result = model.embed_query("query")
|
80
|
+
assert pytest.approx(result) == [0.6, 0.8]
|
81
|
+
|
82
|
+
|
83
|
+
def test_normalizing_embeddings_passthrough():
|
84
|
+
"""Test attribute delegation to base embedding model."""
|
85
|
+
dummy = DummyEmbedding()
|
86
|
+
model = vn.NormalizingEmbeddings(dummy)
|
87
|
+
assert model.test_attr == "test"
|
88
|
+
|
89
|
+
|
90
|
+
@pytest.mark.parametrize(
|
91
|
+
"has_gpu,use_cosine,expected_log",
|
92
|
+
[
|
93
|
+
(True, True, "ENABLED"),
|
94
|
+
(False, True, "DISABLED"),
|
95
|
+
(True, False, "DISABLED"),
|
96
|
+
(False, False, "DISABLED"),
|
97
|
+
],
|
98
|
+
)
|
99
|
+
def test_should_normalize_vectors_logging(has_gpu, use_cosine, expected_log, caplog):
|
100
|
+
"""Test should_normalize_vectors decision logic and logging."""
|
101
|
+
with caplog.at_level(logging.INFO):
|
102
|
+
result = vn.should_normalize_vectors(has_gpu, use_cosine)
|
103
|
+
if has_gpu and use_cosine:
|
104
|
+
assert result is True
|
105
|
+
else:
|
106
|
+
assert result is False
|
107
|
+
assert expected_log in caplog.text
|
108
|
+
|
109
|
+
|
110
|
+
def test_wrap_embedding_model_if_needed_enabled():
|
111
|
+
"""Test that wrapping is applied when needed."""
|
112
|
+
base = DummyEmbedding()
|
113
|
+
wrapped = vn.wrap_embedding_model_if_needed(base, has_gpu=True, use_cosine=True)
|
114
|
+
assert isinstance(wrapped, vn.NormalizingEmbeddings)
|
115
|
+
|
116
|
+
|
117
|
+
def test_wrap_embedding_model_if_needed_disabled():
|
118
|
+
"""Test that original model is returned when normalization not needed."""
|
119
|
+
base = DummyEmbedding()
|
120
|
+
wrapped = vn.wrap_embedding_model_if_needed(base, has_gpu=False, use_cosine=True)
|
121
|
+
assert wrapped is base
|