aiagents4pharma 1.40.1__py3-none-any.whl → 1.41.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +4 -0
  2. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +44 -4
  3. aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker.py +127 -0
  4. aiagents4pharma/talk2scholars/tests/test_pdf_answer_formatter.py +66 -0
  5. aiagents4pharma/talk2scholars/tests/test_pdf_batch_processor.py +101 -0
  6. aiagents4pharma/talk2scholars/tests/test_pdf_collection_manager.py +150 -0
  7. aiagents4pharma/talk2scholars/tests/test_pdf_document_processor.py +69 -0
  8. aiagents4pharma/talk2scholars/tests/test_pdf_generate_answer.py +75 -0
  9. aiagents4pharma/talk2scholars/tests/test_pdf_gpu_detection.py +140 -0
  10. aiagents4pharma/talk2scholars/tests/test_pdf_paper_loader.py +116 -0
  11. aiagents4pharma/talk2scholars/tests/test_pdf_rag_pipeline.py +98 -0
  12. aiagents4pharma/talk2scholars/tests/test_pdf_retrieve_chunks.py +197 -0
  13. aiagents4pharma/talk2scholars/tests/test_pdf_singleton_manager.py +156 -0
  14. aiagents4pharma/talk2scholars/tests/test_pdf_vector_normalization.py +121 -0
  15. aiagents4pharma/talk2scholars/tests/test_pdf_vector_store.py +434 -0
  16. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +89 -509
  17. aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py +34 -89
  18. aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +8 -6
  19. aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +6 -4
  20. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +74 -40
  21. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +26 -1
  22. aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
  23. aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +200 -0
  24. aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
  25. aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
  26. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +14 -14
  27. aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +63 -0
  28. aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +154 -0
  29. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +60 -40
  30. aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
  31. aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +122 -0
  32. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +162 -40
  33. aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
  34. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +40 -78
  35. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +159 -0
  36. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +277 -96
  37. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +12 -9
  38. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +0 -1
  39. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +9 -8
  40. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -5
  41. {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.41.0.dist-info}/METADATA +27 -115
  42. {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.41.0.dist-info}/RECORD +45 -23
  43. aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py +0 -28
  44. {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.41.0.dist-info}/WHEEL +0 -0
  45. {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.41.0.dist-info}/licenses/LICENSE +0 -0
  46. {aiagents4pharma-1.40.1.dist-info → aiagents4pharma-1.41.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,156 @@
1
+ """Tests for singleton_manager: manages vector store connections and event loops."""
2
+
3
+ from unittest.mock import MagicMock, patch
4
+ import pytest
5
+ from pymilvus.exceptions import MilvusException
6
+
7
+ from aiagents4pharma.talk2scholars.tools.pdf.utils.get_vectorstore import (
8
+ get_vectorstore,
9
+ )
10
+ from aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager import (
11
+ VectorstoreSingleton,
12
+ )
13
+
14
+
15
+ def test_singleton_instance_identity():
16
+ """Singleton should return the same instance."""
17
+ a = VectorstoreSingleton()
18
+ b = VectorstoreSingleton()
19
+ assert a is b
20
+
21
+
22
+ @patch(
23
+ "aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.detect_nvidia_gpu"
24
+ )
25
+ def test_detect_gpu_once(mock_detect):
26
+ """Ensure GPU detection is cached."""
27
+ mock_detect.return_value = True
28
+ singleton = VectorstoreSingleton()
29
+ setattr(singleton, "_gpu_detected", None)
30
+
31
+ result = singleton.detect_gpu_once()
32
+ assert result is True
33
+
34
+ result2 = singleton.detect_gpu_once()
35
+ assert result2 is True
36
+
37
+ mock_detect.assert_called_once()
38
+
39
+
40
+ def test_get_event_loop_reuses_existing():
41
+ """get_event_loop should return the same loop if it exists."""
42
+ singleton = VectorstoreSingleton()
43
+ loop1 = singleton.get_event_loop()
44
+ loop2 = singleton.get_event_loop()
45
+ assert loop1 is loop2
46
+
47
+
48
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.connections")
49
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.db")
50
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.utility")
51
+ def test_get_connection_creates_connection(_, mock_db, mock_conns):
52
+ """get_connection should create a new connection if none exists."""
53
+ singleton = VectorstoreSingleton()
54
+ mock_conns.has_connection.return_value = True
55
+ mock_db.list_database.return_value = []
56
+
57
+ conn_key = singleton.get_connection("localhost", 19530, "test_db")
58
+ assert conn_key == "default"
59
+ mock_conns.remove_connection.assert_called_once()
60
+ mock_conns.connect.assert_called_once()
61
+ mock_db.create_database.assert_called_once_with("test_db")
62
+ mock_db.using_database.assert_called_once_with("test_db")
63
+
64
+
65
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.Milvus")
66
+ def test_get_vector_store_creates_if_missing(mock_milvus):
67
+ """get_vector_store should create a new vector store if missing."""
68
+ singleton = VectorstoreSingleton()
69
+ setattr(singleton, "_vector_stores", {})
70
+ setattr(singleton, "_event_loops", {})
71
+
72
+ mock_embed = MagicMock()
73
+ connection_args = {"host": "localhost", "port": 19530}
74
+
75
+ vs = singleton.get_vector_store("collection1", mock_embed, connection_args)
76
+
77
+ vector_stores = getattr(singleton, "_vector_stores")
78
+ assert vs is vector_stores["collection1"]
79
+ assert "collection1" in vector_stores
80
+ mock_milvus.assert_called_once()
81
+
82
+
83
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.get_vectorstore.Vectorstore")
84
+ def test_get_vectorstore_factory(mock_vectorstore_cls):
85
+ """get_vectorstore should reuse or create Vectorstore."""
86
+ mock_config = MagicMock()
87
+ mock_config.milvus.collection_name = "demo"
88
+ mock_config.milvus.embedding_dim = 768
89
+ mock_embed = MagicMock()
90
+
91
+ result1 = get_vectorstore(mock_embed, mock_config, force_new=True)
92
+ assert result1 == mock_vectorstore_cls.return_value
93
+
94
+ result2 = get_vectorstore(mock_embed, mock_config)
95
+ assert result2 == result1
96
+
97
+
98
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.get_vectorstore.Vectorstore")
99
+ def test_get_vectorstore_force_new(mock_vectorstore_cls):
100
+ """get_vectorstore should return a new instance if force_new=True."""
101
+ mock_vs1 = MagicMock(name="Vectorstore1")
102
+ mock_vs2 = MagicMock(name="Vectorstore2")
103
+ mock_vectorstore_cls.side_effect = [mock_vs1, mock_vs2]
104
+
105
+ dummy_config = MagicMock()
106
+ dummy_config.milvus.collection_name = "my_test_collection"
107
+ dummy_config.milvus.embedding_dim = 768
108
+
109
+ vs1 = get_vectorstore(mock_vs1, dummy_config)
110
+ vs2 = get_vectorstore(mock_vs2, dummy_config, force_new=True)
111
+
112
+ assert vs1 is mock_vs1
113
+ assert vs2 is mock_vs2
114
+ assert vs1 != vs2
115
+
116
+
117
+ @patch(
118
+ "aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.connections.connect"
119
+ )
120
+ @patch(
121
+ "aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.connections.has_connection"
122
+ )
123
+ @patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.db")
124
+ def test_get_connection_milvus_error(_, mock_has_connection, mock_connect):
125
+ """get_connection should raise MilvusException on connection failure."""
126
+ manager = VectorstoreSingleton()
127
+ setattr(manager, "_connections", {})
128
+
129
+ mock_has_connection.return_value = False
130
+ mock_connect.side_effect = MilvusException("Connection failed")
131
+
132
+ with pytest.raises(MilvusException, match="Connection failed"):
133
+ manager.get_connection("localhost", 19530, "test_db")
134
+
135
+
136
+ def test_get_event_loop_creates_new_loop_on_closed():
137
+ """Ensure get_event_loop creates a new loop if current one is closed."""
138
+ manager = VectorstoreSingleton()
139
+ setattr(manager, "_event_loops", {})
140
+
141
+ mock_loop = MagicMock()
142
+ mock_loop.is_closed.return_value = True
143
+
144
+ with (
145
+ patch("asyncio.get_event_loop", return_value=mock_loop),
146
+ patch("asyncio.new_event_loop") as mock_new_loop,
147
+ patch("asyncio.set_event_loop") as mock_set_loop,
148
+ ):
149
+ new_loop = MagicMock()
150
+ mock_new_loop.return_value = new_loop
151
+
152
+ result_loop = manager.get_event_loop()
153
+
154
+ mock_new_loop.assert_called_once()
155
+ mock_set_loop.assert_called_once_with(new_loop)
156
+ assert result_loop == new_loop
@@ -0,0 +1,121 @@
1
+ """Unit tests for vector normalization utilities for GPU COSINE support."""
2
+
3
+ import logging
4
+
5
+ import pytest
6
+ from langchain_core.embeddings import Embeddings
7
+
8
+ from aiagents4pharma.talk2scholars.tools.pdf.utils import vector_normalization as vn
9
+
10
+
11
+ def test_normalize_vector_nonzero():
12
+ """Test normalizing a non-zero vector."""
13
+ vec = [3.0, 4.0]
14
+ result = vn.normalize_vector(vec)
15
+ expected = [0.6, 0.8]
16
+ assert pytest.approx(result) == expected
17
+
18
+
19
+ def test_normalize_vector_zero_logs_warning(caplog):
20
+ """Test normalizing a zero vector logs a warning."""
21
+ with caplog.at_level(logging.WARNING):
22
+ result = vn.normalize_vector([0.0, 0.0])
23
+ assert result == [0.0, 0.0]
24
+ assert "Zero vector encountered" in caplog.text
25
+
26
+
27
+ def test_normalize_vectors_batch_empty():
28
+ """Test that an empty batch returns unchanged."""
29
+ result = vn.normalize_vectors_batch([])
30
+ assert result == []
31
+
32
+
33
+ def test_normalize_vectors_batch_normal_case():
34
+ """Test batch normalization of valid vectors with equal dimensions."""
35
+ vectors = [[3, 4], [6, 8]]
36
+ result = vn.normalize_vectors_batch(vectors)
37
+ expected = [
38
+ [0.6, 0.8],
39
+ [0.6, 0.8],
40
+ ]
41
+ for r, e in zip(result, expected):
42
+ assert pytest.approx(r) == e
43
+
44
+
45
+ def test_normalize_vectors_batch_with_zero_vector(caplog):
46
+ """Test that zero vectors are handled and logged."""
47
+ vectors = [[0.0, 0.0], [1.0, 0.0]]
48
+ with caplog.at_level(logging.WARNING):
49
+ result = vn.normalize_vectors_batch(vectors)
50
+ assert len(result) == 2
51
+ assert "zero vectors during batch normalization" in caplog.text
52
+ assert pytest.approx(result[1]) == [1.0, 0.0]
53
+
54
+
55
+ class DummyEmbedding(Embeddings):
56
+ """A dummy embedding class for testing normalization wrapper."""
57
+
58
+ def __init__(self):
59
+ self.test_attr = "test"
60
+
61
+ def embed_documents(self, texts):
62
+ return [[3.0, 4.0] for _ in texts]
63
+
64
+ def embed_query(self, text):
65
+ return [3.0, 4.0]
66
+
67
+
68
+ def test_normalizing_embeddings_embed_documents():
69
+ """Test that document embeddings are normalized."""
70
+ model = vn.NormalizingEmbeddings(DummyEmbedding())
71
+ result = model.embed_documents(["doc1", "doc2"])
72
+ assert len(result) == 2
73
+ assert pytest.approx(result[0]) == [0.6, 0.8]
74
+
75
+
76
+ def test_normalizing_embeddings_embed_query():
77
+ """Test that query embeddings are normalized."""
78
+ model = vn.NormalizingEmbeddings(DummyEmbedding())
79
+ result = model.embed_query("query")
80
+ assert pytest.approx(result) == [0.6, 0.8]
81
+
82
+
83
+ def test_normalizing_embeddings_passthrough():
84
+ """Test attribute delegation to base embedding model."""
85
+ dummy = DummyEmbedding()
86
+ model = vn.NormalizingEmbeddings(dummy)
87
+ assert model.test_attr == "test"
88
+
89
+
90
+ @pytest.mark.parametrize(
91
+ "has_gpu,use_cosine,expected_log",
92
+ [
93
+ (True, True, "ENABLED"),
94
+ (False, True, "DISABLED"),
95
+ (True, False, "DISABLED"),
96
+ (False, False, "DISABLED"),
97
+ ],
98
+ )
99
+ def test_should_normalize_vectors_logging(has_gpu, use_cosine, expected_log, caplog):
100
+ """Test should_normalize_vectors decision logic and logging."""
101
+ with caplog.at_level(logging.INFO):
102
+ result = vn.should_normalize_vectors(has_gpu, use_cosine)
103
+ if has_gpu and use_cosine:
104
+ assert result is True
105
+ else:
106
+ assert result is False
107
+ assert expected_log in caplog.text
108
+
109
+
110
+ def test_wrap_embedding_model_if_needed_enabled():
111
+ """Test that wrapping is applied when needed."""
112
+ base = DummyEmbedding()
113
+ wrapped = vn.wrap_embedding_model_if_needed(base, has_gpu=True, use_cosine=True)
114
+ assert isinstance(wrapped, vn.NormalizingEmbeddings)
115
+
116
+
117
+ def test_wrap_embedding_model_if_needed_disabled():
118
+ """Test that original model is returned when normalization not needed."""
119
+ base = DummyEmbedding()
120
+ wrapped = vn.wrap_embedding_model_if_needed(base, has_gpu=False, use_cosine=True)
121
+ assert wrapped is base