aiagents4pharma 1.39.0__py3-none-any.whl → 1.39.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. aiagents4pharma/talk2scholars/agents/main_agent.py +7 -7
  2. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +88 -12
  3. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +5 -0
  4. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +5 -0
  5. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +1 -20
  6. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +1 -26
  7. aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml +4 -0
  8. aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml +2 -0
  9. aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml +2 -0
  10. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +22 -0
  11. aiagents4pharma/talk2scholars/tests/test_main_agent.py +20 -2
  12. aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py +28 -0
  13. aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +107 -29
  14. aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +2 -3
  15. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +194 -543
  16. aiagents4pharma/talk2scholars/tests/test_s2_agent.py +2 -2
  17. aiagents4pharma/talk2scholars/tests/{test_s2_display.py → test_s2_display_dataframe.py} +2 -3
  18. aiagents4pharma/talk2scholars/tests/test_s2_query_dataframe.py +201 -0
  19. aiagents4pharma/talk2scholars/tests/test_s2_retrieve.py +7 -6
  20. aiagents4pharma/talk2scholars/tests/test_s2_utils_ext_ids.py +413 -0
  21. aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py +140 -0
  22. aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +0 -1
  23. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +16 -18
  24. aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +92 -37
  25. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +73 -575
  26. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +10 -0
  27. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  28. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +77 -0
  29. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +83 -0
  30. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +125 -0
  31. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +162 -0
  32. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +33 -10
  33. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +39 -16
  34. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +124 -10
  35. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +49 -17
  36. aiagents4pharma/talk2scholars/tools/s2/search.py +39 -16
  37. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +34 -16
  38. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +49 -16
  39. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +51 -16
  40. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +50 -17
  41. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/METADATA +58 -105
  42. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/RECORD +45 -32
  43. aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py +0 -89
  44. aiagents4pharma/talk2scholars/tests/test_routing_logic.py +0 -74
  45. aiagents4pharma/talk2scholars/tests/test_s2_query.py +0 -95
  46. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/WHEEL +0 -0
  47. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/licenses/LICENSE +0 -0
  48. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,140 @@
1
+ """
2
+ Unit tests for QAToolHelper routines in tool_helper.py
3
+ """
4
+
5
+ import unittest
6
+ from types import SimpleNamespace
7
+ from unittest.mock import MagicMock, patch
8
+
9
+ from aiagents4pharma.talk2scholars.tools.pdf.utils.tool_helper import QAToolHelper
10
+
11
+
12
+ class TestQAToolHelper(unittest.TestCase):
13
+ """tests for QAToolHelper routines in tool_helper.py"""
14
+
15
+ def setUp(self):
16
+ """set up test case"""
17
+ self.helper = QAToolHelper()
18
+
19
+ def test_start_call_sets_config_and_call_id(self):
20
+ """test start_call sets config and call_id"""
21
+ cfg = SimpleNamespace(foo="bar")
22
+ self.helper.start_call(cfg, "call123")
23
+ self.assertIs(self.helper.config, cfg)
24
+ self.assertEqual(self.helper.call_id, "call123")
25
+
26
+ def test_init_vector_store_reuse(self):
27
+ """test init_vector_store reuses existing instance"""
28
+ emb_model = MagicMock()
29
+ first = self.helper.init_vector_store(emb_model)
30
+ second = self.helper.init_vector_store(emb_model)
31
+ self.assertIs(second, first)
32
+
33
+ def test_get_state_models_and_data_success(self):
34
+ """test get_state_models_and_data returns models and data"""
35
+ emb = MagicMock()
36
+ llm = MagicMock()
37
+ articles = {"p": {}}
38
+ state = {
39
+ "text_embedding_model": emb,
40
+ "llm_model": llm,
41
+ "article_data": articles,
42
+ }
43
+ ret_emb, ret_llm, ret_articles = self.helper.get_state_models_and_data(state)
44
+ self.assertIs(ret_emb, emb)
45
+ self.assertIs(ret_llm, llm)
46
+ self.assertIs(ret_articles, articles)
47
+
48
+ def test_get_state_models_and_data_missing_text_embedding(self):
49
+ """test get_state_models_and_data raises ValueError if missing text embedding"""
50
+ state = {"llm_model": MagicMock(), "article_data": {"p": {}}}
51
+ with self.assertRaises(ValueError) as cm:
52
+ self.helper.get_state_models_and_data(state)
53
+ self.assertEqual(str(cm.exception), "No text embedding model found in state.")
54
+
55
+ def test_get_state_models_and_data_missing_llm(self):
56
+ """test get_state_models_and_data raises ValueError if missing LLM"""
57
+ state = {"text_embedding_model": MagicMock(), "article_data": {"p": {}}}
58
+ with self.assertRaises(ValueError) as cm:
59
+ self.helper.get_state_models_and_data(state)
60
+ self.assertEqual(str(cm.exception), "No LLM model found in state.")
61
+
62
+ def test_get_state_models_and_data_missing_article_data(self):
63
+ """test get_state_models_and_data raises ValueError if missing article data"""
64
+ state = {"text_embedding_model": MagicMock(), "llm_model": MagicMock()}
65
+ with self.assertRaises(ValueError) as cm:
66
+ self.helper.get_state_models_and_data(state)
67
+ self.assertEqual(str(cm.exception), "No article_data found in state.")
68
+
69
+ def test_load_candidate_papers_calls_add_paper_only_for_valid(self):
70
+ """test load_candidate_papers calls add_paper only for valid candidates"""
71
+ vs = SimpleNamespace(loaded_papers=set(), add_paper=MagicMock())
72
+ articles = {"p1": {"pdf_url": "url1"}, "p2": {}, "p3": {"pdf_url": None}}
73
+ candidates = ["p1", "p2", "p3"]
74
+ self.helper.load_candidate_papers(vs, articles, candidates)
75
+ vs.add_paper.assert_called_once_with("p1", "url1", articles["p1"])
76
+
77
+ def test_load_candidate_papers_handles_add_paper_exception(self):
78
+ """test load_candidate_papers handles add_paper exception"""
79
+ # If add_paper raises, it should be caught and not propagate
80
+ vs = SimpleNamespace(
81
+ loaded_papers=set(), add_paper=MagicMock(side_effect=ValueError("oops"))
82
+ )
83
+ articles = {"p1": {"pdf_url": "url1"}}
84
+ # Start call to set call_id (used in logging)
85
+ self.helper.start_call(SimpleNamespace(), "call001")
86
+ # Should not raise despite exception
87
+ self.helper.load_candidate_papers(vs, articles, ["p1"])
88
+ vs.add_paper.assert_called_once_with("p1", "url1", articles["p1"])
89
+
90
+ def test_run_reranker_success_and_filtering(self):
91
+ """test run_reranker success and filtering"""
92
+ # Successful rerank returns filtered candidates
93
+ cfg = SimpleNamespace(top_k_papers=2)
94
+ self.helper.config = cfg
95
+ vs = MagicMock()
96
+ with patch(
97
+ "aiagents4pharma.talk2scholars.tools.pdf.utils.tool_helper.rank_papers_by_query",
98
+ return_value=["a", "c"],
99
+ ):
100
+ out = self.helper.run_reranker(vs, "q", ["a", "b"])
101
+ self.assertEqual(out, ["a"])
102
+
103
+ def test_run_reranker_exception_fallback(self):
104
+ """test run_reranker exception fallback"""
105
+ # On reranker failure, should return original candidates
106
+ cfg = SimpleNamespace(top_k_papers=5)
107
+ self.helper.config = cfg
108
+ vs = MagicMock()
109
+
110
+ def fail(*args, **kwargs):
111
+ raise RuntimeError("fail")
112
+
113
+ with patch(
114
+ "aiagents4pharma.talk2scholars.tools.pdf.utils.tool_helper.rank_papers_by_query",
115
+ side_effect=fail,
116
+ ):
117
+ candidates = ["x", "y"]
118
+ out = self.helper.run_reranker(vs, "q", candidates)
119
+ self.assertEqual(out, candidates)
120
+
121
+ def test_format_answer_with_and_without_sources(self):
122
+ """test format_answer with and without sources"""
123
+ articles = {"p1": {"Title": "T1"}, "p2": {"Title": "T2"}}
124
+ # With sources
125
+ with patch(
126
+ "aiagents4pharma.talk2scholars.tools.pdf.utils.tool_helper.generate_answer",
127
+ return_value={"output_text": "ans", "papers_used": ["p1", "p2"]},
128
+ ):
129
+ res = self.helper.format_answer("q", [], MagicMock(), articles)
130
+ self.assertIn("ans", res)
131
+ self.assertIn("Sources:", res)
132
+ self.assertIn("- T1", res)
133
+ self.assertIn("- T2", res)
134
+ # Without sources
135
+ with patch(
136
+ "aiagents4pharma.talk2scholars.tools.pdf.utils.tool_helper.generate_answer",
137
+ return_value={"output_text": "ans", "papers_used": []},
138
+ ):
139
+ res2 = self.helper.format_answer("q", [], MagicMock(), {})
140
+ self.assertEqual(res2, "ans")
@@ -2,7 +2,6 @@
2
2
  Updated Unit Tests for the Zotero agent (Zotero Library Managent sub-agent).
3
3
  """
4
4
 
5
- # pylint: disable=redefined-outer-name
6
5
  from unittest import mock
7
6
  import pytest
8
7
  from langchain_core.messages import HumanMessage, AIMessage
@@ -17,8 +17,6 @@ from aiagents4pharma.talk2scholars.tools.zotero.utils.zotero_pdf_downloader impo
17
17
  )
18
18
  from aiagents4pharma.talk2scholars.tools.zotero.zotero_read import zotero_read
19
19
 
20
- # pylint: disable=protected-access
21
- # pylint: disable=protected-access, too-many-arguments, too-many-positional-arguments
22
20
 
23
21
  # Dummy Hydra configuration to be used in tests
24
22
  dummy_zotero_read_config = SimpleNamespace(
@@ -211,15 +209,15 @@ class TestZoteroSearchTool(unittest.TestCase):
211
209
  @patch(
212
210
  "aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper.download_pdfs_in_parallel"
213
211
  )
214
- def test_filtering_no_matching_papers(
215
- self,
216
- mock_batch_download,
217
- mock_hydra_init,
218
- mock_hydra_compose,
219
- mock_zotero_class,
220
- mock_get_item_collections,
221
- ):
212
+ def test_filtering_no_matching_papers(self, *mocks):
222
213
  """Testing filtering when no paper matching"""
214
+ (
215
+ mock_batch_download,
216
+ mock_hydra_init,
217
+ mock_hydra_compose,
218
+ mock_zotero_class,
219
+ mock_get_item_collections,
220
+ ) = mocks
223
221
  mock_hydra_compose.return_value = dummy_cfg
224
222
  mock_hydra_init.return_value.__enter__.return_value = None
225
223
 
@@ -460,15 +458,15 @@ class TestZoteroSearchTool(unittest.TestCase):
460
458
  @patch(
461
459
  "aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper.requests.Session.get"
462
460
  )
463
- def test_pdf_attachment_success(
464
- self,
465
- mock_session_get,
466
- mock_hydra_init,
467
- mock_hydra_compose,
468
- mock_zotero_class,
469
- mock_get_item_collections,
470
- ):
461
+ def test_pdf_attachment_success(self, *mocks):
471
462
  """Test for pdf attachment success"""
463
+ (
464
+ mock_session_get,
465
+ mock_hydra_init,
466
+ mock_hydra_compose,
467
+ mock_zotero_class,
468
+ mock_get_item_collections,
469
+ ) = mocks
472
470
  mock_hydra_compose.return_value = dummy_cfg
473
471
  mock_hydra_init.return_value.__enter__.return_value = None
474
472
 
@@ -5,7 +5,7 @@ Tool for downloading arXiv paper metadata and retrieving the PDF URL.
5
5
 
6
6
  import logging
7
7
  import xml.etree.ElementTree as ET
8
- from typing import Annotated, Any
8
+ from typing import Annotated, Any, List
9
9
 
10
10
  import hydra
11
11
  import requests
@@ -23,12 +23,22 @@ logger = logging.getLogger(__name__)
23
23
  class DownloadArxivPaperInput(BaseModel):
24
24
  """Input schema for the arXiv paper download tool."""
25
25
 
26
- arxiv_id: str = Field(
27
- description="The arXiv paper ID used to retrieve the paper details and PDF URL."
26
+ arxiv_ids: List[str] = Field(
27
+ description="List of arXiv paper IDs used to retrieve paper details and PDF URLs."
28
28
  )
29
29
  tool_call_id: Annotated[str, InjectedToolCallId]
30
30
 
31
31
 
32
+ # Helper to load arXiv download configuration
33
+ def _get_arxiv_config() -> Any:
34
+ """Load arXiv download configuration."""
35
+ with hydra.initialize(version_base=None, config_path="../../configs"):
36
+ cfg = hydra.compose(
37
+ config_name="config", overrides=["tools/download_arxiv_paper=default"]
38
+ )
39
+ return cfg.tools.download_arxiv_paper
40
+
41
+
32
42
  def fetch_arxiv_metadata(
33
43
  api_url: str, arxiv_id: str, request_timeout: int
34
44
  ) -> ET.Element:
@@ -42,19 +52,21 @@ def fetch_arxiv_metadata(
42
52
  def extract_metadata(entry: ET.Element, ns: dict, arxiv_id: str) -> dict:
43
53
  """Extract metadata from the XML entry."""
44
54
  title_elem = entry.find("atom:title", ns)
45
- title = title_elem.text.strip() if title_elem is not None else "N/A"
55
+ title = (title_elem.text or "").strip() if title_elem is not None else "N/A"
46
56
 
47
- authors = [
48
- author_elem.find("atom:name", ns).text.strip()
49
- for author_elem in entry.findall("atom:author", ns)
50
- if author_elem.find("atom:name", ns) is not None
51
- ]
57
+ authors = []
58
+ for author_elem in entry.findall("atom:author", ns):
59
+ name_elem = author_elem.find("atom:name", ns)
60
+ if name_elem is not None and name_elem.text:
61
+ authors.append(name_elem.text.strip())
52
62
 
53
63
  summary_elem = entry.find("atom:summary", ns)
54
- abstract = summary_elem.text.strip() if summary_elem is not None else "N/A"
64
+ abstract = (summary_elem.text or "").strip() if summary_elem is not None else "N/A"
55
65
 
56
66
  published_elem = entry.find("atom:published", ns)
57
- pub_date = published_elem.text.strip() if published_elem is not None else "N/A"
67
+ pub_date = (
68
+ (published_elem.text or "").strip() if published_elem is not None else "N/A"
69
+ )
58
70
 
59
71
  pdf_url = next(
60
72
  (
@@ -80,43 +92,86 @@ def extract_metadata(entry: ET.Element, ns: dict, arxiv_id: str) -> dict:
80
92
  }
81
93
 
82
94
 
83
- @tool(args_schema=DownloadArxivPaperInput, parse_docstring=True)
95
+ def _get_snippet(abstract: str) -> str:
96
+ """Extract the first one or two sentences from an abstract."""
97
+ if not abstract or abstract == "N/A":
98
+ return ""
99
+ sentences = abstract.split(". ")
100
+ snippet_sentences = sentences[:2]
101
+ snippet = ". ".join(snippet_sentences)
102
+ if not snippet.endswith("."):
103
+ snippet += "."
104
+ return snippet
105
+
106
+
107
+ def _build_summary(article_data: dict[str, Any]) -> str:
108
+ """Build a summary string for up to three papers with snippets."""
109
+ top = list(article_data.values())[:3]
110
+ lines: list[str] = []
111
+ for idx, paper in enumerate(top):
112
+ title = paper.get("Title", "N/A")
113
+ pub_date = paper.get("Publication Date", "N/A")
114
+ url = paper.get("URL", "")
115
+ snippet = _get_snippet(paper.get("Abstract", ""))
116
+ line = f"{idx+1}. {title} ({pub_date})"
117
+ if url:
118
+ line += f"\n View PDF: {url}"
119
+ if snippet:
120
+ line += f"\n Abstract snippet: {snippet}"
121
+ lines.append(line)
122
+ summary = "\n".join(lines)
123
+ return (
124
+ "Download was successful. Papers metadata are attached as an artifact. "
125
+ "Here is a summary of the results:\n"
126
+ f"Number of papers found: {len(article_data)}\n"
127
+ "Top 3 papers:\n" + summary
128
+ )
129
+
130
+
131
+ @tool(
132
+ args_schema=DownloadArxivPaperInput,
133
+ parse_docstring=True,
134
+ )
84
135
  def download_arxiv_paper(
85
- arxiv_id: str,
136
+ arxiv_ids: List[str],
86
137
  tool_call_id: Annotated[str, InjectedToolCallId],
87
138
  ) -> Command[Any]:
88
139
  """
89
- Get metadata and PDF URL for an arXiv paper using its unique arXiv ID.
140
+ Get metadata and PDF URLs for one or more arXiv papers using their unique arXiv IDs.
90
141
  """
91
- logger.info("Fetching metadata from arXiv for paper ID: %s", arxiv_id)
142
+ logger.info("Fetching metadata from arXiv for paper IDs: %s", arxiv_ids)
92
143
 
93
144
  # Load configuration
94
- with hydra.initialize(version_base=None, config_path="../../configs"):
95
- cfg = hydra.compose(
96
- config_name="config", overrides=["tools/download_arxiv_paper=default"]
145
+ cfg = _get_arxiv_config()
146
+ api_url = cfg.api_url
147
+ request_timeout = cfg.request_timeout
148
+
149
+ # Aggregate results
150
+ article_data: dict[str, Any] = {}
151
+ for aid in arxiv_ids:
152
+ logger.info("Processing arXiv ID: %s", aid)
153
+ # Fetch and parse metadata
154
+ entry = fetch_arxiv_metadata(api_url, aid, request_timeout).find(
155
+ "atom:entry", {"atom": "http://www.w3.org/2005/Atom"}
156
+ )
157
+ if entry is None:
158
+ logger.warning("No entry found for arXiv ID %s", aid)
159
+ continue
160
+ article_data[aid] = extract_metadata(
161
+ entry, {"atom": "http://www.w3.org/2005/Atom"}, aid
97
162
  )
98
- api_url = cfg.tools.download_arxiv_paper.api_url
99
- request_timeout = cfg.tools.download_arxiv_paper.request_timeout
100
-
101
- # Fetch and parse metadata
102
- root = fetch_arxiv_metadata(api_url, arxiv_id, request_timeout)
103
- ns = {"atom": "http://www.w3.org/2005/Atom"}
104
-
105
- entry = root.find("atom:entry", ns)
106
- if entry is None:
107
- raise ValueError(f"No entry found for arXiv ID {arxiv_id}")
108
-
109
- # Extract metadata
110
- metadata = extract_metadata(entry, ns, arxiv_id)
111
-
112
- # Create article_data entry with the paper ID as the key
113
- article_data = {arxiv_id: metadata}
114
-
115
- content = f"Successfully retrieved metadata and PDF URL for arXiv ID {arxiv_id}"
116
163
 
164
+ # Build and return summary
165
+ content = _build_summary(article_data)
117
166
  return Command(
118
167
  update={
119
168
  "article_data": article_data,
120
- "messages": [ToolMessage(content=content, tool_call_id=tool_call_id)],
169
+ "messages": [
170
+ ToolMessage(
171
+ content=content,
172
+ tool_call_id=tool_call_id,
173
+ artifact=article_data,
174
+ )
175
+ ],
121
176
  }
122
177
  )