PyPI - aiagents4pharma - Versions diffs - 1.39.0__py3-none-any.whl → 1.39.2__py3-none-any.whl - Mend

aiagents4pharma 1.39.0py3-none-any.whl → 1.39.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""
+Unit tests for QAToolHelper routines in tool_helper.py
+"""
+import unittest
+from types import SimpleNamespace
+from unittest.mock import MagicMock, patch
+from aiagents4pharma.talk2scholars.tools.pdf.utils.tool_helper import QAToolHelper
+class TestQAToolHelper(unittest.TestCase):
+    """tests for QAToolHelper routines in tool_helper.py"""
+    def setUp(self):
+        """set up test case"""
+        self.helper = QAToolHelper()
+    def test_start_call_sets_config_and_call_id(self):
+        """test start_call sets config and call_id"""
+        cfg = SimpleNamespace(foo="bar")
+        self.helper.start_call(cfg, "call123")
+        self.assertIs(self.helper.config, cfg)
+        self.assertEqual(self.helper.call_id, "call123")
+    def test_init_vector_store_reuse(self):
+        """test init_vector_store reuses existing instance"""
+        emb_model = MagicMock()
+        first = self.helper.init_vector_store(emb_model)
+        second = self.helper.init_vector_store(emb_model)
+        self.assertIs(second, first)
+    def test_get_state_models_and_data_success(self):
+        """test get_state_models_and_data returns models and data"""
+        emb = MagicMock()
+        llm = MagicMock()
+        articles = {"p": {}}
+        state = {
+            "text_embedding_model": emb,
+            "llm_model": llm,
+            "article_data": articles,
+        }
+        ret_emb, ret_llm, ret_articles = self.helper.get_state_models_and_data(state)
+        self.assertIs(ret_emb, emb)
+        self.assertIs(ret_llm, llm)
+        self.assertIs(ret_articles, articles)
+    def test_get_state_models_and_data_missing_text_embedding(self):
+        """test get_state_models_and_data raises ValueError if missing text embedding"""
+        state = {"llm_model": MagicMock(), "article_data": {"p": {}}}
+        with self.assertRaises(ValueError) as cm:
+            self.helper.get_state_models_and_data(state)
+        self.assertEqual(str(cm.exception), "No text embedding model found in state.")
+    def test_get_state_models_and_data_missing_llm(self):
+        """test get_state_models_and_data raises ValueError if missing LLM"""
+        state = {"text_embedding_model": MagicMock(), "article_data": {"p": {}}}
+        with self.assertRaises(ValueError) as cm:
+            self.helper.get_state_models_and_data(state)
+        self.assertEqual(str(cm.exception), "No LLM model found in state.")
+    def test_get_state_models_and_data_missing_article_data(self):
+        """test get_state_models_and_data raises ValueError if missing article data"""
+        state = {"text_embedding_model": MagicMock(), "llm_model": MagicMock()}
+        with self.assertRaises(ValueError) as cm:
+            self.helper.get_state_models_and_data(state)
+        self.assertEqual(str(cm.exception), "No article_data found in state.")
+    def test_load_candidate_papers_calls_add_paper_only_for_valid(self):
+        """test load_candidate_papers calls add_paper only for valid candidates"""
+        vs = SimpleNamespace(loaded_papers=set(), add_paper=MagicMock())
+        articles = {"p1": {"pdf_url": "url1"}, "p2": {}, "p3": {"pdf_url": None}}
+        candidates = ["p1", "p2", "p3"]
+        self.helper.load_candidate_papers(vs, articles, candidates)
+        vs.add_paper.assert_called_once_with("p1", "url1", articles["p1"])
+    def test_load_candidate_papers_handles_add_paper_exception(self):
+        """test load_candidate_papers handles add_paper exception"""
+        # If add_paper raises, it should be caught and not propagate
+        vs = SimpleNamespace(
+            loaded_papers=set(), add_paper=MagicMock(side_effect=ValueError("oops"))
+        )
+        articles = {"p1": {"pdf_url": "url1"}}
+        # Start call to set call_id (used in logging)
+        self.helper.start_call(SimpleNamespace(), "call001")
+        # Should not raise despite exception
+        self.helper.load_candidate_papers(vs, articles, ["p1"])
+        vs.add_paper.assert_called_once_with("p1", "url1", articles["p1"])
+    def test_run_reranker_success_and_filtering(self):
+        """test run_reranker success and filtering"""
+        # Successful rerank returns filtered candidates
+        cfg = SimpleNamespace(top_k_papers=2)
+        self.helper.config = cfg
+        vs = MagicMock()
+        with patch(
+            "aiagents4pharma.talk2scholars.tools.pdf.utils.tool_helper.rank_papers_by_query",
+            return_value=["a", "c"],
+        ):
+            out = self.helper.run_reranker(vs, "q", ["a", "b"])
+        self.assertEqual(out, ["a"])
+    def test_run_reranker_exception_fallback(self):
+        """test run_reranker exception fallback"""
+        # On reranker failure, should return original candidates
+        cfg = SimpleNamespace(top_k_papers=5)
+        self.helper.config = cfg
+        vs = MagicMock()
+        def fail(*args, **kwargs):
+            raise RuntimeError("fail")
+        with patch(
+            "aiagents4pharma.talk2scholars.tools.pdf.utils.tool_helper.rank_papers_by_query",
+            side_effect=fail,
+        ):
+            candidates = ["x", "y"]
+            out = self.helper.run_reranker(vs, "q", candidates)
+        self.assertEqual(out, candidates)
+    def test_format_answer_with_and_without_sources(self):
+        """test format_answer with and without sources"""
+        articles = {"p1": {"Title": "T1"}, "p2": {"Title": "T2"}}
+        # With sources
+        with patch(
+            "aiagents4pharma.talk2scholars.tools.pdf.utils.tool_helper.generate_answer",
+            return_value={"output_text": "ans", "papers_used": ["p1", "p2"]},
+        ):
+            res = self.helper.format_answer("q", [], MagicMock(), articles)
+            self.assertIn("ans", res)
+            self.assertIn("Sources:", res)
+            self.assertIn("- T1", res)
+            self.assertIn("- T2", res)
+        # Without sources
+        with patch(
+            "aiagents4pharma.talk2scholars.tools.pdf.utils.tool_helper.generate_answer",
+            return_value={"output_text": "ans", "papers_used": []},
+        ):
+            res2 = self.helper.format_answer("q", [], MagicMock(), {})
+            self.assertEqual(res2, "ans")

aiagents4pharma/talk2scholars/tests/test_zotero_agent.py CHANGED Viewed

@@ -2,7 +2,6 @@
 Updated Unit Tests for the Zotero agent (Zotero Library Managent sub-agent).
 """
-# pylint: disable=redefined-outer-name
 from unittest import mock
 import pytest
 from langchain_core.messages import HumanMessage, AIMessage

aiagents4pharma/talk2scholars/tests/test_zotero_read.py CHANGED Viewed

@@ -17,8 +17,6 @@ from aiagents4pharma.talk2scholars.tools.zotero.utils.zotero_pdf_downloader impo
 )
 from aiagents4pharma.talk2scholars.tools.zotero.zotero_read import zotero_read
-# pylint: disable=protected-access
-# pylint: disable=protected-access, too-many-arguments, too-many-positional-arguments
 # Dummy Hydra configuration to be used in tests
 dummy_zotero_read_config = SimpleNamespace(
@@ -211,15 +209,15 @@ class TestZoteroSearchTool(unittest.TestCase):
     @patch(
         "aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper.download_pdfs_in_parallel"
     )
-    def test_filtering_no_matching_papers(
-        self,
-        mock_batch_download,
-        mock_hydra_init,
-        mock_hydra_compose,
-        mock_zotero_class,
-        mock_get_item_collections,
-    ):
+    def test_filtering_no_matching_papers(self, *mocks):
         """Testing filtering when no paper matching"""
+        (
+            mock_batch_download,
+            mock_hydra_init,
+            mock_hydra_compose,
+            mock_zotero_class,
+            mock_get_item_collections,
+        ) = mocks
         mock_hydra_compose.return_value = dummy_cfg
         mock_hydra_init.return_value.__enter__.return_value = None
@@ -460,15 +458,15 @@ class TestZoteroSearchTool(unittest.TestCase):
     @patch(
         "aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper.requests.Session.get"
     )
-    def test_pdf_attachment_success(
-        self,
-        mock_session_get,
-        mock_hydra_init,
-        mock_hydra_compose,
-        mock_zotero_class,
-        mock_get_item_collections,
-    ):
+    def test_pdf_attachment_success(self, *mocks):
         """Test for pdf attachment success"""
+        (
+            mock_session_get,
+            mock_hydra_init,
+            mock_hydra_compose,
+            mock_zotero_class,
+            mock_get_item_collections,
+        ) = mocks
         mock_hydra_compose.return_value = dummy_cfg
         mock_hydra_init.return_value.__enter__.return_value = None

aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py CHANGED Viewed

@@ -5,7 +5,7 @@ Tool for downloading arXiv paper metadata and retrieving the PDF URL.
 import logging
 import xml.etree.ElementTree as ET
-from typing import Annotated, Any
+from typing import Annotated, Any, List
 import hydra
 import requests
@@ -23,12 +23,22 @@ logger = logging.getLogger(__name__)
 class DownloadArxivPaperInput(BaseModel):
     """Input schema for the arXiv paper download tool."""
-    arxiv_id: str = Field(
-        description="The arXiv paper ID used to retrieve the paper details and PDF URL."
+    arxiv_ids: List[str] = Field(
+        description="List of arXiv paper IDs used to retrieve paper details and PDF URLs."
     )
     tool_call_id: Annotated[str, InjectedToolCallId]
+# Helper to load arXiv download configuration
+def _get_arxiv_config() -> Any:
+    """Load arXiv download configuration."""
+    with hydra.initialize(version_base=None, config_path="../../configs"):
+        cfg = hydra.compose(
+            config_name="config", overrides=["tools/download_arxiv_paper=default"]
+        )
+    return cfg.tools.download_arxiv_paper
 def fetch_arxiv_metadata(
     api_url: str, arxiv_id: str, request_timeout: int
 ) -> ET.Element:
@@ -42,19 +52,21 @@ def fetch_arxiv_metadata(
 def extract_metadata(entry: ET.Element, ns: dict, arxiv_id: str) -> dict:
     """Extract metadata from the XML entry."""
     title_elem = entry.find("atom:title", ns)
-    title = title_elem.text.strip() if title_elem is not None else "N/A"
+    title = (title_elem.text or "").strip() if title_elem is not None else "N/A"
-    authors = [
-        author_elem.find("atom:name", ns).text.strip()
-        for author_elem in entry.findall("atom:author", ns)
-        if author_elem.find("atom:name", ns) is not None
-    ]
+    authors = []
+    for author_elem in entry.findall("atom:author", ns):
+        name_elem = author_elem.find("atom:name", ns)
+        if name_elem is not None and name_elem.text:
+            authors.append(name_elem.text.strip())
     summary_elem = entry.find("atom:summary", ns)
-    abstract = summary_elem.text.strip() if summary_elem is not None else "N/A"
+    abstract = (summary_elem.text or "").strip() if summary_elem is not None else "N/A"
     published_elem = entry.find("atom:published", ns)
-    pub_date = published_elem.text.strip() if published_elem is not None else "N/A"
+    pub_date = (
+        (published_elem.text or "").strip() if published_elem is not None else "N/A"
+    )
     pdf_url = next(
         (
@@ -80,43 +92,86 @@ def extract_metadata(entry: ET.Element, ns: dict, arxiv_id: str) -> dict:
     }
-@tool(args_schema=DownloadArxivPaperInput, parse_docstring=True)
+def _get_snippet(abstract: str) -> str:
+    """Extract the first one or two sentences from an abstract."""
+    if not abstract or abstract == "N/A":
+        return ""
+    sentences = abstract.split(". ")
+    snippet_sentences = sentences[:2]
+    snippet = ". ".join(snippet_sentences)
+    if not snippet.endswith("."):
+        snippet += "."
+    return snippet
+def _build_summary(article_data: dict[str, Any]) -> str:
+    """Build a summary string for up to three papers with snippets."""
+    top = list(article_data.values())[:3]
+    lines: list[str] = []
+    for idx, paper in enumerate(top):
+        title = paper.get("Title", "N/A")
+        pub_date = paper.get("Publication Date", "N/A")
+        url = paper.get("URL", "")
+        snippet = _get_snippet(paper.get("Abstract", ""))
+        line = f"{idx+1}. {title} ({pub_date})"
+        if url:
+            line += f"\n   View PDF: {url}"
+        if snippet:
+            line += f"\n   Abstract snippet: {snippet}"
+        lines.append(line)
+    summary = "\n".join(lines)
+    return (
+        "Download was successful. Papers metadata are attached as an artifact. "
+        "Here is a summary of the results:\n"
+        f"Number of papers found: {len(article_data)}\n"
+        "Top 3 papers:\n" + summary
+    )
+@tool(
+    args_schema=DownloadArxivPaperInput,
+    parse_docstring=True,
+)
 def download_arxiv_paper(
-    arxiv_id: str,
+    arxiv_ids: List[str],
     tool_call_id: Annotated[str, InjectedToolCallId],
 ) -> Command[Any]:
     """
-    Get metadata and PDF URL for an arXiv paper using its unique arXiv ID.
+    Get metadata and PDF URLs for one or more arXiv papers using their unique arXiv IDs.
     """
-    logger.info("Fetching metadata from arXiv for paper ID: %s", arxiv_id)
+    logger.info("Fetching metadata from arXiv for paper IDs: %s", arxiv_ids)
     # Load configuration
-    with hydra.initialize(version_base=None, config_path="../../configs"):
-        cfg = hydra.compose(
-            config_name="config", overrides=["tools/download_arxiv_paper=default"]
+    cfg = _get_arxiv_config()
+    api_url = cfg.api_url
+    request_timeout = cfg.request_timeout
+    # Aggregate results
+    article_data: dict[str, Any] = {}
+    for aid in arxiv_ids:
+        logger.info("Processing arXiv ID: %s", aid)
+        # Fetch and parse metadata
+        entry = fetch_arxiv_metadata(api_url, aid, request_timeout).find(
+            "atom:entry", {"atom": "http://www.w3.org/2005/Atom"}
+        )
+        if entry is None:
+            logger.warning("No entry found for arXiv ID %s", aid)
+            continue
+        article_data[aid] = extract_metadata(
+            entry, {"atom": "http://www.w3.org/2005/Atom"}, aid
         )
-        api_url = cfg.tools.download_arxiv_paper.api_url
-        request_timeout = cfg.tools.download_arxiv_paper.request_timeout
-    # Fetch and parse metadata
-    root = fetch_arxiv_metadata(api_url, arxiv_id, request_timeout)
-    ns = {"atom": "http://www.w3.org/2005/Atom"}
-    entry = root.find("atom:entry", ns)
-    if entry is None:
-        raise ValueError(f"No entry found for arXiv ID {arxiv_id}")
-    # Extract metadata
-    metadata = extract_metadata(entry, ns, arxiv_id)
-    # Create article_data entry with the paper ID as the key
-    article_data = {arxiv_id: metadata}
-    content = f"Successfully retrieved metadata and PDF URL for arXiv ID {arxiv_id}"
+    # Build and return summary
+    content = _build_summary(article_data)
     return Command(
         update={
             "article_data": article_data,
-            "messages": [ToolMessage(content=content, tool_call_id=tool_call_id)],
+            "messages": [
+                ToolMessage(
+                    content=content,
+                    tool_call_id=tool_call_id,
+                    artifact=article_data,
+                )
+            ],
         }
     )

aiagents4pharma 1.39.0__py3-none-any.whl → 1.39.2__py3-none-any.whl

aiagents4pharma 1.39.0py3-none-any.whl → 1.39.2py3-none-any.whl