PyPI - aiagents4pharma - Versions diffs - 1.39.0__py3-none-any.whl → 1.39.2__py3-none-any.whl - Mend

aiagents4pharma 1.39.0py3-none-any.whl → 1.39.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

aiagents4pharma/talk2scholars/agents/main_agent.py CHANGED Viewed

@@ -48,13 +48,13 @@ def get_app(uniq_id, llm_model: BaseChatModel):
         >>> app = get_app("thread_123")
         >>> result = app.invoke(initial_state)
     """
-    if hasattr(llm_model, "model_name"):
-        if llm_model.model_name == "gpt-4o-mini":
-            llm_model = ChatOpenAI(
-                model="gpt-4o-mini",
-                temperature=0,
-                model_kwargs={"parallel_tool_calls": False},
-            )
+    # Replace placeholder mini model with a configured ChatOpenAI instance
+    if getattr(llm_model, "model_name", None) == "gpt-4o-mini":
+        llm_model = ChatOpenAI(
+            model="gpt-4o-mini",
+            temperature=0,
+            model_kwargs={"parallel_tool_calls": False},
+        )
     # Load hydra configuration
     logger.log(logging.INFO, "Launching Talk2Scholars with thread_id %s", uniq_id)
     with hydra.initialize(version_base=None, config_path="../configs/"):

aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml CHANGED Viewed

@@ -5,18 +5,94 @@ system_prompt: |
   You have access to four tools, each represented by a sub-agent:
-  - s2_agent(Use this to search for or recommend academic papers. This agent
-    should be used when the user requests general paper or article
-    searches, recommendations, or wants to retrieve information—such as
-    abstracts, from the most last displayed or searched results table.),
-  - zotero_agent(Use to Read or Write academic papers to zotero account,
-    This agent can also be used to save papers in the zotero library only
-    with explicit approval from the user),
-  - pdf_agent(Use this to perform question-and-answer tasks on downloaded, uploaded, or Zotero papers/PDFs.), and
-  - paper_download_agent(Use to download PDFs).
+  - s2_agent: Use this to search for or recommend academic papers.
+    You can also use its `query_dataframe` tool to extract metadata from the last displayed papers.
+    This tool is not for summarization or content-level understanding — only for metadata-level filtering or ID extraction.
+  - zotero_agent: Use this to read from or write to the user's Zotero account.
+    This agent can also save papers to the Zotero library, but only with the user's explicit approval.
+  - pdf_agent: Use this to perform question-and-answer tasks on downloaded, uploaded, or Zotero-based papers or PDFs.
+    This includes summarization, explanation, or answering content-based questions.
+  - paper_download_agent: Use to download PDFs.
+  --
+  Tool Usage Boundaries:
+  - Use `query_dataframe` only for metadata queries such as filtering by author, listing titles, or selecting paper IDs.
+    It is not capable of full-text summarization, content analysis, or reading PDF content.
+  - Use `pdf_agent` to summarize or analyze the full content of any downloaded, uploaded, or Zotero-based PDF.
+  - Never attempt to summarize or interpret paper content using `query_dataframe`. That is incorrect and will result in incomplete or misleading output.
+  - When the user asks for a summary, explanation, or any content-based question, you must use `pdf_agent`:
+  --
+  Critical Paper Download Protocol:
+  When the user requests to download paper(s), you must follow this strict 2-step protocol:
+  1. First, always call `query_dataframe` from the `s2_agent` to extract paper IDs from the last displayed DataFrame.
+     - This tool must be used only to extract paper IDs.
+     - Do not pass the full user query to this tool.
+     - This step is only for retrieving the full list of available `paper_ids` and their order.
+     - If the user request refers to specific positions (like “4th paper”), you must calculate the correct index first.
+  2. Then, use the extracted ID(s) as input to the `paper_download_agent` to download the papers.
+  Important format rules:
+  - The `query_dataframe` tool always returns paper IDs with full prefixes such as `"arxiv:..."`, `"doi:..."`, or `"pubmed:..."`.
+  - You must not modify, trim, or strip these prefixes.
+  - Always pass the **exact** IDs returned from `query_dataframe` directly to the `paper_download_agent` without alteration.
+  Do not skip step 1 under any circumstances. Even if you believe you already know the IDs or if the user repeats the request, you must still call `query_dataframe` first. Skipping this step is a critical error and will corrupt the workflow.
+  Example reasoning:
+    - User: "Download and summarize the fourth paper"
+    - Step 1: Compute that the user wants the 4th paper
+    - Step 2: Call `s2_agent.query_dataframe`
+    - Step 3: Pass that ID to `paper_download_agent`
+    - Step 4: After download, use `pdf_agent` for summarization only when requested by the user
+  Additional example:
+    - User: "Download the first and third papers"
+    - Step 1: Compute that the user wants paper indices 1 and 3
+    - Step 2: Call `s2_agent.query_dataframe`
+    - Step 3: Pass both IDs to `paper_download_agent`
+  Full list example:
+    - User: "Download all papers", "Download the 6th paper",
+    - Step 1: Call `s2_agent.query_dataframe`
+    - Step 2: Pass the full list of IDs to `paper_download_agent`
+  Always follow this sequence. It applies to every download request.
+  --
+  Interpreting User Requests Involving Paper Indices:
+  When a user refers to papers using words like "first", "second", "third", or "fourth", you must interpret them as referring to numeric positions in the last displayed DataFrame.
+  For example:
+    - "Download the fourth paper" → treat as "Download the 4th paper"
+    - "Download the first and third papers" → treat as "Download the 1st and 3rd papers"
+  These word-based positions must be normalized before calling `query_dataframe`. Always compute the correct index and pass it as `row_number`.
+  --
+  General Coordination Instructions:
   Each sub-agent is specialized for a different task.
-  You can call multiple sub-agents at the same time, or sequentially. After receiving output from one agent, you can call another based on the user’s query.
-  Your goal is to analyze the user’s request carefully, decide which sub-agent(s) should be used, and coordinate their execution efficiently.
-  Always prioritize delegating tasks correctly. Think step-by-step before acting. Avoid answering by yourself unless explicitly necessary.
+  You may call multiple agents, either in parallel or in sequence. After receiving output from one agent, you can call another as needed based on the user's query.
+  Your role is to analyze the user’s request carefully, decide which sub-agent(s) to use, and coordinate their execution efficiently.
+  Always prioritize delegation and think step-by-step before acting. Avoid answering by yourself unless explicitly necessary.

aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+_target_: agents.paper_download_agent.get_app
+paper_download_agent: |
+  You are the Paper Download Agent.
+  You are responsible for downloading PDFs of papers using their IDs. Use all the provied Ids to download the papers. Only when the user asks a question related to PDFs, please forward the query to the `question_and_answer` tool from the `pdf_agent`

aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+_target_: agents.pdf_agent.get_app
+pdf_agent: |
+  You are the PDF Agent.
+  You are responsible for performing question-and-answer tasks on papers, articles, or PDFs

aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml CHANGED Viewed

@@ -2,23 +2,4 @@ _target_: agents.s2_agent.get_app
 s2_agent: |
   You are the S2 Agent.
-  You are responsible for searching academic papers using the Semantic Scholar API.
-  Your capabilities include:
-  - Retrieving papers based on user queries.
-  - Recommending papers based on a single paper or multiple papers provided by the user.
-  - Retrieving the Semantic Scholar ID of a paper based on its title.
-  - This ID can later be used by other tools (search or recommend) based on the user’s needs.
-  - Always respond accurately based on Semantic Scholar search and recommendation features.
-  - Use `query_dataframe` tool query over the last displayed papers or the search table.
-  - Always call `display_dataframe` tool at the end.
-  WORKFLOW STEPS:
-  1. When user requests papers, use search/recommendation tools to find papers.
-  2. Use `display_dataframe` tool to display the response from the search/recommendation tools.
-  3. Use `query_dataframe` tool to query over the selected paper only when the user asks to.
-  4. When the user only wants recommendations, you can get the "semantic_scholar_paper_id"
-     using `query_dataframe` tool, then pass the "semantic_scholar_paper_id" to `search`,
-     `single_paper_rec` or `multi_paper_rec` tools depending on the user's query. Do not use "arxiv_id"(It is used to download pdfs)
+  You are responsible for searching academic papers, getting recommendations based on the searched articles, and displaying the results.

aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml CHANGED Viewed

@@ -2,32 +2,7 @@ _target_: agents.zotero_agent.get_app
 zotero_agent: |
   You are the Zotero Agent.
-  You are responsible for read and writing of papers to user's Zotero library.
-  Behavior:
-  - Once you have successfully read the papers, you must immediately stop, return a clear 'Search complete' message along with a summary of the articles, call the
-    `display_dataframe` tool, and return to the main supervisor for further processing based on the user's query.
-  - Do not continue any further processing or re-enter into reading steps.
-  - You can write papers to user's library but only after explicit user confirmation.
-  - Do not attempt to answer any scientific or content-related questions yourself.
-  - You can retrieve all articles or search based on the user's query, inferring whether to return the full collection or filter by title, keywords, or other details.
-  - Never call `query_dataframe` tool regarding any question or any information retrival only if the user explicitly asks for metadata.
-  In multi-step workflows:
-  - Your job is only to read the requested paper or all the papers in user's library and return the successful search output.
-  - After that, the Main Supervisor Agent will decide the next step (such as passing the paper to the pdf_agent).
-  - Always call `display_dataframe` tool at the end before transfering to Main Supervisor Agent.
-  - Never attempt to call other agents yourself.
-  Stopping Condition:
-  - After successful search, indicate completion clearly and terminate your action.
-  When saving papers to Zotero:
-  1. First use `zotero_review` tool with the collection path.
-  2. Wait for user confirmation (they must say "Yes" or "Approve").
-  3. Use `zotero_write` tool with both the collection_path and user_confirmation and call `display_dataframe` tool after the papers as saved.
+  You are responsible for reading from and writing to the user's Zotero library, and for displaying the results.
   IMPORTANT: Human approval is required for saving papers to Zotero. Never save papers
   without explicit approval from the user. Always respect the user's decision if they

aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml ADDED Viewed

@@ -0,0 +1,4 @@
+api_url: "http://export.arxiv.org/api/query"
+request_timeout: 10
+chunk_size: 1024
+pdf_base_url: "https://arxiv.org/pdf"

aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ api_url: "https://api.biorxiv.org/details/biorxiv/"
2	+ request_timeout: 20

aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ api_url: "https://api.biorxiv.org/details/medrxiv"
2	+ request_timeout: 20

aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml ADDED Viewed

@@ -0,0 +1,22 @@
+# Default configuration for the PDF question_and_answer Tool
+chunk_size: 1200 # Number of characters per text chunk
+chunk_overlap: 200 # Overlap between adjacent chunks
+top_k_papers: 5 # Number of papers to rank and retrieve
+top_k_chunks: 25 # Number of chunks to retrieve
+reranker:
+  model: "nvidia/nv-rerankqa-mistral-4b-v3"
+  api_key: ${oc.env:NVIDIA_API_KEY}
+prompt_template: |
+  You are a scientific research assistant specialized in reading and extracting information from research papers.
+  Your role is to answer questions by retrieving relevant information from the provided context.
+  - Provide detailed, structured, and well-argued explanations—not just brief summaries.
+  - Cite specific sources using onky the title of the paper.
+  - If the context is insufficient, clearly state that more information is needed.
+  Context:
+  {context}
+  Question: {question}
+  Your answer should be comprehensive, accurate, and clearly structured for a scientific audience.

aiagents4pharma/talk2scholars/tests/test_main_agent.py CHANGED Viewed

@@ -3,8 +3,6 @@ Unit tests for main agent functionality.
 Tests the supervisor agent's routing logic and state management.
 """
-# pylint: disable=redefined-outer-name,too-few-public-methods
 from types import SimpleNamespace
 import pytest
 import hydra
@@ -50,6 +48,10 @@ class DummyWorkflow:
         self.name = name
         return self
+    def get_supervisor_args(self):
+        """Return the supervisor arguments stored in this workflow."""
+        return self.supervisor_args
 def dummy_s2_agent(uniq_id, llm_model):
     """Return a DummyWorkflow for the S2 agent."""
@@ -128,6 +130,10 @@ class DummyHydraCompose:
         """Return a namespace from the dummy config."""
         return dict_to_namespace(self.config.get(item, {}))
+    def get_config(self):
+        """Get the raw dummy configuration dictionary."""
+        return self.config
 # --- Pytest Fixtures to Patch Dependencies ---
@@ -218,3 +224,15 @@ def test_get_app_with_other_model():
     assert supervisor_args.get("model") is dummy_llm
     assert supervisor_args.get("prompt") == "Dummy system prompt"
     assert getattr(app, "name", "") == "Talk2Scholars_MainAgent"
+def test_dummy_workflow_get_supervisor_args():
+    """Test that DummyWorkflow.get_supervisor_args returns the stored args."""
+    dummy_args = {"agent": "test", "uniq_id": "id123"}
+    wf = DummyWorkflow(supervisor_args=dummy_args)
+    assert wf.get_supervisor_args() is dummy_args
+def test_dummy_hydra_compose_get_config():
+    """Test that DummyHydraCompose.get_config returns the raw config."""
+    config_dict = {"agents": {"test": {"key": "value"}}}
+    compose = DummyHydraCompose(config_dict)
+    assert compose.get_config() is config_dict

aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""
+Unit tests for NVIDIA NIM reranker error handling in nvidia_nim_reranker.py
+"""
+import unittest
+from types import SimpleNamespace
+from aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker import (
+    rank_papers_by_query,
+)
+class TestNVIDIARerankerError(unittest.TestCase):
+    """Tests for NVIDIA NIM reranker error handling."""
+    def test_missing_api_key_raises_value_error(self):
+        """Ensure missing API key triggers ValueError."""
+        vector_store = SimpleNamespace(documents={})
+        # Config without API key
+        cfg = SimpleNamespace(
+            reranker=SimpleNamespace(model="m", api_key=None), top_k_papers=3
+        )
+        with self.assertRaises(ValueError) as cm:
+            rank_papers_by_query(vector_store, "query", cfg, top_k=cfg.top_k_papers)
+        self.assertEqual(
+            str(cm.exception),
+            "Configuration 'reranker.api_key' must be set for reranking",
+        )

aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py CHANGED Viewed

@@ -6,9 +6,11 @@ Unit tests for arXiv paper downloading functionality, including:
 import unittest
 from unittest.mock import MagicMock, patch
+import pytest
 from langchain_core.messages import ToolMessage
 from aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input import (
+    _get_snippet,
     download_arxiv_paper,
 )
@@ -38,29 +40,24 @@ class TestDownloadArxivPaper(unittest.TestCase):
         # Set up a dummy XML response with a valid entry including a pdf link.
         arxiv_id = "1234.56789"
-        dummy_xml = f"""<?xml version="1.0" encoding="UTF-8"?>
-        <feed xmlns="http://www.w3.org/2005/Atom">
-            <entry>
-                <title>Sample Paper Title</title>
-                <author>
-                    <name>Author One</name>
-                </author>
-                <author>
-                    <name>Author Two</name>
-                </author>
-                <summary>This is a sample abstract.</summary>
-                <published>2020-01-01T00:00:00Z</published>
-                <link title="pdf" href="http://arxiv.org/pdf/{arxiv_id}v1"/>
-            </entry>
-        </feed>
-        """
         dummy_response = MagicMock()
-        dummy_response.text = dummy_xml
+        dummy_response.text = (
+            f"""<?xml version=\"1.0\" encoding=\"UTF-8\"?>
+        <feed xmlns=\"http://www.w3.org/2005/Atom\">"""
+            f"            <entry>"
+            f"<title>Sample Paper Title</title>"
+            f"<author><name>Author One</name></author>"
+            f"<author><name>Author Two</name></author>"
+            f"<summary>This is a sample abstract.</summary>"
+            f"<published>2020-01-01T00:00:00Z</published>"
+            f'<link title="pdf" href="http://arxiv.org/pdf/{arxiv_id}v1"/>'
+            f"</entry></feed>"
+        )
         dummy_response.raise_for_status = MagicMock()
         mock_get.return_value = dummy_response
         tool_call_id = "test_tool_id"
-        tool_input = {"arxiv_id": arxiv_id, "tool_call_id": tool_call_id}
+        tool_input = {"arxiv_ids": [arxiv_id], "tool_call_id": tool_call_id}
         result = download_arxiv_paper.run(tool_input)
         update = result.update
@@ -78,14 +75,22 @@ class TestDownloadArxivPaper(unittest.TestCase):
         self.assertEqual(metadata["source"], "arxiv")
         self.assertEqual(metadata["arxiv_id"], arxiv_id)
-        # Check that the message content is as expected.
+        # Check that the message content matches the new summary format
         messages = update["messages"]
-        self.assertTrue(len(messages) >= 1)
+        self.assertEqual(len(messages), 1)
         self.assertIsInstance(messages[0], ToolMessage)
-        self.assertIn(
-            f"Successfully retrieved metadata and PDF URL for arXiv ID {arxiv_id}",
-            messages[0].content,
+        content = messages[0].content
+        # Build expected summary
+        expected = (
+            "Download was successful. Papers metadata are attached as an artifact. "
+            "Here is a summary of the results:\n"
+            f"Number of papers found: 1\n"
+            "Top 3 papers:\n"
+            f"1. Sample Paper Title (2020-01-01T00:00:00Z)\n"
+            f"   View PDF: http://arxiv.org/pdf/{arxiv_id}v1\n"
+            "   Abstract snippet: This is a sample abstract."
         )
+        self.assertEqual(content, expected)
     @patch(
         "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
@@ -117,12 +122,22 @@ class TestDownloadArxivPaper(unittest.TestCase):
         mock_get.return_value = dummy_response
         tool_call_id = "test_tool_id"
-        tool_input = {"arxiv_id": arxiv_id, "tool_call_id": tool_call_id}
-        with self.assertRaises(ValueError) as context:
-            download_arxiv_paper.run(tool_input)
-        self.assertEqual(
-            str(context.exception), f"No entry found for arXiv ID {arxiv_id}"
+        tool_input = {"arxiv_ids": [arxiv_id], "tool_call_id": tool_call_id}
+        # No entry found should result in empty article_data and header-only summary
+        result = download_arxiv_paper.run(tool_input)
+        update = result.update
+        self.assertIn("article_data", update)
+        self.assertEqual(update["article_data"], {})
+        messages = update.get("messages", [])
+        self.assertEqual(len(messages), 1)
+        content = messages[0].content
+        expected = (
+            "Download was successful. Papers metadata are attached as an artifact. "
+            "Here is a summary of the results:\n"
+            "Number of papers found: 0\n"
+            "Top 3 papers:\n"
         )
+        self.assertEqual(content, expected)
     @patch(
         "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
@@ -163,9 +178,72 @@ class TestDownloadArxivPaper(unittest.TestCase):
         mock_get.return_value = dummy_response
         tool_call_id = "test_tool_id"
-        tool_input = {"arxiv_id": arxiv_id, "tool_call_id": tool_call_id}
+        tool_input = {"arxiv_ids": [arxiv_id], "tool_call_id": tool_call_id}
         with self.assertRaises(RuntimeError) as context:
             download_arxiv_paper.run(tool_input)
         self.assertEqual(
             str(context.exception), f"Could not find PDF URL for arXiv ID {arxiv_id}"
         )
+    @patch(
+        "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.extract_metadata"
+    )
+    @patch(
+        "aiagents4pharma.talk2scholars.tools.paper_download.download_"
+        "arxiv_input.fetch_arxiv_metadata"
+    )
+    @patch(
+        "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
+    )
+    @patch(
+        "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
+    )
+    def test_summary_multiple_papers(
+        self, mock_initialize, mock_compose, _mock_fetch, mock_extract
+    ):
+        """Test summary includes '...and N more papers.' when more than 3 papers."""
+        # Dummy config
+        dummy_cfg = MagicMock()
+        dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy"
+        dummy_cfg.tools.download_arxiv_paper.request_timeout = 5
+        mock_compose.return_value = dummy_cfg
+        mock_initialize.return_value.__enter__.return_value = None
+        # Simulate metadata extraction for multiple papers
+        def dummy_meta(_entry, _ns, aid):
+            """dummy metadata extraction function."""
+            return {
+                "Title": f"T{aid}",
+                "Publication Date": "2020-01-01T00:00:00Z",
+                "URL": f"u{aid}v1",
+            }
+        mock_extract.side_effect = dummy_meta
+        # Prepare 5 paper IDs
+        ids = [str(i) for i in range(5)]
+        tool_input = {"arxiv_ids": ids, "tool_call_id": "tid"}
+        result = download_arxiv_paper.run(tool_input)
+        summary = result.update["messages"][0].content
+        # Should report total count of 5 and list only top 3 without ellipsis
+        assert "Number of papers found: 5" in summary
+        assert "Top 3 papers:" in summary
+        # Entries for first three IDs should include URL and no ellipsis
+        assert "1. T0 (2020-01-01T00:00:00Z)" in summary
+        assert "   View PDF: u0v1" in summary
+        assert "3. T2 (2020-01-01T00:00:00Z)" in summary
+        assert "...and" not in summary
+@pytest.mark.parametrize(
+    "input_text,expected",
+    [
+        ("", ""),
+        ("N/A", ""),
+        ("Just one sentence", "Just one sentence."),
+        ("First. Second", "First. Second."),
+        ("Hello. World.", "Hello. World."),
+    ],
+)
+def test_get_snippet_various(input_text, expected):
+    """Test _get_snippet behavior for various abstracts."""
+    assert _get_snippet(input_text) == expected

aiagents4pharma/talk2scholars/tests/test_pdf_agent.py CHANGED Viewed

@@ -2,7 +2,6 @@
 Unit Tests for the PDF agent.
 """
-# pylint: disable=redefined-outer-name
 from unittest import mock
 import pytest
 from langchain_core.messages import HumanMessage, AIMessage
@@ -36,8 +35,8 @@ def mock_tools_fixture():
         yield [mock_question_and_answer]
-@pytest.fixture
-def mock_llm():
+@pytest.fixture(name="mock_llm")
+def llm_fixture():
     """Provide a dummy language model to pass into get_app."""
     return mock.Mock()

aiagents4pharma 1.39.0__py3-none-any.whl → 1.39.2__py3-none-any.whl

aiagents4pharma 1.39.0py3-none-any.whl → 1.39.2py3-none-any.whl