aiagents4pharma 1.39.0__py3-none-any.whl → 1.39.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. aiagents4pharma/talk2scholars/agents/main_agent.py +7 -7
  2. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +88 -12
  3. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +5 -0
  4. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +5 -0
  5. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +1 -20
  6. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +1 -26
  7. aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml +4 -0
  8. aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml +2 -0
  9. aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml +2 -0
  10. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +22 -0
  11. aiagents4pharma/talk2scholars/tests/test_main_agent.py +20 -2
  12. aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py +28 -0
  13. aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +107 -29
  14. aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +2 -3
  15. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +194 -543
  16. aiagents4pharma/talk2scholars/tests/test_s2_agent.py +2 -2
  17. aiagents4pharma/talk2scholars/tests/{test_s2_display.py → test_s2_display_dataframe.py} +2 -3
  18. aiagents4pharma/talk2scholars/tests/test_s2_query_dataframe.py +201 -0
  19. aiagents4pharma/talk2scholars/tests/test_s2_retrieve.py +7 -6
  20. aiagents4pharma/talk2scholars/tests/test_s2_utils_ext_ids.py +413 -0
  21. aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py +140 -0
  22. aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +0 -1
  23. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +16 -18
  24. aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +92 -37
  25. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +73 -575
  26. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +10 -0
  27. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  28. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +77 -0
  29. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +83 -0
  30. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +125 -0
  31. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +162 -0
  32. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +33 -10
  33. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +39 -16
  34. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +124 -10
  35. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +49 -17
  36. aiagents4pharma/talk2scholars/tools/s2/search.py +39 -16
  37. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +34 -16
  38. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +49 -16
  39. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +51 -16
  40. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +50 -17
  41. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/METADATA +58 -105
  42. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/RECORD +45 -32
  43. aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py +0 -89
  44. aiagents4pharma/talk2scholars/tests/test_routing_logic.py +0 -74
  45. aiagents4pharma/talk2scholars/tests/test_s2_query.py +0 -95
  46. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/WHEEL +0 -0
  47. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/licenses/LICENSE +0 -0
  48. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/top_level.txt +0 -0
@@ -48,13 +48,13 @@ def get_app(uniq_id, llm_model: BaseChatModel):
48
48
  >>> app = get_app("thread_123")
49
49
  >>> result = app.invoke(initial_state)
50
50
  """
51
- if hasattr(llm_model, "model_name"):
52
- if llm_model.model_name == "gpt-4o-mini":
53
- llm_model = ChatOpenAI(
54
- model="gpt-4o-mini",
55
- temperature=0,
56
- model_kwargs={"parallel_tool_calls": False},
57
- )
51
+ # Replace placeholder mini model with a configured ChatOpenAI instance
52
+ if getattr(llm_model, "model_name", None) == "gpt-4o-mini":
53
+ llm_model = ChatOpenAI(
54
+ model="gpt-4o-mini",
55
+ temperature=0,
56
+ model_kwargs={"parallel_tool_calls": False},
57
+ )
58
58
  # Load hydra configuration
59
59
  logger.log(logging.INFO, "Launching Talk2Scholars with thread_id %s", uniq_id)
60
60
  with hydra.initialize(version_base=None, config_path="../configs/"):
@@ -5,18 +5,94 @@ system_prompt: |
5
5
 
6
6
  You have access to four tools, each represented by a sub-agent:
7
7
 
8
- - s2_agent(Use this to search for or recommend academic papers. This agent
9
- should be used when the user requests general paper or article
10
- searches, recommendations, or wants to retrieve informationsuch as
11
- abstracts, from the most last displayed or searched results table.),
12
- - zotero_agent(Use to Read or Write academic papers to zotero account,
13
- This agent can also be used to save papers in the zotero library only
14
- with explicit approval from the user),
15
- - pdf_agent(Use this to perform question-and-answer tasks on downloaded, uploaded, or Zotero papers/PDFs.), and
16
- - paper_download_agent(Use to download PDFs).
8
+ - s2_agent: Use this to search for or recommend academic papers.
9
+ You can also use its `query_dataframe` tool to extract metadata from the last displayed papers.
10
+ This tool is not for summarization or content-level understandingonly for metadata-level filtering or ID extraction.
11
+
12
+ - zotero_agent: Use this to read from or write to the user's Zotero account.
13
+ This agent can also save papers to the Zotero library, but only with the user's explicit approval.
14
+
15
+ - pdf_agent: Use this to perform question-and-answer tasks on downloaded, uploaded, or Zotero-based papers or PDFs.
16
+ This includes summarization, explanation, or answering content-based questions.
17
+
18
+ - paper_download_agent: Use to download PDFs.
19
+
20
+ --
21
+
22
+ Tool Usage Boundaries:
23
+
24
+ - Use `query_dataframe` only for metadata queries such as filtering by author, listing titles, or selecting paper IDs.
25
+ It is not capable of full-text summarization, content analysis, or reading PDF content.
26
+
27
+ - Use `pdf_agent` to summarize or analyze the full content of any downloaded, uploaded, or Zotero-based PDF.
28
+
29
+ - Never attempt to summarize or interpret paper content using `query_dataframe`. That is incorrect and will result in incomplete or misleading output.
30
+
31
+ - When the user asks for a summary, explanation, or any content-based question, you must use `pdf_agent`:
32
+
33
+ --
34
+
35
+ Critical Paper Download Protocol:
36
+
37
+ When the user requests to download paper(s), you must follow this strict 2-step protocol:
38
+
39
+ 1. First, always call `query_dataframe` from the `s2_agent` to extract paper IDs from the last displayed DataFrame.
40
+
41
+ - This tool must be used only to extract paper IDs.
42
+ - Do not pass the full user query to this tool.
43
+ - This step is only for retrieving the full list of available `paper_ids` and their order.
44
+ - If the user request refers to specific positions (like “4th paper”), you must calculate the correct index first.
45
+
46
+ 2. Then, use the extracted ID(s) as input to the `paper_download_agent` to download the papers.
47
+
48
+ Important format rules:
49
+
50
+ - The `query_dataframe` tool always returns paper IDs with full prefixes such as `"arxiv:..."`, `"doi:..."`, or `"pubmed:..."`.
51
+ - You must not modify, trim, or strip these prefixes.
52
+ - Always pass the **exact** IDs returned from `query_dataframe` directly to the `paper_download_agent` without alteration.
53
+
54
+ Do not skip step 1 under any circumstances. Even if you believe you already know the IDs or if the user repeats the request, you must still call `query_dataframe` first. Skipping this step is a critical error and will corrupt the workflow.
55
+
56
+ Example reasoning:
57
+ - User: "Download and summarize the fourth paper"
58
+ - Step 1: Compute that the user wants the 4th paper
59
+ - Step 2: Call `s2_agent.query_dataframe`
60
+ - Step 3: Pass that ID to `paper_download_agent`
61
+ - Step 4: After download, use `pdf_agent` for summarization only when requested by the user
62
+
63
+ Additional example:
64
+ - User: "Download the first and third papers"
65
+ - Step 1: Compute that the user wants paper indices 1 and 3
66
+ - Step 2: Call `s2_agent.query_dataframe`
67
+ - Step 3: Pass both IDs to `paper_download_agent`
68
+
69
+ Full list example:
70
+ - User: "Download all papers", "Download the 6th paper",
71
+ - Step 1: Call `s2_agent.query_dataframe`
72
+ - Step 2: Pass the full list of IDs to `paper_download_agent`
73
+
74
+ Always follow this sequence. It applies to every download request.
75
+
76
+ --
77
+
78
+ Interpreting User Requests Involving Paper Indices:
79
+
80
+ When a user refers to papers using words like "first", "second", "third", or "fourth", you must interpret them as referring to numeric positions in the last displayed DataFrame.
81
+
82
+ For example:
83
+ - "Download the fourth paper" → treat as "Download the 4th paper"
84
+ - "Download the first and third papers" → treat as "Download the 1st and 3rd papers"
85
+
86
+ These word-based positions must be normalized before calling `query_dataframe`. Always compute the correct index and pass it as `row_number`.
87
+
88
+ --
89
+
90
+ General Coordination Instructions:
17
91
 
18
92
  Each sub-agent is specialized for a different task.
19
93
 
20
- You can call multiple sub-agents at the same time, or sequentially. After receiving output from one agent, you can call another based on the users query.
21
- Your goal is to analyze the user’s request carefully, decide which sub-agent(s) should be used, and coordinate their execution efficiently.
22
- Always prioritize delegating tasks correctly. Think step-by-step before acting. Avoid answering by yourself unless explicitly necessary.
94
+ You may call multiple agents, either in parallel or in sequence. After receiving output from one agent, you can call another as needed based on the user's query.
95
+
96
+ Your role is to analyze the user’s request carefully, decide which sub-agent(s) to use, and coordinate their execution efficiently.
97
+
98
+ Always prioritize delegation and think step-by-step before acting. Avoid answering by yourself unless explicitly necessary.
@@ -0,0 +1,5 @@
1
+ _target_: agents.paper_download_agent.get_app
2
+ paper_download_agent: |
3
+ You are the Paper Download Agent.
4
+
5
+ You are responsible for downloading PDFs of papers using their IDs. Use all the provied Ids to download the papers. Only when the user asks a question related to PDFs, please forward the query to the `question_and_answer` tool from the `pdf_agent`
@@ -0,0 +1,5 @@
1
+ _target_: agents.pdf_agent.get_app
2
+ pdf_agent: |
3
+ You are the PDF Agent.
4
+
5
+ You are responsible for performing question-and-answer tasks on papers, articles, or PDFs
@@ -2,23 +2,4 @@ _target_: agents.s2_agent.get_app
2
2
  s2_agent: |
3
3
  You are the S2 Agent.
4
4
 
5
- You are responsible for searching academic papers using the Semantic Scholar API.
6
-
7
- Your capabilities include:
8
-
9
- - Retrieving papers based on user queries.
10
- - Recommending papers based on a single paper or multiple papers provided by the user.
11
- - Retrieving the Semantic Scholar ID of a paper based on its title.
12
- - This ID can later be used by other tools (search or recommend) based on the user’s needs.
13
- - Always respond accurately based on Semantic Scholar search and recommendation features.
14
- - Use `query_dataframe` tool query over the last displayed papers or the search table.
15
- - Always call `display_dataframe` tool at the end.
16
-
17
-
18
- WORKFLOW STEPS:
19
- 1. When user requests papers, use search/recommendation tools to find papers.
20
- 2. Use `display_dataframe` tool to display the response from the search/recommendation tools.
21
- 3. Use `query_dataframe` tool to query over the selected paper only when the user asks to.
22
- 4. When the user only wants recommendations, you can get the "semantic_scholar_paper_id"
23
- using `query_dataframe` tool, then pass the "semantic_scholar_paper_id" to `search`,
24
- `single_paper_rec` or `multi_paper_rec` tools depending on the user's query. Do not use "arxiv_id"(It is used to download pdfs)
5
+ You are responsible for searching academic papers, getting recommendations based on the searched articles, and displaying the results.
@@ -2,32 +2,7 @@ _target_: agents.zotero_agent.get_app
2
2
  zotero_agent: |
3
3
  You are the Zotero Agent.
4
4
 
5
- You are responsible for read and writing of papers to user's Zotero library.
6
- Behavior:
7
-
8
- - Once you have successfully read the papers, you must immediately stop, return a clear 'Search complete' message along with a summary of the articles, call the
9
- `display_dataframe` tool, and return to the main supervisor for further processing based on the user's query.
10
- - Do not continue any further processing or re-enter into reading steps.
11
- - You can write papers to user's library but only after explicit user confirmation.
12
- - Do not attempt to answer any scientific or content-related questions yourself.
13
- - You can retrieve all articles or search based on the user's query, inferring whether to return the full collection or filter by title, keywords, or other details.
14
- - Never call `query_dataframe` tool regarding any question or any information retrival only if the user explicitly asks for metadata.
15
-
16
- In multi-step workflows:
17
-
18
- - Your job is only to read the requested paper or all the papers in user's library and return the successful search output.
19
- - After that, the Main Supervisor Agent will decide the next step (such as passing the paper to the pdf_agent).
20
- - Always call `display_dataframe` tool at the end before transfering to Main Supervisor Agent.
21
- - Never attempt to call other agents yourself.
22
-
23
- Stopping Condition:
24
-
25
- - After successful search, indicate completion clearly and terminate your action.
26
-
27
- When saving papers to Zotero:
28
- 1. First use `zotero_review` tool with the collection path.
29
- 2. Wait for user confirmation (they must say "Yes" or "Approve").
30
- 3. Use `zotero_write` tool with both the collection_path and user_confirmation and call `display_dataframe` tool after the papers as saved.
5
+ You are responsible for reading from and writing to the user's Zotero library, and for displaying the results.
31
6
 
32
7
  IMPORTANT: Human approval is required for saving papers to Zotero. Never save papers
33
8
  without explicit approval from the user. Always respect the user's decision if they
@@ -0,0 +1,4 @@
1
+ api_url: "http://export.arxiv.org/api/query"
2
+ request_timeout: 10
3
+ chunk_size: 1024
4
+ pdf_base_url: "https://arxiv.org/pdf"
@@ -0,0 +1,2 @@
1
+ api_url: "https://api.biorxiv.org/details/biorxiv/"
2
+ request_timeout: 20
@@ -0,0 +1,2 @@
1
+ api_url: "https://api.biorxiv.org/details/medrxiv"
2
+ request_timeout: 20
@@ -0,0 +1,22 @@
1
+ # Default configuration for the PDF question_and_answer Tool
2
+ chunk_size: 1200 # Number of characters per text chunk
3
+ chunk_overlap: 200 # Overlap between adjacent chunks
4
+ top_k_papers: 5 # Number of papers to rank and retrieve
5
+ top_k_chunks: 25 # Number of chunks to retrieve
6
+ reranker:
7
+ model: "nvidia/nv-rerankqa-mistral-4b-v3"
8
+ api_key: ${oc.env:NVIDIA_API_KEY}
9
+ prompt_template: |
10
+ You are a scientific research assistant specialized in reading and extracting information from research papers.
11
+ Your role is to answer questions by retrieving relevant information from the provided context.
12
+
13
+ - Provide detailed, structured, and well-argued explanations—not just brief summaries.
14
+ - Cite specific sources using onky the title of the paper.
15
+ - If the context is insufficient, clearly state that more information is needed.
16
+
17
+ Context:
18
+ {context}
19
+
20
+ Question: {question}
21
+
22
+ Your answer should be comprehensive, accurate, and clearly structured for a scientific audience.
@@ -3,8 +3,6 @@ Unit tests for main agent functionality.
3
3
  Tests the supervisor agent's routing logic and state management.
4
4
  """
5
5
 
6
- # pylint: disable=redefined-outer-name,too-few-public-methods
7
-
8
6
  from types import SimpleNamespace
9
7
  import pytest
10
8
  import hydra
@@ -50,6 +48,10 @@ class DummyWorkflow:
50
48
  self.name = name
51
49
  return self
52
50
 
51
+ def get_supervisor_args(self):
52
+ """Return the supervisor arguments stored in this workflow."""
53
+ return self.supervisor_args
54
+
53
55
 
54
56
  def dummy_s2_agent(uniq_id, llm_model):
55
57
  """Return a DummyWorkflow for the S2 agent."""
@@ -128,6 +130,10 @@ class DummyHydraCompose:
128
130
  """Return a namespace from the dummy config."""
129
131
  return dict_to_namespace(self.config.get(item, {}))
130
132
 
133
+ def get_config(self):
134
+ """Get the raw dummy configuration dictionary."""
135
+ return self.config
136
+
131
137
 
132
138
  # --- Pytest Fixtures to Patch Dependencies ---
133
139
 
@@ -218,3 +224,15 @@ def test_get_app_with_other_model():
218
224
  assert supervisor_args.get("model") is dummy_llm
219
225
  assert supervisor_args.get("prompt") == "Dummy system prompt"
220
226
  assert getattr(app, "name", "") == "Talk2Scholars_MainAgent"
227
+
228
+ def test_dummy_workflow_get_supervisor_args():
229
+ """Test that DummyWorkflow.get_supervisor_args returns the stored args."""
230
+ dummy_args = {"agent": "test", "uniq_id": "id123"}
231
+ wf = DummyWorkflow(supervisor_args=dummy_args)
232
+ assert wf.get_supervisor_args() is dummy_args
233
+
234
+ def test_dummy_hydra_compose_get_config():
235
+ """Test that DummyHydraCompose.get_config returns the raw config."""
236
+ config_dict = {"agents": {"test": {"key": "value"}}}
237
+ compose = DummyHydraCompose(config_dict)
238
+ assert compose.get_config() is config_dict
@@ -0,0 +1,28 @@
1
+ """
2
+ Unit tests for NVIDIA NIM reranker error handling in nvidia_nim_reranker.py
3
+ """
4
+
5
+ import unittest
6
+ from types import SimpleNamespace
7
+
8
+ from aiagents4pharma.talk2scholars.tools.pdf.utils.nvidia_nim_reranker import (
9
+ rank_papers_by_query,
10
+ )
11
+
12
+
13
+ class TestNVIDIARerankerError(unittest.TestCase):
14
+ """Tests for NVIDIA NIM reranker error handling."""
15
+
16
+ def test_missing_api_key_raises_value_error(self):
17
+ """Ensure missing API key triggers ValueError."""
18
+ vector_store = SimpleNamespace(documents={})
19
+ # Config without API key
20
+ cfg = SimpleNamespace(
21
+ reranker=SimpleNamespace(model="m", api_key=None), top_k_papers=3
22
+ )
23
+ with self.assertRaises(ValueError) as cm:
24
+ rank_papers_by_query(vector_store, "query", cfg, top_k=cfg.top_k_papers)
25
+ self.assertEqual(
26
+ str(cm.exception),
27
+ "Configuration 'reranker.api_key' must be set for reranking",
28
+ )
@@ -6,9 +6,11 @@ Unit tests for arXiv paper downloading functionality, including:
6
6
  import unittest
7
7
  from unittest.mock import MagicMock, patch
8
8
 
9
+ import pytest
9
10
  from langchain_core.messages import ToolMessage
10
11
 
11
12
  from aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input import (
13
+ _get_snippet,
12
14
  download_arxiv_paper,
13
15
  )
14
16
 
@@ -38,29 +40,24 @@ class TestDownloadArxivPaper(unittest.TestCase):
38
40
 
39
41
  # Set up a dummy XML response with a valid entry including a pdf link.
40
42
  arxiv_id = "1234.56789"
41
- dummy_xml = f"""<?xml version="1.0" encoding="UTF-8"?>
42
- <feed xmlns="http://www.w3.org/2005/Atom">
43
- <entry>
44
- <title>Sample Paper Title</title>
45
- <author>
46
- <name>Author One</name>
47
- </author>
48
- <author>
49
- <name>Author Two</name>
50
- </author>
51
- <summary>This is a sample abstract.</summary>
52
- <published>2020-01-01T00:00:00Z</published>
53
- <link title="pdf" href="http://arxiv.org/pdf/{arxiv_id}v1"/>
54
- </entry>
55
- </feed>
56
- """
57
43
  dummy_response = MagicMock()
58
- dummy_response.text = dummy_xml
44
+ dummy_response.text = (
45
+ f"""<?xml version=\"1.0\" encoding=\"UTF-8\"?>
46
+ <feed xmlns=\"http://www.w3.org/2005/Atom\">"""
47
+ f" <entry>"
48
+ f"<title>Sample Paper Title</title>"
49
+ f"<author><name>Author One</name></author>"
50
+ f"<author><name>Author Two</name></author>"
51
+ f"<summary>This is a sample abstract.</summary>"
52
+ f"<published>2020-01-01T00:00:00Z</published>"
53
+ f'<link title="pdf" href="http://arxiv.org/pdf/{arxiv_id}v1"/>'
54
+ f"</entry></feed>"
55
+ )
59
56
  dummy_response.raise_for_status = MagicMock()
60
57
  mock_get.return_value = dummy_response
61
58
 
62
59
  tool_call_id = "test_tool_id"
63
- tool_input = {"arxiv_id": arxiv_id, "tool_call_id": tool_call_id}
60
+ tool_input = {"arxiv_ids": [arxiv_id], "tool_call_id": tool_call_id}
64
61
  result = download_arxiv_paper.run(tool_input)
65
62
  update = result.update
66
63
 
@@ -78,14 +75,22 @@ class TestDownloadArxivPaper(unittest.TestCase):
78
75
  self.assertEqual(metadata["source"], "arxiv")
79
76
  self.assertEqual(metadata["arxiv_id"], arxiv_id)
80
77
 
81
- # Check that the message content is as expected.
78
+ # Check that the message content matches the new summary format
82
79
  messages = update["messages"]
83
- self.assertTrue(len(messages) >= 1)
80
+ self.assertEqual(len(messages), 1)
84
81
  self.assertIsInstance(messages[0], ToolMessage)
85
- self.assertIn(
86
- f"Successfully retrieved metadata and PDF URL for arXiv ID {arxiv_id}",
87
- messages[0].content,
82
+ content = messages[0].content
83
+ # Build expected summary
84
+ expected = (
85
+ "Download was successful. Papers metadata are attached as an artifact. "
86
+ "Here is a summary of the results:\n"
87
+ f"Number of papers found: 1\n"
88
+ "Top 3 papers:\n"
89
+ f"1. Sample Paper Title (2020-01-01T00:00:00Z)\n"
90
+ f" View PDF: http://arxiv.org/pdf/{arxiv_id}v1\n"
91
+ " Abstract snippet: This is a sample abstract."
88
92
  )
93
+ self.assertEqual(content, expected)
89
94
 
90
95
  @patch(
91
96
  "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
@@ -117,12 +122,22 @@ class TestDownloadArxivPaper(unittest.TestCase):
117
122
  mock_get.return_value = dummy_response
118
123
 
119
124
  tool_call_id = "test_tool_id"
120
- tool_input = {"arxiv_id": arxiv_id, "tool_call_id": tool_call_id}
121
- with self.assertRaises(ValueError) as context:
122
- download_arxiv_paper.run(tool_input)
123
- self.assertEqual(
124
- str(context.exception), f"No entry found for arXiv ID {arxiv_id}"
125
+ tool_input = {"arxiv_ids": [arxiv_id], "tool_call_id": tool_call_id}
126
+ # No entry found should result in empty article_data and header-only summary
127
+ result = download_arxiv_paper.run(tool_input)
128
+ update = result.update
129
+ self.assertIn("article_data", update)
130
+ self.assertEqual(update["article_data"], {})
131
+ messages = update.get("messages", [])
132
+ self.assertEqual(len(messages), 1)
133
+ content = messages[0].content
134
+ expected = (
135
+ "Download was successful. Papers metadata are attached as an artifact. "
136
+ "Here is a summary of the results:\n"
137
+ "Number of papers found: 0\n"
138
+ "Top 3 papers:\n"
125
139
  )
140
+ self.assertEqual(content, expected)
126
141
 
127
142
  @patch(
128
143
  "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
@@ -163,9 +178,72 @@ class TestDownloadArxivPaper(unittest.TestCase):
163
178
  mock_get.return_value = dummy_response
164
179
 
165
180
  tool_call_id = "test_tool_id"
166
- tool_input = {"arxiv_id": arxiv_id, "tool_call_id": tool_call_id}
181
+ tool_input = {"arxiv_ids": [arxiv_id], "tool_call_id": tool_call_id}
167
182
  with self.assertRaises(RuntimeError) as context:
168
183
  download_arxiv_paper.run(tool_input)
169
184
  self.assertEqual(
170
185
  str(context.exception), f"Could not find PDF URL for arXiv ID {arxiv_id}"
171
186
  )
187
+
188
+ @patch(
189
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.extract_metadata"
190
+ )
191
+ @patch(
192
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_"
193
+ "arxiv_input.fetch_arxiv_metadata"
194
+ )
195
+ @patch(
196
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.compose"
197
+ )
198
+ @patch(
199
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_arxiv_input.hydra.initialize"
200
+ )
201
+ def test_summary_multiple_papers(
202
+ self, mock_initialize, mock_compose, _mock_fetch, mock_extract
203
+ ):
204
+ """Test summary includes '...and N more papers.' when more than 3 papers."""
205
+ # Dummy config
206
+ dummy_cfg = MagicMock()
207
+ dummy_cfg.tools.download_arxiv_paper.api_url = "http://dummy"
208
+ dummy_cfg.tools.download_arxiv_paper.request_timeout = 5
209
+ mock_compose.return_value = dummy_cfg
210
+ mock_initialize.return_value.__enter__.return_value = None
211
+
212
+ # Simulate metadata extraction for multiple papers
213
+ def dummy_meta(_entry, _ns, aid):
214
+ """dummy metadata extraction function."""
215
+ return {
216
+ "Title": f"T{aid}",
217
+ "Publication Date": "2020-01-01T00:00:00Z",
218
+ "URL": f"u{aid}v1",
219
+ }
220
+
221
+ mock_extract.side_effect = dummy_meta
222
+ # Prepare 5 paper IDs
223
+ ids = [str(i) for i in range(5)]
224
+ tool_input = {"arxiv_ids": ids, "tool_call_id": "tid"}
225
+ result = download_arxiv_paper.run(tool_input)
226
+ summary = result.update["messages"][0].content
227
+ # Should report total count of 5 and list only top 3 without ellipsis
228
+ assert "Number of papers found: 5" in summary
229
+ assert "Top 3 papers:" in summary
230
+ # Entries for first three IDs should include URL and no ellipsis
231
+ assert "1. T0 (2020-01-01T00:00:00Z)" in summary
232
+ assert " View PDF: u0v1" in summary
233
+ assert "3. T2 (2020-01-01T00:00:00Z)" in summary
234
+ assert "...and" not in summary
235
+
236
+
237
+ @pytest.mark.parametrize(
238
+ "input_text,expected",
239
+ [
240
+ ("", ""),
241
+ ("N/A", ""),
242
+ ("Just one sentence", "Just one sentence."),
243
+ ("First. Second", "First. Second."),
244
+ ("Hello. World.", "Hello. World."),
245
+ ],
246
+ )
247
+ def test_get_snippet_various(input_text, expected):
248
+ """Test _get_snippet behavior for various abstracts."""
249
+ assert _get_snippet(input_text) == expected
@@ -2,7 +2,6 @@
2
2
  Unit Tests for the PDF agent.
3
3
  """
4
4
 
5
- # pylint: disable=redefined-outer-name
6
5
  from unittest import mock
7
6
  import pytest
8
7
  from langchain_core.messages import HumanMessage, AIMessage
@@ -36,8 +35,8 @@ def mock_tools_fixture():
36
35
  yield [mock_question_and_answer]
37
36
 
38
37
 
39
- @pytest.fixture
40
- def mock_llm():
38
+ @pytest.fixture(name="mock_llm")
39
+ def llm_fixture():
41
40
  """Provide a dummy language model to pass into get_app."""
42
41
  return mock.Mock()
43
42