aiagents4pharma 1.37.0__py3-none-any.whl → 1.39.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +12 -4
  2. aiagents4pharma/talk2scholars/configs/config.yaml +2 -0
  3. aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/__init__.py +3 -0
  4. aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/__init__.py +3 -0
  5. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +1 -0
  6. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +33 -7
  7. aiagents4pharma/talk2scholars/tests/test_paper_download_biorxiv.py +151 -0
  8. aiagents4pharma/talk2scholars/tests/test_paper_download_medrxiv.py +151 -0
  9. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +59 -3
  10. aiagents4pharma/talk2scholars/tests/test_read_helper_utils.py +110 -0
  11. aiagents4pharma/talk2scholars/tests/test_s2_display.py +20 -1
  12. aiagents4pharma/talk2scholars/tests/test_s2_query.py +17 -0
  13. aiagents4pharma/talk2scholars/tests/test_state.py +25 -1
  14. aiagents4pharma/talk2scholars/tests/test_zotero_pdf_downloader_utils.py +46 -0
  15. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +35 -40
  16. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +4 -1
  17. aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +112 -0
  18. aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +112 -0
  19. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +82 -41
  20. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +6 -2
  21. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +2 -1
  22. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +7 -3
  23. aiagents4pharma/talk2scholars/tools/s2/search.py +2 -1
  24. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +2 -1
  25. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +2 -0
  26. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +2 -0
  27. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +2 -0
  28. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +79 -136
  29. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py +147 -0
  30. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +42 -9
  31. {aiagents4pharma-1.37.0.dist-info → aiagents4pharma-1.39.0.dist-info}/METADATA +1 -1
  32. {aiagents4pharma-1.37.0.dist-info → aiagents4pharma-1.39.0.dist-info}/RECORD +35 -26
  33. {aiagents4pharma-1.37.0.dist-info → aiagents4pharma-1.39.0.dist-info}/WHEEL +1 -1
  34. {aiagents4pharma-1.37.0.dist-info → aiagents4pharma-1.39.0.dist-info}/licenses/LICENSE +0 -0
  35. {aiagents4pharma-1.37.0.dist-info → aiagents4pharma-1.39.0.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,8 @@ from langgraph.prebuilt.tool_node import ToolNode
14
14
  from langgraph.checkpoint.memory import MemorySaver
15
15
  from ..state.state_talk2scholars import Talk2Scholars
16
16
  from ..tools.paper_download.download_arxiv_input import download_arxiv_paper
17
+ from ..tools.paper_download.download_medrxiv_input import download_medrxiv_paper
18
+ from ..tools.paper_download.download_biorxiv_input import download_biorxiv_paper
17
19
 
18
20
  # Initialize logger
19
21
  logging.basicConfig(level=logging.INFO)
@@ -24,14 +26,20 @@ def get_app(uniq_id, llm_model: BaseChatModel):
24
26
  """
25
27
  Initializes and returns the LangGraph application for the Talk2Scholars paper download agent.
26
28
 
29
+ This agent supports downloading scientific papers from multiple preprint servers, including
30
+ arXiv, BioRxiv, and MedRxiv. It can intelligently handle user queries by extracting or resolving
31
+ necessary identifiers (e.g., arXiv ID or DOI) from the paper title and routing the request to
32
+ the appropriate download tool.
33
+
27
34
  Args:
28
35
  uniq_id (str): A unique identifier for tracking the current session.
29
36
  llm_model (BaseChatModel, optional): The language model to be used by the agent.
30
- Defaults to ChatOpenAI(model="gpt-4o-mini", temperature=0.5).
37
+ Defaults to ChatOpenAI(model="gpt-4o-mini", temperature=0.5).
31
38
 
32
39
  Returns:
33
40
  StateGraph: A compiled LangGraph application that enables the paper download agent to
34
- process user queries and retrieve arXiv papers.
41
+ process user queries and retrieve research papers from arXiv (using arXiv ID),
42
+ BioRxiv and MedRxiv (using DOI resolved from the paper title or provided directly).
35
43
  """
36
44
 
37
45
  # Load Hydra configuration
@@ -44,7 +52,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
44
52
  cfg = cfg.agents.talk2scholars.paper_download_agent
45
53
 
46
54
  # Define tools properly
47
- tools = ToolNode([download_arxiv_paper])
55
+ tools = ToolNode([download_arxiv_paper, download_medrxiv_paper, download_biorxiv_paper])
48
56
 
49
57
  # Define the model
50
58
  logger.info("Using OpenAI model %s", llm_model)
@@ -58,7 +66,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
58
66
 
59
67
  def paper_download_agent_node(state: Talk2Scholars) -> Dict[str, Any]:
60
68
  """
61
- Processes the current state to fetch the arXiv paper.
69
+ Processes the current state to fetch the research paper from arXiv, BioRxiv, or MedRxiv.
62
70
  """
63
71
  logger.info("Creating paper download agent node with thread_id: %s", uniq_id)
64
72
  result = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
@@ -8,6 +8,8 @@ defaults:
8
8
  - agents/talk2scholars/pdf_agent: default
9
9
  - tools/search: default
10
10
  - tools/download_arxiv_paper: default
11
+ - tools/download_biorxiv_paper: default
12
+ - tools/download_medrxiv_paper: default
11
13
  - tools/single_paper_recommendation: default
12
14
  - tools/multi_paper_recommendation: default
13
15
  - tools/retrieve_semantic_scholar_paper_id: default
@@ -0,0 +1,3 @@
1
+ """
2
+ Import all the modules in the package
3
+ """
@@ -0,0 +1,3 @@
1
+ """
2
+ Import all the modules in the package
3
+ """
@@ -2,6 +2,7 @@
2
2
  library_type: "user" # Type of library ('user' or 'group')
3
3
  default_limit: 2
4
4
  request_timeout: 10
5
+ chunk_size: 16384 # Size (in bytes) for streaming PDF download chunks
5
6
  user_id: ${oc.env:ZOTERO_USER_ID} # Load from environment variable
6
7
  api_key: ${oc.env:ZOTERO_API_KEY} # Load from environment variable
7
8
 
@@ -7,6 +7,7 @@ across agent interactions.
7
7
  """
8
8
 
9
9
  import logging
10
+ from collections.abc import Mapping
10
11
  from typing import Annotated, Any, Dict
11
12
 
12
13
  from langchain_core.embeddings import Embeddings
@@ -18,7 +19,24 @@ logging.basicConfig(level=logging.INFO)
18
19
  logger = logging.getLogger(__name__)
19
20
 
20
21
 
21
- def replace_dict(existing: Dict[str, Any], new: Dict[str, Any]) -> Dict[str, Any]:
22
+ def merge_dict(existing: Dict[str, Any], new: Dict[str, Any]) -> Dict[str, Any]:
23
+ """
24
+ Merges the existing dictionary with a new dictionary.
25
+
26
+ This function logs the state merge and ensures that the new values
27
+ are appended to the existing state without overwriting other entries.
28
+ Args:
29
+ existing (Dict[str, Any]): The current dictionary state.
30
+ new (Dict[str, Any]): The new dictionary state to merge.
31
+ Returns:
32
+ Dict[str, Any]: The merged dictionary state.
33
+ """
34
+ merged = dict(existing) if existing else {}
35
+ merged.update(new or {})
36
+ return merged
37
+
38
+
39
+ def replace_dict(existing: Dict[str, Any], new: Any) -> Any:
22
40
  """
23
41
  Replaces the existing dictionary with a new dictionary.
24
42
 
@@ -39,9 +57,13 @@ def replace_dict(existing: Dict[str, Any], new: Dict[str, Any]) -> Dict[str, Any
39
57
  >>> print(updated_state)
40
58
  {"papers": {"id2": "Paper 2"}}
41
59
  """
42
- # No-op operation to use the 'existing' variable
43
- _ = len(existing)
44
- return new
60
+ # If new is not a mapping, just replace existing value outright
61
+ if not isinstance(new, Mapping):
62
+ return new
63
+ # In-place replace: clear existing mapping and update with new entries
64
+ existing.clear()
65
+ existing.update(new)
66
+ return existing
45
67
 
46
68
 
47
69
  class Talk2Scholars(AgentState):
@@ -63,10 +85,14 @@ class Talk2Scholars(AgentState):
63
85
  """
64
86
 
65
87
  # Agent state fields
88
+ # Key controlling UI display: always replace to reference latest output
89
+ # Stores the most recently displayed papers metadata
66
90
  last_displayed_papers: Annotated[Dict[str, Any], replace_dict]
67
- papers: Annotated[Dict[str, Any], replace_dict]
68
- multi_papers: Annotated[Dict[str, Any], replace_dict]
69
- article_data: Annotated[Dict[str, Any], replace_dict]
91
+ # Accumulative keys: merge new entries into existing state
92
+ papers: Annotated[Dict[str, Any], merge_dict]
93
+ multi_papers: Annotated[Dict[str, Any], merge_dict]
94
+ article_data: Annotated[Dict[str, Any], merge_dict]
95
+ # Approval status: always replace to reflect latest operation
70
96
  zotero_write_approval_status: Annotated[Dict[str, Any], replace_dict]
71
97
  llm_model: BaseChatModel
72
98
  text_embedding_model: Embeddings
@@ -0,0 +1,151 @@
1
+ """
2
+ Unit tests for bioRxiv paper downloading functionality, including:
3
+ - download_bioRxiv_paper tool function.
4
+ """
5
+
6
+ import unittest
7
+ from unittest.mock import MagicMock, patch
8
+ from langchain_core.messages import ToolMessage
9
+
10
+ from aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input import (
11
+ download_biorxiv_paper,
12
+ )
13
+
14
+
15
+ class TestDownloadBiorxivPaper(unittest.TestCase):
16
+ """Tests for the download_bioRxiv_paper tool."""
17
+
18
+ @patch(
19
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
20
+ )
21
+ @patch(
22
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
23
+ )
24
+ @patch(
25
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
26
+ )
27
+ def test_download_biorxiv_paper_success(self, mock_get, mock_compose, mock_initialize):
28
+ """Test successful metadata and PDF URL retrieval."""
29
+ dummy_cfg = MagicMock()
30
+ dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
31
+ dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
32
+ mock_compose.return_value = dummy_cfg
33
+ mock_initialize.return_value.__enter__.return_value = None
34
+
35
+ doi = "10.1101/2025.05.13.653102"
36
+
37
+ dummy_response = MagicMock()
38
+ dummy_response.status_code = 200
39
+ dummy_response.raise_for_status = MagicMock()
40
+ dummy_response.json.return_value = {
41
+ "collection": [
42
+ {
43
+ "title": "Sample BioRxiv Paper",
44
+ "authors": "Author One; Author Two",
45
+ "abstract": "This is a bioRxiv abstract.",
46
+ "date": "2025-04-25",
47
+ "doi": doi,
48
+ "link": f"https://www.biorxiv.org/content/{doi}.full.pdf"
49
+ }
50
+ ]
51
+ }
52
+ mock_get.return_value = dummy_response
53
+
54
+ tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
55
+ result = download_biorxiv_paper.run(tool_input)
56
+ update = result.update
57
+
58
+ self.assertIn("article_data", update)
59
+ self.assertIn(doi, update["article_data"])
60
+ metadata = update["article_data"][doi]
61
+ self.assertEqual(metadata["Title"], "Sample BioRxiv Paper")
62
+ self.assertEqual(metadata["Authors"], "Author One; Author Two")
63
+ self.assertEqual(metadata["Abstract"], "This is a bioRxiv abstract.")
64
+ self.assertEqual(metadata["Publication Date"], "2025-04-25")
65
+ self.assertEqual(metadata["URL"], f"https://www.biorxiv.org/content/{doi}.full.pdf")
66
+ self.assertEqual(metadata["pdf_url"], f"https://www.biorxiv.org/content/{doi}.full.pdf")
67
+ self.assertEqual(metadata["filename"], f"{doi.rsplit('/', maxsplit=1)[-1]}.pdf")
68
+ self.assertEqual(metadata["source"], "biorxiv")
69
+ self.assertEqual(metadata["biorxiv_id"], doi)
70
+
71
+ self.assertTrue(len(update["messages"]) >= 1)
72
+ self.assertIsInstance(update["messages"][0], ToolMessage)
73
+ self.assertIn("Successfully retrieved metadata and PDF URL", update["messages"][0].content)
74
+
75
+ @patch(
76
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
77
+ )
78
+ @patch(
79
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
80
+ )
81
+ @patch(
82
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
83
+ )
84
+ def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
85
+ """Test behavior when no 'entry' is in response."""
86
+ dummy_cfg = MagicMock()
87
+ dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
88
+ dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
89
+ mock_compose.return_value = dummy_cfg
90
+ mock_initialize.return_value.__enter__.return_value = None
91
+
92
+ dummy_response = MagicMock()
93
+ dummy_response.status_code = 200
94
+ dummy_response.raise_for_status = MagicMock()
95
+ dummy_response.json.return_value = {} # No entry
96
+ mock_get.return_value = dummy_response
97
+
98
+ doi = "10.1101/2025.05.13.653102"
99
+ tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
100
+
101
+ with self.assertRaises(ValueError) as context:
102
+ download_biorxiv_paper.run(tool_input)
103
+
104
+ self.assertEqual(str(context.exception), f"No metadata found for DOI: {doi}")
105
+
106
+ @patch(
107
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.initialize"
108
+ )
109
+ @patch(
110
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.hydra.compose"
111
+ )
112
+ @patch(
113
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_biorxiv_input.requests.get"
114
+ )
115
+ def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
116
+ """Test fallback to DOI-based PDF URL construction when 'link' is missing."""
117
+ dummy_cfg = MagicMock()
118
+ dummy_cfg.tools.download_biorxiv_paper.api_url = "http://dummy.biorxiv.org/api"
119
+ dummy_cfg.tools.download_biorxiv_paper.request_timeout = 10
120
+ mock_compose.return_value = dummy_cfg
121
+ mock_initialize.return_value.__enter__.return_value = None
122
+
123
+ doi = "10.1101/2025.05.13.653102"
124
+
125
+ dummy_response = MagicMock()
126
+ dummy_response.status_code = 200
127
+ dummy_response.raise_for_status = MagicMock()
128
+ dummy_response.json.return_value = {
129
+ "collection": [
130
+ {
131
+ "title": "Sample Biorxiv Paper",
132
+ "authors": "Author One; Author Two",
133
+ "abstract": "This is a BioRxiv abstract.",
134
+ "date": "2025-04-25",
135
+ "doi": doi
136
+ # 'link' is intentionally omitted
137
+ }
138
+ ]
139
+ }
140
+ mock_get.return_value = dummy_response
141
+
142
+ tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
143
+ result = download_biorxiv_paper.run(tool_input)
144
+ update = result.update
145
+ metadata = update["article_data"][doi]
146
+
147
+ # Assert that the PDF URL was constructed from DOI
148
+ expected_suffix = doi.rsplit('/', maxsplit=1)[-1]
149
+ expected_url = f"https://www.biorxiv.org/content/10.1101/{expected_suffix}.full.pdf"
150
+
151
+ self.assertEqual(metadata["pdf_url"], expected_url)
@@ -0,0 +1,151 @@
1
+ """
2
+ Unit tests for medrXiv paper downloading functionality, including:
3
+ - download_medrxiv_paper tool function.
4
+ """
5
+
6
+ import unittest
7
+ from unittest.mock import MagicMock, patch
8
+ from langchain_core.messages import ToolMessage
9
+
10
+ from aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input import (
11
+ download_medrxiv_paper,
12
+ )
13
+
14
+
15
+ class TestDownloadMedrxivPaper(unittest.TestCase):
16
+ """Tests for the download_medrxiv_paper tool."""
17
+
18
+ @patch(
19
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
20
+ )
21
+ @patch(
22
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
23
+ )
24
+ @patch(
25
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
26
+ )
27
+ def test_download_medrxiv_paper_success(self, mock_get, mock_compose, mock_initialize):
28
+ """Test successful metadata and PDF URL retrieval."""
29
+ dummy_cfg = MagicMock()
30
+ dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
31
+ dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
32
+ mock_compose.return_value = dummy_cfg
33
+ mock_initialize.return_value.__enter__.return_value = None
34
+
35
+ doi = "10.1101/2025.04.25.25326432"
36
+
37
+ dummy_response = MagicMock()
38
+ dummy_response.status_code = 200
39
+ dummy_response.raise_for_status = MagicMock()
40
+ dummy_response.json.return_value = {
41
+ "collection": [
42
+ {
43
+ "title": "Sample Medrxiv Paper",
44
+ "authors": "Author One; Author Two",
45
+ "abstract": "This is a medRxiv abstract.",
46
+ "date": "2025-04-25",
47
+ "doi": doi,
48
+ "link": f"https://www.medrxiv.org/content/{doi}.full.pdf"
49
+ }
50
+ ]
51
+ }
52
+ mock_get.return_value = dummy_response
53
+
54
+ tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
55
+ result = download_medrxiv_paper.run(tool_input)
56
+ update = result.update
57
+
58
+ self.assertIn("article_data", update)
59
+ self.assertIn(doi, update["article_data"])
60
+ metadata = update["article_data"][doi]
61
+ self.assertEqual(metadata["Title"], "Sample Medrxiv Paper")
62
+ self.assertEqual(metadata["Authors"], "Author One; Author Two")
63
+ self.assertEqual(metadata["Abstract"], "This is a medRxiv abstract.")
64
+ self.assertEqual(metadata["Publication Date"], "2025-04-25")
65
+ self.assertEqual(metadata["URL"], f"https://www.medrxiv.org/content/{doi}.full.pdf")
66
+ self.assertEqual(metadata["pdf_url"], f"https://www.medrxiv.org/content/{doi}.full.pdf")
67
+ self.assertEqual(metadata["filename"], f"{doi.rsplit('/', maxsplit=1)[-1]}.pdf")
68
+ self.assertEqual(metadata["source"], "medrxiv")
69
+ self.assertEqual(metadata["medrxiv_id"], doi)
70
+
71
+ self.assertTrue(len(update["messages"]) >= 1)
72
+ self.assertIsInstance(update["messages"][0], ToolMessage)
73
+ self.assertIn("Successfully retrieved metadata and PDF URL", update["messages"][0].content)
74
+
75
+ @patch(
76
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
77
+ )
78
+ @patch(
79
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
80
+ )
81
+ @patch(
82
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
83
+ )
84
+ def test_no_entry_found(self, mock_get, mock_compose, mock_initialize):
85
+ """Test behavior when no 'entry' is in response."""
86
+ dummy_cfg = MagicMock()
87
+ dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
88
+ dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
89
+ mock_compose.return_value = dummy_cfg
90
+ mock_initialize.return_value.__enter__.return_value = None
91
+
92
+ dummy_response = MagicMock()
93
+ dummy_response.status_code = 200
94
+ dummy_response.raise_for_status = MagicMock()
95
+ dummy_response.json.return_value = {} # No entry
96
+ mock_get.return_value = dummy_response
97
+
98
+ doi = "10.1101/2025.04.25.25326432"
99
+ tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
100
+
101
+ with self.assertRaises(ValueError) as context:
102
+ download_medrxiv_paper.run(tool_input)
103
+
104
+ self.assertEqual(str(context.exception), f"No entry found for medRxiv ID {doi}")
105
+
106
+ @patch(
107
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.initialize"
108
+ )
109
+ @patch(
110
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.hydra.compose"
111
+ )
112
+ @patch(
113
+ "aiagents4pharma.talk2scholars.tools.paper_download.download_medrxiv_input.requests.get"
114
+ )
115
+ def test_no_pdf_url_found(self, mock_get, mock_compose, mock_initialize):
116
+ """Test fallback to DOI-based PDF URL construction when 'link' is missing."""
117
+ dummy_cfg = MagicMock()
118
+ dummy_cfg.tools.download_medrxiv_paper.api_url = "http://dummy.medrxiv.org/api"
119
+ dummy_cfg.tools.download_medrxiv_paper.request_timeout = 10
120
+ mock_compose.return_value = dummy_cfg
121
+ mock_initialize.return_value.__enter__.return_value = None
122
+
123
+ doi = "10.1101/2025.04.25.25326432"
124
+
125
+ dummy_response = MagicMock()
126
+ dummy_response.status_code = 200
127
+ dummy_response.raise_for_status = MagicMock()
128
+ dummy_response.json.return_value = {
129
+ "collection": [
130
+ {
131
+ "title": "Sample Medrxiv Paper",
132
+ "authors": "Author One; Author Two",
133
+ "abstract": "This is a medRxiv abstract.",
134
+ "date": "2025-04-25",
135
+ "doi": doi
136
+ # 'link' is intentionally omitted
137
+ }
138
+ ]
139
+ }
140
+ mock_get.return_value = dummy_response
141
+
142
+ tool_input = {"doi": doi, "tool_call_id": "test_tool_id"}
143
+ result = download_medrxiv_paper.run(tool_input)
144
+ update = result.update
145
+ metadata = update["article_data"][doi]
146
+
147
+ # Assert that the PDF URL was constructed from DOI
148
+ expected_suffix = doi.rsplit('/', maxsplit=1)[-1]
149
+ expected_url = f"https://www.medrxiv.org/content/10.1101/{expected_suffix}.full.pdf"
150
+
151
+ self.assertEqual(metadata["pdf_url"], expected_url)
@@ -3,11 +3,14 @@ Unit tests for question_and_answer tool functionality.
3
3
  """
4
4
 
5
5
  import unittest
6
+ from types import SimpleNamespace
6
7
  from unittest.mock import MagicMock, patch
7
8
 
8
9
  from langchain_core.documents import Document
9
10
  from langchain_core.embeddings import Embeddings
11
+ from langchain_core.messages import ToolMessage
10
12
 
13
+ import aiagents4pharma.talk2scholars.tools.pdf.question_and_answer as qa_module
11
14
  from aiagents4pharma.talk2scholars.tools.pdf.question_and_answer import (
12
15
  Vectorstore,
13
16
  generate_answer,
@@ -145,8 +148,9 @@ class TestQuestionAndAnswerTool(unittest.TestCase):
145
148
 
146
149
  vector_store = Vectorstore(embedding_model=mock_embedding_model)
147
150
  vector_store.vector_store = True
151
+ # Add a document chunk with required metadata including chunk_id
148
152
  vector_store.documents["test_doc"] = Document(
149
- page_content="Test content", metadata={"paper_id": "test_paper"}
153
+ page_content="Test content", metadata={"paper_id": "test_paper", "chunk_id": 0}
150
154
  )
151
155
 
152
156
  results = vector_store.retrieve_relevant_chunks(query="test query")
@@ -793,8 +797,9 @@ class TestMissingState(unittest.TestCase):
793
797
 
794
798
  vector_store = Vectorstore(embedding_model=mock_embedding_model)
795
799
  vector_store.vector_store = True
796
- doc1 = Document(page_content="Doc 1", metadata={"paper_id": "paper1"})
797
- doc2 = Document(page_content="Doc 2", metadata={"paper_id": "paper2"})
800
+ # Add document chunks with necessary metadata including chunk_ids
801
+ doc1 = Document(page_content="Doc 1", metadata={"paper_id": "paper1", "chunk_id": 0})
802
+ doc2 = Document(page_content="Doc 2", metadata={"paper_id": "paper2", "chunk_id": 1})
798
803
  vector_store.documents = {"doc1": doc1, "doc2": doc2}
799
804
 
800
805
  results = vector_store.retrieve_relevant_chunks(
@@ -820,3 +825,54 @@ class TestMissingState(unittest.TestCase):
820
825
  query="test", paper_ids=["nonexistent_id"]
821
826
  )
822
827
  assert results == []
828
+
829
+ @patch(
830
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.load_hydra_config"
831
+ )
832
+ @patch(
833
+ "aiagents4pharma.talk2scholars.tools.pdf.question_and_answer.generate_answer"
834
+ )
835
+ def test_prebuilt_vector_store_branch(self, mock_generate, mock_load_config):
836
+ """Test question_and_answer tool with a shared pre-built vector store branch."""
837
+ # Mock configuration for tool-level thresholds
838
+ config = SimpleNamespace(top_k_papers=1, top_k_chunks=1)
839
+ mock_load_config.return_value = config
840
+ # Mock generate_answer to return a simple response
841
+ mock_generate.return_value = {"output_text": "Answer", "papers_used": ["p1"]}
842
+
843
+ # Prepare a dummy pre-built vector store
844
+ dummy_vs = SimpleNamespace(
845
+ loaded_papers=set(),
846
+ vector_store=True,
847
+ retrieve_relevant_chunks=lambda *_args, **_kwargs: [
848
+ Document(page_content="chunk", metadata={"paper_id": "p1"})
849
+ ],
850
+ )
851
+ # Override the module-level prebuilt_vector_store
852
+ qa_module.prebuilt_vector_store = dummy_vs
853
+
854
+ # Prepare state with required models and article_data
855
+ state = {
856
+ "text_embedding_model": MagicMock(),
857
+ "llm_model": MagicMock(),
858
+ "article_data": {"p1": {"source": "upload"}},
859
+ }
860
+
861
+ # Invoke the tool-level function via .run with appropriate input schema
862
+ input_data = {
863
+ "question": "What?",
864
+ "paper_ids": None,
865
+ "use_all_papers": False,
866
+ "tool_call_id": "testid",
867
+ "state": state,
868
+ }
869
+ result = qa_module.question_and_answer.run(input_data)
870
+
871
+ # Ensure the prebuilt branch was used and a Command is returned
872
+ self.assertTrue(hasattr(result, "update"))
873
+ messages = result.update.get("messages", [])
874
+ self.assertEqual(len(messages), 1)
875
+ self.assertIsInstance(messages[0], ToolMessage)
876
+
877
+ # Clean up global override
878
+ qa_module.prebuilt_vector_store = None
@@ -0,0 +1,110 @@
1
+ """
2
+ Unit tests for Zotero read helper download branches.
3
+ """
4
+
5
+ import unittest
6
+ from types import SimpleNamespace
7
+ from unittest.mock import MagicMock, patch
8
+
9
+ from aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper import (
10
+ ZoteroSearchData,
11
+ )
12
+
13
+ # Dummy Hydra configuration for tests
14
+ dummy_zotero_read_config = SimpleNamespace(
15
+ user_id="dummy_user",
16
+ library_type="user",
17
+ api_key="dummy_api_key",
18
+ zotero=SimpleNamespace(
19
+ max_limit=5,
20
+ filter_item_types=["journalArticle", "conferencePaper"],
21
+ filter_excluded_types=["attachment", "note"],
22
+ ),
23
+ )
24
+ dummy_cfg = SimpleNamespace(tools=SimpleNamespace(zotero_read=dummy_zotero_read_config))
25
+
26
+
27
+ class TestReadHelperDownloadsFalse(unittest.TestCase):
28
+ """Tests for read_helper download_pdfs=False branches."""
29
+
30
+ @patch(
31
+ "aiagents4pharma.talk2scholars.tools.zotero.utils.zotero_path.get_item_collections"
32
+ )
33
+ @patch("aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper.zotero.Zotero")
34
+ @patch("aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper.hydra.compose")
35
+ @patch(
36
+ "aiagents4pharma.talk2scholars.tools.zotero.utils.read_helper.hydra.initialize"
37
+ )
38
+ def test_download_pdfs_false_branches(
39
+ self,
40
+ mock_hydra_init,
41
+ mock_hydra_compose,
42
+ mock_zotero_class,
43
+ mock_get_item_collections,
44
+ ):
45
+ """Ensure attachment_key and filename are set when download_pdfs=False."""
46
+ # Setup Hydra mocks
47
+ mock_hydra_compose.return_value = dummy_cfg
48
+ mock_hydra_init.return_value.__enter__.return_value = None
49
+
50
+ # Fake Zotero items: one paper with child PDF, one orphaned PDF
51
+ fake_zot = MagicMock()
52
+ fake_items = [
53
+ {
54
+ "data": {
55
+ "key": "paper1",
56
+ "title": "P1",
57
+ "abstractNote": "A1",
58
+ "date": "2021",
59
+ "url": "u1",
60
+ "itemType": "journalArticle",
61
+ }
62
+ },
63
+ {
64
+ "data": {
65
+ "key": "attach2",
66
+ "itemType": "attachment",
67
+ "contentType": "application/pdf",
68
+ "filename": "file2.pdf",
69
+ }
70
+ },
71
+ ]
72
+ fake_zot.items.return_value = fake_items
73
+ # children for paper1
74
+ fake_child = {
75
+ "data": {
76
+ "key": "attach1",
77
+ "filename": "file1.pdf",
78
+ "contentType": "application/pdf",
79
+ }
80
+ }
81
+
82
+ def children_side_effect(key):
83
+ return [fake_child] if key == "paper1" else []
84
+
85
+ fake_zot.children.side_effect = children_side_effect
86
+ mock_zotero_class.return_value = fake_zot
87
+ mock_get_item_collections.return_value = {"paper1": ["/C1"], "attach2": ["/C2"]}
88
+
89
+ # Instantiate with download_pdfs=False
90
+ search = ZoteroSearchData(
91
+ query="test",
92
+ only_articles=False,
93
+ limit=2,
94
+ tool_call_id="id",
95
+ download_pdfs=False,
96
+ )
97
+ search.process_search()
98
+ data = search.get_search_results()["article_data"]
99
+
100
+ # Regular paper1 should have attachment_key and filename, no pdf_url
101
+ self.assertIn("paper1", data)
102
+ self.assertEqual(data["paper1"]["attachment_key"], "attach1")
103
+ self.assertEqual(data["paper1"]["filename"], "file1.pdf")
104
+ self.assertNotIn("pdf_url", data["paper1"])
105
+
106
+ # Orphan attach2 should have attachment_key and filename, no pdf_url
107
+ self.assertIn("attach2", data)
108
+ self.assertEqual(data["attach2"]["attachment_key"], "attach2")
109
+ self.assertEqual(data["attach2"]["filename"], "file2.pdf")
110
+ self.assertNotIn("pdf_url", data["attach2"])