aiagents4pharma 1.30.0__py3-none-any.whl → 1.30.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. aiagents4pharma/talk2scholars/agents/main_agent.py +18 -10
  2. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -6
  3. aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -10
  4. aiagents4pharma/talk2scholars/agents/zotero_agent.py +9 -7
  5. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +18 -9
  6. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +2 -2
  7. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +9 -15
  8. aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +1 -0
  9. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +6 -1
  10. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +7 -1
  11. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +6 -1
  12. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +1 -1
  13. aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
  14. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +7 -1
  15. aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py +84 -53
  16. aiagents4pharma/talk2scholars/tests/test_main_agent.py +24 -0
  17. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +79 -15
  18. aiagents4pharma/talk2scholars/tests/test_routing_logic.py +13 -10
  19. aiagents4pharma/talk2scholars/tests/test_s2_multi.py +27 -4
  20. aiagents4pharma/talk2scholars/tests/test_s2_search.py +19 -3
  21. aiagents4pharma/talk2scholars/tests/test_s2_single.py +27 -3
  22. aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +3 -2
  23. aiagents4pharma/talk2scholars/tests/test_zotero_human_in_the_loop.py +273 -0
  24. aiagents4pharma/talk2scholars/tests/test_zotero_path.py +419 -1
  25. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +25 -18
  26. aiagents4pharma/talk2scholars/tests/test_zotero_write.py +123 -588
  27. aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py +2 -0
  28. aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py +11 -4
  29. aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +5 -1
  30. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +73 -26
  31. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +46 -22
  32. aiagents4pharma/talk2scholars/tools/s2/query_results.py +1 -1
  33. aiagents4pharma/talk2scholars/tools/s2/search.py +40 -12
  34. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +42 -16
  35. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -0
  36. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +125 -0
  37. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +35 -20
  38. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +198 -0
  39. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +86 -118
  40. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.2.dist-info}/METADATA +4 -3
  41. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.2.dist-info}/RECORD +44 -41
  42. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.2.dist-info}/WHEEL +1 -1
  43. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.2.dist-info/licenses}/LICENSE +0 -0
  44. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.2.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,6 @@ Unit tests for question_and_answer tool functionality.
3
3
  """
4
4
 
5
5
  from langchain.docstore.document import Document
6
-
7
6
  from ..tools.pdf import question_and_answer
8
7
  from ..tools.pdf.question_and_answer import (
9
8
  extract_text_from_pdf_data,
@@ -11,6 +10,8 @@ from ..tools.pdf.question_and_answer import (
11
10
  generate_answer,
12
11
  )
13
12
 
13
+ # pylint: disable=redefined-outer-name,too-few-public-methods
14
+
14
15
 
15
16
  def test_extract_text_from_pdf_data():
16
17
  """
@@ -46,14 +47,14 @@ DUMMY_PDF_BYTES = (
46
47
  )
47
48
 
48
49
 
49
- def fake_generate_answer(question, pdf_bytes, _llm_model):
50
+ def fake_generate_answer2(question, pdf_url, _text_embedding_model):
50
51
  """
51
- Fake generate_answer function to bypass external dependencies.
52
+ Fake generate_answer2 function to bypass external dependencies.
52
53
  """
53
54
  return {
54
55
  "answer": "Mock answer",
55
56
  "question": question,
56
- "pdf_bytes_length": len(pdf_bytes),
57
+ "pdf_url": pdf_url,
57
58
  }
58
59
 
59
60
 
@@ -61,30 +62,31 @@ def test_question_and_answer_tool_success(monkeypatch):
61
62
  """
62
63
  Test that question_and_answer_tool returns the expected result on success.
63
64
  """
64
- monkeypatch.setattr(
65
- question_and_answer, "generate_answer", fake_generate_answer
66
- )
67
- # Create a valid state with pdf_data containing both pdf_object and pdf_url,
68
- # and include a dummy llm_model.
65
+ # Patch generate_answer2 because the tool calls that.
66
+ monkeypatch.setattr(question_and_answer, "generate_answer2", fake_generate_answer2)
67
+ dummy_text_embedding_model = object() # Provide a dummy text embedding model.
68
+ # Create a valid state with pdf_data and include dummy llm_model and text_embedding_model.
69
69
  state = {
70
70
  "pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"},
71
71
  "llm_model": object(), # Provide a dummy LLM model instance.
72
+ "text_embedding_model": dummy_text_embedding_model,
72
73
  }
73
74
  question = "What is in the PDF?"
74
- # Call the underlying function directly via .func to bypass the StructuredTool wrapper.
75
75
  result = question_and_answer_tool.func(
76
76
  question=question, tool_call_id="test_call_id", state=state
77
77
  )
78
78
  assert result["answer"] == "Mock answer"
79
79
  assert result["question"] == question
80
- assert result["pdf_bytes_length"] == len(DUMMY_PDF_BYTES)
80
+ assert result["pdf_url"] == "http://dummy.url"
81
81
 
82
82
 
83
83
  def test_question_and_answer_tool_no_pdf_data():
84
84
  """
85
85
  Test that an error is returned if the state lacks the 'pdf_data' key.
86
86
  """
87
- state = {} # pdf_data key is missing.
87
+ state = {
88
+ "text_embedding_model": object(), # Added to avoid KeyError.
89
+ }
88
90
  question = "Any question?"
89
91
  result = question_and_answer_tool.func(
90
92
  question=question, tool_call_id="test_call_id", state=state
@@ -97,7 +99,11 @@ def test_question_and_answer_tool_no_pdf_object():
97
99
  """
98
100
  Test that an error is returned if the pdf_object is missing within pdf_data.
99
101
  """
100
- state = {"pdf_data": {"pdf_object": None}}
102
+ state = {
103
+ "pdf_data": {"pdf_object": None},
104
+ "text_embedding_model": object(), # Added to avoid KeyError.
105
+ "llm_model": object(), # Dummy LLM model.
106
+ }
101
107
  question = "Any question?"
102
108
  result = question_and_answer_tool.func(
103
109
  question=question, tool_call_id="test_call_id", state=state
@@ -114,8 +120,9 @@ def test_question_and_answer_tool_no_llm_model():
114
120
  Test that an error is returned if the LLM model is missing in the state.
115
121
  """
116
122
  state = {
117
- "pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"}
118
- # Note: llm_model is intentionally omitted.
123
+ "pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"},
124
+ "text_embedding_model": object(), # Added to avoid KeyError.
125
+ # llm_model is intentionally omitted.
119
126
  }
120
127
  question = "What is in the PDF?"
121
128
  result = question_and_answer_tool.func(
@@ -124,6 +131,57 @@ def test_question_and_answer_tool_no_llm_model():
124
131
  assert result == {"error": "No LLM model found in state."}
125
132
 
126
133
 
134
+ def test_generate_answer2_actual(monkeypatch):
135
+ """
136
+ Test the actual behavior of generate_answer2 using fake dependencies
137
+ to exercise its internal logic.
138
+ """
139
+
140
+ # Create a fake PyPDFLoader that does not perform a network call.
141
+ class FakePyPDFLoader:
142
+ """class to fake PyPDFLoader"""
143
+
144
+ def __init__(self, file_path, headers=None):
145
+ """Initialize the fake PyPDFLoader."""
146
+ self.file_path = file_path
147
+ self.headers = headers
148
+
149
+ def lazy_load(self):
150
+ """Return a list with one fake Document."""
151
+ # Return a list with one fake Document.
152
+ return [Document(page_content="Answer for Test question?")]
153
+
154
+ monkeypatch.setattr(question_and_answer, "PyPDFLoader", FakePyPDFLoader)
155
+
156
+ # Create a fake vector store that returns a controlled result for similarity_search.
157
+ class FakeVectorStore:
158
+ """Fake vector store for similarity search."""
159
+
160
+ def similarity_search(self, query):
161
+ """Return a list with one Document containing our expected answer."""
162
+ # Return a list with one Document containing our expected answer.
163
+ return [Document(page_content=f"Answer for {query}")]
164
+
165
+ monkeypatch.setattr(
166
+ question_and_answer.InMemoryVectorStore,
167
+ "from_documents",
168
+ lambda docs, emb: FakeVectorStore(),
169
+ )
170
+
171
+ # Provide a dummy text embedding model.
172
+ dummy_text_embedding_model = object()
173
+ question = "Test question?"
174
+ pdf_url = "http://dummy.pdf"
175
+
176
+ # Call generate_answer2 without triggering an actual network call.
177
+ result = question_and_answer.generate_answer2(
178
+ question, pdf_url, dummy_text_embedding_model
179
+ )
180
+ # The function should join the page content from the similarity search.
181
+ expected = "Answer for Test question?"
182
+ assert result == expected
183
+
184
+
127
185
  def test_generate_answer(monkeypatch):
128
186
  """
129
187
  Test generate_answer function with controlled monkeypatched dependencies.
@@ -141,12 +199,15 @@ def test_generate_answer(monkeypatch):
141
199
  """
142
200
  Fake Annoy.from_documents function that returns a fake vector store.
143
201
  """
202
+
144
203
  # pylint: disable=too-few-public-methods, unused-argument
145
204
  class FakeVectorStore:
146
205
  """Fake vector store for similarity search."""
206
+
147
207
  def similarity_search(self, _question, k):
148
208
  """Return a list with a single dummy Document."""
149
209
  return [Document(page_content="dummy content")]
210
+
150
211
  return FakeVectorStore()
151
212
 
152
213
  monkeypatch.setattr(
@@ -157,9 +218,11 @@ def test_generate_answer(monkeypatch):
157
218
  """
158
219
  Fake load_qa_chain function that returns a fake QA chain.
159
220
  """
221
+
160
222
  # pylint: disable=too-few-public-methods, unused-argument
161
223
  class FakeChain:
162
224
  """Fake QA chain for testing generate_answer."""
225
+
163
226
  def invoke(self, **kwargs):
164
227
  """
165
228
  Fake invoke method that returns a mock answer.
@@ -169,6 +232,7 @@ def test_generate_answer(monkeypatch):
169
232
  "answer": "real mock answer",
170
233
  "question": input_data.get("question"),
171
234
  }
235
+
172
236
  return FakeChain()
173
237
 
174
238
  monkeypatch.setattr(question_and_answer, "load_qa_chain", fake_load_qa_chain)
@@ -22,9 +22,9 @@ def mock_router():
22
22
  """Creates a mock supervisor router that routes based on keyword matching."""
23
23
 
24
24
  def mock_supervisor_node(state):
25
+ """Mock supervisor node that routes based on keyword matching."""
25
26
  query = state["messages"][-1].content.lower()
26
-
27
- # Expanded keyword matching for S2 Agent
27
+ # Define keywords for each sub-agent.
28
28
  s2_keywords = [
29
29
  "paper",
30
30
  "research",
@@ -34,13 +34,19 @@ def mock_router():
34
34
  "references",
35
35
  ]
36
36
  zotero_keywords = ["zotero", "library", "saved papers", "academic library"]
37
+ pdf_keywords = ["pdf", "document", "read pdf"]
38
+ paper_download_keywords = ["download", "arxiv", "fetch paper", "paper download"]
37
39
 
40
+ # Priority ordering: Zotero, then paper download, then PDF, then S2.
38
41
  if any(keyword in query for keyword in zotero_keywords):
39
42
  return Command(goto="zotero_agent")
43
+ if any(keyword in query for keyword in paper_download_keywords):
44
+ return Command(goto="paper_download_agent")
45
+ if any(keyword in query for keyword in pdf_keywords):
46
+ return Command(goto="pdf_agent")
40
47
  if any(keyword in query for keyword in s2_keywords):
41
48
  return Command(goto="s2_agent")
42
-
43
- # If no match, default to ending the conversation
49
+ # Default to end if no keyword matches.
44
50
  return Command(goto=END)
45
51
 
46
52
  return mock_supervisor_node
@@ -55,10 +61,9 @@ def mock_router():
55
61
  ("Fetch my academic library.", "zotero_agent"),
56
62
  ("Retrieve citations.", "s2_agent"),
57
63
  ("Can you get journal articles?", "s2_agent"),
58
- (
59
- "Completely unrelated query.",
60
- "__end__",
61
- ), # NEW: Should trigger the `END` case
64
+ ("I want to read the PDF document.", "pdf_agent"),
65
+ ("Download the paper from arxiv.", "paper_download_agent"),
66
+ ("Completely unrelated query.", "__end__"),
62
67
  ],
63
68
  )
64
69
  def test_routing_logic(mock_state, mock_router, user_query, expected_agent):
@@ -66,6 +71,4 @@ def test_routing_logic(mock_state, mock_router, user_query, expected_agent):
66
71
  mock_state["messages"].append(HumanMessage(content=user_query))
67
72
  result = mock_router(mock_state)
68
73
 
69
- print(f"\nDEBUG: Query '{user_query}' routed to: {result.goto}")
70
-
71
74
  assert result.goto == expected_agent, f"Failed for query: {user_query}"
@@ -94,7 +94,7 @@ def dummy_requests_post_success(url, headers, params, data, timeout):
94
94
  {
95
95
  "paperId": "paperA",
96
96
  "title": "Multi Rec Paper A",
97
- "authors": ["Author X"],
97
+ "authors": [{"name": "Author X", "authorId": "AX"}],
98
98
  "year": 2019,
99
99
  "citationCount": 12,
100
100
  "url": "http://paperA",
@@ -103,7 +103,7 @@ def dummy_requests_post_success(url, headers, params, data, timeout):
103
103
  {
104
104
  "paperId": "paperB",
105
105
  "title": "Multi Rec Paper B",
106
- "authors": ["Author Y"],
106
+ "authors": [{"name": "Author Y", "authorId": "AY"}],
107
107
  "year": 2020,
108
108
  "citationCount": 18,
109
109
  "url": "http://paperB",
@@ -112,7 +112,7 @@ def dummy_requests_post_success(url, headers, params, data, timeout):
112
112
  {
113
113
  "paperId": "paperC",
114
114
  "title": "Multi Rec Paper C",
115
- "authors": None, # This one should be filtered out.
115
+ "authors": None, # This paper should be filtered out.
116
116
  "year": 2021,
117
117
  "citationCount": 25,
118
118
  "url": "http://paperC",
@@ -277,6 +277,29 @@ def test_multi_paper_rec_requests_exception(monkeypatch):
277
277
  }
278
278
  with pytest.raises(
279
279
  RuntimeError,
280
- match="Failed to connect to Semantic Scholar API. Please retry the same query.",
280
+ match="Failed to connect to Semantic Scholar API after 10 attempts."
281
+ "Please retry the same query.",
282
+ ):
283
+ get_multi_paper_recommendations.run(input_data)
284
+
285
+
286
+ def test_multi_paper_rec_no_response(monkeypatch):
287
+ """
288
+ Test that get_multi_paper_recommendations raises a RuntimeError when no response is obtained.
289
+ This is simulated by patching 'range' in the underlying function's globals to
290
+ return an empty iterator,
291
+ so that the for loop does not iterate and response remains None.
292
+ """
293
+ # Patch 'range' in the underlying function's globals (accessed via .func.__globals__)
294
+ monkeypatch.setitem(
295
+ get_multi_paper_recommendations.func.__globals__, "range", lambda x: iter([])
296
+ )
297
+ tool_call_id = "test_tool_call_id"
298
+ input_data = {
299
+ "paper_ids": ["p1", "p2"],
300
+ "tool_call_id": tool_call_id,
301
+ }
302
+ with pytest.raises(
303
+ RuntimeError, match="Failed to obtain a response from the Semantic Scholar API."
281
304
  ):
282
305
  get_multi_paper_recommendations.run(input_data)
@@ -85,7 +85,7 @@ def dummy_requests_get_success(url, params, timeout):
85
85
  {
86
86
  "paperId": "1",
87
87
  "title": "Paper 1",
88
- "authors": ["Author A"],
88
+ "authors": [{"name": "Author A", "authorId": "A1"}],
89
89
  "year": 2020,
90
90
  "citationCount": 10,
91
91
  "url": "http://paper1",
@@ -94,7 +94,7 @@ def dummy_requests_get_success(url, params, timeout):
94
94
  {
95
95
  "paperId": "2",
96
96
  "title": "Paper 2",
97
- "authors": ["Author B"],
97
+ "authors": [{"name": "Author B", "authorId": "B1"}],
98
98
  "year": 2021,
99
99
  "citationCount": 20,
100
100
  "url": "http://paper2",
@@ -256,7 +256,8 @@ def test_search_tool_requests_exception(monkeypatch):
256
256
  tool_call_id = "test_tool_call_id"
257
257
  with pytest.raises(
258
258
  RuntimeError,
259
- match="Failed to connect to Semantic Scholar API. Please retry the same query.",
259
+ match="Failed to connect to Semantic Scholar API after 10 attempts."
260
+ "Please retry the same query.",
260
261
  ):
261
262
  search_tool.run(
262
263
  {
@@ -264,3 +265,18 @@ def test_search_tool_requests_exception(monkeypatch):
264
265
  "tool_call_id": tool_call_id,
265
266
  }
266
267
  )
268
+
269
+
270
+ def test_search_tool_no_response(monkeypatch):
271
+ """
272
+ Test that search_tool raises a RuntimeError when no response is obtained.
273
+ This is simulated by patching 'range' in the original function's globals (a dict)
274
+ so that it returns an empty iterator, leaving response as None.
275
+ """
276
+ # Patch 'range' in the original function's globals using setitem.
277
+ monkeypatch.setitem(search_tool.func.__globals__, "range", lambda x: iter([]))
278
+ tool_call_id = "test_tool_call_id"
279
+ with pytest.raises(
280
+ RuntimeError, match="Failed to obtain a response from the Semantic Scholar API."
281
+ ):
282
+ search_tool.run({"query": "test", "tool_call_id": tool_call_id})
@@ -92,7 +92,7 @@ def dummy_requests_get_success(url, params, timeout):
92
92
  {
93
93
  "paperId": "paper1",
94
94
  "title": "Recommended Paper 1",
95
- "authors": ["Author A"],
95
+ "authors": [{"name": "Author A", "authorId": "A1"}],
96
96
  "year": 2020,
97
97
  "citationCount": 15,
98
98
  "url": "http://paper1",
@@ -101,7 +101,7 @@ def dummy_requests_get_success(url, params, timeout):
101
101
  {
102
102
  "paperId": "paper2",
103
103
  "title": "Recommended Paper 2",
104
- "authors": ["Author B"],
104
+ "authors": [{"name": "Author B", "authorId": "B1"}],
105
105
  "year": 2021,
106
106
  "citationCount": 25,
107
107
  "url": "http://paper2",
@@ -269,6 +269,30 @@ def test_single_paper_rec_requests_exception(monkeypatch):
269
269
  }
270
270
  with pytest.raises(
271
271
  RuntimeError,
272
- match="Failed to connect to Semantic Scholar API. Please retry the same query.",
272
+ match="Failed to connect to Semantic Scholar API after 10 attempts."
273
+ "Please retry the same query.",
274
+ ):
275
+ get_single_paper_recommendations.run(input_data)
276
+
277
+
278
+ def test_single_paper_rec_no_response(monkeypatch):
279
+ """
280
+ Test that get_single_paper_recommendations raises a RuntimeError
281
+ when no response is obtained from the API.
282
+
283
+ This is simulated by patching 'range' in the underlying function's globals
284
+ to return an empty iterator, so the for-loop never iterates and response remains None.
285
+ """
286
+ # Patch 'range' in the underlying function's globals (accessed via .func.__globals__)
287
+ monkeypatch.setitem(
288
+ get_single_paper_recommendations.func.__globals__, "range", lambda x: iter([])
289
+ )
290
+ tool_call_id = "test_tool_call_id"
291
+ input_data = {
292
+ "paper_id": "12345",
293
+ "tool_call_id": tool_call_id,
294
+ }
295
+ with pytest.raises(
296
+ RuntimeError, match="Failed to obtain a response from the Semantic Scholar API."
273
297
  ):
274
298
  get_single_paper_recommendations.run(input_data)
@@ -10,7 +10,8 @@ from langchain_openai import ChatOpenAI
10
10
  from ..agents.zotero_agent import get_app
11
11
  from ..state.state_talk2scholars import Talk2Scholars
12
12
 
13
- LLM_MODEL = ChatOpenAI(model='gpt-4o-mini', temperature=0)
13
+ LLM_MODEL = ChatOpenAI(model="gpt-4o-mini", temperature=0)
14
+
14
15
 
15
16
  @pytest.fixture(autouse=True)
16
17
  def mock_hydra_fixture():
@@ -39,7 +40,7 @@ def mock_tools_fixture():
39
40
  "retrieve_semantic_scholar_paper_id"
40
41
  ) as mock_s2_retrieve_id,
41
42
  mock.patch(
42
- "aiagents4pharma.talk2scholars.tools.zotero.zotero_read.zotero_search_tool"
43
+ "aiagents4pharma.talk2scholars.tools.zotero.zotero_read.zotero_read"
43
44
  ) as mock_zotero_query_results,
44
45
  ):
45
46
  mock_s2_display.return_value = {"result": "Mock Display Result"}