aiagents4pharma 1.30.0__py3-none-any.whl → 1.30.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. aiagents4pharma/talk2scholars/agents/main_agent.py +18 -10
  2. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -6
  3. aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -10
  4. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +18 -9
  5. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +2 -2
  6. aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +1 -0
  7. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +6 -1
  8. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +7 -1
  9. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +6 -1
  10. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +1 -1
  11. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +4 -1
  12. aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py +84 -53
  13. aiagents4pharma/talk2scholars/tests/test_main_agent.py +24 -0
  14. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +79 -15
  15. aiagents4pharma/talk2scholars/tests/test_routing_logic.py +12 -8
  16. aiagents4pharma/talk2scholars/tests/test_s2_multi.py +27 -4
  17. aiagents4pharma/talk2scholars/tests/test_s2_search.py +19 -3
  18. aiagents4pharma/talk2scholars/tests/test_s2_single.py +27 -3
  19. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +17 -10
  20. aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py +2 -0
  21. aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py +11 -4
  22. aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +5 -1
  23. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +73 -26
  24. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +46 -22
  25. aiagents4pharma/talk2scholars/tools/s2/query_results.py +1 -1
  26. aiagents4pharma/talk2scholars/tools/s2/search.py +40 -12
  27. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +42 -16
  28. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +33 -16
  29. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +39 -7
  30. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.1.dist-info}/METADATA +2 -2
  31. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.1.dist-info}/RECORD +34 -34
  32. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.1.dist-info}/WHEEL +1 -1
  33. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.1.dist-info}/LICENSE +0 -0
  34. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.1.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,14 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
3
  """
4
- Main agent for the talk2scholars app using ReAct pattern.
4
+ Main agent module for initializing and running the Talk2Scholars application.
5
5
 
6
- This module implements a hierarchical agent system where a supervisor agent
7
- routes queries to specialized sub-agents. It follows the LangGraph patterns
8
- for multi-agent systems and implements proper state management.
6
+ This module sets up the hierarchical agent system using LangGraph and integrates
7
+ various sub-agents for handling different tasks such as semantic scholar, zotero,
8
+ PDF processing, and paper downloading.
9
+
10
+ Functions:
11
+ - get_app: Initializes and returns the LangGraph-based hierarchical agent system.
9
12
  """
10
13
 
11
14
  import logging
@@ -16,6 +19,8 @@ from langchain_core.language_models.chat_models import BaseChatModel
16
19
  from langgraph.checkpoint.memory import MemorySaver
17
20
  from ..agents.s2_agent import get_app as get_app_s2
18
21
  from ..agents.zotero_agent import get_app as get_app_zotero
22
+ from ..agents.pdf_agent import get_app as get_app_pdf
23
+ from ..agents.paper_download_agent import get_app as get_app_paper_download
19
24
  from ..state.state_talk2scholars import Talk2Scholars
20
25
 
21
26
  # Initialize logger
@@ -43,12 +48,13 @@ def get_app(uniq_id, llm_model: BaseChatModel):
43
48
  >>> app = get_app("thread_123")
44
49
  >>> result = app.invoke(initial_state)
45
50
  """
46
- if llm_model.model_name == "gpt-4o-mini":
47
- llm_model = ChatOpenAI(
48
- model="gpt-4o-mini",
49
- temperature=0,
50
- model_kwargs={"parallel_tool_calls": False},
51
- )
51
+ if hasattr(llm_model, "model_name"):
52
+ if llm_model.model_name == "gpt-4o-mini":
53
+ llm_model = ChatOpenAI(
54
+ model="gpt-4o-mini",
55
+ temperature=0,
56
+ model_kwargs={"parallel_tool_calls": False},
57
+ )
52
58
  # Load hydra configuration
53
59
  logger.log(logging.INFO, "Launching Talk2Scholars with thread_id %s", uniq_id)
54
60
  with hydra.initialize(version_base=None, config_path="../configs/"):
@@ -62,6 +68,8 @@ def get_app(uniq_id, llm_model: BaseChatModel):
62
68
  [
63
69
  get_app_s2(uniq_id, llm_model), # semantic scholar
64
70
  get_app_zotero(uniq_id, llm_model), # zotero
71
+ get_app_pdf(uniq_id, llm_model), # pdf
72
+ get_app_paper_download(uniq_id, llm_model), # paper download
65
73
  ],
66
74
  model=llm_model,
67
75
  state_schema=Talk2Scholars,
@@ -20,6 +20,7 @@ from ..tools.s2.query_results import query_results
20
20
  logging.basicConfig(level=logging.INFO)
21
21
  logger = logging.getLogger(__name__)
22
22
 
23
+
23
24
  def get_app(uniq_id, llm_model: BaseChatModel):
24
25
  """
25
26
  Initializes and returns the LangGraph application for the Talk2Scholars paper download agent.
@@ -39,14 +40,12 @@ def get_app(uniq_id, llm_model: BaseChatModel):
39
40
  with hydra.initialize(version_base=None, config_path="../configs"):
40
41
  cfg = hydra.compose(
41
42
  config_name="config",
42
- overrides=["agents/talk2scholars/paper_download_agent=default"]
43
+ overrides=["agents/talk2scholars/paper_download_agent=default"],
43
44
  )
44
45
  cfg = cfg.agents.talk2scholars.paper_download_agent
45
46
 
46
47
  # Define tools properly
47
- tools = ToolNode(
48
- [download_arxiv_paper, query_results]
49
- )
48
+ tools = ToolNode([download_arxiv_paper, query_results])
50
49
 
51
50
  # Define the model
52
51
  logger.info("Using OpenAI model %s", llm_model)
@@ -54,7 +53,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
54
53
  llm_model,
55
54
  tools=tools,
56
55
  state_schema=Talk2Scholars,
57
- prompt=cfg.prompt,
56
+ prompt=cfg.paper_download_agent,
58
57
  checkpointer=MemorySaver(),
59
58
  )
60
59
 
@@ -79,7 +78,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
79
78
  checkpointer = MemorySaver()
80
79
 
81
80
  # Compile the graph
82
- app = workflow.compile(checkpointer=checkpointer)
81
+ app = workflow.compile(checkpointer=checkpointer, name="agent_paper_download")
83
82
 
84
83
  # Logging the information and returning the app
85
84
  logger.info("Compiled the graph")
@@ -26,10 +26,7 @@ logging.basicConfig(level=logging.INFO)
26
26
  logger = logging.getLogger(__name__)
27
27
 
28
28
 
29
- def get_app(
30
- uniq_id,
31
- llm_model: BaseChatModel
32
- ):
29
+ def get_app(uniq_id, llm_model: BaseChatModel):
33
30
  """
34
31
  Initializes and returns the LangGraph application for the PDF agent.
35
32
 
@@ -40,7 +37,7 @@ def get_app(
40
37
 
41
38
  Args:
42
39
  uniq_id (str): A unique identifier for the current conversation session or thread.
43
- llm_model (BaseChatModel, optional): The language model instance to be used.
40
+ llm_model (BaseChatModel, optional): The language model instance to be used.
44
41
  Defaults to ChatOpenAI(model="gpt-4o-mini", temperature=0).
45
42
 
46
43
  Returns:
@@ -71,10 +68,7 @@ def get_app(
71
68
  Any: The response generated by the language model after processing the state.
72
69
  """
73
70
  logger.info("Creating Agent_PDF node with thread_id %s", uniq_id)
74
- response = model.invoke(
75
- state,
76
- {"configurable": {"thread_id": uniq_id}}
77
- )
71
+ response = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
78
72
  return response
79
73
 
80
74
  # Define the tool node that includes the PDF QnA tool.
@@ -100,7 +94,7 @@ def get_app(
100
94
  checkpointer = MemorySaver()
101
95
 
102
96
  # Compile the graph into a runnable app.
103
- app = workflow.compile(checkpointer=checkpointer)
97
+ app = workflow.compile(checkpointer=checkpointer, name="agent_pdf")
104
98
  logger.info("Compiled the PDF agent graph.")
105
99
 
106
100
  return app
@@ -1,13 +1,22 @@
1
1
  _target_: agents.main_agent.get_app
2
2
  temperature: 0
3
3
  system_prompt: >
4
- You are the Talk2Scholars agent coordinating academic paper discovery and analysis.
4
+ You are Talk2Scholars agent coordinating academic paper discovery
5
+ and analysis with help of the following agents:
6
+ 1. Agent S2: This agent can be used to search and recommend papers
7
+ from Semantic Scholar. Use this agent when the user asks for
8
+ general paper/article searches and recommendations, or to retrieve information
9
+ from the last displayed results table or query abstract of last
10
+ displayed results.
11
+ 2. Agent Zotero: This agent can be used to retrieve, display, and query
12
+ papers/articles from the Zotero library. Use this agent only when the user
13
+ explicitly asks for papers from Zotero. This tool can also be used to
14
+ save papers in the zotero library.
15
+ 3. Agent PaperFetch: This agent can be used to download papers/articles
16
+ from ArXiv.
17
+ 4. Agent PDFQuery: This agent can be used to query contents of an
18
+ uploaded or downloaded PDF/paper/article.
5
19
 
6
- You have access to the following agents:
7
- 1. S2_agent: This agent can be used to search and recommend papers
8
- from Semantic Scholar. Use this agent when the user asks for
9
- general paper searches and recommendations.
10
- 2. Zotero_agent: This agent can be used to retrieve, display, and query
11
- papers from the Zotero library. Use this agent only when the user
12
- explicitly asks for papers from Zotero. This tool can also be used to
13
- save papers in under collections in the zotero library
20
+ Your final response should be a one sentence summary of the information
21
+ retrieved from the agents above. Do not repeat the information already
22
+ displayed to the user in the response of the agents.
@@ -15,5 +15,5 @@ s2_agent: >
15
15
  1. When user requests papers, use search/recommendation tools to find papers
16
16
  2. Use `display_results` tool to display the response from the search/recommendation tools
17
17
  3. Use `query_results` tool to query over the selected paper only when the user asks to
18
- 4. When the user wants recommendations, you can get the "paper_id" using `query_results` tool in the "last_displayed_results" key, then
19
- pass the "paper_id" to `search`, `single_paper_rec` or `multi_paper_rec` tools depending on the user's query. Do not use "arxiv_id"
18
+ 4. When the user wants recommendations, you can get the "semantic_scholar_paper_id" using `query_results` tool in the "last_displayed_results" key, then
19
+ pass the "semantic_scholar_paper_id" to `search`, `single_paper_rec` or `multi_paper_rec` tools depending on the user's query. Do not use "arxiv_id"
@@ -8,6 +8,7 @@ page:
8
8
  llms:
9
9
  available_models:
10
10
  - "OpenAI/gpt-4o-mini"
11
+ - "NVIDIA/llama-3.3-70b-instruct"
11
12
  # # Chat UI configuration
12
13
  # chat:
13
14
  # assistant_avatar: "🤖"
@@ -6,10 +6,15 @@ api_fields:
6
6
  - "title"
7
7
  - "abstract"
8
8
  - "year"
9
- - "authors"
9
+ - "authors.name"
10
+ - "authors.authorId"
10
11
  - "citationCount"
11
12
  - "url"
12
13
  - "externalIds"
14
+ - "venue"
15
+ - "publicationVenue" # Full object, instead of specific subfields
16
+ - "journal" # Full object, instead of specific subfields
17
+ - "publicationDate"
13
18
  # Commented fields that could be added later if needed
14
19
 
15
20
  # Default headers and params
@@ -6,10 +6,16 @@ api_fields:
6
6
  - "title"
7
7
  - "abstract"
8
8
  - "year"
9
- - "authors"
9
+ - "authors.name"
10
+ - "authors.authorId"
10
11
  - "citationCount"
11
12
  - "url"
12
13
  - "externalIds"
14
+ - "venue"
15
+ - "publicationVenue" # Full object, instead of specific subfields
16
+ - "journal" # Full object, instead of specific subfields
17
+ - "publicationDate"
18
+
13
19
  # Commented fields that could be added later if needed
14
20
  # - "publicationTypes"
15
21
  # - "openAccessPdf"
@@ -6,10 +6,15 @@ api_fields:
6
6
  - "title"
7
7
  - "abstract"
8
8
  - "year"
9
- - "authors"
9
+ - "authors.name"
10
+ - "authors.authorId"
10
11
  - "citationCount"
11
12
  - "url"
12
13
  - "externalIds"
14
+ - "venue"
15
+ - "publicationVenue" # Full object, instead of specific subfields
16
+ - "journal" # Full object, instead of specific subfields
17
+ - "publicationDate"
13
18
  # Commented fields that could be added later if needed
14
19
  # - "publicationTypes"
15
20
  # - "openAccessPdf"
@@ -53,4 +53,4 @@ zotero:
53
53
  "Web Page",
54
54
  ]
55
55
 
56
- filter_excluded_types: ["attachment", "note", "annotation"]
56
+ # filter_excluded_types: ["attachment", "note", "annotation"]
@@ -9,6 +9,7 @@ across agent interactions.
9
9
  import logging
10
10
  from typing import Annotated, Any, Dict
11
11
  from langchain_core.language_models import BaseChatModel
12
+ from langchain_core.embeddings import Embeddings
12
13
  from langgraph.prebuilt.chat_agent_executor import AgentState
13
14
 
14
15
  # Configure logging
@@ -54,6 +55,8 @@ class Talk2Scholars(AgentState):
54
55
  multi_papers (Dict[str, Any]): Stores multiple recommended papers from various sources.
55
56
  zotero_read (Dict[str, Any]): Stores the papers retrieved from Zotero.
56
57
  llm_model (BaseChatModel): The language model instance used for generating responses.
58
+ text_embedding_model (Embeddings): The text embedding model used for
59
+ similarity calculations.
57
60
  """
58
61
 
59
62
  # Agent state fields
@@ -63,4 +66,4 @@ class Talk2Scholars(AgentState):
63
66
  pdf_data: Annotated[Dict[str, Any], replace_dict]
64
67
  zotero_read: Annotated[Dict[str, Any], replace_dict]
65
68
  llm_model: BaseChatModel
66
- pdf_data: Annotated[Dict[str, Any], replace_dict]
69
+ text_embedding_model: Embeddings
@@ -1,58 +1,89 @@
1
1
  """
2
2
  Integration tests for talk2scholars system with OpenAI.
3
+ This test triggers all sub-agents by sending a conversation that covers:
4
+ - Searching Semantic Scholar (S2 agent)
5
+ - Retrieving Zotero results (Zotero agent)
6
+ - Querying PDF content (PDF agent)
7
+ - Downloading paper details from arXiv (Paper Download agent)
3
8
  """
4
9
 
5
- import os
6
- import pytest
7
- import hydra
8
- from langchain_openai import ChatOpenAI
9
- from langchain_core.messages import HumanMessage, AIMessage
10
- from ..agents.main_agent import get_app
11
- from ..state.state_talk2scholars import Talk2Scholars
10
+ # This will be covered in the next pr.
12
11
 
13
- # pylint: disable=redefined-outer-name
14
-
15
-
16
- @pytest.mark.skipif(
17
- not os.getenv("OPENAI_API_KEY"), reason="Requires OpenAI API key to run"
18
- )
19
- def test_main_agent_real_llm():
20
- """
21
- Test that the main agent invokes S2 agent correctly
22
- and updates the state with real LLM execution.
23
- """
24
-
25
- # Load Hydra Configuration EXACTLY like in main_agent.py
26
- with hydra.initialize(version_base=None, config_path="../configs"):
27
- cfg = hydra.compose(
28
- config_name="config", overrides=["agents/talk2scholars/main_agent=default"]
29
- )
30
- hydra_cfg = cfg.agents.talk2scholars.main_agent
31
-
32
- assert hydra_cfg is not None, "Hydra config failed to load"
33
-
34
- # Use the real OpenAI API (ensure env variable is set)
35
- llm = ChatOpenAI(model="gpt-4o-mini", temperature=hydra_cfg.temperature)
36
-
37
- # Initialize main agent workflow (WITH real Hydra config)
38
- thread_id = "test_thread"
39
- app = get_app(thread_id, llm)
40
-
41
- # Provide an actual user query
42
- initial_state = Talk2Scholars(
43
- messages=[HumanMessage(content="Find AI papers on transformers")]
44
- )
45
-
46
- # Invoke the agent (triggers supervisor → s2_agent)
47
- result = app.invoke(
48
- initial_state,
49
- {"configurable": {"config_id": thread_id, "thread_id": thread_id}},
50
- )
51
-
52
- # Assert that the supervisor routed correctly
53
- assert "messages" in result, "Expected messages in response"
54
-
55
- # Fix: Accept AIMessage as a valid response type
56
- assert isinstance(
57
- result["messages"][-1], (HumanMessage, AIMessage, str)
58
- ), "Last message should be a valid response"
12
+ #
13
+ # import os
14
+ # import pytest
15
+ # import hydra
16
+ # from langchain_openai import ChatOpenAI
17
+ # from langchain_core.messages import HumanMessage, AIMessage
18
+ # from ..agents.main_agent import get_app
19
+ # from ..state.state_talk2scholars import Talk2Scholars
20
+ #
21
+ # # pylint: disable=redefined-outer-name,too-few-public-methods
22
+ #
23
+ #
24
+ # @pytest.mark.skipif(
25
+ # not os.getenv("OPENAI_API_KEY"), reason="Requires OpenAI API key to run"
26
+ # )
27
+ # def test_main_agent_real_llm():
28
+ # """
29
+ # Integration test for the Talk2Scholars system using a real OpenAI LLM.
30
+ # This test verifies that the supervisor correctly routes to all sub-agents by
31
+ # providing a conversation with queries intended to trigger each agent.
32
+ # """
33
+ # # Load Hydra configuration EXACTLY like in main_agent.py
34
+ # with hydra.initialize(version_base=None, config_path="../configs"):
35
+ # cfg = hydra.compose(
36
+ # config_name="config", overrides=["agents/talk2scholars/main_agent=default"]
37
+ # )
38
+ # hydra_cfg = cfg.agents.talk2scholars.main_agent
39
+ # assert hydra_cfg is not None, "Hydra config failed to load"
40
+ #
41
+ # # Use the real OpenAI API (ensure OPENAI_API_KEY is set in environment)
42
+ # llm = ChatOpenAI(model="gpt-4o-mini", temperature=hydra_cfg.temperature)
43
+ #
44
+ # # Initialize the main agent workflow (with real Hydra config)
45
+ # thread_id = "test_thread"
46
+ # app = get_app(thread_id, llm)
47
+ #
48
+ # # Provide a multi-turn conversation intended to trigger all sub-agents:
49
+ # # - S2 agent: "Search Semantic Scholar for AI papers on transformers."
50
+ # # - Zotero agent: "Retrieve Zotero results for these papers."
51
+ # # - PDF agent: "Analyze the attached PDF and summarize its key findings."
52
+ # # - Paper Download agent: "Download the paper details from arXiv."
53
+ # initial_state = Talk2Scholars(
54
+ # messages=[
55
+ # HumanMessage(
56
+ # content="Search Semantic Scholar for AI papers on transformers."
57
+ # ),
58
+ # HumanMessage(content="Also, retrieve Zotero results for these papers."),
59
+ # HumanMessage(
60
+ # content="I have attached a PDF; analyze it and tell me the key findings."
61
+ # ),
62
+ # HumanMessage(content="Finally, download the paper from arXiv."),
63
+ # ]
64
+ # )
65
+ #
66
+ # # Invoke the agent (which routes to the appropriate sub-agents)
67
+ # result = app.invoke(
68
+ # initial_state,
69
+ # {"configurable": {"config_id": thread_id, "thread_id": thread_id}},
70
+ # )
71
+ #
72
+ # # Assert that the result contains messages and that the final message is valid.
73
+ # assert "messages" in result, "Expected 'messages' in the response"
74
+ # last_message = result["messages"][-1]
75
+ # assert isinstance(
76
+ # last_message, (HumanMessage, AIMessage, str)
77
+ # ), "Last message should be a valid response type"
78
+ #
79
+ # # Concatenate message texts (if available) to perform keyword checks.
80
+ # output_text = " ".join(
81
+ # msg.content if hasattr(msg, "content") else str(msg)
82
+ # for msg in result["messages"]
83
+ # ).lower()
84
+ #
85
+ # # Check for keywords that suggest each sub-agent was invoked.
86
+ # for keyword in ["semantic scholar", "zotero", "pdf", "arxiv"]:
87
+ # assert (
88
+ # keyword in output_text
89
+ # ), f"Expected keyword '{keyword}' in the output response"
@@ -65,6 +65,13 @@ def dummy_get_app_zotero(uniq_id, llm_model):
65
65
  return DummyWorkflow(supervisor_args={"agent": "zotero", "uniq_id": uniq_id})
66
66
 
67
67
 
68
+ def dummy_get_app_pdf(uniq_id, llm_model):
69
+ """Return a DummyWorkflow for the PDF agent."""
70
+ dummy_get_app_pdf.called_uniq_id = uniq_id
71
+ dummy_get_app_pdf.called_llm_model = llm_model
72
+ return DummyWorkflow(supervisor_args={"agent": "pdf", "uniq_id": uniq_id})
73
+
74
+
68
75
  def dummy_create_supervisor(apps, model, state_schema, **kwargs):
69
76
  """Return a DummyWorkflow for the supervisor."""
70
77
  dummy_create_supervisor.called_kwargs = kwargs
@@ -136,6 +143,15 @@ def patch_hydra(monkeypatch):
136
143
  )
137
144
 
138
145
 
146
+ def dummy_get_app_paper_download(uniq_id, llm_model):
147
+ """Return a DummyWorkflow for the paper download agent."""
148
+ dummy_get_app_paper_download.called_uniq_id = uniq_id
149
+ dummy_get_app_paper_download.called_llm_model = llm_model
150
+ return DummyWorkflow(
151
+ supervisor_args={"agent": "paper_download", "uniq_id": uniq_id}
152
+ )
153
+
154
+
139
155
  @pytest.fixture(autouse=True)
140
156
  def patch_sub_agents_and_supervisor(monkeypatch):
141
157
  """Patch the sub-agents and supervisor creation functions."""
@@ -146,6 +162,14 @@ def patch_sub_agents_and_supervisor(monkeypatch):
146
162
  "aiagents4pharma.talk2scholars.agents.main_agent.get_app_zotero",
147
163
  dummy_get_app_zotero,
148
164
  )
165
+ monkeypatch.setattr(
166
+ "aiagents4pharma.talk2scholars.agents.main_agent.get_app_pdf",
167
+ dummy_get_app_pdf,
168
+ )
169
+ monkeypatch.setattr(
170
+ "aiagents4pharma.talk2scholars.agents.main_agent.get_app_paper_download",
171
+ dummy_get_app_paper_download,
172
+ )
149
173
  monkeypatch.setattr(
150
174
  "aiagents4pharma.talk2scholars.agents.main_agent.create_supervisor",
151
175
  dummy_create_supervisor,
@@ -3,7 +3,6 @@ Unit tests for question_and_answer tool functionality.
3
3
  """
4
4
 
5
5
  from langchain.docstore.document import Document
6
-
7
6
  from ..tools.pdf import question_and_answer
8
7
  from ..tools.pdf.question_and_answer import (
9
8
  extract_text_from_pdf_data,
@@ -11,6 +10,8 @@ from ..tools.pdf.question_and_answer import (
11
10
  generate_answer,
12
11
  )
13
12
 
13
+ # pylint: disable=redefined-outer-name,too-few-public-methods
14
+
14
15
 
15
16
  def test_extract_text_from_pdf_data():
16
17
  """
@@ -46,14 +47,14 @@ DUMMY_PDF_BYTES = (
46
47
  )
47
48
 
48
49
 
49
- def fake_generate_answer(question, pdf_bytes, _llm_model):
50
+ def fake_generate_answer2(question, pdf_url, _text_embedding_model):
50
51
  """
51
- Fake generate_answer function to bypass external dependencies.
52
+ Fake generate_answer2 function to bypass external dependencies.
52
53
  """
53
54
  return {
54
55
  "answer": "Mock answer",
55
56
  "question": question,
56
- "pdf_bytes_length": len(pdf_bytes),
57
+ "pdf_url": pdf_url,
57
58
  }
58
59
 
59
60
 
@@ -61,30 +62,31 @@ def test_question_and_answer_tool_success(monkeypatch):
61
62
  """
62
63
  Test that question_and_answer_tool returns the expected result on success.
63
64
  """
64
- monkeypatch.setattr(
65
- question_and_answer, "generate_answer", fake_generate_answer
66
- )
67
- # Create a valid state with pdf_data containing both pdf_object and pdf_url,
68
- # and include a dummy llm_model.
65
+ # Patch generate_answer2 because the tool calls that.
66
+ monkeypatch.setattr(question_and_answer, "generate_answer2", fake_generate_answer2)
67
+ dummy_text_embedding_model = object() # Provide a dummy text embedding model.
68
+ # Create a valid state with pdf_data and include dummy llm_model and text_embedding_model.
69
69
  state = {
70
70
  "pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"},
71
71
  "llm_model": object(), # Provide a dummy LLM model instance.
72
+ "text_embedding_model": dummy_text_embedding_model,
72
73
  }
73
74
  question = "What is in the PDF?"
74
- # Call the underlying function directly via .func to bypass the StructuredTool wrapper.
75
75
  result = question_and_answer_tool.func(
76
76
  question=question, tool_call_id="test_call_id", state=state
77
77
  )
78
78
  assert result["answer"] == "Mock answer"
79
79
  assert result["question"] == question
80
- assert result["pdf_bytes_length"] == len(DUMMY_PDF_BYTES)
80
+ assert result["pdf_url"] == "http://dummy.url"
81
81
 
82
82
 
83
83
  def test_question_and_answer_tool_no_pdf_data():
84
84
  """
85
85
  Test that an error is returned if the state lacks the 'pdf_data' key.
86
86
  """
87
- state = {} # pdf_data key is missing.
87
+ state = {
88
+ "text_embedding_model": object(), # Added to avoid KeyError.
89
+ }
88
90
  question = "Any question?"
89
91
  result = question_and_answer_tool.func(
90
92
  question=question, tool_call_id="test_call_id", state=state
@@ -97,7 +99,11 @@ def test_question_and_answer_tool_no_pdf_object():
97
99
  """
98
100
  Test that an error is returned if the pdf_object is missing within pdf_data.
99
101
  """
100
- state = {"pdf_data": {"pdf_object": None}}
102
+ state = {
103
+ "pdf_data": {"pdf_object": None},
104
+ "text_embedding_model": object(), # Added to avoid KeyError.
105
+ "llm_model": object(), # Dummy LLM model.
106
+ }
101
107
  question = "Any question?"
102
108
  result = question_and_answer_tool.func(
103
109
  question=question, tool_call_id="test_call_id", state=state
@@ -114,8 +120,9 @@ def test_question_and_answer_tool_no_llm_model():
114
120
  Test that an error is returned if the LLM model is missing in the state.
115
121
  """
116
122
  state = {
117
- "pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"}
118
- # Note: llm_model is intentionally omitted.
123
+ "pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"},
124
+ "text_embedding_model": object(), # Added to avoid KeyError.
125
+ # llm_model is intentionally omitted.
119
126
  }
120
127
  question = "What is in the PDF?"
121
128
  result = question_and_answer_tool.func(
@@ -124,6 +131,57 @@ def test_question_and_answer_tool_no_llm_model():
124
131
  assert result == {"error": "No LLM model found in state."}
125
132
 
126
133
 
134
+ def test_generate_answer2_actual(monkeypatch):
135
+ """
136
+ Test the actual behavior of generate_answer2 using fake dependencies
137
+ to exercise its internal logic.
138
+ """
139
+
140
+ # Create a fake PyPDFLoader that does not perform a network call.
141
+ class FakePyPDFLoader:
142
+ """class to fake PyPDFLoader"""
143
+
144
+ def __init__(self, file_path, headers=None):
145
+ """Initialize the fake PyPDFLoader."""
146
+ self.file_path = file_path
147
+ self.headers = headers
148
+
149
+ def lazy_load(self):
150
+ """Return a list with one fake Document."""
151
+ # Return a list with one fake Document.
152
+ return [Document(page_content="Answer for Test question?")]
153
+
154
+ monkeypatch.setattr(question_and_answer, "PyPDFLoader", FakePyPDFLoader)
155
+
156
+ # Create a fake vector store that returns a controlled result for similarity_search.
157
+ class FakeVectorStore:
158
+ """Fake vector store for similarity search."""
159
+
160
+ def similarity_search(self, query):
161
+ """Return a list with one Document containing our expected answer."""
162
+ # Return a list with one Document containing our expected answer.
163
+ return [Document(page_content=f"Answer for {query}")]
164
+
165
+ monkeypatch.setattr(
166
+ question_and_answer.InMemoryVectorStore,
167
+ "from_documents",
168
+ lambda docs, emb: FakeVectorStore(),
169
+ )
170
+
171
+ # Provide a dummy text embedding model.
172
+ dummy_text_embedding_model = object()
173
+ question = "Test question?"
174
+ pdf_url = "http://dummy.pdf"
175
+
176
+ # Call generate_answer2 without triggering an actual network call.
177
+ result = question_and_answer.generate_answer2(
178
+ question, pdf_url, dummy_text_embedding_model
179
+ )
180
+ # The function should join the page content from the similarity search.
181
+ expected = "Answer for Test question?"
182
+ assert result == expected
183
+
184
+
127
185
  def test_generate_answer(monkeypatch):
128
186
  """
129
187
  Test generate_answer function with controlled monkeypatched dependencies.
@@ -141,12 +199,15 @@ def test_generate_answer(monkeypatch):
141
199
  """
142
200
  Fake Annoy.from_documents function that returns a fake vector store.
143
201
  """
202
+
144
203
  # pylint: disable=too-few-public-methods, unused-argument
145
204
  class FakeVectorStore:
146
205
  """Fake vector store for similarity search."""
206
+
147
207
  def similarity_search(self, _question, k):
148
208
  """Return a list with a single dummy Document."""
149
209
  return [Document(page_content="dummy content")]
210
+
150
211
  return FakeVectorStore()
151
212
 
152
213
  monkeypatch.setattr(
@@ -157,9 +218,11 @@ def test_generate_answer(monkeypatch):
157
218
  """
158
219
  Fake load_qa_chain function that returns a fake QA chain.
159
220
  """
221
+
160
222
  # pylint: disable=too-few-public-methods, unused-argument
161
223
  class FakeChain:
162
224
  """Fake QA chain for testing generate_answer."""
225
+
163
226
  def invoke(self, **kwargs):
164
227
  """
165
228
  Fake invoke method that returns a mock answer.
@@ -169,6 +232,7 @@ def test_generate_answer(monkeypatch):
169
232
  "answer": "real mock answer",
170
233
  "question": input_data.get("question"),
171
234
  }
235
+
172
236
  return FakeChain()
173
237
 
174
238
  monkeypatch.setattr(question_and_answer, "load_qa_chain", fake_load_qa_chain)