aiagents4pharma 1.30.0__py3-none-any.whl → 1.30.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2scholars/agents/main_agent.py +18 -10
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -6
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -10
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +18 -9
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +2 -2
- aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +1 -0
- aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +6 -1
- aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +7 -1
- aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +6 -1
- aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +1 -1
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +4 -1
- aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py +84 -53
- aiagents4pharma/talk2scholars/tests/test_main_agent.py +24 -0
- aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +79 -15
- aiagents4pharma/talk2scholars/tests/test_routing_logic.py +12 -8
- aiagents4pharma/talk2scholars/tests/test_s2_multi.py +27 -4
- aiagents4pharma/talk2scholars/tests/test_s2_search.py +19 -3
- aiagents4pharma/talk2scholars/tests/test_s2_single.py +27 -3
- aiagents4pharma/talk2scholars/tests/test_zotero_read.py +17 -10
- aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py +2 -0
- aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py +11 -4
- aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +5 -1
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +73 -26
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +46 -22
- aiagents4pharma/talk2scholars/tools/s2/query_results.py +1 -1
- aiagents4pharma/talk2scholars/tools/s2/search.py +40 -12
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +42 -16
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +33 -16
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +39 -7
- {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.1.dist-info}/METADATA +2 -2
- {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.1.dist-info}/RECORD +34 -34
- {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.1.dist-info}/WHEEL +1 -1
- {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.1.dist-info}/LICENSE +0 -0
- {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.1.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,14 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
3
|
"""
|
4
|
-
Main agent for
|
4
|
+
Main agent module for initializing and running the Talk2Scholars application.
|
5
5
|
|
6
|
-
This module
|
7
|
-
|
8
|
-
|
6
|
+
This module sets up the hierarchical agent system using LangGraph and integrates
|
7
|
+
various sub-agents for handling different tasks such as semantic scholar, zotero,
|
8
|
+
PDF processing, and paper downloading.
|
9
|
+
|
10
|
+
Functions:
|
11
|
+
- get_app: Initializes and returns the LangGraph-based hierarchical agent system.
|
9
12
|
"""
|
10
13
|
|
11
14
|
import logging
|
@@ -16,6 +19,8 @@ from langchain_core.language_models.chat_models import BaseChatModel
|
|
16
19
|
from langgraph.checkpoint.memory import MemorySaver
|
17
20
|
from ..agents.s2_agent import get_app as get_app_s2
|
18
21
|
from ..agents.zotero_agent import get_app as get_app_zotero
|
22
|
+
from ..agents.pdf_agent import get_app as get_app_pdf
|
23
|
+
from ..agents.paper_download_agent import get_app as get_app_paper_download
|
19
24
|
from ..state.state_talk2scholars import Talk2Scholars
|
20
25
|
|
21
26
|
# Initialize logger
|
@@ -43,12 +48,13 @@ def get_app(uniq_id, llm_model: BaseChatModel):
|
|
43
48
|
>>> app = get_app("thread_123")
|
44
49
|
>>> result = app.invoke(initial_state)
|
45
50
|
"""
|
46
|
-
if llm_model
|
47
|
-
llm_model
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
51
|
+
if hasattr(llm_model, "model_name"):
|
52
|
+
if llm_model.model_name == "gpt-4o-mini":
|
53
|
+
llm_model = ChatOpenAI(
|
54
|
+
model="gpt-4o-mini",
|
55
|
+
temperature=0,
|
56
|
+
model_kwargs={"parallel_tool_calls": False},
|
57
|
+
)
|
52
58
|
# Load hydra configuration
|
53
59
|
logger.log(logging.INFO, "Launching Talk2Scholars with thread_id %s", uniq_id)
|
54
60
|
with hydra.initialize(version_base=None, config_path="../configs/"):
|
@@ -62,6 +68,8 @@ def get_app(uniq_id, llm_model: BaseChatModel):
|
|
62
68
|
[
|
63
69
|
get_app_s2(uniq_id, llm_model), # semantic scholar
|
64
70
|
get_app_zotero(uniq_id, llm_model), # zotero
|
71
|
+
get_app_pdf(uniq_id, llm_model), # pdf
|
72
|
+
get_app_paper_download(uniq_id, llm_model), # paper download
|
65
73
|
],
|
66
74
|
model=llm_model,
|
67
75
|
state_schema=Talk2Scholars,
|
@@ -20,6 +20,7 @@ from ..tools.s2.query_results import query_results
|
|
20
20
|
logging.basicConfig(level=logging.INFO)
|
21
21
|
logger = logging.getLogger(__name__)
|
22
22
|
|
23
|
+
|
23
24
|
def get_app(uniq_id, llm_model: BaseChatModel):
|
24
25
|
"""
|
25
26
|
Initializes and returns the LangGraph application for the Talk2Scholars paper download agent.
|
@@ -39,14 +40,12 @@ def get_app(uniq_id, llm_model: BaseChatModel):
|
|
39
40
|
with hydra.initialize(version_base=None, config_path="../configs"):
|
40
41
|
cfg = hydra.compose(
|
41
42
|
config_name="config",
|
42
|
-
overrides=["agents/talk2scholars/paper_download_agent=default"]
|
43
|
+
overrides=["agents/talk2scholars/paper_download_agent=default"],
|
43
44
|
)
|
44
45
|
cfg = cfg.agents.talk2scholars.paper_download_agent
|
45
46
|
|
46
47
|
# Define tools properly
|
47
|
-
tools = ToolNode(
|
48
|
-
[download_arxiv_paper, query_results]
|
49
|
-
)
|
48
|
+
tools = ToolNode([download_arxiv_paper, query_results])
|
50
49
|
|
51
50
|
# Define the model
|
52
51
|
logger.info("Using OpenAI model %s", llm_model)
|
@@ -54,7 +53,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
|
|
54
53
|
llm_model,
|
55
54
|
tools=tools,
|
56
55
|
state_schema=Talk2Scholars,
|
57
|
-
prompt=cfg.
|
56
|
+
prompt=cfg.paper_download_agent,
|
58
57
|
checkpointer=MemorySaver(),
|
59
58
|
)
|
60
59
|
|
@@ -79,7 +78,7 @@ def get_app(uniq_id, llm_model: BaseChatModel):
|
|
79
78
|
checkpointer = MemorySaver()
|
80
79
|
|
81
80
|
# Compile the graph
|
82
|
-
app = workflow.compile(checkpointer=checkpointer)
|
81
|
+
app = workflow.compile(checkpointer=checkpointer, name="agent_paper_download")
|
83
82
|
|
84
83
|
# Logging the information and returning the app
|
85
84
|
logger.info("Compiled the graph")
|
@@ -26,10 +26,7 @@ logging.basicConfig(level=logging.INFO)
|
|
26
26
|
logger = logging.getLogger(__name__)
|
27
27
|
|
28
28
|
|
29
|
-
def get_app(
|
30
|
-
uniq_id,
|
31
|
-
llm_model: BaseChatModel
|
32
|
-
):
|
29
|
+
def get_app(uniq_id, llm_model: BaseChatModel):
|
33
30
|
"""
|
34
31
|
Initializes and returns the LangGraph application for the PDF agent.
|
35
32
|
|
@@ -40,7 +37,7 @@ def get_app(
|
|
40
37
|
|
41
38
|
Args:
|
42
39
|
uniq_id (str): A unique identifier for the current conversation session or thread.
|
43
|
-
llm_model (BaseChatModel, optional): The language model instance to be used.
|
40
|
+
llm_model (BaseChatModel, optional): The language model instance to be used.
|
44
41
|
Defaults to ChatOpenAI(model="gpt-4o-mini", temperature=0).
|
45
42
|
|
46
43
|
Returns:
|
@@ -71,10 +68,7 @@ def get_app(
|
|
71
68
|
Any: The response generated by the language model after processing the state.
|
72
69
|
"""
|
73
70
|
logger.info("Creating Agent_PDF node with thread_id %s", uniq_id)
|
74
|
-
response = model.invoke(
|
75
|
-
state,
|
76
|
-
{"configurable": {"thread_id": uniq_id}}
|
77
|
-
)
|
71
|
+
response = model.invoke(state, {"configurable": {"thread_id": uniq_id}})
|
78
72
|
return response
|
79
73
|
|
80
74
|
# Define the tool node that includes the PDF QnA tool.
|
@@ -100,7 +94,7 @@ def get_app(
|
|
100
94
|
checkpointer = MemorySaver()
|
101
95
|
|
102
96
|
# Compile the graph into a runnable app.
|
103
|
-
app = workflow.compile(checkpointer=checkpointer)
|
97
|
+
app = workflow.compile(checkpointer=checkpointer, name="agent_pdf")
|
104
98
|
logger.info("Compiled the PDF agent graph.")
|
105
99
|
|
106
100
|
return app
|
@@ -1,13 +1,22 @@
|
|
1
1
|
_target_: agents.main_agent.get_app
|
2
2
|
temperature: 0
|
3
3
|
system_prompt: >
|
4
|
-
You are
|
4
|
+
You are Talk2Scholars agent coordinating academic paper discovery
|
5
|
+
and analysis with help of the following agents:
|
6
|
+
1. Agent S2: This agent can be used to search and recommend papers
|
7
|
+
from Semantic Scholar. Use this agent when the user asks for
|
8
|
+
general paper/article searches and recommendations, or to retrieve information
|
9
|
+
from the last displayed results table or query abstract of last
|
10
|
+
displayed results.
|
11
|
+
2. Agent Zotero: This agent can be used to retrieve, display, and query
|
12
|
+
papers/articles from the Zotero library. Use this agent only when the user
|
13
|
+
explicitly asks for papers from Zotero. This tool can also be used to
|
14
|
+
save papers in the zotero library.
|
15
|
+
3. Agent PaperFetch: This agent can be used to download papers/articles
|
16
|
+
from ArXiv.
|
17
|
+
4. Agent PDFQuery: This agent can be used to query contents of an
|
18
|
+
uploaded or downloaded PDF/paper/article.
|
5
19
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
general paper searches and recommendations.
|
10
|
-
2. Zotero_agent: This agent can be used to retrieve, display, and query
|
11
|
-
papers from the Zotero library. Use this agent only when the user
|
12
|
-
explicitly asks for papers from Zotero. This tool can also be used to
|
13
|
-
save papers in under collections in the zotero library
|
20
|
+
Your final response should be a one sentence summary of the information
|
21
|
+
retrieved from the agents above. Do not repeat the information already
|
22
|
+
displayed to the user in the response of the agents.
|
@@ -15,5 +15,5 @@ s2_agent: >
|
|
15
15
|
1. When user requests papers, use search/recommendation tools to find papers
|
16
16
|
2. Use `display_results` tool to display the response from the search/recommendation tools
|
17
17
|
3. Use `query_results` tool to query over the selected paper only when the user asks to
|
18
|
-
4. When the user wants recommendations, you can get the "
|
19
|
-
pass the "
|
18
|
+
4. When the user wants recommendations, you can get the "semantic_scholar_paper_id" using `query_results` tool in the "last_displayed_results" key, then
|
19
|
+
pass the "semantic_scholar_paper_id" to `search`, `single_paper_rec` or `multi_paper_rec` tools depending on the user's query. Do not use "arxiv_id"
|
@@ -6,10 +6,15 @@ api_fields:
|
|
6
6
|
- "title"
|
7
7
|
- "abstract"
|
8
8
|
- "year"
|
9
|
-
- "authors"
|
9
|
+
- "authors.name"
|
10
|
+
- "authors.authorId"
|
10
11
|
- "citationCount"
|
11
12
|
- "url"
|
12
13
|
- "externalIds"
|
14
|
+
- "venue"
|
15
|
+
- "publicationVenue" # Full object, instead of specific subfields
|
16
|
+
- "journal" # Full object, instead of specific subfields
|
17
|
+
- "publicationDate"
|
13
18
|
# Commented fields that could be added later if needed
|
14
19
|
|
15
20
|
# Default headers and params
|
@@ -6,10 +6,16 @@ api_fields:
|
|
6
6
|
- "title"
|
7
7
|
- "abstract"
|
8
8
|
- "year"
|
9
|
-
- "authors"
|
9
|
+
- "authors.name"
|
10
|
+
- "authors.authorId"
|
10
11
|
- "citationCount"
|
11
12
|
- "url"
|
12
13
|
- "externalIds"
|
14
|
+
- "venue"
|
15
|
+
- "publicationVenue" # Full object, instead of specific subfields
|
16
|
+
- "journal" # Full object, instead of specific subfields
|
17
|
+
- "publicationDate"
|
18
|
+
|
13
19
|
# Commented fields that could be added later if needed
|
14
20
|
# - "publicationTypes"
|
15
21
|
# - "openAccessPdf"
|
@@ -6,10 +6,15 @@ api_fields:
|
|
6
6
|
- "title"
|
7
7
|
- "abstract"
|
8
8
|
- "year"
|
9
|
-
- "authors"
|
9
|
+
- "authors.name"
|
10
|
+
- "authors.authorId"
|
10
11
|
- "citationCount"
|
11
12
|
- "url"
|
12
13
|
- "externalIds"
|
14
|
+
- "venue"
|
15
|
+
- "publicationVenue" # Full object, instead of specific subfields
|
16
|
+
- "journal" # Full object, instead of specific subfields
|
17
|
+
- "publicationDate"
|
13
18
|
# Commented fields that could be added later if needed
|
14
19
|
# - "publicationTypes"
|
15
20
|
# - "openAccessPdf"
|
@@ -9,6 +9,7 @@ across agent interactions.
|
|
9
9
|
import logging
|
10
10
|
from typing import Annotated, Any, Dict
|
11
11
|
from langchain_core.language_models import BaseChatModel
|
12
|
+
from langchain_core.embeddings import Embeddings
|
12
13
|
from langgraph.prebuilt.chat_agent_executor import AgentState
|
13
14
|
|
14
15
|
# Configure logging
|
@@ -54,6 +55,8 @@ class Talk2Scholars(AgentState):
|
|
54
55
|
multi_papers (Dict[str, Any]): Stores multiple recommended papers from various sources.
|
55
56
|
zotero_read (Dict[str, Any]): Stores the papers retrieved from Zotero.
|
56
57
|
llm_model (BaseChatModel): The language model instance used for generating responses.
|
58
|
+
text_embedding_model (Embeddings): The text embedding model used for
|
59
|
+
similarity calculations.
|
57
60
|
"""
|
58
61
|
|
59
62
|
# Agent state fields
|
@@ -63,4 +66,4 @@ class Talk2Scholars(AgentState):
|
|
63
66
|
pdf_data: Annotated[Dict[str, Any], replace_dict]
|
64
67
|
zotero_read: Annotated[Dict[str, Any], replace_dict]
|
65
68
|
llm_model: BaseChatModel
|
66
|
-
|
69
|
+
text_embedding_model: Embeddings
|
@@ -1,58 +1,89 @@
|
|
1
1
|
"""
|
2
2
|
Integration tests for talk2scholars system with OpenAI.
|
3
|
+
This test triggers all sub-agents by sending a conversation that covers:
|
4
|
+
- Searching Semantic Scholar (S2 agent)
|
5
|
+
- Retrieving Zotero results (Zotero agent)
|
6
|
+
- Querying PDF content (PDF agent)
|
7
|
+
- Downloading paper details from arXiv (Paper Download agent)
|
3
8
|
"""
|
4
9
|
|
5
|
-
|
6
|
-
import pytest
|
7
|
-
import hydra
|
8
|
-
from langchain_openai import ChatOpenAI
|
9
|
-
from langchain_core.messages import HumanMessage, AIMessage
|
10
|
-
from ..agents.main_agent import get_app
|
11
|
-
from ..state.state_talk2scholars import Talk2Scholars
|
10
|
+
# This will be covered in the next pr.
|
12
11
|
|
13
|
-
#
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
12
|
+
#
|
13
|
+
# import os
|
14
|
+
# import pytest
|
15
|
+
# import hydra
|
16
|
+
# from langchain_openai import ChatOpenAI
|
17
|
+
# from langchain_core.messages import HumanMessage, AIMessage
|
18
|
+
# from ..agents.main_agent import get_app
|
19
|
+
# from ..state.state_talk2scholars import Talk2Scholars
|
20
|
+
#
|
21
|
+
# # pylint: disable=redefined-outer-name,too-few-public-methods
|
22
|
+
#
|
23
|
+
#
|
24
|
+
# @pytest.mark.skipif(
|
25
|
+
# not os.getenv("OPENAI_API_KEY"), reason="Requires OpenAI API key to run"
|
26
|
+
# )
|
27
|
+
# def test_main_agent_real_llm():
|
28
|
+
# """
|
29
|
+
# Integration test for the Talk2Scholars system using a real OpenAI LLM.
|
30
|
+
# This test verifies that the supervisor correctly routes to all sub-agents by
|
31
|
+
# providing a conversation with queries intended to trigger each agent.
|
32
|
+
# """
|
33
|
+
# # Load Hydra configuration EXACTLY like in main_agent.py
|
34
|
+
# with hydra.initialize(version_base=None, config_path="../configs"):
|
35
|
+
# cfg = hydra.compose(
|
36
|
+
# config_name="config", overrides=["agents/talk2scholars/main_agent=default"]
|
37
|
+
# )
|
38
|
+
# hydra_cfg = cfg.agents.talk2scholars.main_agent
|
39
|
+
# assert hydra_cfg is not None, "Hydra config failed to load"
|
40
|
+
#
|
41
|
+
# # Use the real OpenAI API (ensure OPENAI_API_KEY is set in environment)
|
42
|
+
# llm = ChatOpenAI(model="gpt-4o-mini", temperature=hydra_cfg.temperature)
|
43
|
+
#
|
44
|
+
# # Initialize the main agent workflow (with real Hydra config)
|
45
|
+
# thread_id = "test_thread"
|
46
|
+
# app = get_app(thread_id, llm)
|
47
|
+
#
|
48
|
+
# # Provide a multi-turn conversation intended to trigger all sub-agents:
|
49
|
+
# # - S2 agent: "Search Semantic Scholar for AI papers on transformers."
|
50
|
+
# # - Zotero agent: "Retrieve Zotero results for these papers."
|
51
|
+
# # - PDF agent: "Analyze the attached PDF and summarize its key findings."
|
52
|
+
# # - Paper Download agent: "Download the paper details from arXiv."
|
53
|
+
# initial_state = Talk2Scholars(
|
54
|
+
# messages=[
|
55
|
+
# HumanMessage(
|
56
|
+
# content="Search Semantic Scholar for AI papers on transformers."
|
57
|
+
# ),
|
58
|
+
# HumanMessage(content="Also, retrieve Zotero results for these papers."),
|
59
|
+
# HumanMessage(
|
60
|
+
# content="I have attached a PDF; analyze it and tell me the key findings."
|
61
|
+
# ),
|
62
|
+
# HumanMessage(content="Finally, download the paper from arXiv."),
|
63
|
+
# ]
|
64
|
+
# )
|
65
|
+
#
|
66
|
+
# # Invoke the agent (which routes to the appropriate sub-agents)
|
67
|
+
# result = app.invoke(
|
68
|
+
# initial_state,
|
69
|
+
# {"configurable": {"config_id": thread_id, "thread_id": thread_id}},
|
70
|
+
# )
|
71
|
+
#
|
72
|
+
# # Assert that the result contains messages and that the final message is valid.
|
73
|
+
# assert "messages" in result, "Expected 'messages' in the response"
|
74
|
+
# last_message = result["messages"][-1]
|
75
|
+
# assert isinstance(
|
76
|
+
# last_message, (HumanMessage, AIMessage, str)
|
77
|
+
# ), "Last message should be a valid response type"
|
78
|
+
#
|
79
|
+
# # Concatenate message texts (if available) to perform keyword checks.
|
80
|
+
# output_text = " ".join(
|
81
|
+
# msg.content if hasattr(msg, "content") else str(msg)
|
82
|
+
# for msg in result["messages"]
|
83
|
+
# ).lower()
|
84
|
+
#
|
85
|
+
# # Check for keywords that suggest each sub-agent was invoked.
|
86
|
+
# for keyword in ["semantic scholar", "zotero", "pdf", "arxiv"]:
|
87
|
+
# assert (
|
88
|
+
# keyword in output_text
|
89
|
+
# ), f"Expected keyword '{keyword}' in the output response"
|
@@ -65,6 +65,13 @@ def dummy_get_app_zotero(uniq_id, llm_model):
|
|
65
65
|
return DummyWorkflow(supervisor_args={"agent": "zotero", "uniq_id": uniq_id})
|
66
66
|
|
67
67
|
|
68
|
+
def dummy_get_app_pdf(uniq_id, llm_model):
|
69
|
+
"""Return a DummyWorkflow for the PDF agent."""
|
70
|
+
dummy_get_app_pdf.called_uniq_id = uniq_id
|
71
|
+
dummy_get_app_pdf.called_llm_model = llm_model
|
72
|
+
return DummyWorkflow(supervisor_args={"agent": "pdf", "uniq_id": uniq_id})
|
73
|
+
|
74
|
+
|
68
75
|
def dummy_create_supervisor(apps, model, state_schema, **kwargs):
|
69
76
|
"""Return a DummyWorkflow for the supervisor."""
|
70
77
|
dummy_create_supervisor.called_kwargs = kwargs
|
@@ -136,6 +143,15 @@ def patch_hydra(monkeypatch):
|
|
136
143
|
)
|
137
144
|
|
138
145
|
|
146
|
+
def dummy_get_app_paper_download(uniq_id, llm_model):
|
147
|
+
"""Return a DummyWorkflow for the paper download agent."""
|
148
|
+
dummy_get_app_paper_download.called_uniq_id = uniq_id
|
149
|
+
dummy_get_app_paper_download.called_llm_model = llm_model
|
150
|
+
return DummyWorkflow(
|
151
|
+
supervisor_args={"agent": "paper_download", "uniq_id": uniq_id}
|
152
|
+
)
|
153
|
+
|
154
|
+
|
139
155
|
@pytest.fixture(autouse=True)
|
140
156
|
def patch_sub_agents_and_supervisor(monkeypatch):
|
141
157
|
"""Patch the sub-agents and supervisor creation functions."""
|
@@ -146,6 +162,14 @@ def patch_sub_agents_and_supervisor(monkeypatch):
|
|
146
162
|
"aiagents4pharma.talk2scholars.agents.main_agent.get_app_zotero",
|
147
163
|
dummy_get_app_zotero,
|
148
164
|
)
|
165
|
+
monkeypatch.setattr(
|
166
|
+
"aiagents4pharma.talk2scholars.agents.main_agent.get_app_pdf",
|
167
|
+
dummy_get_app_pdf,
|
168
|
+
)
|
169
|
+
monkeypatch.setattr(
|
170
|
+
"aiagents4pharma.talk2scholars.agents.main_agent.get_app_paper_download",
|
171
|
+
dummy_get_app_paper_download,
|
172
|
+
)
|
149
173
|
monkeypatch.setattr(
|
150
174
|
"aiagents4pharma.talk2scholars.agents.main_agent.create_supervisor",
|
151
175
|
dummy_create_supervisor,
|
@@ -3,7 +3,6 @@ Unit tests for question_and_answer tool functionality.
|
|
3
3
|
"""
|
4
4
|
|
5
5
|
from langchain.docstore.document import Document
|
6
|
-
|
7
6
|
from ..tools.pdf import question_and_answer
|
8
7
|
from ..tools.pdf.question_and_answer import (
|
9
8
|
extract_text_from_pdf_data,
|
@@ -11,6 +10,8 @@ from ..tools.pdf.question_and_answer import (
|
|
11
10
|
generate_answer,
|
12
11
|
)
|
13
12
|
|
13
|
+
# pylint: disable=redefined-outer-name,too-few-public-methods
|
14
|
+
|
14
15
|
|
15
16
|
def test_extract_text_from_pdf_data():
|
16
17
|
"""
|
@@ -46,14 +47,14 @@ DUMMY_PDF_BYTES = (
|
|
46
47
|
)
|
47
48
|
|
48
49
|
|
49
|
-
def
|
50
|
+
def fake_generate_answer2(question, pdf_url, _text_embedding_model):
|
50
51
|
"""
|
51
|
-
Fake
|
52
|
+
Fake generate_answer2 function to bypass external dependencies.
|
52
53
|
"""
|
53
54
|
return {
|
54
55
|
"answer": "Mock answer",
|
55
56
|
"question": question,
|
56
|
-
"
|
57
|
+
"pdf_url": pdf_url,
|
57
58
|
}
|
58
59
|
|
59
60
|
|
@@ -61,30 +62,31 @@ def test_question_and_answer_tool_success(monkeypatch):
|
|
61
62
|
"""
|
62
63
|
Test that question_and_answer_tool returns the expected result on success.
|
63
64
|
"""
|
64
|
-
|
65
|
-
|
66
|
-
)
|
67
|
-
# Create a valid state with pdf_data
|
68
|
-
# and include a dummy llm_model.
|
65
|
+
# Patch generate_answer2 because the tool calls that.
|
66
|
+
monkeypatch.setattr(question_and_answer, "generate_answer2", fake_generate_answer2)
|
67
|
+
dummy_text_embedding_model = object() # Provide a dummy text embedding model.
|
68
|
+
# Create a valid state with pdf_data and include dummy llm_model and text_embedding_model.
|
69
69
|
state = {
|
70
70
|
"pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"},
|
71
71
|
"llm_model": object(), # Provide a dummy LLM model instance.
|
72
|
+
"text_embedding_model": dummy_text_embedding_model,
|
72
73
|
}
|
73
74
|
question = "What is in the PDF?"
|
74
|
-
# Call the underlying function directly via .func to bypass the StructuredTool wrapper.
|
75
75
|
result = question_and_answer_tool.func(
|
76
76
|
question=question, tool_call_id="test_call_id", state=state
|
77
77
|
)
|
78
78
|
assert result["answer"] == "Mock answer"
|
79
79
|
assert result["question"] == question
|
80
|
-
assert result["
|
80
|
+
assert result["pdf_url"] == "http://dummy.url"
|
81
81
|
|
82
82
|
|
83
83
|
def test_question_and_answer_tool_no_pdf_data():
|
84
84
|
"""
|
85
85
|
Test that an error is returned if the state lacks the 'pdf_data' key.
|
86
86
|
"""
|
87
|
-
state = {
|
87
|
+
state = {
|
88
|
+
"text_embedding_model": object(), # Added to avoid KeyError.
|
89
|
+
}
|
88
90
|
question = "Any question?"
|
89
91
|
result = question_and_answer_tool.func(
|
90
92
|
question=question, tool_call_id="test_call_id", state=state
|
@@ -97,7 +99,11 @@ def test_question_and_answer_tool_no_pdf_object():
|
|
97
99
|
"""
|
98
100
|
Test that an error is returned if the pdf_object is missing within pdf_data.
|
99
101
|
"""
|
100
|
-
state = {
|
102
|
+
state = {
|
103
|
+
"pdf_data": {"pdf_object": None},
|
104
|
+
"text_embedding_model": object(), # Added to avoid KeyError.
|
105
|
+
"llm_model": object(), # Dummy LLM model.
|
106
|
+
}
|
101
107
|
question = "Any question?"
|
102
108
|
result = question_and_answer_tool.func(
|
103
109
|
question=question, tool_call_id="test_call_id", state=state
|
@@ -114,8 +120,9 @@ def test_question_and_answer_tool_no_llm_model():
|
|
114
120
|
Test that an error is returned if the LLM model is missing in the state.
|
115
121
|
"""
|
116
122
|
state = {
|
117
|
-
"pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"}
|
118
|
-
#
|
123
|
+
"pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"},
|
124
|
+
"text_embedding_model": object(), # Added to avoid KeyError.
|
125
|
+
# llm_model is intentionally omitted.
|
119
126
|
}
|
120
127
|
question = "What is in the PDF?"
|
121
128
|
result = question_and_answer_tool.func(
|
@@ -124,6 +131,57 @@ def test_question_and_answer_tool_no_llm_model():
|
|
124
131
|
assert result == {"error": "No LLM model found in state."}
|
125
132
|
|
126
133
|
|
134
|
+
def test_generate_answer2_actual(monkeypatch):
|
135
|
+
"""
|
136
|
+
Test the actual behavior of generate_answer2 using fake dependencies
|
137
|
+
to exercise its internal logic.
|
138
|
+
"""
|
139
|
+
|
140
|
+
# Create a fake PyPDFLoader that does not perform a network call.
|
141
|
+
class FakePyPDFLoader:
|
142
|
+
"""class to fake PyPDFLoader"""
|
143
|
+
|
144
|
+
def __init__(self, file_path, headers=None):
|
145
|
+
"""Initialize the fake PyPDFLoader."""
|
146
|
+
self.file_path = file_path
|
147
|
+
self.headers = headers
|
148
|
+
|
149
|
+
def lazy_load(self):
|
150
|
+
"""Return a list with one fake Document."""
|
151
|
+
# Return a list with one fake Document.
|
152
|
+
return [Document(page_content="Answer for Test question?")]
|
153
|
+
|
154
|
+
monkeypatch.setattr(question_and_answer, "PyPDFLoader", FakePyPDFLoader)
|
155
|
+
|
156
|
+
# Create a fake vector store that returns a controlled result for similarity_search.
|
157
|
+
class FakeVectorStore:
|
158
|
+
"""Fake vector store for similarity search."""
|
159
|
+
|
160
|
+
def similarity_search(self, query):
|
161
|
+
"""Return a list with one Document containing our expected answer."""
|
162
|
+
# Return a list with one Document containing our expected answer.
|
163
|
+
return [Document(page_content=f"Answer for {query}")]
|
164
|
+
|
165
|
+
monkeypatch.setattr(
|
166
|
+
question_and_answer.InMemoryVectorStore,
|
167
|
+
"from_documents",
|
168
|
+
lambda docs, emb: FakeVectorStore(),
|
169
|
+
)
|
170
|
+
|
171
|
+
# Provide a dummy text embedding model.
|
172
|
+
dummy_text_embedding_model = object()
|
173
|
+
question = "Test question?"
|
174
|
+
pdf_url = "http://dummy.pdf"
|
175
|
+
|
176
|
+
# Call generate_answer2 without triggering an actual network call.
|
177
|
+
result = question_and_answer.generate_answer2(
|
178
|
+
question, pdf_url, dummy_text_embedding_model
|
179
|
+
)
|
180
|
+
# The function should join the page content from the similarity search.
|
181
|
+
expected = "Answer for Test question?"
|
182
|
+
assert result == expected
|
183
|
+
|
184
|
+
|
127
185
|
def test_generate_answer(monkeypatch):
|
128
186
|
"""
|
129
187
|
Test generate_answer function with controlled monkeypatched dependencies.
|
@@ -141,12 +199,15 @@ def test_generate_answer(monkeypatch):
|
|
141
199
|
"""
|
142
200
|
Fake Annoy.from_documents function that returns a fake vector store.
|
143
201
|
"""
|
202
|
+
|
144
203
|
# pylint: disable=too-few-public-methods, unused-argument
|
145
204
|
class FakeVectorStore:
|
146
205
|
"""Fake vector store for similarity search."""
|
206
|
+
|
147
207
|
def similarity_search(self, _question, k):
|
148
208
|
"""Return a list with a single dummy Document."""
|
149
209
|
return [Document(page_content="dummy content")]
|
210
|
+
|
150
211
|
return FakeVectorStore()
|
151
212
|
|
152
213
|
monkeypatch.setattr(
|
@@ -157,9 +218,11 @@ def test_generate_answer(monkeypatch):
|
|
157
218
|
"""
|
158
219
|
Fake load_qa_chain function that returns a fake QA chain.
|
159
220
|
"""
|
221
|
+
|
160
222
|
# pylint: disable=too-few-public-methods, unused-argument
|
161
223
|
class FakeChain:
|
162
224
|
"""Fake QA chain for testing generate_answer."""
|
225
|
+
|
163
226
|
def invoke(self, **kwargs):
|
164
227
|
"""
|
165
228
|
Fake invoke method that returns a mock answer.
|
@@ -169,6 +232,7 @@ def test_generate_answer(monkeypatch):
|
|
169
232
|
"answer": "real mock answer",
|
170
233
|
"question": input_data.get("question"),
|
171
234
|
}
|
235
|
+
|
172
236
|
return FakeChain()
|
173
237
|
|
174
238
|
monkeypatch.setattr(question_and_answer, "load_qa_chain", fake_load_qa_chain)
|