aiagents4pharma 1.27.1__py3-none-any.whl → 1.28.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,3 +5,4 @@ This file is used to import all the modules in the package.
5
5
  from . import main_agent
6
6
  from . import s2_agent
7
7
  from . import zotero_agent
8
+ from . import pdf_agent
@@ -0,0 +1,106 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Agent for interacting with PDF documents via question and answer.
4
+
5
+ This module initializes and compiles a LangGraph application that enables users to query PDF
6
+ documents using a question_and_answer tool. It integrates a language model and follows
7
+ the ReAct pattern to process and answer queries related to PDF content.
8
+
9
+ Usage:
10
+ >>> app = get_app("unique_thread_id")
11
+ >>> response = app.invoke(initial_state)
12
+ """
13
+
14
+ import logging
15
+ import hydra
16
+ from langchain_core.language_models.chat_models import BaseChatModel
17
+ from langgraph.graph import START, StateGraph
18
+ from langgraph.prebuilt import create_react_agent, ToolNode
19
+ from langgraph.checkpoint.memory import MemorySaver
20
+ from ..state.state_talk2scholars import Talk2Scholars
21
+ from ..tools.pdf.question_and_answer import question_and_answer_tool
22
+ from ..tools.s2.query_results import query_results
23
+
24
+ # Initialize logger
25
+ logging.basicConfig(level=logging.INFO)
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ def get_app(
30
+ uniq_id,
31
+ llm_model: BaseChatModel
32
+ ):
33
+ """
34
+ Initializes and returns the LangGraph application for the PDF agent.
35
+
36
+ This function sets up the PDF agent by loading configuration settings via Hydra,
37
+ initializing a model, and creating a workflow graph that incorporates
38
+ PDF-specific tools. The agent is built using the ReAct pattern to facilitate interactive
39
+ querying and processing of PDF documents.
40
+
41
+ Args:
42
+ uniq_id (str): A unique identifier for the current conversation session or thread.
43
+ llm_model (BaseChatModel, optional): The language model instance to be used.
44
+ Defaults to ChatOpenAI(model="gpt-4o-mini", temperature=0).
45
+
46
+ Returns:
47
+ StateGraph: A compiled LangGraph application capable of handling PDF interactions.
48
+
49
+ Example:
50
+ >>> app = get_app("thread_123")
51
+ >>> result = app.invoke(initial_state)
52
+ """
53
+ # Load configuration using Hydra.
54
+ with hydra.initialize(version_base=None, config_path="../configs"):
55
+ cfg = hydra.compose(
56
+ config_name="config",
57
+ overrides=["agents/talk2scholars/pdf_agent=default"],
58
+ )
59
+ cfg = cfg.agents.talk2scholars.pdf_agent
60
+ logger.info("Loaded pdf_agent configuration.")
61
+
62
+ def agent_pdf_node(state: Talk2Scholars):
63
+ """
64
+ Processes the current state by invoking the language model for PDF question and answer.
65
+
66
+ Args:
67
+ state (Talk2Scholars): The current conversation state containing query details
68
+ and context.
69
+
70
+ Returns:
71
+ Any: The response generated by the language model after processing the state.
72
+ """
73
+ logger.info("Creating Agent_PDF node with thread_id %s", uniq_id)
74
+ response = model.invoke(
75
+ state,
76
+ {"configurable": {"thread_id": uniq_id}}
77
+ )
78
+ return response
79
+
80
+ # Define the tool node that includes the PDF QnA tool.
81
+ tools = ToolNode([question_and_answer_tool, query_results])
82
+
83
+ logger.info("Using OpenAI model %s", llm_model)
84
+
85
+ # Create the agent using the provided BaseChatModel instance.
86
+ model = create_react_agent(
87
+ llm_model,
88
+ tools=tools,
89
+ state_schema=Talk2Scholars,
90
+ prompt=cfg.pdf_agent,
91
+ checkpointer=MemorySaver(),
92
+ )
93
+
94
+ # Define a new workflow graph with the state schema.
95
+ workflow = StateGraph(Talk2Scholars)
96
+ workflow.add_node("agent_pdf", agent_pdf_node)
97
+ workflow.add_edge(START, "agent_pdf")
98
+
99
+ # Initialize memory to persist state between runs.
100
+ checkpointer = MemorySaver()
101
+
102
+ # Compile the graph into a runnable app.
103
+ app = workflow.compile(checkpointer=checkpointer)
104
+ logger.info("Compiled the PDF agent graph.")
105
+
106
+ return app
@@ -4,4 +4,5 @@ Import all the modules in the package
4
4
 
5
5
  from . import s2_agent
6
6
  from . import main_agent
7
+ from . import pdf_agent
7
8
  from . import zotero_agent
@@ -0,0 +1,3 @@
1
+ """
2
+ Import all the modules in the package
3
+ """
@@ -4,8 +4,10 @@ defaults:
4
4
  - agents/talk2scholars/s2_agent: default
5
5
  - agents/talk2scholars/zotero_agent: default
6
6
  - app/frontend: default
7
+ - agents/talk2scholars/pdf_agent: default
7
8
  - tools/search: default
8
9
  - tools/single_paper_recommendation: default
9
10
  - tools/multi_paper_recommendation: default
10
11
  - tools/retrieve_semantic_scholar_paper_id: default
12
+ - tools/question_and_answer: default
11
13
  - tools/zotero_read: default
@@ -5,4 +5,5 @@ Import all the modules in the package
5
5
  from . import search
6
6
  from . import single_paper_recommendation
7
7
  from . import multi_paper_recommendation
8
+ from . import question_and_answer
8
9
  from . import zotero_read
@@ -0,0 +1,3 @@
1
+ """
2
+ Import all the modules in the package
3
+ """
@@ -60,5 +60,6 @@ class Talk2Scholars(AgentState):
60
60
  last_displayed_papers: Annotated[Dict[str, Any], replace_dict]
61
61
  papers: Annotated[Dict[str, Any], replace_dict]
62
62
  multi_papers: Annotated[Dict[str, Any], replace_dict]
63
+ pdf_data: Annotated[Dict[str, Any], replace_dict]
63
64
  zotero_read: Annotated[Dict[str, Any], replace_dict]
64
65
  llm_model: BaseChatModel
@@ -0,0 +1,126 @@
1
+ """
2
+ Unit Tests for the PDF agent.
3
+ """
4
+
5
+ # pylint: disable=redefined-outer-name
6
+ from unittest import mock
7
+ import pytest
8
+ from langchain_core.messages import HumanMessage, AIMessage
9
+ from ..agents.pdf_agent import get_app
10
+ from ..state.state_talk2scholars import Talk2Scholars
11
+
12
+
13
+ @pytest.fixture(autouse=True)
14
+ def mock_hydra_fixture():
15
+ """Mock Hydra configuration to prevent external dependencies."""
16
+ with mock.patch("hydra.initialize"), mock.patch("hydra.compose") as mock_compose:
17
+ # Create a mock configuration with a pdf_agent section.
18
+ cfg_mock = mock.MagicMock()
19
+ # The pdf_agent config will be accessed as cfg.agents.talk2scholars.pdf_agent in get_app.
20
+ cfg_mock.agents.talk2scholars.pdf_agent.some_property = "Test prompt"
21
+ mock_compose.return_value = cfg_mock
22
+ yield mock_compose
23
+
24
+
25
+ @pytest.fixture
26
+ def mock_tools_fixture():
27
+ """Mock PDF agent tools to prevent execution of real API calls."""
28
+ with (
29
+ mock.patch(
30
+ "aiagents4pharma.talk2scholars.agents.pdf_agent.question_and_answer_tool"
31
+ ) as mock_question_and_answer_tool,
32
+ mock.patch(
33
+ "aiagents4pharma.talk2scholars.agents.pdf_agent.query_results"
34
+ ) as mock_query_results,
35
+ ):
36
+ mock_question_and_answer_tool.return_value = {
37
+ "result": "Mock Question and Answer Result"
38
+ }
39
+ mock_query_results.return_value = {"result": "Mock Query Result"}
40
+ yield [mock_question_and_answer_tool, mock_query_results]
41
+
42
+
43
+ @pytest.fixture
44
+ def mock_llm():
45
+ """Provide a dummy language model to pass into get_app."""
46
+ return mock.Mock()
47
+
48
+
49
+ @pytest.mark.usefixtures("mock_hydra_fixture")
50
+ def test_pdf_agent_initialization(mock_llm):
51
+ """Test that PDF agent initializes correctly with mock configuration."""
52
+ thread_id = "test_thread"
53
+ with mock.patch(
54
+ "aiagents4pharma.talk2scholars.agents.pdf_agent.create_react_agent"
55
+ ) as mock_create:
56
+ mock_create.return_value = mock.Mock()
57
+ app = get_app(thread_id, mock_llm)
58
+ assert app is not None
59
+ assert mock_create.called
60
+
61
+
62
+ def test_pdf_agent_invocation(mock_llm):
63
+ """Test that the PDF agent processes user input and returns a valid response."""
64
+ thread_id = "test_thread"
65
+ # Create a sample state with a human message.
66
+ mock_state = Talk2Scholars(
67
+ messages=[HumanMessage(content="Extract key data from PDF")]
68
+ )
69
+ with mock.patch(
70
+ "aiagents4pharma.talk2scholars.agents.pdf_agent.create_react_agent"
71
+ ) as mock_create:
72
+ mock_agent = mock.Mock()
73
+ mock_create.return_value = mock_agent
74
+ # Simulate a response from the PDF agent.
75
+ mock_agent.invoke.return_value = {
76
+ "messages": [
77
+ AIMessage(content="PDF content extracted successfully")
78
+ ],
79
+ "pdf_data": {"page": 1, "text": "Sample PDF text"},
80
+ }
81
+ app = get_app(thread_id, mock_llm)
82
+ result = app.invoke(
83
+ mock_state,
84
+ config={
85
+ "configurable": {
86
+ "thread_id": thread_id,
87
+ "checkpoint_ns": "test_ns",
88
+ "checkpoint_id": "test_checkpoint",
89
+ }
90
+ },
91
+ )
92
+ assert "messages" in result
93
+ assert "pdf_data" in result
94
+ assert result["pdf_data"]["page"] == 1
95
+
96
+
97
+ def test_pdf_agent_tools_assignment(request, mock_llm):
98
+ """Ensure that the correct tools are assigned to the PDF agent."""
99
+ thread_id = "test_thread"
100
+ mock_tools = request.getfixturevalue("mock_tools_fixture")
101
+ with (
102
+ mock.patch(
103
+ "aiagents4pharma.talk2scholars.agents.pdf_agent.create_react_agent"
104
+ ) as mock_create,
105
+ mock.patch(
106
+ "aiagents4pharma.talk2scholars.agents.pdf_agent.ToolNode"
107
+ ) as mock_toolnode,
108
+ ):
109
+ mock_agent = mock.Mock()
110
+ mock_create.return_value = mock_agent
111
+ mock_tool_instance = mock.Mock()
112
+ # For the PDF agent, we expect two tools: question_and_answer_tool and query_results.
113
+ mock_tool_instance.tools = mock_tools
114
+ mock_toolnode.return_value = mock_tool_instance
115
+ get_app(thread_id, mock_llm)
116
+ assert mock_toolnode.called
117
+ assert len(mock_tool_instance.tools) == 2
118
+
119
+
120
+ def test_pdf_agent_hydra_failure(mock_llm):
121
+ """Test exception handling when Hydra fails to load config for PDF agent."""
122
+ thread_id = "test_thread"
123
+ with mock.patch("hydra.initialize", side_effect=Exception("Hydra error")):
124
+ with pytest.raises(Exception) as exc_info:
125
+ get_app(thread_id, mock_llm)
126
+ assert "Hydra error" in str(exc_info.value)
@@ -0,0 +1,186 @@
1
+ """
2
+ Unit tests for question_and_answer tool functionality.
3
+ """
4
+
5
+ from langchain.docstore.document import Document
6
+
7
+ from ..tools.pdf import question_and_answer
8
+ from ..tools.pdf.question_and_answer import (
9
+ extract_text_from_pdf_data,
10
+ question_and_answer_tool,
11
+ generate_answer,
12
+ )
13
+
14
+
15
+ def test_extract_text_from_pdf_data():
16
+ """
17
+ Test that extract_text_from_pdf_data returns text containing 'Hello World'.
18
+ """
19
+ extracted_text = extract_text_from_pdf_data(DUMMY_PDF_BYTES)
20
+ assert "Hello World" in extracted_text
21
+
22
+
23
+ DUMMY_PDF_BYTES = (
24
+ b"%PDF-1.4\n"
25
+ b"%\xe2\xe3\xcf\xd3\n"
26
+ b"1 0 obj\n"
27
+ b"<< /Type /Catalog /Pages 2 0 R >>\n"
28
+ b"endobj\n"
29
+ b"2 0 obj\n"
30
+ b"<< /Type /Pages /Count 1 /Kids [3 0 R] >>\n"
31
+ b"endobj\n"
32
+ b"3 0 obj\n"
33
+ b"<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R "
34
+ b"/Resources << /Font << /F1 5 0 R >> >> >>\n"
35
+ b"endobj\n"
36
+ b"4 0 obj\n"
37
+ b"<< /Length 44 >>\n"
38
+ b"stream\nBT\n/F1 24 Tf\n72 712 Td\n(Hello World) Tj\nET\nendstream\n"
39
+ b"endobj\n"
40
+ b"5 0 obj\n"
41
+ b"<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>\n"
42
+ b"endobj\n"
43
+ b"xref\n0 6\n0000000000 65535 f \n0000000010 00000 n \n0000000053 00000 n \n"
44
+ b"0000000100 00000 n \n0000000150 00000 n \n0000000200 00000 n \n"
45
+ b"trailer\n<< /Size 6 /Root 1 0 R >>\nstartxref\n250\n%%EOF\n"
46
+ )
47
+
48
+
49
+ def fake_generate_answer(question, pdf_bytes, _llm_model):
50
+ """
51
+ Fake generate_answer function to bypass external dependencies.
52
+ """
53
+ return {
54
+ "answer": "Mock answer",
55
+ "question": question,
56
+ "pdf_bytes_length": len(pdf_bytes),
57
+ }
58
+
59
+
60
+ def test_question_and_answer_tool_success(monkeypatch):
61
+ """
62
+ Test that question_and_answer_tool returns the expected result on success.
63
+ """
64
+ monkeypatch.setattr(
65
+ question_and_answer, "generate_answer", fake_generate_answer
66
+ )
67
+ # Create a valid state with pdf_data containing both pdf_object and pdf_url,
68
+ # and include a dummy llm_model.
69
+ state = {
70
+ "pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"},
71
+ "llm_model": object(), # Provide a dummy LLM model instance.
72
+ }
73
+ question = "What is in the PDF?"
74
+ # Call the underlying function directly via .func to bypass the StructuredTool wrapper.
75
+ result = question_and_answer_tool.func(
76
+ question=question, tool_call_id="test_call_id", state=state
77
+ )
78
+ assert result["answer"] == "Mock answer"
79
+ assert result["question"] == question
80
+ assert result["pdf_bytes_length"] == len(DUMMY_PDF_BYTES)
81
+
82
+
83
+ def test_question_and_answer_tool_no_pdf_data():
84
+ """
85
+ Test that an error is returned if the state lacks the 'pdf_data' key.
86
+ """
87
+ state = {} # pdf_data key is missing.
88
+ question = "Any question?"
89
+ result = question_and_answer_tool.func(
90
+ question=question, tool_call_id="test_call_id", state=state
91
+ )
92
+ messages = result.update["messages"]
93
+ assert any("No pdf_data found in state." in msg.content for msg in messages)
94
+
95
+
96
+ def test_question_and_answer_tool_no_pdf_object():
97
+ """
98
+ Test that an error is returned if the pdf_object is missing within pdf_data.
99
+ """
100
+ state = {"pdf_data": {"pdf_object": None}}
101
+ question = "Any question?"
102
+ result = question_and_answer_tool.func(
103
+ question=question, tool_call_id="test_call_id", state=state
104
+ )
105
+ messages = result.update["messages"]
106
+ assert any(
107
+ "PDF binary data is missing in the pdf_data from state." in msg.content
108
+ for msg in messages
109
+ )
110
+
111
+
112
+ def test_question_and_answer_tool_no_llm_model():
113
+ """
114
+ Test that an error is returned if the LLM model is missing in the state.
115
+ """
116
+ state = {
117
+ "pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"}
118
+ # Note: llm_model is intentionally omitted.
119
+ }
120
+ question = "What is in the PDF?"
121
+ result = question_and_answer_tool.func(
122
+ question=question, tool_call_id="test_call_id", state=state
123
+ )
124
+ assert result == {"error": "No LLM model found in state."}
125
+
126
+
127
+ def test_generate_answer(monkeypatch):
128
+ """
129
+ Test generate_answer function with controlled monkeypatched dependencies.
130
+ """
131
+
132
+ def fake_split_text(_self, _text):
133
+ """Fake split_text method that returns controlled chunks."""
134
+ return ["chunk1", "chunk2"]
135
+
136
+ monkeypatch.setattr(
137
+ question_and_answer.CharacterTextSplitter, "split_text", fake_split_text
138
+ )
139
+
140
+ def fake_annoy_from_documents(_documents, _embeddings):
141
+ """
142
+ Fake Annoy.from_documents function that returns a fake vector store.
143
+ """
144
+ # pylint: disable=too-few-public-methods, unused-argument
145
+ class FakeVectorStore:
146
+ """Fake vector store for similarity search."""
147
+ def similarity_search(self, _question, k):
148
+ """Return a list with a single dummy Document."""
149
+ return [Document(page_content="dummy content")]
150
+ return FakeVectorStore()
151
+
152
+ monkeypatch.setattr(
153
+ question_and_answer.Annoy, "from_documents", fake_annoy_from_documents
154
+ )
155
+
156
+ def fake_load_qa_chain(_llm, chain_type): # chain_type matches the keyword argument
157
+ """
158
+ Fake load_qa_chain function that returns a fake QA chain.
159
+ """
160
+ # pylint: disable=too-few-public-methods, unused-argument
161
+ class FakeChain:
162
+ """Fake QA chain for testing generate_answer."""
163
+ def invoke(self, **kwargs):
164
+ """
165
+ Fake invoke method that returns a mock answer.
166
+ """
167
+ input_data = kwargs.get("input")
168
+ return {
169
+ "answer": "real mock answer",
170
+ "question": input_data.get("question"),
171
+ }
172
+ return FakeChain()
173
+
174
+ monkeypatch.setattr(question_and_answer, "load_qa_chain", fake_load_qa_chain)
175
+ # Set dummy configuration values so that generate_answer can run.
176
+ question_and_answer.cfg.chunk_size = 1000
177
+ question_and_answer.cfg.chunk_overlap = 0
178
+ question_and_answer.cfg.openai_api_key = "dummy_key"
179
+ question_and_answer.cfg.num_retrievals = 1
180
+ question_and_answer.cfg.qa_chain_type = "dummy-chain"
181
+
182
+ question = "What is in the PDF?"
183
+ dummy_llm_model = object() # A dummy model placeholder.
184
+ answer = generate_answer(question, DUMMY_PDF_BYTES, dummy_llm_model)
185
+ assert answer["answer"] == "real mock answer"
186
+ assert answer["question"] == question
@@ -5,4 +5,5 @@ Import statements
5
5
  """
6
6
 
7
7
  from . import s2
8
+ from . import pdf
8
9
  from . import zotero
@@ -0,0 +1,5 @@
1
+ '''
2
+ This file is used to import all the modules in the package.
3
+ '''
4
+
5
+ from . import question_and_answer
@@ -0,0 +1,170 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ question_and_answer: Tool for performing Q&A on PDF documents using retrieval augmented generation.
4
+
5
+ This module provides functionality to extract text from PDF binary data, split it into
6
+ chunks, retrieve relevant segments via a vector store, and generate an answer to a
7
+ user-provided question using a language model chain.
8
+ """
9
+
10
+ import io
11
+ import logging
12
+ from typing import Annotated, Dict, Any, List
13
+
14
+ from PyPDF2 import PdfReader
15
+ from pydantic import BaseModel, Field
16
+ import hydra
17
+
18
+ from langchain.chains.question_answering import load_qa_chain
19
+ from langchain.docstore.document import Document
20
+ from langchain.text_splitter import CharacterTextSplitter
21
+ from langchain_community.vectorstores import Annoy
22
+ from langchain_openai import OpenAIEmbeddings
23
+ from langchain_core.language_models.chat_models import BaseChatModel
24
+
25
+ from langchain_core.messages import ToolMessage
26
+ from langchain_core.tools import tool
27
+ from langchain_core.tools.base import InjectedToolCallId
28
+ from langgraph.types import Command
29
+ from langgraph.prebuilt import InjectedState
30
+
31
+ # Set up logging.
32
+ logging.basicConfig(level=logging.INFO)
33
+ logger = logging.getLogger(__name__)
34
+ logger.setLevel(logging.INFO)
35
+
36
+ # Load configuration using Hydra.
37
+ with hydra.initialize(version_base=None, config_path="../../configs"):
38
+ cfg = hydra.compose(config_name="config", overrides=["tools/question_and_answer=default"])
39
+ cfg = cfg.tools.question_and_answer
40
+ logger.info("Loaded Question and Answer tool configuration.")
41
+
42
+ class QuestionAndAnswerInput(BaseModel):
43
+ """
44
+ Input schema for the PDF Question and Answer tool.
45
+
46
+ Attributes:
47
+ question (str): The question to ask regarding the PDF content.
48
+ tool_call_id (str): Unique identifier for the tool call, injected automatically.
49
+ """
50
+ question: str = Field(
51
+ description="The question to ask regarding the PDF content."
52
+ )
53
+ tool_call_id: Annotated[str, InjectedToolCallId]
54
+ state: Annotated[dict, InjectedState]
55
+
56
+ def extract_text_from_pdf_data(pdf_bytes: bytes) -> str:
57
+ """
58
+ Extract text content from PDF binary data.
59
+
60
+ This function uses PyPDF2 to read the provided PDF bytes and concatenates the text
61
+ extracted from each page.
62
+
63
+ Args:
64
+ pdf_bytes (bytes): The binary data of the PDF document.
65
+
66
+ Returns:
67
+ str: The complete text extracted from the PDF.
68
+ """
69
+ reader = PdfReader(io.BytesIO(pdf_bytes))
70
+ text = ""
71
+ for page in reader.pages:
72
+ page_text = page.extract_text() or ""
73
+ text += page_text
74
+ return text
75
+
76
+ def generate_answer(question: str, pdf_bytes: bytes, llm_model: BaseChatModel) -> Dict[str, Any]:
77
+ """
78
+ Generate an answer for a question using retrieval augmented generation on PDF content.
79
+
80
+ This function extracts text from the PDF data, splits the text into manageable chunks,
81
+ performs a similarity search to retrieve the most relevant segments, and then uses a
82
+ question-answering chain (built using the provided llm_model) to generate an answer.
83
+
84
+ Args:
85
+ question (str): The question to be answered.
86
+ pdf_bytes (bytes): The binary content of the PDF document.
87
+ llm_model (BaseChatModel): The language model instance to use for answering.
88
+
89
+ Returns:
90
+ Dict[str, Any]: A dictionary containing the answer generated by the language model.
91
+ """
92
+ text = extract_text_from_pdf_data(pdf_bytes)
93
+ logger.info("Extracted text from PDF.")
94
+ text_splitter = CharacterTextSplitter(
95
+ separator="\n",
96
+ chunk_size=cfg.chunk_size,
97
+ chunk_overlap=cfg.chunk_overlap
98
+ )
99
+ chunks = text_splitter.split_text(text)
100
+ documents: List[Document] = [Document(page_content=chunk) for chunk in chunks]
101
+ logger.info("Split PDF text into %d chunks.", len(documents))
102
+
103
+ embeddings = OpenAIEmbeddings(openai_api_key=cfg.openai_api_key)
104
+ vector_store = Annoy.from_documents(documents, embeddings)
105
+ search_results = vector_store.similarity_search(
106
+ question,
107
+ k=cfg.num_retrievals
108
+ )
109
+ logger.info("Retrieved %d relevant document chunks.", len(search_results))
110
+ # Use the provided llm_model to build the QA chain.
111
+ qa_chain = load_qa_chain(llm_model, chain_type=cfg.qa_chain_type)
112
+ answer = qa_chain.invoke(
113
+ input={"input_documents": search_results, "question": question}
114
+ )
115
+ return answer
116
+
117
+ @tool(args_schema=QuestionAndAnswerInput)
118
+ def question_and_answer_tool(
119
+ question: str,
120
+ tool_call_id: Annotated[str, InjectedToolCallId],
121
+ state: Annotated[dict, InjectedState],
122
+ ) -> Dict[str, Any]:
123
+ """
124
+ Answer a question using PDF content stored in the state via retrieval augmented generation.
125
+
126
+ This tool retrieves the PDF binary data from the state (under the key "pdf_data"), extracts its
127
+ textual content, and generates an answer to the specified question. It also extracts the
128
+ llm_model (of type BaseChatModel) from the state to use for answering.
129
+
130
+ Args:
131
+ question (str): The question regarding the PDF content.
132
+ tool_call_id (str): Unique identifier for the current tool call.
133
+ state (dict): A dictionary representing the current state, expected to contain PDF data
134
+ under the key "pdf_data" with a sub-key "pdf_object" for the binary content,
135
+ and a key "llm_model" holding the language model instance.
136
+
137
+ Returns:
138
+ Dict[str, Any]: A dictionary containing the generated answer or an error message.
139
+ """
140
+ logger.info("Starting PDF Question and Answer tool using PDF data from state.")
141
+ pdf_state = state.get("pdf_data")
142
+ if not pdf_state:
143
+ error_msg = "No pdf_data found in state."
144
+ logger.error(error_msg)
145
+ return Command(
146
+ update={
147
+ "messages": [
148
+ ToolMessage(content=error_msg, tool_call_id=tool_call_id)
149
+ ]
150
+ }
151
+ )
152
+ pdf_bytes = pdf_state.get("pdf_object")
153
+ if not pdf_bytes:
154
+ error_msg = "PDF binary data is missing in the pdf_data from state."
155
+ logger.error(error_msg)
156
+ return Command(
157
+ update={
158
+ "messages": [
159
+ ToolMessage(content=error_msg, tool_call_id=tool_call_id)
160
+ ]
161
+ }
162
+ )
163
+ # Retrieve llm_model from state; use a default if not provided.
164
+ llm_model = state.get("llm_model")
165
+ if not llm_model:
166
+ logger.error("Missing LLM model instance in state.")
167
+ return {"error": "No LLM model found in state."}
168
+ answer = generate_answer(question, pdf_bytes, llm_model)
169
+ logger.info("Generated answer: %s", answer)
170
+ return answer
@@ -44,7 +44,7 @@ def query_results(question: str, state: Annotated[dict, InjectedState]) -> str:
44
44
  raise NoPapersFoundError(
45
45
  "No papers found. A search needs to be performed first."
46
46
  )
47
- context_key = state.get("last_displayed_papers")
47
+ context_key = state.get("last_displayed_papers","pdf_data")
48
48
  dic_papers = state.get(context_key)
49
49
  df_papers = pd.DataFrame.from_dict(dic_papers, orient="index")
50
50
  df_agent = create_pandas_dataframe_agent(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: aiagents4pharma
3
- Version: 1.27.1
3
+ Version: 1.28.0
4
4
  Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -135,16 +135,18 @@ aiagents4pharma/talk2knowledgegraphs/utils/enrichments/pubchem_strings.py,sha256
135
135
  aiagents4pharma/talk2knowledgegraphs/utils/extractions/__init__.py,sha256=7gwwtfzKhB8GuOBD47XRi0NprwEXkOzwNl5eeu-hDTI,86
136
136
  aiagents4pharma/talk2knowledgegraphs/utils/extractions/pcst.py,sha256=m5p0yoJb7I19ua5yeQfXPf7c4r6S1XPwttsrM7Qoy94,9336
137
137
  aiagents4pharma/talk2scholars/__init__.py,sha256=gphERyVKZHvOnMQsml7TIHlaIshHJ75R1J3FKExkfuY,120
138
- aiagents4pharma/talk2scholars/agents/__init__.py,sha256=WxEauzCzLEGyhdIRkxSBpNW5c_Uzf7iJUdM57IQkXH8,144
138
+ aiagents4pharma/talk2scholars/agents/__init__.py,sha256=ZwFiHOlDGJk1601J5xEZDy0btPzqiOk2UCocKxohde8,168
139
139
  aiagents4pharma/talk2scholars/agents/main_agent.py,sha256=nZIhOyEUSHECM4-wEHbDrfHRLkqoxW0H4fy6-MpA6N8,9397
140
+ aiagents4pharma/talk2scholars/agents/pdf_agent.py,sha256=c9-_z5qp5Zkgh6piEIlgI4uo4OMXD3janZNmfYwnFCg,3729
140
141
  aiagents4pharma/talk2scholars/agents/s2_agent.py,sha256=ZiXtQVX2UbIyMOSXajuloWepEm7DKs6ZpPS0HgHzw0g,4492
141
142
  aiagents4pharma/talk2scholars/agents/zotero_agent.py,sha256=flIvg1ORaMiQpGEbsRM4zJHRNXi6UUv7emHDjH5HVY4,3961
142
143
  aiagents4pharma/talk2scholars/configs/__init__.py,sha256=tf2gz8n7M4ko6xLdX_C925ELVIxoP6SgkPcbeh59ad4,151
143
- aiagents4pharma/talk2scholars/configs/config.yaml,sha256=IBrHX_mACNb7R4rrI_zbWgscAMUdIAkOg9LDgLN1o28,386
144
+ aiagents4pharma/talk2scholars/configs/config.yaml,sha256=Lk5kZSDENqCMhushMxDIyLCzLtH7IpvVP_9f5BaUAMQ,469
144
145
  aiagents4pharma/talk2scholars/configs/agents/__init__.py,sha256=yyh7PB2oY_JulnpSQCWS4wwCH_uzIdt47O2Ay48x_oU,75
145
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py,sha256=MI4RmoQQ2P-JQgCJ8XEucqF6g2VlNNi37yxcsXFj9Oo,122
146
+ aiagents4pharma/talk2scholars/configs/agents/talk2scholars/__init__.py,sha256=64GEWAoKOd_YHLi27eSOcOC5eSLK0IG_FNra3ZBt02Y,146
146
147
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
147
148
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml,sha256=wHbFTQSfdRc0JO2lbzXXHliFNz40Oza-mGmVphNOoPw,2615
149
+ aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
148
150
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
149
151
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml,sha256=WQOHG1WwnoQSUyIRfEEK6LLGwmWy2gaZNXpb12WsgNk,1975
150
152
  aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
@@ -152,9 +154,10 @@ aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.
152
154
  aiagents4pharma/talk2scholars/configs/app/__init__.py,sha256=JoSZV6N669kGMv5zLDszwf0ZjcRHx9TJfIqGhIIdPXE,70
153
155
  aiagents4pharma/talk2scholars/configs/app/frontend/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
154
156
  aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml,sha256=wsELBdRLv6UqZ9QZfwpS7K4xfMj5s-a99-aXqIs6WEI,868
155
- aiagents4pharma/talk2scholars/configs/tools/__init__.py,sha256=z5PzYS1WyoC4PiWVIIOGICldps2yMh2E_2HFZA9ltpQ,177
157
+ aiagents4pharma/talk2scholars/configs/tools/__init__.py,sha256=NDXBZVtEYpOVL0EMm69ffoFAZw9G4tQiwsSFdxRrxLQ,211
156
158
  aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
157
159
  aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml,sha256=iEsEW89MlQwKsAW4ZAxLt4pDBwA1qxImYQ2dfONIf6c,442
160
+ aiagents4pharma/talk2scholars/configs/tools/question_and_answer/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
158
161
  aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
159
162
  aiagents4pharma/talk2scholars/configs/tools/retrieve_semantic_scholar_paper_id/default.yaml,sha256=HG-N8yRjlX9zFwbIBvaDI9ndKjfL-gqPTCCPMLgdUpw,271
160
163
  aiagents4pharma/talk2scholars/configs/tools/search/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
@@ -164,30 +167,34 @@ aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.
164
167
  aiagents4pharma/talk2scholars/configs/tools/zotero_read/__init__.py,sha256=fqQQ-GlRcbzru2KmEk3oMma0R6_SzGM8dOXzYeU4oVA,46
165
168
  aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml,sha256=iILspz9EvN8jpVHzMsW3L9BDEST5eqOUO7TnhxwXBrI,468
166
169
  aiagents4pharma/talk2scholars/state/__init__.py,sha256=S6SxlszIMZSIMJehjevPF9sKyR-PAwWb5TEdo6xWXE8,103
167
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py,sha256=DoCtKP2qd69mXPwfOb-aYw9Hq2fYmx6b76S-HlsVSNo,2382
170
+ aiagents4pharma/talk2scholars/state/state_talk2scholars.py,sha256=E0epqlBurzNcMzN4WV6nh--YkVAesbxQTuBBlJhESVA,2436
168
171
  aiagents4pharma/talk2scholars/tests/__init__.py,sha256=U3PsTiUZaUBD1IZanFGkDIOdFieDVJtGKQ5-woYUo8c,45
169
172
  aiagents4pharma/talk2scholars/tests/test_call_s2.py,sha256=ZL5HmnYNVyaBJgPGQi9JnbD1d1rtWnWusVxVRVW3aHc,3375
170
173
  aiagents4pharma/talk2scholars/tests/test_call_zotero.py,sha256=N4g6Pt2vuaxIhHQbIqlMaDUF4O7vIvRqa7pPIkpL8FI,3314
171
174
  aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py,sha256=SAMG-Kb2S9sei8Us5vUWCUJikTKXPZVKQ6aJJPEhJsc,1880
172
175
  aiagents4pharma/talk2scholars/tests/test_main_agent.py,sha256=8FKujCVhkurCe5IE6OGPTmz1p4eH1CDi467vM6VtM5A,4318
176
+ aiagents4pharma/talk2scholars/tests/test_pdf_agent.py,sha256=TN4Sq5-SCxv-9VfFyq7sOlBlxbekmnWuB7-qh4MrhkA,4656
177
+ aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py,sha256=TpCDiGfsC2y6bOkm0ZTXjT1Vp8D-Po25wiEH5aDT_DA,6491
173
178
  aiagents4pharma/talk2scholars/tests/test_routing_logic.py,sha256=AZrvaEBDk51KL6edrZY3GpQ_N6VbrlADqXFeg_jxDoQ,2284
174
179
  aiagents4pharma/talk2scholars/tests/test_s2_agent.py,sha256=BhW1wGc-wUPS4fwNBQRtBXJaJ_i7L6t_G9Bq57fK7rI,7784
175
180
  aiagents4pharma/talk2scholars/tests/test_s2_tools.py,sha256=QEwraJk9_Kp6ZSGYyYDXWH62wIjSwi1Pptwwbx1fuG0,13176
176
181
  aiagents4pharma/talk2scholars/tests/test_state.py,sha256=_iHXvoZnU_eruf8l1sQKBSCIVnxNkH_9VzkVtZZA6bY,384
177
182
  aiagents4pharma/talk2scholars/tests/test_zotero_agent.py,sha256=3TKz6yjNfYulaQv-MBv1zXCmR9xh9g3ju4Ge5HDdt1o,6136
178
183
  aiagents4pharma/talk2scholars/tests/test_zotero_tool.py,sha256=LI7KBTxPga7E-841pugjpNqtWgoIz0mDIJEZzdIL9eI,5759
179
- aiagents4pharma/talk2scholars/tools/__init__.py,sha256=-9iXVIGzFLak6a14Ib8yDg1bfiHgJz2nAhwWEk1jhOk,89
184
+ aiagents4pharma/talk2scholars/tools/__init__.py,sha256=UtGutYNNaRcr2nOmT_XqbTiaJpgVYKo3KVGVPFVrX2Y,107
185
+ aiagents4pharma/talk2scholars/tools/pdf/__init__.py,sha256=WOm-o-fFzyjFZBaHg658Gjzdiu1Kt-h9xvzvw0hR7aE,103
186
+ aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py,sha256=22JvT7F0rY11TF40pBfe9Cn2Y-6Tx73NfWDt4NJv700,6639
180
187
  aiagents4pharma/talk2scholars/tools/s2/__init__.py,sha256=wytqCmGm8Fbl8y5qLdIkxhhG8VHLYMifCGjbH_LK2Fc,258
181
188
  aiagents4pharma/talk2scholars/tools/s2/display_results.py,sha256=UR0PtEHGDpOhPH0Di5HT8-Fip2RkEMTJgzROsChb1gc,2959
182
189
  aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py,sha256=QM30Oq3518cuEWwpfA5R7NzNmNklYUkt9Y1D5jdjmG4,5430
183
- aiagents4pharma/talk2scholars/tools/s2/query_results.py,sha256=EUfzRh5Qc_tMl5fDIFb9PIsQkkrU4Xb5MR0sud_X5-c,2017
190
+ aiagents4pharma/talk2scholars/tools/s2/query_results.py,sha256=S4yBNtg1loDu4ckLPrW4H8GAswriPaRU4U08cOuw2HE,2028
184
191
  aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py,sha256=Lg1L4HQCN2LaQEyWtLD73O67PMoXkPHi-Y8rCzHS0A4,2499
185
192
  aiagents4pharma/talk2scholars/tools/s2/search.py,sha256=i5KMFJWK31CjYtVT1McJpLzgcwvyTHZe2aHZlscfK3Q,4667
186
193
  aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py,sha256=7PoZfcstxDThWX6NYOgxN_9M_nwgMPAALch8OmjraVY,5568
187
194
  aiagents4pharma/talk2scholars/tools/zotero/__init__.py,sha256=1UW4r5ECvAwYpo1Fjf7lQPO--M8I85baYCHocFOAq4M,53
188
195
  aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py,sha256=NJ65fAJ4u2Zq15uvEajVOhI4QnNvyqA6FHPaEDqvMw0,4321
189
- aiagents4pharma-1.27.1.dist-info/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
190
- aiagents4pharma-1.27.1.dist-info/METADATA,sha256=kTCOwxjPuhVeXqavYrNYu08GDiJsykcb_8Vz7_xK_l0,13252
191
- aiagents4pharma-1.27.1.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
192
- aiagents4pharma-1.27.1.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
193
- aiagents4pharma-1.27.1.dist-info/RECORD,,
196
+ aiagents4pharma-1.28.0.dist-info/LICENSE,sha256=IcIbyB1Hyk5ZDah03VNQvJkbNk2hkBCDqQ8qtnCvB4Q,1077
197
+ aiagents4pharma-1.28.0.dist-info/METADATA,sha256=jG17DZJJ8a8hrmwOECb_eDodny-pGm93rTuu9nYnacc,13252
198
+ aiagents4pharma-1.28.0.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
199
+ aiagents4pharma-1.28.0.dist-info/top_level.txt,sha256=-AH8rMmrSnJtq7HaAObS78UU-cTCwvX660dSxeM7a0A,16
200
+ aiagents4pharma-1.28.0.dist-info/RECORD,,