aiagents4pharma 1.31.0__py3-none-any.whl → 1.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2knowledgegraphs/configs/config.yaml +1 -0
- aiagents4pharma/talk2knowledgegraphs/tests/test_utils_enrichments_uniprot.py +44 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/__init__.py +1 -0
- aiagents4pharma/talk2knowledgegraphs/utils/enrichments/uniprot_proteins.py +90 -0
- aiagents4pharma/talk2scholars/agents/main_agent.py +4 -3
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +3 -4
- aiagents4pharma/talk2scholars/agents/pdf_agent.py +6 -7
- aiagents4pharma/talk2scholars/agents/s2_agent.py +23 -20
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +11 -11
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +19 -19
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +20 -15
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +27 -6
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +7 -7
- aiagents4pharma/talk2scholars/tests/test_main_agent.py +16 -16
- aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +17 -24
- aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +152 -135
- aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +9 -16
- aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +790 -218
- aiagents4pharma/talk2scholars/tests/test_s2_agent.py +9 -9
- aiagents4pharma/talk2scholars/tests/test_s2_display.py +8 -8
- aiagents4pharma/talk2scholars/tests/test_s2_query.py +8 -8
- aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +12 -12
- aiagents4pharma/talk2scholars/tests/test_zotero_path.py +11 -12
- aiagents4pharma/talk2scholars/tests/test_zotero_read.py +400 -22
- aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +0 -6
- aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +89 -31
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +540 -156
- aiagents4pharma/talk2scholars/tools/s2/__init__.py +4 -4
- aiagents4pharma/talk2scholars/tools/s2/{display_results.py → display_dataframe.py} +19 -21
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +71 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +213 -35
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +3 -3
- {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.33.0.dist-info}/METADATA +3 -1
- {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.33.0.dist-info}/RECORD +37 -37
- {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.33.0.dist-info}/WHEEL +1 -1
- aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py +0 -45
- aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py +0 -115
- aiagents4pharma/talk2scholars/tools/s2/query_results.py +0 -61
- {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.33.0.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.33.0.dist-info}/top_level.txt +0 -0
@@ -2,18 +2,18 @@
|
|
2
2
|
This file is used to import all the modules in the package.
|
3
3
|
"""
|
4
4
|
|
5
|
-
from . import
|
5
|
+
from . import display_dataframe
|
6
6
|
from . import multi_paper_rec
|
7
7
|
from . import search
|
8
8
|
from . import single_paper_rec
|
9
|
-
from . import
|
9
|
+
from . import query_dataframe
|
10
10
|
from . import retrieve_semantic_scholar_paper_id
|
11
11
|
|
12
12
|
__all__ = [
|
13
|
-
"
|
13
|
+
"display_dataframe",
|
14
14
|
"multi_paper_rec",
|
15
15
|
"search",
|
16
16
|
"single_paper_rec",
|
17
|
-
"
|
17
|
+
"query_dataframe",
|
18
18
|
"retrieve_semantic_scholar_paper_id",
|
19
19
|
]
|
@@ -2,11 +2,13 @@
|
|
2
2
|
|
3
3
|
|
4
4
|
"""
|
5
|
-
Tool for
|
5
|
+
Tool for rendering the most recently displayed papers as a DataFrame artifact for the front-end.
|
6
6
|
|
7
|
-
This module defines a tool that retrieves
|
8
|
-
|
9
|
-
|
7
|
+
This module defines a tool that retrieves the paper metadata stored under the state key
|
8
|
+
'last_displayed_papers' and returns it as an artifact (dictionary of papers). The front-end
|
9
|
+
can then render this artifact as a pandas DataFrame for display. If no papers are found,
|
10
|
+
a NoPapersFoundError is raised to indicate that a search or recommendation should be
|
11
|
+
performed first.
|
10
12
|
"""
|
11
13
|
|
12
14
|
|
@@ -38,34 +40,30 @@ class NoPapersFoundError(Exception):
|
|
38
40
|
"""
|
39
41
|
|
40
42
|
|
41
|
-
@tool("
|
42
|
-
def
|
43
|
+
@tool("display_dataframe", parse_docstring=True)
|
44
|
+
def display_dataframe(
|
43
45
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
44
46
|
state: Annotated[dict, InjectedState],
|
45
47
|
) -> Command:
|
46
48
|
"""
|
47
|
-
|
49
|
+
Render the last set of retrieved papers as a DataFrame in the front-end.
|
48
50
|
|
49
|
-
This function
|
50
|
-
|
51
|
-
|
51
|
+
This function reads the 'last_displayed_papers' key from state, fetches the
|
52
|
+
corresponding metadata dictionary, and returns a Command with a ToolMessage
|
53
|
+
containing the artifact (dictionary) for the front-end to render as a DataFrame.
|
54
|
+
If no papers are found in state, it raises a NoPapersFoundError to indicate
|
55
|
+
that a search or recommendation must be performed first.
|
52
56
|
|
53
57
|
Args:
|
54
|
-
tool_call_id (
|
55
|
-
state (dict): The agent's state containing
|
58
|
+
tool_call_id (InjectedToolCallId): Unique ID of this tool invocation.
|
59
|
+
state (dict): The agent's state containing the 'last_displayed_papers' reference.
|
56
60
|
|
57
61
|
Returns:
|
58
|
-
Command: A command
|
59
|
-
|
62
|
+
Command: A command whose update contains a ToolMessage with the artifact
|
63
|
+
(papers dict) for DataFrame rendering in the UI.
|
60
64
|
|
61
65
|
Raises:
|
62
|
-
NoPapersFoundError: If no
|
63
|
-
|
64
|
-
Example:
|
65
|
-
>>> state = {"last_displayed_papers": {"paper1": "Title 1", "paper2": "Title 2"}}
|
66
|
-
>>> result = display_results(tool_call_id="123", state=state)
|
67
|
-
>>> print(result.update["messages"][0].content)
|
68
|
-
"2 papers found. Papers are attached as an artifact."
|
66
|
+
NoPapersFoundError: If no entries exist under 'last_displayed_papers' in state.
|
69
67
|
"""
|
70
68
|
logger.info("Displaying papers")
|
71
69
|
context_key = state.get("last_displayed_papers")
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
"""
|
4
|
+
Tool for querying the metadata table of the last displayed papers.
|
5
|
+
|
6
|
+
This tool loads the most recently displayed papers into a pandas DataFrame and uses an
|
7
|
+
LLM-driven pandas agent to answer metadata-level questions (e.g., filter by author, list titles).
|
8
|
+
It is intended for metadata exploration only, and does not perform content-based retrieval
|
9
|
+
or summarization. For PDF-level question answering, use the 'question_and_answer_agent'.
|
10
|
+
"""
|
11
|
+
|
12
|
+
import logging
|
13
|
+
from typing import Annotated
|
14
|
+
import pandas as pd
|
15
|
+
from langchain_experimental.agents import create_pandas_dataframe_agent
|
16
|
+
from langchain_core.tools import tool
|
17
|
+
from langgraph.prebuilt import InjectedState
|
18
|
+
|
19
|
+
# Configure logging
|
20
|
+
logging.basicConfig(level=logging.INFO)
|
21
|
+
logger = logging.getLogger(__name__)
|
22
|
+
|
23
|
+
|
24
|
+
class NoPapersFoundError(Exception):
|
25
|
+
"""Exception raised when no papers are found in the state."""
|
26
|
+
|
27
|
+
|
28
|
+
@tool("query_dataframe", parse_docstring=True)
|
29
|
+
def query_dataframe(question: str, state: Annotated[dict, InjectedState]) -> str:
|
30
|
+
"""
|
31
|
+
Perform a tabular query on the most recently displayed papers.
|
32
|
+
|
33
|
+
This function loads the last displayed papers into a pandas DataFrame and uses a
|
34
|
+
pandas DataFrame agent to answer metadata-level questions (e.g., "Which papers have
|
35
|
+
'Transformer' in the title?", "List authors of paper X"). It does not perform PDF
|
36
|
+
content analysis or summarization; for content-level question answering, use the
|
37
|
+
'question_and_answer_agent'.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
question (str): The metadata query to ask over the papers table.
|
41
|
+
state (dict): The agent's state containing 'last_displayed_papers'
|
42
|
+
key referencing the metadata table in state.
|
43
|
+
|
44
|
+
Returns:
|
45
|
+
str: The LLM's response to the metadata query.
|
46
|
+
|
47
|
+
Raises:
|
48
|
+
NoPapersFoundError: If no papers have been displayed yet.
|
49
|
+
"""
|
50
|
+
logger.info("Querying last displayed papers with question: %s", question)
|
51
|
+
llm_model = state.get("llm_model")
|
52
|
+
if not state.get("last_displayed_papers"):
|
53
|
+
logger.info("No papers displayed so far, raising NoPapersFoundError")
|
54
|
+
raise NoPapersFoundError(
|
55
|
+
"No papers found. A search needs to be performed first."
|
56
|
+
)
|
57
|
+
context_key = state.get("last_displayed_papers")
|
58
|
+
dic_papers = state.get(context_key)
|
59
|
+
df_papers = pd.DataFrame.from_dict(dic_papers, orient="index")
|
60
|
+
df_agent = create_pandas_dataframe_agent(
|
61
|
+
llm_model,
|
62
|
+
allow_dangerous_code=True,
|
63
|
+
agent_type="tool-calling",
|
64
|
+
df=df_papers,
|
65
|
+
max_iterations=5,
|
66
|
+
include_df_in_prompt=True,
|
67
|
+
number_of_head_rows=df_papers.shape[0],
|
68
|
+
verbose=True,
|
69
|
+
)
|
70
|
+
llm_result = df_agent.invoke(question, stream_mode=None)
|
71
|
+
return llm_result["output"]
|
@@ -5,16 +5,22 @@ Utility for zotero read tool.
|
|
5
5
|
"""
|
6
6
|
|
7
7
|
import logging
|
8
|
-
|
8
|
+
import tempfile
|
9
|
+
from typing import Any, Dict, List, Tuple, Optional
|
10
|
+
import concurrent.futures
|
11
|
+
|
9
12
|
import hydra
|
13
|
+
import requests
|
10
14
|
from pyzotero import zotero
|
11
|
-
from .zotero_path import get_item_collections
|
12
15
|
|
16
|
+
from .zotero_path import get_item_collections
|
13
17
|
|
14
18
|
# Configure logging
|
15
19
|
logging.basicConfig(level=logging.INFO)
|
16
20
|
logger = logging.getLogger(__name__)
|
17
21
|
|
22
|
+
# pylint: disable=broad-exception-caught
|
23
|
+
|
18
24
|
|
19
25
|
class ZoteroSearchData:
|
20
26
|
"""Helper class to organize Zotero search-related data."""
|
@@ -33,8 +39,10 @@ class ZoteroSearchData:
|
|
33
39
|
self.cfg = self._load_config()
|
34
40
|
self.zot = self._init_zotero_client()
|
35
41
|
self.item_to_collections = get_item_collections(self.zot)
|
36
|
-
self.
|
42
|
+
self.article_data = {}
|
37
43
|
self.content = ""
|
44
|
+
# Create a session for connection pooling
|
45
|
+
self.session = requests.Session()
|
38
46
|
|
39
47
|
def process_search(self) -> None:
|
40
48
|
"""Process the search request and prepare results."""
|
@@ -45,7 +53,7 @@ class ZoteroSearchData:
|
|
45
53
|
def get_search_results(self) -> Dict[str, Any]:
|
46
54
|
"""Get the search results and content."""
|
47
55
|
return {
|
48
|
-
"
|
56
|
+
"article_data": self.article_data,
|
49
57
|
"content": self.content,
|
50
58
|
}
|
51
59
|
|
@@ -97,50 +105,218 @@ class ZoteroSearchData:
|
|
97
105
|
|
98
106
|
return items
|
99
107
|
|
108
|
+
def _download_zotero_pdf(self, attachment_key: str) -> Optional[Tuple[str, str]]:
|
109
|
+
"""Download a PDF from Zotero by attachment key. Returns (file_path, filename) or None."""
|
110
|
+
zotero_pdf_url = (
|
111
|
+
f"https://api.zotero.org/users/{self.cfg.user_id}/items/"
|
112
|
+
f"{attachment_key}/file"
|
113
|
+
)
|
114
|
+
headers = {"Zotero-API-Key": self.cfg.api_key}
|
115
|
+
|
116
|
+
try:
|
117
|
+
# Use session for connection pooling
|
118
|
+
response = self.session.get(
|
119
|
+
zotero_pdf_url, headers=headers, stream=True, timeout=10
|
120
|
+
)
|
121
|
+
response.raise_for_status()
|
122
|
+
|
123
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
124
|
+
# Increased chunk size for better performance
|
125
|
+
for chunk in response.iter_content(chunk_size=16384):
|
126
|
+
temp_file.write(chunk)
|
127
|
+
temp_file_path = temp_file.name
|
128
|
+
|
129
|
+
content_disp = response.headers.get("Content-Disposition", "")
|
130
|
+
filename = (
|
131
|
+
content_disp.split("filename=")[-1].strip('"')
|
132
|
+
if "filename=" in content_disp
|
133
|
+
else "downloaded.pdf"
|
134
|
+
)
|
135
|
+
|
136
|
+
return temp_file_path, filename
|
137
|
+
|
138
|
+
except Exception as e:
|
139
|
+
logger.error(
|
140
|
+
"Failed to download Zotero PDF for attachment %s: %s", attachment_key, e
|
141
|
+
)
|
142
|
+
return None
|
143
|
+
|
144
|
+
def _download_pdfs_in_parallel(
|
145
|
+
self, attachment_item_map: Dict[str, str]
|
146
|
+
) -> Dict[str, Tuple[str, str, str]]:
|
147
|
+
"""
|
148
|
+
Download multiple PDFs in parallel using ThreadPoolExecutor.
|
149
|
+
|
150
|
+
Args:
|
151
|
+
attachment_item_map: Dictionary mapping attachment keys to parent item keys
|
152
|
+
|
153
|
+
Returns:
|
154
|
+
Dictionary mapping parent item keys to (file_path, filename, attachment_key)
|
155
|
+
"""
|
156
|
+
results = {}
|
157
|
+
max_workers = min(10, len(attachment_item_map)) # Set reasonable limit
|
158
|
+
|
159
|
+
if not attachment_item_map:
|
160
|
+
return results
|
161
|
+
|
162
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
163
|
+
# Create a dictionary mapping Future objects to attachment keys
|
164
|
+
future_to_key = {
|
165
|
+
executor.submit(self._download_zotero_pdf, attachment_key): (
|
166
|
+
attachment_key,
|
167
|
+
item_key,
|
168
|
+
)
|
169
|
+
for attachment_key, item_key in attachment_item_map.items()
|
170
|
+
}
|
171
|
+
|
172
|
+
for future in concurrent.futures.as_completed(future_to_key):
|
173
|
+
attachment_key, item_key = future_to_key[future]
|
174
|
+
try:
|
175
|
+
result = future.result()
|
176
|
+
if result:
|
177
|
+
temp_file_path, resolved_filename = result
|
178
|
+
results[item_key] = (
|
179
|
+
temp_file_path,
|
180
|
+
resolved_filename,
|
181
|
+
attachment_key,
|
182
|
+
)
|
183
|
+
except Exception as e:
|
184
|
+
logger.error(
|
185
|
+
"Failed to download PDF for key %s: %s", attachment_key, e
|
186
|
+
)
|
187
|
+
|
188
|
+
return results
|
189
|
+
|
190
|
+
# pylint: disable=too-many-locals, too-many-branches
|
100
191
|
def _filter_and_format_papers(self, items: List[Dict[str, Any]]) -> None:
|
101
|
-
"""Filter and format papers from items."""
|
192
|
+
"""Filter and format papers from Zotero items, including standalone PDFs."""
|
102
193
|
filter_item_types = (
|
103
194
|
self.cfg.zotero.filter_item_types if self.only_articles else []
|
104
195
|
)
|
105
196
|
logger.debug("Filtering item types: %s", filter_item_types)
|
106
197
|
|
198
|
+
# Maps to track attachments for batch processing
|
199
|
+
orphaned_pdfs = {} # attachment_key -> item key (same for orphans)
|
200
|
+
item_attachments = {} # item_key -> [attachment_keys]
|
201
|
+
|
202
|
+
# First pass: process all items without downloading PDFs
|
107
203
|
for item in items:
|
108
204
|
if not isinstance(item, dict):
|
109
205
|
continue
|
110
206
|
|
111
|
-
data = item.get("data")
|
112
|
-
if not isinstance(data, dict):
|
113
|
-
continue
|
114
|
-
|
207
|
+
data = item.get("data", {})
|
115
208
|
item_type = data.get("itemType", "N/A")
|
116
|
-
logger.debug("Item type: %s", item_type)
|
117
|
-
|
118
209
|
key = data.get("key")
|
119
210
|
if not key:
|
120
211
|
continue
|
121
212
|
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
213
|
+
# CASE 1: Top-level item (e.g., journalArticle)
|
214
|
+
if item_type != "attachment":
|
215
|
+
collection_paths = self.item_to_collections.get(key, ["/Unknown"])
|
216
|
+
|
217
|
+
self.article_data[key] = {
|
218
|
+
"Title": data.get("title", "N/A"),
|
219
|
+
"Abstract": data.get("abstractNote", "N/A"),
|
220
|
+
"Publication Date": data.get("date", "N/A"),
|
221
|
+
"URL": data.get("url", "N/A"),
|
222
|
+
"Type": item_type,
|
223
|
+
"Collections": collection_paths,
|
224
|
+
"Citation Count": data.get("citationCount", "N/A"),
|
225
|
+
"Venue": data.get("venue", "N/A"),
|
226
|
+
"Publication Venue": data.get("publicationTitle", "N/A"),
|
227
|
+
"Journal Name": data.get("journalAbbreviation", "N/A"),
|
228
|
+
"Authors": [
|
229
|
+
f"{creator.get('firstName', '')} {creator.get('lastName', '')}".strip()
|
230
|
+
for creator in data.get("creators", [])
|
231
|
+
if isinstance(creator, dict)
|
232
|
+
and creator.get("creatorType") == "author"
|
233
|
+
],
|
234
|
+
"source": "zotero",
|
235
|
+
}
|
236
|
+
# We'll collect attachment info in second pass
|
237
|
+
|
238
|
+
# CASE 2: Standalone orphaned PDF attachment
|
239
|
+
elif data.get("contentType") == "application/pdf" and not data.get(
|
240
|
+
"parentItem"
|
241
|
+
):
|
242
|
+
attachment_key = key
|
243
|
+
filename = data.get("filename", "unknown.pdf")
|
244
|
+
|
245
|
+
# Add to orphaned PDFs for batch processing
|
246
|
+
orphaned_pdfs[attachment_key] = (
|
247
|
+
attachment_key # Same key as both attachment and "item"
|
248
|
+
)
|
249
|
+
|
250
|
+
# Create the entry without PDF info yet
|
251
|
+
self.article_data[key] = {
|
252
|
+
"Title": filename,
|
253
|
+
"Abstract": "No abstract available",
|
254
|
+
"Publication Date": "N/A",
|
255
|
+
"URL": "N/A",
|
256
|
+
"Type": "orphan_attachment",
|
257
|
+
"Collections": ["/(No Collection)"],
|
258
|
+
"Citation Count": "N/A",
|
259
|
+
"Venue": "N/A",
|
260
|
+
"Publication Venue": "N/A",
|
261
|
+
"Journal Name": "N/A",
|
262
|
+
"Authors": ["(Unknown)"],
|
263
|
+
"source": "zotero",
|
264
|
+
}
|
142
265
|
|
143
|
-
|
266
|
+
# Second pass: collect attachment info for all items
|
267
|
+
for item_key, item_data in self.article_data.items():
|
268
|
+
if item_data["Type"] != "orphan_attachment":
|
269
|
+
try:
|
270
|
+
children = self.zot.children(item_key)
|
271
|
+
pdf_attachments = [
|
272
|
+
child
|
273
|
+
for child in children
|
274
|
+
if isinstance(child, dict)
|
275
|
+
and child.get("data", {}).get("contentType")
|
276
|
+
== "application/pdf"
|
277
|
+
]
|
278
|
+
|
279
|
+
if pdf_attachments:
|
280
|
+
attachment = pdf_attachments[0]
|
281
|
+
attachment_data = attachment.get("data", {})
|
282
|
+
attachment_key = attachment_data.get("key")
|
283
|
+
filename = attachment_data.get("filename", "unknown.pdf")
|
284
|
+
|
285
|
+
if attachment_key:
|
286
|
+
# Add to item attachments map
|
287
|
+
item_attachments[attachment_key] = item_key
|
288
|
+
# Add basic info
|
289
|
+
self.article_data[item_key]["filename"] = filename
|
290
|
+
except Exception as e:
|
291
|
+
logger.error(
|
292
|
+
"Failed to get attachments for item %s: %s", item_key, e
|
293
|
+
)
|
294
|
+
|
295
|
+
# Now download all PDFs in parallel - first orphaned PDFs
|
296
|
+
logger.info("Downloading %d orphaned PDFs in parallel", len(orphaned_pdfs))
|
297
|
+
orphan_results = self._download_pdfs_in_parallel(orphaned_pdfs)
|
298
|
+
|
299
|
+
# Update orphan data
|
300
|
+
for item_key, (file_path, filename, attachment_key) in orphan_results.items():
|
301
|
+
self.article_data[item_key]["filename"] = filename
|
302
|
+
self.article_data[item_key]["pdf_url"] = file_path
|
303
|
+
self.article_data[item_key]["attachment_key"] = attachment_key
|
304
|
+
logger.info("Downloaded orphaned Zotero PDF to: %s", file_path)
|
305
|
+
|
306
|
+
# Download regular item attachments
|
307
|
+
logger.info(
|
308
|
+
"Downloading %d regular item PDFs in parallel", len(item_attachments)
|
309
|
+
)
|
310
|
+
item_results = self._download_pdfs_in_parallel(item_attachments)
|
311
|
+
|
312
|
+
# Update item data
|
313
|
+
for item_key, (file_path, filename, attachment_key) in item_results.items():
|
314
|
+
self.article_data[item_key]["filename"] = filename
|
315
|
+
self.article_data[item_key]["pdf_url"] = file_path
|
316
|
+
self.article_data[item_key]["attachment_key"] = attachment_key
|
317
|
+
logger.info("Downloaded Zotero PDF to: %s", file_path)
|
318
|
+
|
319
|
+
if not self.article_data:
|
144
320
|
logger.error(
|
145
321
|
"No matching papers returned from Zotero for query: '%s'", self.query
|
146
322
|
)
|
@@ -148,11 +324,13 @@ class ZoteroSearchData:
|
|
148
324
|
"No matching papers returned from Zotero. Please retry the same query."
|
149
325
|
)
|
150
326
|
|
151
|
-
logger.info(
|
327
|
+
logger.info(
|
328
|
+
"Filtered %d items (including orphaned attachments)", len(self.article_data)
|
329
|
+
)
|
152
330
|
|
153
331
|
def _create_content(self) -> None:
|
154
332
|
"""Create the content message for the response."""
|
155
|
-
top_papers = list(self.
|
333
|
+
top_papers = list(self.article_data.values())[:2]
|
156
334
|
top_papers_info = "\n".join(
|
157
335
|
[
|
158
336
|
f"{i+1}. {paper['Title']} ({paper['Type']})"
|
@@ -162,6 +340,6 @@ class ZoteroSearchData:
|
|
162
340
|
|
163
341
|
self.content = "Retrieval was successful. Papers are attached as an artifact."
|
164
342
|
self.content += " And here is a summary of the retrieval results:\n"
|
165
|
-
self.content += f"Number of papers found: {len(self.
|
343
|
+
self.content += f"Number of papers found: {len(self.article_data)}\n"
|
166
344
|
self.content += f"Query: {self.query}\n"
|
167
345
|
self.content += "Here are a few of these papers:\n" + top_papers_info
|
@@ -62,13 +62,13 @@ def zotero_read(
|
|
62
62
|
|
63
63
|
return Command(
|
64
64
|
update={
|
65
|
-
"
|
66
|
-
"last_displayed_papers": "
|
65
|
+
"article_data": results["article_data"],
|
66
|
+
"last_displayed_papers": "article_data",
|
67
67
|
"messages": [
|
68
68
|
ToolMessage(
|
69
69
|
content=results["content"],
|
70
70
|
tool_call_id=tool_call_id,
|
71
|
-
artifact=results["
|
71
|
+
artifact=results["article_data"],
|
72
72
|
)
|
73
73
|
],
|
74
74
|
}
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: aiagents4pharma
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.33.0
|
4
4
|
Summary: AI Agents for drug discovery, drug development, and other pharmaceutical R&D.
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
6
6
|
Classifier: License :: OSI Approved :: MIT License
|
@@ -275,6 +275,8 @@ To use **Talk2AIAgents4Pharma**, **Talk2BioModels**, **Talk2KnowledgeGraphs**, o
|
|
275
275
|
|
276
276
|
Only for **Talk2Scholars**, you also need a **Zotero API key**, which you can generate [here](https://www.zotero.org/user/login#applications). _(For all other agents, the Zotero key is not required.)_
|
277
277
|
|
278
|
+
To use **Talk2Scholars**, you must have **FAISS** installed through **Conda**. Follow installation instructions for your OS [here](https://github.com/VirtualPatientEngine/AIAgents4Pharma/tree/main/aiagents4pharma/talk2scholars/install.md).
|
279
|
+
|
278
280
|
To use **Talk2AIAgents4Pharma** or **Talk2KnowledgeGraphs**, you must have **Ollama** installed. Follow installation instructions for your OS [here](https://ollama.com/download).
|
279
281
|
|
280
282
|
After installing, pull the `nomic-embed-text` model and start the server by running:
|