aiagents4pharma 1.28.0__py3-none-any.whl → 1.30.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2scholars/agents/__init__.py +1 -0
- aiagents4pharma/talk2scholars/agents/main_agent.py +35 -209
- aiagents4pharma/talk2scholars/agents/paper_download_agent.py +86 -0
- aiagents4pharma/talk2scholars/agents/s2_agent.py +10 -6
- aiagents4pharma/talk2scholars/agents/zotero_agent.py +12 -6
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +2 -48
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +5 -28
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +5 -21
- aiagents4pharma/talk2scholars/configs/config.yaml +3 -0
- aiagents4pharma/talk2scholars/configs/tools/__init__.py +1 -0
- aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/__init__.py +3 -0
- aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +1 -1
- aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +42 -1
- aiagents4pharma/talk2scholars/configs/tools/zotero_write/__inti__.py +3 -0
- aiagents4pharma/talk2scholars/state/state_talk2scholars.py +1 -0
- aiagents4pharma/talk2scholars/tests/test_main_agent.py +186 -111
- aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +142 -0
- aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +154 -0
- aiagents4pharma/talk2scholars/tests/test_s2_display.py +74 -0
- aiagents4pharma/talk2scholars/tests/test_s2_multi.py +282 -0
- aiagents4pharma/talk2scholars/tests/test_s2_query.py +78 -0
- aiagents4pharma/talk2scholars/tests/test_s2_retrieve.py +65 -0
- aiagents4pharma/talk2scholars/tests/test_s2_search.py +266 -0
- aiagents4pharma/talk2scholars/tests/test_s2_single.py +274 -0
- aiagents4pharma/talk2scholars/tests/test_zotero_path.py +57 -0
- aiagents4pharma/talk2scholars/tests/test_zotero_read.py +412 -0
- aiagents4pharma/talk2scholars/tests/test_zotero_write.py +626 -0
- aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +17 -0
- aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py +43 -0
- aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py +108 -0
- aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +60 -0
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +50 -34
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +8 -8
- aiagents4pharma/talk2scholars/tools/s2/search.py +36 -23
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +44 -38
- aiagents4pharma/talk2scholars/tools/zotero/__init__.py +2 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/__init__.py +5 -0
- aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +63 -0
- aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +64 -19
- aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +247 -0
- {aiagents4pharma-1.28.0.dist-info → aiagents4pharma-1.30.0.dist-info}/METADATA +6 -5
- {aiagents4pharma-1.28.0.dist-info → aiagents4pharma-1.30.0.dist-info}/RECORD +48 -30
- aiagents4pharma/talk2scholars/tests/test_call_s2.py +0 -100
- aiagents4pharma/talk2scholars/tests/test_call_zotero.py +0 -94
- aiagents4pharma/talk2scholars/tests/test_s2_tools.py +0 -355
- aiagents4pharma/talk2scholars/tests/test_zotero_tool.py +0 -171
- {aiagents4pharma-1.28.0.dist-info → aiagents4pharma-1.30.0.dist-info}/LICENSE +0 -0
- {aiagents4pharma-1.28.0.dist-info → aiagents4pharma-1.30.0.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.28.0.dist-info → aiagents4pharma-1.30.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,108 @@
|
|
1
|
+
"""
|
2
|
+
Arxiv Paper Downloader
|
3
|
+
|
4
|
+
This module provides an implementation of `AbstractPaperDownloader` for arXiv.
|
5
|
+
It connects to the arXiv API, retrieves metadata for a research paper, and
|
6
|
+
downloads the corresponding PDF.
|
7
|
+
|
8
|
+
By using an abstract base class, this implementation is extendable to other
|
9
|
+
APIs like PubMed, IEEE Xplore, etc.
|
10
|
+
"""
|
11
|
+
import xml.etree.ElementTree as ET
|
12
|
+
from typing import Any, Dict
|
13
|
+
import logging
|
14
|
+
import hydra
|
15
|
+
import requests
|
16
|
+
from .abstract_downloader import AbstractPaperDownloader
|
17
|
+
|
18
|
+
# Configure logging
|
19
|
+
logging.basicConfig(level=logging.INFO)
|
20
|
+
logger = logging.getLogger(__name__)
|
21
|
+
|
22
|
+
class ArxivPaperDownloader(AbstractPaperDownloader):
|
23
|
+
"""
|
24
|
+
Downloader class for arXiv.
|
25
|
+
|
26
|
+
This class interfaces with the arXiv API to fetch metadata
|
27
|
+
and retrieve PDFs of academic papers based on their arXiv IDs.
|
28
|
+
"""
|
29
|
+
|
30
|
+
def __init__(self):
|
31
|
+
"""
|
32
|
+
Initializes the arXiv paper downloader.
|
33
|
+
|
34
|
+
Uses Hydra for configuration management to retrieve API details.
|
35
|
+
"""
|
36
|
+
with hydra.initialize(version_base=None, config_path="../../configs"):
|
37
|
+
cfg = hydra.compose(
|
38
|
+
config_name="config",
|
39
|
+
overrides=["tools/download_arxiv_paper=default"]
|
40
|
+
)
|
41
|
+
self.api_url = cfg.tools.download_arxiv_paper.api_url
|
42
|
+
self.request_timeout = cfg.tools.download_arxiv_paper.request_timeout
|
43
|
+
self.chunk_size = cfg.tools.download_arxiv_paper.chunk_size
|
44
|
+
self.pdf_base_url = cfg.tools.download_arxiv_paper.pdf_base_url
|
45
|
+
def fetch_metadata(self, paper_id: str) -> Dict[str, Any]:
|
46
|
+
"""
|
47
|
+
Fetch metadata from arXiv for a given paper ID.
|
48
|
+
|
49
|
+
Args:
|
50
|
+
paper_id (str): The arXiv ID of the paper.
|
51
|
+
|
52
|
+
Returns:
|
53
|
+
Dict[str, Any]: A dictionary containing metadata, including the XML response.
|
54
|
+
"""
|
55
|
+
logger.info("Fetching metadata from arXiv for paper ID: %s", paper_id)
|
56
|
+
api_url = f"{self.api_url}?search_query=id:{paper_id}&start=0&max_results=1"
|
57
|
+
response = requests.get(api_url, timeout=self.request_timeout)
|
58
|
+
response.raise_for_status()
|
59
|
+
return {"xml": response.text}
|
60
|
+
|
61
|
+
def download_pdf(self, paper_id: str) -> Dict[str, Any]:
|
62
|
+
"""
|
63
|
+
Download the PDF of a paper from arXiv.
|
64
|
+
|
65
|
+
This function first retrieves the paper's metadata to locate the PDF link
|
66
|
+
before downloading the file.
|
67
|
+
|
68
|
+
Args:
|
69
|
+
paper_id (str): The arXiv ID of the paper.
|
70
|
+
|
71
|
+
Returns:
|
72
|
+
Dict[str, Any]: A dictionary containing:
|
73
|
+
- `pdf_object`: The binary content of the downloaded PDF.
|
74
|
+
- `pdf_url`: The URL from which the PDF was fetched.
|
75
|
+
- `arxiv_id`: The arXiv ID of the downloaded paper.
|
76
|
+
"""
|
77
|
+
metadata = self.fetch_metadata(paper_id)
|
78
|
+
|
79
|
+
# Parse the XML response to locate the PDF link.
|
80
|
+
root = ET.fromstring(metadata["xml"])
|
81
|
+
ns = {"atom": "http://www.w3.org/2005/Atom"}
|
82
|
+
pdf_url = next(
|
83
|
+
(
|
84
|
+
link.attrib.get("href")
|
85
|
+
for entry in root.findall("atom:entry", ns)
|
86
|
+
for link in entry.findall("atom:link", ns)
|
87
|
+
if link.attrib.get("title") == "pdf"
|
88
|
+
),
|
89
|
+
None,
|
90
|
+
)
|
91
|
+
|
92
|
+
if not pdf_url:
|
93
|
+
raise RuntimeError(f"Failed to download PDF for arXiv ID {paper_id}.")
|
94
|
+
|
95
|
+
logger.info("Downloading PDF from: %s", pdf_url)
|
96
|
+
pdf_response = requests.get(pdf_url, stream=True, timeout=self.request_timeout)
|
97
|
+
pdf_response.raise_for_status()
|
98
|
+
|
99
|
+
# Combine the PDF data from chunks.
|
100
|
+
pdf_object = b"".join(
|
101
|
+
chunk for chunk in pdf_response.iter_content(chunk_size=self.chunk_size) if chunk
|
102
|
+
)
|
103
|
+
|
104
|
+
return {
|
105
|
+
"pdf_object": pdf_object,
|
106
|
+
"pdf_url": pdf_url,
|
107
|
+
"arxiv_id": paper_id,
|
108
|
+
}
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# File: aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py
|
2
|
+
"""
|
3
|
+
This module defines the `download_arxiv_paper` tool, which leverages the
|
4
|
+
`ArxivPaperDownloader` class to fetch and download academic papers from arXiv
|
5
|
+
based on their unique arXiv ID.
|
6
|
+
"""
|
7
|
+
from typing import Annotated, Any
|
8
|
+
from pydantic import BaseModel, Field
|
9
|
+
from langchain_core.tools import tool
|
10
|
+
from langchain_core.messages import ToolMessage
|
11
|
+
from langchain_core.tools.base import InjectedToolCallId
|
12
|
+
from langgraph.types import Command
|
13
|
+
|
14
|
+
# Local import from the same package:
|
15
|
+
from .arxiv_downloader import ArxivPaperDownloader
|
16
|
+
|
17
|
+
class DownloadArxivPaperInput(BaseModel):
|
18
|
+
"""
|
19
|
+
Input schema for the arXiv paper download tool.
|
20
|
+
(Optional: if you decide to keep Pydantic validation in the future)
|
21
|
+
"""
|
22
|
+
arxiv_id: str = Field(
|
23
|
+
description="The arXiv paper ID used to retrieve the paper details and PDF."
|
24
|
+
)
|
25
|
+
tool_call_id: Annotated[str, InjectedToolCallId]
|
26
|
+
|
27
|
+
@tool(args_schema=DownloadArxivPaperInput, parse_docstring=True)
|
28
|
+
def download_arxiv_paper(
|
29
|
+
arxiv_id: str,
|
30
|
+
tool_call_id: Annotated[str, InjectedToolCallId],
|
31
|
+
) -> Command[Any]:
|
32
|
+
"""
|
33
|
+
Download an arXiv paper's PDF using its unique arXiv ID.
|
34
|
+
|
35
|
+
This function:
|
36
|
+
1. Creates an `ArxivPaperDownloader` instance.
|
37
|
+
2. Fetches metadata from arXiv using the provided `arxiv_id`.
|
38
|
+
3. Downloads the PDF from the returned link.
|
39
|
+
4. Returns a `Command` object containing the PDF data and a success message.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
arxiv_id (str): The unique arXiv paper ID.
|
43
|
+
tool_call_id (InjectedToolCallId): A unique identifier for tracking this tool call.
|
44
|
+
|
45
|
+
Returns:
|
46
|
+
Command[Any]: Contains metadata and messages about the success of the operation.
|
47
|
+
"""
|
48
|
+
downloader = ArxivPaperDownloader()
|
49
|
+
|
50
|
+
# If the downloader fails or the arxiv_id is invalid, this might raise an error
|
51
|
+
pdf_data = downloader.download_pdf(arxiv_id)
|
52
|
+
|
53
|
+
content = f"Successfully downloaded PDF for arXiv ID {arxiv_id}"
|
54
|
+
|
55
|
+
return Command(
|
56
|
+
update={
|
57
|
+
"pdf_data": pdf_data,
|
58
|
+
"messages": [ToolMessage(content=content, tool_call_id=tool_call_id)],
|
59
|
+
}
|
60
|
+
)
|
@@ -16,6 +16,7 @@ from langchain_core.tools.base import InjectedToolCallId
|
|
16
16
|
from langgraph.types import Command
|
17
17
|
from pydantic import BaseModel, Field
|
18
18
|
|
19
|
+
# pylint: disable=R0914,R0912,R0915
|
19
20
|
|
20
21
|
# Configure logging
|
21
22
|
logging.basicConfig(level=logging.INFO)
|
@@ -26,7 +27,7 @@ class MultiPaperRecInput(BaseModel):
|
|
26
27
|
"""Input schema for multiple paper recommendations tool."""
|
27
28
|
|
28
29
|
paper_ids: List[str] = Field(
|
29
|
-
description=
|
30
|
+
description="List of Semantic Scholar Paper IDs to get recommendations for"
|
30
31
|
)
|
31
32
|
limit: int = Field(
|
32
33
|
default=2,
|
@@ -44,14 +45,6 @@ class MultiPaperRecInput(BaseModel):
|
|
44
45
|
model_config = {"arbitrary_types_allowed": True}
|
45
46
|
|
46
47
|
|
47
|
-
# Load hydra configuration
|
48
|
-
with hydra.initialize(version_base=None, config_path="../../configs"):
|
49
|
-
cfg = hydra.compose(
|
50
|
-
config_name="config", overrides=["tools/multi_paper_recommendation=default"]
|
51
|
-
)
|
52
|
-
cfg = cfg.tools.multi_paper_recommendation
|
53
|
-
|
54
|
-
|
55
48
|
@tool(args_schema=MultiPaperRecInput, parse_docstring=True)
|
56
49
|
def get_multi_paper_recommendations(
|
57
50
|
paper_ids: List[str],
|
@@ -73,7 +66,14 @@ def get_multi_paper_recommendations(
|
|
73
66
|
Returns:
|
74
67
|
Dict[str, Any]: The recommendations and related information.
|
75
68
|
"""
|
76
|
-
|
69
|
+
# Load hydra configuration
|
70
|
+
with hydra.initialize(version_base=None, config_path="../../configs"):
|
71
|
+
cfg = hydra.compose(
|
72
|
+
config_name="config", overrides=["tools/multi_paper_recommendation=default"]
|
73
|
+
)
|
74
|
+
cfg = cfg.tools.multi_paper_recommendation
|
75
|
+
logger.info("Loaded configuration for multi-paper recommendation tool")
|
76
|
+
logger.info(
|
77
77
|
"Starting multi-paper recommendations search with paper IDs: %s", paper_ids
|
78
78
|
)
|
79
79
|
|
@@ -89,45 +89,61 @@ def get_multi_paper_recommendations(
|
|
89
89
|
if year:
|
90
90
|
params["year"] = year
|
91
91
|
|
92
|
-
#
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
92
|
+
# Wrap API call in try/except to catch connectivity issues and validate response format
|
93
|
+
try:
|
94
|
+
response = requests.post(
|
95
|
+
endpoint,
|
96
|
+
headers=headers,
|
97
|
+
params=params,
|
98
|
+
data=json.dumps(payload),
|
99
|
+
timeout=cfg.request_timeout,
|
100
|
+
)
|
101
|
+
response.raise_for_status() # Raises HTTPError for bad responses
|
102
|
+
except requests.exceptions.RequestException as e:
|
103
|
+
logger.error(
|
104
|
+
"Failed to connect to Semantic Scholar API for multi-paper recommendations: %s",
|
105
|
+
e,
|
106
|
+
)
|
107
|
+
raise RuntimeError(
|
108
|
+
"Failed to connect to Semantic Scholar API. Please retry the same query."
|
109
|
+
) from e
|
110
|
+
|
111
|
+
logger.info(
|
101
112
|
"API Response Status for multi-paper recommendations: %s", response.status_code
|
102
113
|
)
|
114
|
+
logger.info("Request params: %s", params)
|
103
115
|
|
104
116
|
data = response.json()
|
105
|
-
recommendations = data.get("recommendedPapers", [])
|
106
117
|
|
118
|
+
# Check for expected data format
|
119
|
+
if "recommendedPapers" not in data:
|
120
|
+
logger.error("Unexpected API response format: %s", data)
|
121
|
+
raise RuntimeError(
|
122
|
+
"Unexpected response from Semantic Scholar API. The results could not be "
|
123
|
+
"retrieved due to an unexpected format. "
|
124
|
+
"Please modify your search query and try again."
|
125
|
+
)
|
126
|
+
|
127
|
+
recommendations = data.get("recommendedPapers", [])
|
107
128
|
if not recommendations:
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
)
|
115
|
-
]
|
116
|
-
}
|
129
|
+
logger.error(
|
130
|
+
"No recommendations returned from API for paper IDs: %s", paper_ids
|
131
|
+
)
|
132
|
+
raise RuntimeError(
|
133
|
+
"No recommendations were found for your query. Consider refining your search "
|
134
|
+
"by using more specific keywords or different terms."
|
117
135
|
)
|
118
136
|
|
119
137
|
# Create a dictionary to store the papers
|
120
138
|
filtered_papers = {
|
121
139
|
paper["paperId"]: {
|
122
|
-
|
140
|
+
"paper_id": paper["paperId"],
|
123
141
|
"Title": paper.get("title", "N/A"),
|
124
142
|
"Abstract": paper.get("abstract", "N/A"),
|
125
143
|
"Year": paper.get("year", "N/A"),
|
126
144
|
"Citation Count": paper.get("citationCount", "N/A"),
|
127
145
|
"URL": paper.get("url", "N/A"),
|
128
|
-
|
129
|
-
# "ArXiv", "N/A"
|
130
|
-
# ), # Extract arXiv ID
|
146
|
+
"arxiv_id": paper.get("externalIds", {}).get("ArXiv", "N/A"),
|
131
147
|
}
|
132
148
|
for paper in recommendations
|
133
149
|
if paper.get("title") and paper.get("authors")
|
@@ -156,7 +172,7 @@ def get_multi_paper_recommendations(
|
|
156
172
|
|
157
173
|
return Command(
|
158
174
|
update={
|
159
|
-
"multi_papers": filtered_papers, #
|
175
|
+
"multi_papers": filtered_papers, # Sending the dictionary directly
|
160
176
|
"last_displayed_papers": "multi_papers",
|
161
177
|
"messages": [
|
162
178
|
ToolMessage(
|
@@ -19,14 +19,6 @@ from pydantic import Field
|
|
19
19
|
logging.basicConfig(level=logging.INFO)
|
20
20
|
logger = logging.getLogger(__name__)
|
21
21
|
|
22
|
-
# Load hydra configuration
|
23
|
-
with hydra.initialize(version_base=None, config_path="../../configs"):
|
24
|
-
cfg = hydra.compose(
|
25
|
-
config_name="config",
|
26
|
-
overrides=["tools/retrieve_semantic_scholar_paper_id=default"],
|
27
|
-
)
|
28
|
-
cfg = cfg.tools.retrieve_semantic_scholar_paper_id
|
29
|
-
|
30
22
|
|
31
23
|
@tool("retrieve_semantic_scholar_paper_id", parse_docstring=True)
|
32
24
|
def retrieve_semantic_scholar_paper_id(
|
@@ -49,6 +41,14 @@ def retrieve_semantic_scholar_paper_id(
|
|
49
41
|
Returns:
|
50
42
|
ToolMessage: A message containing the paper ID.
|
51
43
|
"""
|
44
|
+
# Load hydra configuration
|
45
|
+
with hydra.initialize(version_base=None, config_path="../../configs"):
|
46
|
+
cfg = hydra.compose(
|
47
|
+
config_name="config",
|
48
|
+
overrides=["tools/retrieve_semantic_scholar_paper_id=default"],
|
49
|
+
)
|
50
|
+
cfg = cfg.tools.retrieve_semantic_scholar_paper_id
|
51
|
+
logger.info("Loaded configuration for Semantic Scholar paper ID retrieval tool")
|
52
52
|
logger.info("Retrieving ID of paper with title: %s", paper_title)
|
53
53
|
endpoint = cfg.api_endpoint
|
54
54
|
params = {
|
@@ -37,12 +37,6 @@ class SearchInput(BaseModel):
|
|
37
37
|
tool_call_id: Annotated[str, InjectedToolCallId]
|
38
38
|
|
39
39
|
|
40
|
-
# Load hydra configuration
|
41
|
-
with hydra.initialize(version_base=None, config_path="../../configs"):
|
42
|
-
cfg = hydra.compose(config_name="config", overrides=["tools/search=default"])
|
43
|
-
cfg = cfg.tools.search
|
44
|
-
|
45
|
-
|
46
40
|
@tool("search_tool", args_schema=SearchInput, parse_docstring=True)
|
47
41
|
def search_tool(
|
48
42
|
query: str,
|
@@ -56,13 +50,18 @@ def search_tool(
|
|
56
50
|
Args:
|
57
51
|
query (str): The search query string to find academic papers.
|
58
52
|
tool_call_id (Annotated[str, InjectedToolCallId]): The tool call ID.
|
59
|
-
limit (int, optional): The maximum number of results to return. Defaults to
|
53
|
+
limit (int, optional): The maximum number of results to return. Defaults to 5.
|
60
54
|
year (str, optional): Year range for papers.
|
61
55
|
Supports formats like "2024-", "-2024", "2024:2025". Defaults to None.
|
62
56
|
|
63
57
|
Returns:
|
64
58
|
The number of papers found on Semantic Scholar.
|
65
59
|
"""
|
60
|
+
# Load hydra configuration
|
61
|
+
with hydra.initialize(version_base=None, config_path="../../configs"):
|
62
|
+
cfg = hydra.compose(config_name="config", overrides=["tools/search=default"])
|
63
|
+
cfg = cfg.tools.search
|
64
|
+
logger.info("Loaded configuration for search tool")
|
66
65
|
logger.info("Searching for papers on %s", query)
|
67
66
|
endpoint = cfg.api_endpoint
|
68
67
|
params = {
|
@@ -75,33 +74,47 @@ def search_tool(
|
|
75
74
|
if year:
|
76
75
|
params["year"] = year
|
77
76
|
|
78
|
-
|
77
|
+
# Wrap API call in try/except to catch connectivity issues
|
78
|
+
try:
|
79
|
+
response = requests.get(endpoint, params=params, timeout=10)
|
80
|
+
response.raise_for_status() # Raises HTTPError for bad responses
|
81
|
+
except requests.exceptions.RequestException as e:
|
82
|
+
logger.error("Failed to connect to Semantic Scholar API: %s", e)
|
83
|
+
raise RuntimeError(
|
84
|
+
"Failed to connect to Semantic Scholar API. Please retry the same query."
|
85
|
+
) from e
|
86
|
+
|
79
87
|
data = response.json()
|
88
|
+
|
89
|
+
# Check for expected data format
|
90
|
+
if "data" not in data:
|
91
|
+
logger.error("Unexpected API response format: %s", data)
|
92
|
+
raise RuntimeError(
|
93
|
+
"Unexpected response from Semantic Scholar API. The results could not be "
|
94
|
+
"retrieved due to an unexpected format. "
|
95
|
+
"Please modify your search query and try again."
|
96
|
+
)
|
97
|
+
|
80
98
|
papers = data.get("data", [])
|
81
|
-
logger.info("Received %d papers", len(papers))
|
82
99
|
if not papers:
|
83
|
-
|
84
|
-
|
85
|
-
"messages": [
|
86
|
-
ToolMessage(
|
87
|
-
content="No papers found. Please try a different search query.",
|
88
|
-
tool_call_id=tool_call_id,
|
89
|
-
)
|
90
|
-
]
|
91
|
-
}
|
100
|
+
logger.error(
|
101
|
+
"No papers returned from Semantic Scholar API for query: %s", query
|
92
102
|
)
|
103
|
+
raise RuntimeError(
|
104
|
+
"No papers were found for your query. Consider refining your search "
|
105
|
+
"by using more specific keywords or different terms."
|
106
|
+
)
|
107
|
+
|
93
108
|
# Create a dictionary to store the papers
|
94
109
|
filtered_papers = {
|
95
110
|
paper["paperId"]: {
|
96
|
-
|
111
|
+
"paper_id": paper["paperId"],
|
97
112
|
"Title": paper.get("title", "N/A"),
|
98
113
|
"Abstract": paper.get("abstract", "N/A"),
|
99
114
|
"Year": paper.get("year", "N/A"),
|
100
115
|
"Citation Count": paper.get("citationCount", "N/A"),
|
101
116
|
"URL": paper.get("url", "N/A"),
|
102
|
-
|
103
|
-
# "ArXiv", "N/A"
|
104
|
-
# ), # Extract arXiv ID
|
117
|
+
"arxiv_id": paper.get("externalIds", {}).get("ArXiv", "N/A"),
|
105
118
|
}
|
106
119
|
for paper in papers
|
107
120
|
if paper.get("title") and paper.get("authors")
|
@@ -129,7 +142,7 @@ def search_tool(
|
|
129
142
|
|
130
143
|
return Command(
|
131
144
|
update={
|
132
|
-
"papers": filtered_papers, #
|
145
|
+
"papers": filtered_papers, # Sending the dictionary directly
|
133
146
|
"last_displayed_papers": "papers",
|
134
147
|
"messages": [
|
135
148
|
ToolMessage(
|
@@ -40,14 +40,6 @@ class SinglePaperRecInput(BaseModel):
|
|
40
40
|
model_config = {"arbitrary_types_allowed": True}
|
41
41
|
|
42
42
|
|
43
|
-
# Load hydra configuration
|
44
|
-
with hydra.initialize(version_base=None, config_path="../../configs"):
|
45
|
-
cfg = hydra.compose(
|
46
|
-
config_name="config", overrides=["tools/single_paper_recommendation=default"]
|
47
|
-
)
|
48
|
-
cfg = cfg.tools.single_paper_recommendation
|
49
|
-
|
50
|
-
|
51
43
|
@tool(args_schema=SinglePaperRecInput, parse_docstring=True)
|
52
44
|
def get_single_paper_recommendations(
|
53
45
|
paper_id: str,
|
@@ -56,19 +48,27 @@ def get_single_paper_recommendations(
|
|
56
48
|
year: Optional[str] = None,
|
57
49
|
) -> Command[Any]:
|
58
50
|
"""
|
59
|
-
Get recommendations for
|
51
|
+
Get recommendations for a single paper using its Semantic Scholar ID.
|
60
52
|
No other ID types are supported.
|
61
53
|
|
62
54
|
Args:
|
63
55
|
paper_id (str): The Semantic Scholar Paper ID to get recommendations for.
|
64
56
|
tool_call_id (Annotated[str, InjectedToolCallId]): The tool call ID.
|
65
|
-
limit (int, optional): The maximum number of recommendations to return. Defaults to
|
57
|
+
limit (int, optional): The maximum number of recommendations to return. Defaults to 5.
|
66
58
|
year (str, optional): Year range for papers.
|
67
59
|
Supports formats like "2024-", "-2024", "2024:2025". Defaults to None.
|
68
60
|
|
69
61
|
Returns:
|
70
62
|
Dict[str, Any]: The recommendations and related information.
|
71
63
|
"""
|
64
|
+
# Load hydra configuration
|
65
|
+
with hydra.initialize(version_base=None, config_path="../../configs"):
|
66
|
+
cfg = hydra.compose(
|
67
|
+
config_name="config",
|
68
|
+
overrides=["tools/single_paper_recommendation=default"],
|
69
|
+
)
|
70
|
+
cfg = cfg.tools.single_paper_recommendation
|
71
|
+
logger.info("Loaded configuration for single paper recommendation tool")
|
72
72
|
logger.info(
|
73
73
|
"Starting single paper recommendations search with paper ID: %s", paper_id
|
74
74
|
)
|
@@ -84,48 +84,54 @@ def get_single_paper_recommendations(
|
|
84
84
|
if year:
|
85
85
|
params["year"] = year
|
86
86
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
87
|
+
# Wrap API call in try/except to catch connectivity issues and check response format
|
88
|
+
try:
|
89
|
+
response = requests.get(endpoint, params=params, timeout=cfg.request_timeout)
|
90
|
+
response.raise_for_status() # Raises HTTPError for bad responses
|
91
|
+
except requests.exceptions.RequestException as e:
|
92
|
+
logger.error(
|
93
|
+
"Failed to connect to Semantic Scholar API for recommendations: %s", e
|
94
|
+
)
|
95
|
+
raise RuntimeError(
|
96
|
+
"Failed to connect to Semantic Scholar API. Please retry the same query."
|
97
|
+
) from e
|
98
|
+
|
99
|
+
logger.info(
|
92
100
|
"API Response Status for recommendations of paper %s: %s",
|
93
101
|
paper_id,
|
94
102
|
response.status_code,
|
95
103
|
)
|
96
|
-
|
97
|
-
raise ValueError("Invalid paper ID or API error.")
|
98
|
-
# print(f"Request params: {params}")
|
99
|
-
logging.info("Request params: %s", params)
|
104
|
+
logger.info("Request params: %s", params)
|
100
105
|
|
101
106
|
data = response.json()
|
102
|
-
recommendations = data.get("recommendedPapers", [])
|
103
107
|
|
108
|
+
# Check for expected data format
|
109
|
+
if "recommendedPapers" not in data:
|
110
|
+
logger.error("Unexpected API response format: %s", data)
|
111
|
+
raise RuntimeError(
|
112
|
+
"Unexpected response from Semantic Scholar API. The results could not be "
|
113
|
+
"retrieved due to an unexpected format. "
|
114
|
+
"Please modify your search query and try again."
|
115
|
+
)
|
116
|
+
|
117
|
+
recommendations = data.get("recommendedPapers", [])
|
104
118
|
if not recommendations:
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
ToolMessage(
|
110
|
-
content=f"No recommendations found for {paper_id}.",
|
111
|
-
tool_call_id=tool_call_id,
|
112
|
-
)
|
113
|
-
],
|
114
|
-
}
|
119
|
+
logger.error("No recommendations returned from API for paper: %s", paper_id)
|
120
|
+
raise RuntimeError(
|
121
|
+
"No recommendations were found for your query. Consider refining your search "
|
122
|
+
"by using more specific keywords or different terms."
|
115
123
|
)
|
116
124
|
|
117
125
|
# Extract paper ID and title from recommendations
|
118
126
|
filtered_papers = {
|
119
127
|
paper["paperId"]: {
|
120
|
-
|
128
|
+
"paper_id": paper["paperId"],
|
121
129
|
"Title": paper.get("title", "N/A"),
|
122
130
|
"Abstract": paper.get("abstract", "N/A"),
|
123
131
|
"Year": paper.get("year", "N/A"),
|
124
132
|
"Citation Count": paper.get("citationCount", "N/A"),
|
125
133
|
"URL": paper.get("url", "N/A"),
|
126
|
-
|
127
|
-
# "ArXiv", "N/A"
|
128
|
-
# ), # Extract arXiv ID
|
134
|
+
"arxiv_id": paper.get("externalIds", {}).get("ArXiv", "N/A"),
|
129
135
|
}
|
130
136
|
for paper in recommendations
|
131
137
|
if paper.get("title") and paper.get("authors")
|
@@ -143,10 +149,10 @@ def get_single_paper_recommendations(
|
|
143
149
|
logger.info("Filtered %d papers", len(filtered_papers))
|
144
150
|
|
145
151
|
content = (
|
146
|
-
"Recommendations based on single paper were successful. "
|
147
|
-
"Papers are attached as an artifact."
|
152
|
+
"Recommendations based on the single paper were successful. "
|
153
|
+
"Papers are attached as an artifact. "
|
154
|
+
"Here is a summary of the recommendations:\n"
|
148
155
|
)
|
149
|
-
content += " Here is a summary of the recommendations:\n"
|
150
156
|
content += f"Number of papers found: {len(filtered_papers)}\n"
|
151
157
|
content += f"Query Paper ID: {paper_id}\n"
|
152
158
|
content += f"Year: {year}\n" if year else ""
|
@@ -154,7 +160,7 @@ def get_single_paper_recommendations(
|
|
154
160
|
|
155
161
|
return Command(
|
156
162
|
update={
|
157
|
-
"papers": filtered_papers, #
|
163
|
+
"papers": filtered_papers, # Sending the dictionary directly
|
158
164
|
"last_displayed_papers": "papers",
|
159
165
|
"messages": [
|
160
166
|
ToolMessage(
|
@@ -0,0 +1,63 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
"""
|
4
|
+
Utility functions for Zotero tools.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
|
9
|
+
# Configure logging
|
10
|
+
logging.basicConfig(level=logging.INFO)
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
|
14
|
+
def get_item_collections(zot):
|
15
|
+
"""
|
16
|
+
Fetch all Zotero collections and map item keys to their full collection paths.
|
17
|
+
|
18
|
+
Args:
|
19
|
+
zot (Zotero): An initialized Zotero client.
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
dict: A dictionary mapping item keys to a list of full collection paths.
|
23
|
+
"""
|
24
|
+
logger.info("Fetching Zotero collections...")
|
25
|
+
|
26
|
+
# Fetch all collections
|
27
|
+
collections = zot.collections()
|
28
|
+
|
29
|
+
# Create mappings: collection key → name and collection key → parent key
|
30
|
+
collection_map = {col["key"]: col["data"]["name"] for col in collections}
|
31
|
+
parent_map = {
|
32
|
+
col["key"]: col["data"].get("parentCollection") for col in collections
|
33
|
+
}
|
34
|
+
|
35
|
+
# Build full paths for collections
|
36
|
+
def build_collection_path(col_key):
|
37
|
+
path = []
|
38
|
+
while col_key:
|
39
|
+
path.insert(0, collection_map.get(col_key, "Unknown"))
|
40
|
+
col_key = parent_map.get(col_key)
|
41
|
+
return "/" + "/".join(path) # Convert to "/path/to/collection"
|
42
|
+
|
43
|
+
collection_paths = {key: build_collection_path(key) for key in collection_map}
|
44
|
+
|
45
|
+
# Manually create an item-to-collection mapping with full paths
|
46
|
+
item_to_collections = {}
|
47
|
+
|
48
|
+
for collection in collections:
|
49
|
+
collection_key = collection["key"]
|
50
|
+
collection_items = zot.collection_items(
|
51
|
+
collection_key
|
52
|
+
) # Fetch items in the collection
|
53
|
+
|
54
|
+
for item in collection_items:
|
55
|
+
item_key = item["data"]["key"]
|
56
|
+
if item_key in item_to_collections:
|
57
|
+
item_to_collections[item_key].append(collection_paths[collection_key])
|
58
|
+
else:
|
59
|
+
item_to_collections[item_key] = [collection_paths[collection_key]]
|
60
|
+
|
61
|
+
logger.info("Successfully mapped items to collection paths.")
|
62
|
+
|
63
|
+
return item_to_collections
|