aiagents4pharma 1.39.0__py3-none-any.whl → 1.39.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2scholars/agents/main_agent.py +7 -7
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +88 -12
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +5 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +5 -0
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +1 -20
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +1 -26
- aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml +4 -0
- aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml +2 -0
- aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml +2 -0
- aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +22 -0
- aiagents4pharma/talk2scholars/tests/test_main_agent.py +20 -2
- aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py +28 -0
- aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +107 -29
- aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +2 -3
- aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +194 -543
- aiagents4pharma/talk2scholars/tests/test_s2_agent.py +2 -2
- aiagents4pharma/talk2scholars/tests/{test_s2_display.py → test_s2_display_dataframe.py} +2 -3
- aiagents4pharma/talk2scholars/tests/test_s2_query_dataframe.py +201 -0
- aiagents4pharma/talk2scholars/tests/test_s2_retrieve.py +7 -6
- aiagents4pharma/talk2scholars/tests/test_s2_utils_ext_ids.py +413 -0
- aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py +140 -0
- aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +0 -1
- aiagents4pharma/talk2scholars/tests/test_zotero_read.py +16 -18
- aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +92 -37
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +73 -575
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +10 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +77 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +83 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +125 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +162 -0
- aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +33 -10
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +39 -16
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +124 -10
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +49 -17
- aiagents4pharma/talk2scholars/tools/s2/search.py +39 -16
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +34 -16
- aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +49 -16
- aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +51 -16
- aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +50 -17
- {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/METADATA +58 -105
- {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/RECORD +45 -32
- aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py +0 -89
- aiagents4pharma/talk2scholars/tests/test_routing_logic.py +0 -74
- aiagents4pharma/talk2scholars/tests/test_s2_query.py +0 -95
- {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,11 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
3
|
"""
|
4
|
-
|
4
|
+
Recommend research papers related to a set of input papers using Semantic Scholar.
|
5
|
+
|
6
|
+
Given a list of Semantic Scholar paper IDs, this tool aggregates related works
|
7
|
+
(citations and references) from each input paper and returns a consolidated list
|
8
|
+
of recommended papers.
|
5
9
|
"""
|
6
10
|
|
7
11
|
import logging
|
@@ -20,47 +24,66 @@ logger = logging.getLogger(__name__)
|
|
20
24
|
|
21
25
|
|
22
26
|
class MultiPaperRecInput(BaseModel):
|
23
|
-
"""
|
27
|
+
"""Defines the input schema for the multi-paper recommendation tool.
|
28
|
+
|
29
|
+
Attributes:
|
30
|
+
paper_ids: List of 40-character Semantic Scholar Paper IDs (provide at least two).
|
31
|
+
limit: Maximum total number of recommendations to return (1-500).
|
32
|
+
year: Optional publication year filter; supports formats:
|
33
|
+
'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'.
|
34
|
+
tool_call_id: Internal tool call identifier injected by the system.
|
35
|
+
"""
|
24
36
|
|
25
37
|
paper_ids: List[str] = Field(
|
26
|
-
description="List of Semantic Scholar Paper IDs
|
38
|
+
description="List of 40-character Semantic Scholar Paper IDs"
|
39
|
+
"(at least two) to base recommendations on"
|
27
40
|
)
|
28
41
|
limit: int = Field(
|
29
42
|
default=10,
|
30
|
-
description="Maximum total number of recommendations to return",
|
43
|
+
description="Maximum total number of recommendations to return (1-500)",
|
31
44
|
ge=1,
|
32
45
|
le=500,
|
33
46
|
)
|
34
47
|
year: Optional[str] = Field(
|
35
48
|
default=None,
|
36
|
-
description="
|
37
|
-
"YYYY-
|
49
|
+
description="Publication year filter; supports formats:"
|
50
|
+
"'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'",
|
38
51
|
)
|
39
52
|
tool_call_id: Annotated[str, InjectedToolCallId]
|
40
53
|
|
41
54
|
model_config = {"arbitrary_types_allowed": True}
|
42
55
|
|
43
56
|
|
44
|
-
@tool(
|
57
|
+
@tool(
|
58
|
+
args_schema=MultiPaperRecInput,
|
59
|
+
parse_docstring=True,
|
60
|
+
)
|
45
61
|
def get_multi_paper_recommendations(
|
46
62
|
paper_ids: List[str],
|
47
63
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
48
|
-
limit: int =
|
64
|
+
limit: int = 10,
|
49
65
|
year: Optional[str] = None,
|
50
66
|
) -> Command[Any]:
|
51
67
|
"""
|
52
|
-
|
53
|
-
|
68
|
+
Return recommended papers based on multiple Semantic Scholar paper IDs.
|
69
|
+
|
70
|
+
This tool accepts a list of Semantic Scholar paper IDs and returns a set of
|
71
|
+
recommended papers by aggregating related works (citations and references)
|
72
|
+
from each input paper.
|
54
73
|
|
55
74
|
Args:
|
56
|
-
paper_ids (List[str]):
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
75
|
+
paper_ids (List[str]): List of 40-character Semantic Scholar paper IDs.
|
76
|
+
Provide at least two IDs.
|
77
|
+
tool_call_id (str): Internal tool call identifier injected by the system.
|
78
|
+
limit (int, optional): Maximum total number of recommendations to return. Defaults to 10.
|
79
|
+
year (str, optional): Publication year filter; supports formats: 'YYYY',
|
80
|
+
'YYYY-', '-YYYY', 'YYYY:YYYY'. Defaults to None.
|
61
81
|
|
62
82
|
Returns:
|
63
|
-
|
83
|
+
Command: A Command object containing:
|
84
|
+
- multi_papers: List of recommended papers.
|
85
|
+
- last_displayed_papers: Same list for display purposes.
|
86
|
+
- messages: List containing a ToolMessage with recommendations details.
|
64
87
|
"""
|
65
88
|
# Create recommendation data object to organize variables
|
66
89
|
rec_data = MultiPaperRecData(paper_ids, limit, year, tool_call_id)
|
@@ -10,11 +10,16 @@ or summarization. For PDF-level question answering, use the 'question_and_answer
|
|
10
10
|
"""
|
11
11
|
|
12
12
|
import logging
|
13
|
-
from typing import Annotated
|
13
|
+
from typing import Annotated, Optional, Any
|
14
|
+
|
14
15
|
import pandas as pd
|
15
|
-
from
|
16
|
+
from langchain_core.messages import ToolMessage
|
16
17
|
from langchain_core.tools import tool
|
18
|
+
from langchain_core.tools.base import InjectedToolCallId
|
19
|
+
from langchain_experimental.agents import create_pandas_dataframe_agent
|
17
20
|
from langgraph.prebuilt import InjectedState
|
21
|
+
from langgraph.types import Command
|
22
|
+
from pydantic import BaseModel, Field
|
18
23
|
|
19
24
|
# Configure logging
|
20
25
|
logging.basicConfig(level=logging.INFO)
|
@@ -25,8 +30,71 @@ class NoPapersFoundError(Exception):
|
|
25
30
|
"""Exception raised when no papers are found in the state."""
|
26
31
|
|
27
32
|
|
28
|
-
|
29
|
-
|
33
|
+
class QueryDataFrameInput(BaseModel):
|
34
|
+
"""
|
35
|
+
Pydantic schema for querying the metadata of displayed papers.
|
36
|
+
|
37
|
+
Fields:
|
38
|
+
question: A free-text prompt or Python expression to query the papers DataFrame.
|
39
|
+
tool_call_id: LangGraph-injected identifier for tracking the tool invocation.
|
40
|
+
state: Agent state dictionary. Must include:
|
41
|
+
- 'last_displayed_papers': dictionary of paper metadata (rows = papers).
|
42
|
+
- 'llm_model': model used to instantiate the DataFrame agent.
|
43
|
+
|
44
|
+
Notes:
|
45
|
+
- This tool is only for metadata queries. It does not perform full-text PDF analysis.
|
46
|
+
- You can access standard metadata columns such as 'Title', 'Authors',
|
47
|
+
'Venue', 'Year', and the unified 'paper_ids' (list of <source>:<ID>).
|
48
|
+
- To extract raw paper IDs for downloading, set extract_ids=True and specify:
|
49
|
+
- id_column: 'paper_ids' (default)
|
50
|
+
- row_number: 1-based index of the paper to return (optional—for a single ID)
|
51
|
+
Example: {{"question":"","extract_ids":True,"row_number":4}}
|
52
|
+
"""
|
53
|
+
|
54
|
+
question: str = Field(
|
55
|
+
description=(
|
56
|
+
"The metadata query to run over the papers DataFrame. Can be natural language "
|
57
|
+
"(e.g., 'List all titles by author X') or Python code "
|
58
|
+
"(e.g., df['arxiv_id'].dropna().tolist())."
|
59
|
+
)
|
60
|
+
)
|
61
|
+
extract_ids: bool = Field(
|
62
|
+
default=False,
|
63
|
+
description=(
|
64
|
+
"If true, instruct the DataFrame agent to extract values from the"
|
65
|
+
"specified ID column via a Python expression."
|
66
|
+
),
|
67
|
+
)
|
68
|
+
id_column: str = Field(
|
69
|
+
default="paper_ids",
|
70
|
+
description=(
|
71
|
+
"Name of the metadata column containing a list of paper IDs to"
|
72
|
+
"extract when extract_ids=True."
|
73
|
+
),
|
74
|
+
)
|
75
|
+
row_number: Optional[int] = Field(
|
76
|
+
default=None,
|
77
|
+
description=(
|
78
|
+
"1-based index of the ID to extract from the list; if provided, returns only"
|
79
|
+
"that single ID."
|
80
|
+
),
|
81
|
+
)
|
82
|
+
tool_call_id: Annotated[str, InjectedToolCallId]
|
83
|
+
state: Annotated[dict, InjectedState]
|
84
|
+
|
85
|
+
|
86
|
+
@tool(
|
87
|
+
"query_dataframe",
|
88
|
+
args_schema=QueryDataFrameInput,
|
89
|
+
parse_docstring=True,
|
90
|
+
return_direct=True,
|
91
|
+
)
|
92
|
+
def query_dataframe(
|
93
|
+
question: str,
|
94
|
+
state: Annotated[dict, InjectedState],
|
95
|
+
tool_call_id: str,
|
96
|
+
**kwargs: Any,
|
97
|
+
) -> Command:
|
30
98
|
"""
|
31
99
|
Perform a tabular query on the most recently displayed papers.
|
32
100
|
|
@@ -38,29 +106,63 @@ def query_dataframe(question: str, state: Annotated[dict, InjectedState]) -> str
|
|
38
106
|
|
39
107
|
Args:
|
40
108
|
question (str): The metadata query to ask over the papers table.
|
41
|
-
|
42
|
-
|
109
|
+
extract_ids (bool): If true, modify the query to instruct the DataFrame agent
|
110
|
+
to extract values from the specified ID column via Python code.
|
111
|
+
id_column (str): Name of the metadata column to extract values from when extract_ids=True.
|
112
|
+
row_number (int, optional): 1-based index of the ID to extract from the list; if provided,
|
113
|
+
returns only that single ID.
|
114
|
+
state (dict): The agent's state containing 'last_displayed_papers' key
|
115
|
+
referencing the metadata table in state.
|
116
|
+
tool_call_id (str): LangGraph-injected identifier for this tool call.
|
43
117
|
|
44
118
|
Returns:
|
45
|
-
|
119
|
+
Command: A structured response containing a ToolMessage with the query result.
|
46
120
|
|
47
121
|
Raises:
|
48
122
|
NoPapersFoundError: If no papers have been displayed yet.
|
49
123
|
"""
|
50
124
|
logger.info("Querying last displayed papers with question: %s", question)
|
51
125
|
llm_model = state.get("llm_model")
|
126
|
+
if llm_model is None:
|
127
|
+
raise ValueError("Missing 'llm_model' in state.")
|
128
|
+
|
52
129
|
context_val = state.get("last_displayed_papers")
|
53
130
|
if not context_val:
|
54
131
|
logger.info("No papers displayed so far, raising NoPapersFoundError")
|
55
132
|
raise NoPapersFoundError(
|
56
133
|
"No papers found. A search needs to be performed first."
|
57
134
|
)
|
58
|
-
|
135
|
+
|
136
|
+
# Resolve the paper dictionary
|
59
137
|
if isinstance(context_val, dict):
|
60
138
|
dic_papers = context_val
|
61
139
|
else:
|
62
140
|
dic_papers = state.get(context_val)
|
141
|
+
|
142
|
+
if not isinstance(dic_papers, dict):
|
143
|
+
raise ValueError(
|
144
|
+
"Could not resolve a valid metadata dictionary from 'last_displayed_papers'"
|
145
|
+
)
|
146
|
+
|
63
147
|
df_papers = pd.DataFrame.from_dict(dic_papers, orient="index")
|
148
|
+
# Prepare the query: if extracting IDs, let the DataFrame agent handle it via Python code
|
149
|
+
extract_ids_flag = kwargs.get("extract_ids", False)
|
150
|
+
id_column = kwargs.get("id_column", "paper_ids")
|
151
|
+
row_number = kwargs.get("row_number")
|
152
|
+
question_to_agent = question
|
153
|
+
if extract_ids_flag:
|
154
|
+
if not id_column:
|
155
|
+
raise ValueError("Must specify 'id_column' when extract_ids=True.")
|
156
|
+
if row_number is not None:
|
157
|
+
question_to_agent = (
|
158
|
+
f"df['{id_column}'].dropna().str[0].tolist()[{row_number-1}]"
|
159
|
+
)
|
160
|
+
else:
|
161
|
+
question_to_agent = f"df['{id_column}'].dropna().str[0].tolist()"
|
162
|
+
logger.info(
|
163
|
+
"extract_ids enabled: asking agent to run expression: %s", question_to_agent
|
164
|
+
)
|
165
|
+
|
64
166
|
df_agent = create_pandas_dataframe_agent(
|
65
167
|
llm_model,
|
66
168
|
allow_dangerous_code=True,
|
@@ -71,5 +173,17 @@ def query_dataframe(question: str, state: Annotated[dict, InjectedState]) -> str
|
|
71
173
|
number_of_head_rows=df_papers.shape[0],
|
72
174
|
verbose=True,
|
73
175
|
)
|
74
|
-
|
75
|
-
|
176
|
+
|
177
|
+
llm_result = df_agent.invoke({"input": question_to_agent}, stream_mode=None)
|
178
|
+
response_text = llm_result["output"]
|
179
|
+
|
180
|
+
return Command(
|
181
|
+
update={
|
182
|
+
"messages": [
|
183
|
+
ToolMessage(
|
184
|
+
content=response_text,
|
185
|
+
tool_call_id=tool_call_id,
|
186
|
+
)
|
187
|
+
],
|
188
|
+
}
|
189
|
+
)
|
@@ -1,7 +1,12 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
3
|
"""
|
4
|
-
|
4
|
+
Tool for retrieving a Semantic Scholar paper ID given a paper title.
|
5
|
+
|
6
|
+
This tool queries the Semantic Scholar API for the best match of the provided paper title
|
7
|
+
and returns the unique Semantic Scholar paperId. Use when you have a known title and need its
|
8
|
+
Semantic Scholar identifier for further metadata retrieval or pipeline integration. Do not
|
9
|
+
use this tool for broad literature search; use the `search` tool instead.
|
5
10
|
"""
|
6
11
|
|
7
12
|
import logging
|
@@ -12,7 +17,7 @@ from langchain_core.messages import ToolMessage
|
|
12
17
|
from langchain_core.tools import tool
|
13
18
|
from langchain_core.tools.base import InjectedToolCallId
|
14
19
|
from langgraph.types import Command
|
15
|
-
from pydantic import Field
|
20
|
+
from pydantic import BaseModel, Field
|
16
21
|
|
17
22
|
|
18
23
|
# Configure logging
|
@@ -20,26 +25,51 @@ logging.basicConfig(level=logging.INFO)
|
|
20
25
|
logger = logging.getLogger(__name__)
|
21
26
|
|
22
27
|
|
23
|
-
|
24
|
-
|
25
|
-
|
28
|
+
class RetrieveSemanticScholarPaperIdInput(BaseModel):
|
29
|
+
"""
|
30
|
+
Pydantic schema for retrieving a Semantic Scholar paper ID.
|
31
|
+
|
32
|
+
Fields:
|
33
|
+
paper_title: The title (full or partial) of the paper to look up on Semantic Scholar.
|
34
|
+
tool_call_id: LangGraph-injected identifier for tracking the tool invocation.
|
35
|
+
"""
|
36
|
+
|
26
37
|
paper_title: str = Field(
|
27
|
-
description="The title
|
28
|
-
)
|
38
|
+
..., description="The paper title to search for on Semantic Scholar."
|
39
|
+
)
|
40
|
+
tool_call_id: Annotated[str, InjectedToolCallId]
|
41
|
+
|
42
|
+
|
43
|
+
@tool(
|
44
|
+
"retrieve_semantic_scholar_paper_id",
|
45
|
+
args_schema=RetrieveSemanticScholarPaperIdInput,
|
46
|
+
parse_docstring=True,
|
47
|
+
)
|
48
|
+
def retrieve_semantic_scholar_paper_id(
|
49
|
+
paper_title: str,
|
50
|
+
tool_call_id: str,
|
29
51
|
) -> Command[Any]:
|
30
52
|
"""
|
31
|
-
|
32
|
-
and retrieve the paper Semantic Scholar ID.
|
53
|
+
Search for a paper by title on Semantic Scholar and return its unique paper ID.
|
33
54
|
|
34
|
-
This
|
35
|
-
|
55
|
+
This tool issues a GET request to the Semantic Scholar API to find the best match
|
56
|
+
for the given paper title, then returns the paper's Semantic Scholar ID.
|
57
|
+
|
58
|
+
Use when you have a known title (full or partial) and need the Semantic Scholar ID
|
59
|
+
to fetch additional metadata or perform downstream lookups. Do not use this tool
|
60
|
+
for broad literature searches; for general search use the `search` tool.
|
36
61
|
|
37
62
|
Args:
|
38
|
-
|
39
|
-
|
63
|
+
paper_title (str): The title of the paper to look up.
|
64
|
+
tool_call_id (str): LangGraph-injected identifier for this tool call.
|
40
65
|
|
41
66
|
Returns:
|
42
|
-
|
67
|
+
Command: A structured response containing a ToolMessage whose content is
|
68
|
+
the Semantic Scholar paper ID string (e.g., 'abc123xyz').
|
69
|
+
|
70
|
+
Raises:
|
71
|
+
ValueError: If no matching paper is found for the given title.
|
72
|
+
requests.RequestException: If the API request fails.
|
43
73
|
"""
|
44
74
|
# Load hydra configuration
|
45
75
|
with hydra.initialize(version_base=None, config_path="../../configs"):
|
@@ -64,14 +94,16 @@ def retrieve_semantic_scholar_paper_id(
|
|
64
94
|
if not papers:
|
65
95
|
logger.error("No papers found for query: %s", paper_title)
|
66
96
|
raise ValueError(f"No papers found for query: {paper_title}. Try again.")
|
67
|
-
#
|
97
|
+
# Extract the paper ID from the top result
|
68
98
|
paper_id = papers[0]["paperId"]
|
69
|
-
|
99
|
+
logger.info("Found paper ID: %s", paper_id)
|
100
|
+
# Prepare the response content (just the ID)
|
101
|
+
response_text = paper_id
|
70
102
|
return Command(
|
71
103
|
update={
|
72
104
|
"messages": [
|
73
105
|
ToolMessage(
|
74
|
-
content=
|
106
|
+
content=response_text,
|
75
107
|
tool_call_id=tool_call_id,
|
76
108
|
)
|
77
109
|
],
|
@@ -1,7 +1,10 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
3
|
"""
|
4
|
-
|
4
|
+
Search for academic papers on Semantic Scholar by title or keywords.
|
5
|
+
|
6
|
+
Given a text query, this tool retrieves relevant papers from Semantic Scholar,
|
7
|
+
optionally filtered by publication year.
|
5
8
|
"""
|
6
9
|
|
7
10
|
import logging
|
@@ -19,42 +22,62 @@ logger = logging.getLogger(__name__)
|
|
19
22
|
|
20
23
|
|
21
24
|
class SearchInput(BaseModel):
|
22
|
-
"""
|
25
|
+
"""Defines the input schema for the paper search tool.
|
26
|
+
|
27
|
+
Attributes:
|
28
|
+
query: Full or partial paper title or keywords to search for.
|
29
|
+
limit: Maximum number of search results to return (1-100).
|
30
|
+
year: Optional publication year filter; supports 'YYYY',
|
31
|
+
'YYYY-', '-YYYY', 'YYYY:YYYY'.
|
32
|
+
tool_call_id: Internal tool call identifier injected by the system.
|
33
|
+
"""
|
23
34
|
|
24
35
|
query: str = Field(
|
25
|
-
description="
|
26
|
-
"Be specific and include relevant academic terms."
|
36
|
+
description="Full or partial paper title or keywords to search for"
|
27
37
|
)
|
28
38
|
limit: int = Field(
|
29
|
-
default=10,
|
39
|
+
default=10,
|
40
|
+
description="Maximum number of search results to return (1-100)",
|
41
|
+
ge=1,
|
42
|
+
le=100,
|
30
43
|
)
|
31
44
|
year: Optional[str] = Field(
|
32
45
|
default=None,
|
33
|
-
description="
|
34
|
-
"YYYY-
|
46
|
+
description="Publication year filter; supports formats:"
|
47
|
+
"'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'",
|
35
48
|
)
|
36
49
|
tool_call_id: Annotated[str, InjectedToolCallId]
|
37
50
|
|
38
51
|
|
39
|
-
@tool(
|
52
|
+
@tool(
|
53
|
+
"search_tool",
|
54
|
+
args_schema=SearchInput,
|
55
|
+
parse_docstring=True,
|
56
|
+
)
|
40
57
|
def search_tool(
|
41
58
|
query: str,
|
42
59
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
43
|
-
limit: int =
|
60
|
+
limit: int = 10,
|
44
61
|
year: Optional[str] = None,
|
45
62
|
) -> Command[Any]:
|
46
63
|
"""
|
47
|
-
|
64
|
+
Return academic papers from Semantic Scholar matching a title or keyword query.
|
65
|
+
|
66
|
+
This tool searches Semantic Scholar for papers whose titles or keywords
|
67
|
+
match the given text, optionally filtered by publication year.
|
48
68
|
|
49
69
|
Args:
|
50
|
-
query (str):
|
51
|
-
tool_call_id (
|
52
|
-
limit (int, optional):
|
53
|
-
year (str, optional):
|
54
|
-
|
70
|
+
query (str): Full or partial paper title or keywords to search for.
|
71
|
+
tool_call_id (str): Internal tool call identifier injected by the system.
|
72
|
+
limit (int, optional): Maximum number of search results to return. Defaults to 5.
|
73
|
+
year (str, optional): Publication year filter; supports 'YYYY',
|
74
|
+
'YYYY-', '-YYYY', 'YYYY:YYYY'. Defaults to None.
|
55
75
|
|
56
76
|
Returns:
|
57
|
-
|
77
|
+
Command: A Command object containing:
|
78
|
+
- papers: List of matching papers.
|
79
|
+
- last_displayed_papers: Same list for display purposes.
|
80
|
+
- messages: List containing a ToolMessage with search results details.
|
58
81
|
"""
|
59
82
|
# Create search data object to organize variables
|
60
83
|
search_data = SearchData(query, limit, year, tool_call_id)
|
@@ -1,7 +1,10 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
|
3
3
|
"""
|
4
|
-
|
4
|
+
Recommend research papers related to a single input paper using Semantic Scholar.
|
5
|
+
|
6
|
+
Given a Semantic Scholar paper ID, this tool retrieves related works
|
7
|
+
(citations and references) and returns a curated list of recommended papers.
|
5
8
|
"""
|
6
9
|
|
7
10
|
import logging
|
@@ -19,27 +22,37 @@ logger = logging.getLogger(__name__)
|
|
19
22
|
|
20
23
|
|
21
24
|
class SinglePaperRecInput(BaseModel):
|
22
|
-
"""
|
25
|
+
"""Defines the input schema for the single-paper recommendation tool.
|
26
|
+
|
27
|
+
Attributes:
|
28
|
+
paper_id: 40-character Semantic Scholar Paper ID to base recommendations on.
|
29
|
+
limit: Maximum number of recommendations to return (1-500).
|
30
|
+
year: Optional publication year filter; supports 'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'.
|
31
|
+
tool_call_id: Internal tool call identifier injected by the system.
|
32
|
+
"""
|
23
33
|
|
24
34
|
paper_id: str = Field(
|
25
|
-
description="Semantic Scholar Paper ID to
|
35
|
+
description="40-character Semantic Scholar Paper ID to base recommendations on"
|
26
36
|
)
|
27
37
|
limit: int = Field(
|
28
|
-
default=
|
29
|
-
description="Maximum number of recommendations to return",
|
38
|
+
default=10,
|
39
|
+
description="Maximum number of recommendations to return (1-500)",
|
30
40
|
ge=1,
|
31
41
|
le=500,
|
32
42
|
)
|
33
43
|
year: Optional[str] = Field(
|
34
44
|
default=None,
|
35
|
-
description="
|
36
|
-
"YYYY-
|
45
|
+
description="Publication year filter; supports formats::"
|
46
|
+
"'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'",
|
37
47
|
)
|
38
48
|
tool_call_id: Annotated[str, InjectedToolCallId]
|
39
49
|
model_config = {"arbitrary_types_allowed": True}
|
40
50
|
|
41
51
|
|
42
|
-
@tool(
|
52
|
+
@tool(
|
53
|
+
args_schema=SinglePaperRecInput,
|
54
|
+
parse_docstring=True,
|
55
|
+
)
|
43
56
|
def get_single_paper_recommendations(
|
44
57
|
paper_id: str,
|
45
58
|
tool_call_id: Annotated[str, InjectedToolCallId],
|
@@ -47,18 +60,23 @@ def get_single_paper_recommendations(
|
|
47
60
|
year: Optional[str] = None,
|
48
61
|
) -> Command[Any]:
|
49
62
|
"""
|
50
|
-
|
51
|
-
|
63
|
+
Return recommended papers for a single Semantic Scholar paper ID.
|
64
|
+
|
65
|
+
This tool accepts a single Semantic Scholar paper ID and returns related works
|
66
|
+
by aggregating citations and references.
|
52
67
|
|
53
68
|
Args:
|
54
|
-
paper_id (str):
|
55
|
-
tool_call_id (
|
56
|
-
limit (int, optional):
|
57
|
-
year (str, optional):
|
58
|
-
|
69
|
+
paper_id (str): 40-character Semantic Scholar paper ID.
|
70
|
+
tool_call_id (str): Internal tool call identifier injected by the system.
|
71
|
+
limit (int, optional): Maximum number of recommendations to return. Defaults to 5.
|
72
|
+
year (str, optional): Publication year filter; supports 'YYYY', 'YYYY-',
|
73
|
+
'-YYYY', 'YYYY:YYYY'. Defaults to None.
|
59
74
|
|
60
75
|
Returns:
|
61
|
-
|
76
|
+
Command: A Command object containing:
|
77
|
+
- papers: List of recommended papers.
|
78
|
+
- last_displayed_papers: Same list for display purposes.
|
79
|
+
- messages: List containing a ToolMessage with recommendation details.
|
62
80
|
"""
|
63
81
|
# Create recommendation data object to organize variables
|
64
82
|
rec_data = SinglePaperRecData(paper_id, limit, year, tool_call_id)
|
@@ -127,8 +127,26 @@ class MultiPaperRecData:
|
|
127
127
|
|
128
128
|
def _filter_papers(self) -> None:
|
129
129
|
"""Filter and format papers."""
|
130
|
-
|
131
|
-
|
130
|
+
# Build filtered recommendations with unified paper_ids
|
131
|
+
filtered: Dict[str, Any] = {}
|
132
|
+
for paper in self.recommendations:
|
133
|
+
if not paper.get("title") or not paper.get("authors"):
|
134
|
+
continue
|
135
|
+
ext = paper.get("externalIds", {}) or {}
|
136
|
+
ids: List[str] = []
|
137
|
+
arxiv = ext.get("ArXiv")
|
138
|
+
if arxiv:
|
139
|
+
ids.append(f"arxiv:{arxiv}")
|
140
|
+
pubmed = ext.get("PubMed")
|
141
|
+
if pubmed:
|
142
|
+
ids.append(f"pubmed:{pubmed}")
|
143
|
+
pmc = ext.get("PubMedCentral")
|
144
|
+
if pmc:
|
145
|
+
ids.append(f"pmc:{pmc}")
|
146
|
+
doi_id = ext.get("DOI")
|
147
|
+
if doi_id:
|
148
|
+
ids.append(f"doi:{doi_id}")
|
149
|
+
metadata = {
|
132
150
|
"semantic_scholar_paper_id": paper["paperId"],
|
133
151
|
"Title": paper.get("title", "N/A"),
|
134
152
|
"Abstract": paper.get("abstract", "N/A"),
|
@@ -142,27 +160,42 @@ class MultiPaperRecData:
|
|
142
160
|
for author in paper.get("authors", [])
|
143
161
|
],
|
144
162
|
"URL": paper.get("url", "N/A"),
|
145
|
-
"arxiv_id":
|
146
|
-
"
|
163
|
+
"arxiv_id": arxiv or "N/A",
|
164
|
+
"pm_id": pubmed or "N/A",
|
165
|
+
"pmc_id": pmc or "N/A",
|
166
|
+
"doi": doi_id or "N/A",
|
167
|
+
"paper_ids": ids,
|
168
|
+
"source": "semantic_scholar",
|
147
169
|
}
|
148
|
-
|
149
|
-
|
150
|
-
}
|
170
|
+
filtered[paper["paperId"]] = metadata
|
171
|
+
self.filtered_papers = filtered
|
151
172
|
|
152
173
|
logger.info("Filtered %d papers", len(self.filtered_papers))
|
153
174
|
|
175
|
+
def _get_snippet(self, abstract: str) -> str:
|
176
|
+
"""Extract the first one or two sentences from an abstract."""
|
177
|
+
if not abstract or abstract == "N/A":
|
178
|
+
return ""
|
179
|
+
sentences = abstract.split(". ")
|
180
|
+
snippet_sentences = sentences[:2]
|
181
|
+
snippet = ". ".join(snippet_sentences)
|
182
|
+
if not snippet.endswith("."):
|
183
|
+
snippet += "."
|
184
|
+
return snippet
|
185
|
+
|
154
186
|
def _create_content(self) -> None:
|
155
187
|
"""Create the content message for the response."""
|
156
188
|
top_papers = list(self.filtered_papers.values())[:3]
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
189
|
+
entries: list[str] = []
|
190
|
+
for i, paper in enumerate(top_papers):
|
191
|
+
title = paper.get("Title", "N/A")
|
192
|
+
year = paper.get("Year", "N/A")
|
193
|
+
snippet = self._get_snippet(paper.get("Abstract", ""))
|
194
|
+
entry = f"{i+1}. {title} ({year})"
|
195
|
+
if snippet:
|
196
|
+
entry += f"\n Abstract snippet: {snippet}"
|
197
|
+
entries.append(entry)
|
198
|
+
top_papers_info = "\n".join(entries)
|
166
199
|
|
167
200
|
self.content = (
|
168
201
|
"Recommendations based on multiple papers were successful. "
|