aiagents4pharma 1.39.0__py3-none-any.whl → 1.39.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. aiagents4pharma/talk2scholars/agents/main_agent.py +7 -7
  2. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +88 -12
  3. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/paper_download_agent/default.yaml +5 -0
  4. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/pdf_agent/default.yaml +5 -0
  5. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +1 -20
  6. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +1 -26
  7. aiagents4pharma/talk2scholars/configs/tools/download_arxiv_paper/default.yaml +4 -0
  8. aiagents4pharma/talk2scholars/configs/tools/download_biorxiv_paper/default.yaml +2 -0
  9. aiagents4pharma/talk2scholars/configs/tools/download_medrxiv_paper/default.yaml +2 -0
  10. aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +22 -0
  11. aiagents4pharma/talk2scholars/tests/test_main_agent.py +20 -2
  12. aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py +28 -0
  13. aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +107 -29
  14. aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +2 -3
  15. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +194 -543
  16. aiagents4pharma/talk2scholars/tests/test_s2_agent.py +2 -2
  17. aiagents4pharma/talk2scholars/tests/{test_s2_display.py → test_s2_display_dataframe.py} +2 -3
  18. aiagents4pharma/talk2scholars/tests/test_s2_query_dataframe.py +201 -0
  19. aiagents4pharma/talk2scholars/tests/test_s2_retrieve.py +7 -6
  20. aiagents4pharma/talk2scholars/tests/test_s2_utils_ext_ids.py +413 -0
  21. aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py +140 -0
  22. aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +0 -1
  23. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +16 -18
  24. aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +92 -37
  25. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +73 -575
  26. aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +10 -0
  27. aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +97 -0
  28. aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +77 -0
  29. aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +83 -0
  30. aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +125 -0
  31. aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +162 -0
  32. aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py +33 -10
  33. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +39 -16
  34. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +124 -10
  35. aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +49 -17
  36. aiagents4pharma/talk2scholars/tools/s2/search.py +39 -16
  37. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +34 -16
  38. aiagents4pharma/talk2scholars/tools/s2/utils/multi_helper.py +49 -16
  39. aiagents4pharma/talk2scholars/tools/s2/utils/search_helper.py +51 -16
  40. aiagents4pharma/talk2scholars/tools/s2/utils/single_helper.py +50 -17
  41. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/METADATA +58 -105
  42. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/RECORD +45 -32
  43. aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py +0 -89
  44. aiagents4pharma/talk2scholars/tests/test_routing_logic.py +0 -74
  45. aiagents4pharma/talk2scholars/tests/test_s2_query.py +0 -95
  46. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/WHEEL +0 -0
  47. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/licenses/LICENSE +0 -0
  48. {aiagents4pharma-1.39.0.dist-info → aiagents4pharma-1.39.2.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,11 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
3
  """
4
- This tool is used to return recommendations based on multiple papers
4
+ Recommend research papers related to a set of input papers using Semantic Scholar.
5
+
6
+ Given a list of Semantic Scholar paper IDs, this tool aggregates related works
7
+ (citations and references) from each input paper and returns a consolidated list
8
+ of recommended papers.
5
9
  """
6
10
 
7
11
  import logging
@@ -20,47 +24,66 @@ logger = logging.getLogger(__name__)
20
24
 
21
25
 
22
26
  class MultiPaperRecInput(BaseModel):
23
- """Input schema for multiple paper recommendations tool."""
27
+ """Defines the input schema for the multi-paper recommendation tool.
28
+
29
+ Attributes:
30
+ paper_ids: List of 40-character Semantic Scholar Paper IDs (provide at least two).
31
+ limit: Maximum total number of recommendations to return (1-500).
32
+ year: Optional publication year filter; supports formats:
33
+ 'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'.
34
+ tool_call_id: Internal tool call identifier injected by the system.
35
+ """
24
36
 
25
37
  paper_ids: List[str] = Field(
26
- description="List of Semantic Scholar Paper IDs to get recommendations for"
38
+ description="List of 40-character Semantic Scholar Paper IDs"
39
+ "(at least two) to base recommendations on"
27
40
  )
28
41
  limit: int = Field(
29
42
  default=10,
30
- description="Maximum total number of recommendations to return",
43
+ description="Maximum total number of recommendations to return (1-500)",
31
44
  ge=1,
32
45
  le=500,
33
46
  )
34
47
  year: Optional[str] = Field(
35
48
  default=None,
36
- description="Year range in format: YYYY for specific year, "
37
- "YYYY- for papers after year, -YYYY for papers before year, or YYYY:YYYY for range",
49
+ description="Publication year filter; supports formats:"
50
+ "'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'",
38
51
  )
39
52
  tool_call_id: Annotated[str, InjectedToolCallId]
40
53
 
41
54
  model_config = {"arbitrary_types_allowed": True}
42
55
 
43
56
 
44
- @tool(args_schema=MultiPaperRecInput, parse_docstring=True)
57
+ @tool(
58
+ args_schema=MultiPaperRecInput,
59
+ parse_docstring=True,
60
+ )
45
61
  def get_multi_paper_recommendations(
46
62
  paper_ids: List[str],
47
63
  tool_call_id: Annotated[str, InjectedToolCallId],
48
- limit: int = 2,
64
+ limit: int = 10,
49
65
  year: Optional[str] = None,
50
66
  ) -> Command[Any]:
51
67
  """
52
- Get recommendations for a group of multiple papers using the Semantic Scholar IDs.
53
- No other paper IDs are supported.
68
+ Return recommended papers based on multiple Semantic Scholar paper IDs.
69
+
70
+ This tool accepts a list of Semantic Scholar paper IDs and returns a set of
71
+ recommended papers by aggregating related works (citations and references)
72
+ from each input paper.
54
73
 
55
74
  Args:
56
- paper_ids (List[str]): The list of paper IDs to base recommendations on.
57
- tool_call_id (Annotated[str, InjectedToolCallId]): The tool call ID.
58
- limit (int, optional): The maximum number of recommendations to return. Defaults to 2.
59
- year (str, optional): Year range for papers.
60
- Supports formats like "2024-", "-2024", "2024:2025". Defaults to None.
75
+ paper_ids (List[str]): List of 40-character Semantic Scholar paper IDs.
76
+ Provide at least two IDs.
77
+ tool_call_id (str): Internal tool call identifier injected by the system.
78
+ limit (int, optional): Maximum total number of recommendations to return. Defaults to 10.
79
+ year (str, optional): Publication year filter; supports formats: 'YYYY',
80
+ 'YYYY-', '-YYYY', 'YYYY:YYYY'. Defaults to None.
61
81
 
62
82
  Returns:
63
- Dict[str, Any]: The recommendations and related information.
83
+ Command: A Command object containing:
84
+ - multi_papers: List of recommended papers.
85
+ - last_displayed_papers: Same list for display purposes.
86
+ - messages: List containing a ToolMessage with recommendations details.
64
87
  """
65
88
  # Create recommendation data object to organize variables
66
89
  rec_data = MultiPaperRecData(paper_ids, limit, year, tool_call_id)
@@ -10,11 +10,16 @@ or summarization. For PDF-level question answering, use the 'question_and_answer
10
10
  """
11
11
 
12
12
  import logging
13
- from typing import Annotated
13
+ from typing import Annotated, Optional, Any
14
+
14
15
  import pandas as pd
15
- from langchain_experimental.agents import create_pandas_dataframe_agent
16
+ from langchain_core.messages import ToolMessage
16
17
  from langchain_core.tools import tool
18
+ from langchain_core.tools.base import InjectedToolCallId
19
+ from langchain_experimental.agents import create_pandas_dataframe_agent
17
20
  from langgraph.prebuilt import InjectedState
21
+ from langgraph.types import Command
22
+ from pydantic import BaseModel, Field
18
23
 
19
24
  # Configure logging
20
25
  logging.basicConfig(level=logging.INFO)
@@ -25,8 +30,71 @@ class NoPapersFoundError(Exception):
25
30
  """Exception raised when no papers are found in the state."""
26
31
 
27
32
 
28
- @tool("query_dataframe", parse_docstring=True)
29
- def query_dataframe(question: str, state: Annotated[dict, InjectedState]) -> str:
33
+ class QueryDataFrameInput(BaseModel):
34
+ """
35
+ Pydantic schema for querying the metadata of displayed papers.
36
+
37
+ Fields:
38
+ question: A free-text prompt or Python expression to query the papers DataFrame.
39
+ tool_call_id: LangGraph-injected identifier for tracking the tool invocation.
40
+ state: Agent state dictionary. Must include:
41
+ - 'last_displayed_papers': dictionary of paper metadata (rows = papers).
42
+ - 'llm_model': model used to instantiate the DataFrame agent.
43
+
44
+ Notes:
45
+ - This tool is only for metadata queries. It does not perform full-text PDF analysis.
46
+ - You can access standard metadata columns such as 'Title', 'Authors',
47
+ 'Venue', 'Year', and the unified 'paper_ids' (list of <source>:<ID>).
48
+ - To extract raw paper IDs for downloading, set extract_ids=True and specify:
49
+ - id_column: 'paper_ids' (default)
50
+ - row_number: 1-based index of the paper to return (optional—for a single ID)
51
+ Example: {{"question":"","extract_ids":True,"row_number":4}}
52
+ """
53
+
54
+ question: str = Field(
55
+ description=(
56
+ "The metadata query to run over the papers DataFrame. Can be natural language "
57
+ "(e.g., 'List all titles by author X') or Python code "
58
+ "(e.g., df['arxiv_id'].dropna().tolist())."
59
+ )
60
+ )
61
+ extract_ids: bool = Field(
62
+ default=False,
63
+ description=(
64
+ "If true, instruct the DataFrame agent to extract values from the"
65
+ "specified ID column via a Python expression."
66
+ ),
67
+ )
68
+ id_column: str = Field(
69
+ default="paper_ids",
70
+ description=(
71
+ "Name of the metadata column containing a list of paper IDs to"
72
+ "extract when extract_ids=True."
73
+ ),
74
+ )
75
+ row_number: Optional[int] = Field(
76
+ default=None,
77
+ description=(
78
+ "1-based index of the ID to extract from the list; if provided, returns only"
79
+ "that single ID."
80
+ ),
81
+ )
82
+ tool_call_id: Annotated[str, InjectedToolCallId]
83
+ state: Annotated[dict, InjectedState]
84
+
85
+
86
+ @tool(
87
+ "query_dataframe",
88
+ args_schema=QueryDataFrameInput,
89
+ parse_docstring=True,
90
+ return_direct=True,
91
+ )
92
+ def query_dataframe(
93
+ question: str,
94
+ state: Annotated[dict, InjectedState],
95
+ tool_call_id: str,
96
+ **kwargs: Any,
97
+ ) -> Command:
30
98
  """
31
99
  Perform a tabular query on the most recently displayed papers.
32
100
 
@@ -38,29 +106,63 @@ def query_dataframe(question: str, state: Annotated[dict, InjectedState]) -> str
38
106
 
39
107
  Args:
40
108
  question (str): The metadata query to ask over the papers table.
41
- state (dict): The agent's state containing 'last_displayed_papers'
42
- key referencing the metadata table in state.
109
+ extract_ids (bool): If true, modify the query to instruct the DataFrame agent
110
+ to extract values from the specified ID column via Python code.
111
+ id_column (str): Name of the metadata column to extract values from when extract_ids=True.
112
+ row_number (int, optional): 1-based index of the ID to extract from the list; if provided,
113
+ returns only that single ID.
114
+ state (dict): The agent's state containing 'last_displayed_papers' key
115
+ referencing the metadata table in state.
116
+ tool_call_id (str): LangGraph-injected identifier for this tool call.
43
117
 
44
118
  Returns:
45
- str: The LLM's response to the metadata query.
119
+ Command: A structured response containing a ToolMessage with the query result.
46
120
 
47
121
  Raises:
48
122
  NoPapersFoundError: If no papers have been displayed yet.
49
123
  """
50
124
  logger.info("Querying last displayed papers with question: %s", question)
51
125
  llm_model = state.get("llm_model")
126
+ if llm_model is None:
127
+ raise ValueError("Missing 'llm_model' in state.")
128
+
52
129
  context_val = state.get("last_displayed_papers")
53
130
  if not context_val:
54
131
  logger.info("No papers displayed so far, raising NoPapersFoundError")
55
132
  raise NoPapersFoundError(
56
133
  "No papers found. A search needs to be performed first."
57
134
  )
58
- # Support both key reference (str) and direct mapping
135
+
136
+ # Resolve the paper dictionary
59
137
  if isinstance(context_val, dict):
60
138
  dic_papers = context_val
61
139
  else:
62
140
  dic_papers = state.get(context_val)
141
+
142
+ if not isinstance(dic_papers, dict):
143
+ raise ValueError(
144
+ "Could not resolve a valid metadata dictionary from 'last_displayed_papers'"
145
+ )
146
+
63
147
  df_papers = pd.DataFrame.from_dict(dic_papers, orient="index")
148
+ # Prepare the query: if extracting IDs, let the DataFrame agent handle it via Python code
149
+ extract_ids_flag = kwargs.get("extract_ids", False)
150
+ id_column = kwargs.get("id_column", "paper_ids")
151
+ row_number = kwargs.get("row_number")
152
+ question_to_agent = question
153
+ if extract_ids_flag:
154
+ if not id_column:
155
+ raise ValueError("Must specify 'id_column' when extract_ids=True.")
156
+ if row_number is not None:
157
+ question_to_agent = (
158
+ f"df['{id_column}'].dropna().str[0].tolist()[{row_number-1}]"
159
+ )
160
+ else:
161
+ question_to_agent = f"df['{id_column}'].dropna().str[0].tolist()"
162
+ logger.info(
163
+ "extract_ids enabled: asking agent to run expression: %s", question_to_agent
164
+ )
165
+
64
166
  df_agent = create_pandas_dataframe_agent(
65
167
  llm_model,
66
168
  allow_dangerous_code=True,
@@ -71,5 +173,17 @@ def query_dataframe(question: str, state: Annotated[dict, InjectedState]) -> str
71
173
  number_of_head_rows=df_papers.shape[0],
72
174
  verbose=True,
73
175
  )
74
- llm_result = df_agent.invoke(question, stream_mode=None)
75
- return llm_result["output"]
176
+
177
+ llm_result = df_agent.invoke({"input": question_to_agent}, stream_mode=None)
178
+ response_text = llm_result["output"]
179
+
180
+ return Command(
181
+ update={
182
+ "messages": [
183
+ ToolMessage(
184
+ content=response_text,
185
+ tool_call_id=tool_call_id,
186
+ )
187
+ ],
188
+ }
189
+ )
@@ -1,7 +1,12 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
3
  """
4
- This tool is used to search for academic papers on Semantic Scholar.
4
+ Tool for retrieving a Semantic Scholar paper ID given a paper title.
5
+
6
+ This tool queries the Semantic Scholar API for the best match of the provided paper title
7
+ and returns the unique Semantic Scholar paperId. Use when you have a known title and need its
8
+ Semantic Scholar identifier for further metadata retrieval or pipeline integration. Do not
9
+ use this tool for broad literature search; use the `search` tool instead.
5
10
  """
6
11
 
7
12
  import logging
@@ -12,7 +17,7 @@ from langchain_core.messages import ToolMessage
12
17
  from langchain_core.tools import tool
13
18
  from langchain_core.tools.base import InjectedToolCallId
14
19
  from langgraph.types import Command
15
- from pydantic import Field
20
+ from pydantic import BaseModel, Field
16
21
 
17
22
 
18
23
  # Configure logging
@@ -20,26 +25,51 @@ logging.basicConfig(level=logging.INFO)
20
25
  logger = logging.getLogger(__name__)
21
26
 
22
27
 
23
- @tool("retrieve_semantic_scholar_paper_id", parse_docstring=True)
24
- def retrieve_semantic_scholar_paper_id(
25
- tool_call_id: Annotated[str, InjectedToolCallId],
28
+ class RetrieveSemanticScholarPaperIdInput(BaseModel):
29
+ """
30
+ Pydantic schema for retrieving a Semantic Scholar paper ID.
31
+
32
+ Fields:
33
+ paper_title: The title (full or partial) of the paper to look up on Semantic Scholar.
34
+ tool_call_id: LangGraph-injected identifier for tracking the tool invocation.
35
+ """
36
+
26
37
  paper_title: str = Field(
27
- description="The title of the paper to search for on Semantic Scholar."
28
- ),
38
+ ..., description="The paper title to search for on Semantic Scholar."
39
+ )
40
+ tool_call_id: Annotated[str, InjectedToolCallId]
41
+
42
+
43
+ @tool(
44
+ "retrieve_semantic_scholar_paper_id",
45
+ args_schema=RetrieveSemanticScholarPaperIdInput,
46
+ parse_docstring=True,
47
+ )
48
+ def retrieve_semantic_scholar_paper_id(
49
+ paper_title: str,
50
+ tool_call_id: str,
29
51
  ) -> Command[Any]:
30
52
  """
31
- This tool can be used to search for a paper on Semantic Scholar
32
- and retrieve the paper Semantic Scholar ID.
53
+ Search for a paper by title on Semantic Scholar and return its unique paper ID.
33
54
 
34
- This is useful for when an article is retrieved from users Zotero library
35
- and the Semantic Scholar ID is needed to retrieve more information about the paper.
55
+ This tool issues a GET request to the Semantic Scholar API to find the best match
56
+ for the given paper title, then returns the paper's Semantic Scholar ID.
57
+
58
+ Use when you have a known title (full or partial) and need the Semantic Scholar ID
59
+ to fetch additional metadata or perform downstream lookups. Do not use this tool
60
+ for broad literature searches; for general search use the `search` tool.
36
61
 
37
62
  Args:
38
- tool_call_id (Annotated[str, InjectedToolCallId]): The tool call ID.
39
- paper_title (str): The title of the paper to search for on Semantic Scholar.
63
+ paper_title (str): The title of the paper to look up.
64
+ tool_call_id (str): LangGraph-injected identifier for this tool call.
40
65
 
41
66
  Returns:
42
- ToolMessage: A message containing the paper ID.
67
+ Command: A structured response containing a ToolMessage whose content is
68
+ the Semantic Scholar paper ID string (e.g., 'abc123xyz').
69
+
70
+ Raises:
71
+ ValueError: If no matching paper is found for the given title.
72
+ requests.RequestException: If the API request fails.
43
73
  """
44
74
  # Load hydra configuration
45
75
  with hydra.initialize(version_base=None, config_path="../../configs"):
@@ -64,14 +94,16 @@ def retrieve_semantic_scholar_paper_id(
64
94
  if not papers:
65
95
  logger.error("No papers found for query: %s", paper_title)
66
96
  raise ValueError(f"No papers found for query: {paper_title}. Try again.")
67
- # Get the paper ID
97
+ # Extract the paper ID from the top result
68
98
  paper_id = papers[0]["paperId"]
69
-
99
+ logger.info("Found paper ID: %s", paper_id)
100
+ # Prepare the response content (just the ID)
101
+ response_text = paper_id
70
102
  return Command(
71
103
  update={
72
104
  "messages": [
73
105
  ToolMessage(
74
- content=f"Paper ID for '{paper_title}' is: {paper_id}",
106
+ content=response_text,
75
107
  tool_call_id=tool_call_id,
76
108
  )
77
109
  ],
@@ -1,7 +1,10 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
3
  """
4
- This tool is used to search for academic papers on Semantic Scholar.
4
+ Search for academic papers on Semantic Scholar by title or keywords.
5
+
6
+ Given a text query, this tool retrieves relevant papers from Semantic Scholar,
7
+ optionally filtered by publication year.
5
8
  """
6
9
 
7
10
  import logging
@@ -19,42 +22,62 @@ logger = logging.getLogger(__name__)
19
22
 
20
23
 
21
24
  class SearchInput(BaseModel):
22
- """Input schema for the search papers tool."""
25
+ """Defines the input schema for the paper search tool.
26
+
27
+ Attributes:
28
+ query: Full or partial paper title or keywords to search for.
29
+ limit: Maximum number of search results to return (1-100).
30
+ year: Optional publication year filter; supports 'YYYY',
31
+ 'YYYY-', '-YYYY', 'YYYY:YYYY'.
32
+ tool_call_id: Internal tool call identifier injected by the system.
33
+ """
23
34
 
24
35
  query: str = Field(
25
- description="Search query string to find academic papers."
26
- "Be specific and include relevant academic terms."
36
+ description="Full or partial paper title or keywords to search for"
27
37
  )
28
38
  limit: int = Field(
29
- default=10, description="Maximum number of results to return", ge=1, le=100
39
+ default=10,
40
+ description="Maximum number of search results to return (1-100)",
41
+ ge=1,
42
+ le=100,
30
43
  )
31
44
  year: Optional[str] = Field(
32
45
  default=None,
33
- description="Year range in format: YYYY for specific year, "
34
- "YYYY- for papers after year, -YYYY for papers before year, or YYYY:YYYY for range",
46
+ description="Publication year filter; supports formats:"
47
+ "'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'",
35
48
  )
36
49
  tool_call_id: Annotated[str, InjectedToolCallId]
37
50
 
38
51
 
39
- @tool("search_tool", args_schema=SearchInput, parse_docstring=True)
52
+ @tool(
53
+ "search_tool",
54
+ args_schema=SearchInput,
55
+ parse_docstring=True,
56
+ )
40
57
  def search_tool(
41
58
  query: str,
42
59
  tool_call_id: Annotated[str, InjectedToolCallId],
43
- limit: int = 5,
60
+ limit: int = 10,
44
61
  year: Optional[str] = None,
45
62
  ) -> Command[Any]:
46
63
  """
47
- Search for academic papers on Semantic Scholar.
64
+ Return academic papers from Semantic Scholar matching a title or keyword query.
65
+
66
+ This tool searches Semantic Scholar for papers whose titles or keywords
67
+ match the given text, optionally filtered by publication year.
48
68
 
49
69
  Args:
50
- query (str): The search query string to find academic papers.
51
- tool_call_id (Annotated[str, InjectedToolCallId]): The tool call ID.
52
- limit (int, optional): The maximum number of results to return. Defaults to 5.
53
- year (str, optional): Year range for papers.
54
- Supports formats like "2024-", "-2024", "2024:2025". Defaults to None.
70
+ query (str): Full or partial paper title or keywords to search for.
71
+ tool_call_id (str): Internal tool call identifier injected by the system.
72
+ limit (int, optional): Maximum number of search results to return. Defaults to 5.
73
+ year (str, optional): Publication year filter; supports 'YYYY',
74
+ 'YYYY-', '-YYYY', 'YYYY:YYYY'. Defaults to None.
55
75
 
56
76
  Returns:
57
- The number of papers found on Semantic Scholar.
77
+ Command: A Command object containing:
78
+ - papers: List of matching papers.
79
+ - last_displayed_papers: Same list for display purposes.
80
+ - messages: List containing a ToolMessage with search results details.
58
81
  """
59
82
  # Create search data object to organize variables
60
83
  search_data = SearchData(query, limit, year, tool_call_id)
@@ -1,7 +1,10 @@
1
1
  #!/usr/bin/env python3
2
2
 
3
3
  """
4
- This tool is used to return recommendations for a single paper.
4
+ Recommend research papers related to a single input paper using Semantic Scholar.
5
+
6
+ Given a Semantic Scholar paper ID, this tool retrieves related works
7
+ (citations and references) and returns a curated list of recommended papers.
5
8
  """
6
9
 
7
10
  import logging
@@ -19,27 +22,37 @@ logger = logging.getLogger(__name__)
19
22
 
20
23
 
21
24
  class SinglePaperRecInput(BaseModel):
22
- """Input schema for single paper recommendation tool."""
25
+ """Defines the input schema for the single-paper recommendation tool.
26
+
27
+ Attributes:
28
+ paper_id: 40-character Semantic Scholar Paper ID to base recommendations on.
29
+ limit: Maximum number of recommendations to return (1-500).
30
+ year: Optional publication year filter; supports 'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'.
31
+ tool_call_id: Internal tool call identifier injected by the system.
32
+ """
23
33
 
24
34
  paper_id: str = Field(
25
- description="Semantic Scholar Paper ID to get recommendations for (40-character string)"
35
+ description="40-character Semantic Scholar Paper ID to base recommendations on"
26
36
  )
27
37
  limit: int = Field(
28
- default=5,
29
- description="Maximum number of recommendations to return",
38
+ default=10,
39
+ description="Maximum number of recommendations to return (1-500)",
30
40
  ge=1,
31
41
  le=500,
32
42
  )
33
43
  year: Optional[str] = Field(
34
44
  default=None,
35
- description="Year range in format: YYYY for specific year, "
36
- "YYYY- for papers after year, -YYYY for papers before year, or YYYY:YYYY for range",
45
+ description="Publication year filter; supports formats::"
46
+ "'YYYY', 'YYYY-', '-YYYY', 'YYYY:YYYY'",
37
47
  )
38
48
  tool_call_id: Annotated[str, InjectedToolCallId]
39
49
  model_config = {"arbitrary_types_allowed": True}
40
50
 
41
51
 
42
- @tool(args_schema=SinglePaperRecInput, parse_docstring=True)
52
+ @tool(
53
+ args_schema=SinglePaperRecInput,
54
+ parse_docstring=True,
55
+ )
43
56
  def get_single_paper_recommendations(
44
57
  paper_id: str,
45
58
  tool_call_id: Annotated[str, InjectedToolCallId],
@@ -47,18 +60,23 @@ def get_single_paper_recommendations(
47
60
  year: Optional[str] = None,
48
61
  ) -> Command[Any]:
49
62
  """
50
- Get recommendations for a single paper using its Semantic Scholar ID.
51
- No other ID types are supported.
63
+ Return recommended papers for a single Semantic Scholar paper ID.
64
+
65
+ This tool accepts a single Semantic Scholar paper ID and returns related works
66
+ by aggregating citations and references.
52
67
 
53
68
  Args:
54
- paper_id (str): The Semantic Scholar Paper ID to get recommendations for.
55
- tool_call_id (Annotated[str, InjectedToolCallId]): The tool call ID.
56
- limit (int, optional): The maximum number of recommendations to return. Defaults to 5.
57
- year (str, optional): Year range for papers.
58
- Supports formats like "2024-", "-2024", "2024:2025". Defaults to None.
69
+ paper_id (str): 40-character Semantic Scholar paper ID.
70
+ tool_call_id (str): Internal tool call identifier injected by the system.
71
+ limit (int, optional): Maximum number of recommendations to return. Defaults to 5.
72
+ year (str, optional): Publication year filter; supports 'YYYY', 'YYYY-',
73
+ '-YYYY', 'YYYY:YYYY'. Defaults to None.
59
74
 
60
75
  Returns:
61
- Dict[str, Any]: The recommendations and related information.
76
+ Command: A Command object containing:
77
+ - papers: List of recommended papers.
78
+ - last_displayed_papers: Same list for display purposes.
79
+ - messages: List containing a ToolMessage with recommendation details.
62
80
  """
63
81
  # Create recommendation data object to organize variables
64
82
  rec_data = SinglePaperRecData(paper_id, limit, year, tool_call_id)
@@ -127,8 +127,26 @@ class MultiPaperRecData:
127
127
 
128
128
  def _filter_papers(self) -> None:
129
129
  """Filter and format papers."""
130
- self.filtered_papers = {
131
- paper["paperId"]: {
130
+ # Build filtered recommendations with unified paper_ids
131
+ filtered: Dict[str, Any] = {}
132
+ for paper in self.recommendations:
133
+ if not paper.get("title") or not paper.get("authors"):
134
+ continue
135
+ ext = paper.get("externalIds", {}) or {}
136
+ ids: List[str] = []
137
+ arxiv = ext.get("ArXiv")
138
+ if arxiv:
139
+ ids.append(f"arxiv:{arxiv}")
140
+ pubmed = ext.get("PubMed")
141
+ if pubmed:
142
+ ids.append(f"pubmed:{pubmed}")
143
+ pmc = ext.get("PubMedCentral")
144
+ if pmc:
145
+ ids.append(f"pmc:{pmc}")
146
+ doi_id = ext.get("DOI")
147
+ if doi_id:
148
+ ids.append(f"doi:{doi_id}")
149
+ metadata = {
132
150
  "semantic_scholar_paper_id": paper["paperId"],
133
151
  "Title": paper.get("title", "N/A"),
134
152
  "Abstract": paper.get("abstract", "N/A"),
@@ -142,27 +160,42 @@ class MultiPaperRecData:
142
160
  for author in paper.get("authors", [])
143
161
  ],
144
162
  "URL": paper.get("url", "N/A"),
145
- "arxiv_id": paper.get("externalIds", {}).get("ArXiv", "N/A"),
146
- "doi": paper.get("externalIds", {}).get("DOI", "N/A"),
163
+ "arxiv_id": arxiv or "N/A",
164
+ "pm_id": pubmed or "N/A",
165
+ "pmc_id": pmc or "N/A",
166
+ "doi": doi_id or "N/A",
167
+ "paper_ids": ids,
168
+ "source": "semantic_scholar",
147
169
  }
148
- for paper in self.recommendations
149
- if paper.get("title") and paper.get("authors")
150
- }
170
+ filtered[paper["paperId"]] = metadata
171
+ self.filtered_papers = filtered
151
172
 
152
173
  logger.info("Filtered %d papers", len(self.filtered_papers))
153
174
 
175
+ def _get_snippet(self, abstract: str) -> str:
176
+ """Extract the first one or two sentences from an abstract."""
177
+ if not abstract or abstract == "N/A":
178
+ return ""
179
+ sentences = abstract.split(". ")
180
+ snippet_sentences = sentences[:2]
181
+ snippet = ". ".join(snippet_sentences)
182
+ if not snippet.endswith("."):
183
+ snippet += "."
184
+ return snippet
185
+
154
186
  def _create_content(self) -> None:
155
187
  """Create the content message for the response."""
156
188
  top_papers = list(self.filtered_papers.values())[:3]
157
- top_papers_info = "\n".join(
158
- [
159
- f"{i+1}. {paper['Title']} ({paper['Year']}; "
160
- f"semantic_scholar_paper_id: {paper['semantic_scholar_paper_id']}; "
161
- f"arXiv ID: {paper['arxiv_id']})"
162
- f"doi: {paper['doi']})"
163
- for i, paper in enumerate(top_papers)
164
- ]
165
- )
189
+ entries: list[str] = []
190
+ for i, paper in enumerate(top_papers):
191
+ title = paper.get("Title", "N/A")
192
+ year = paper.get("Year", "N/A")
193
+ snippet = self._get_snippet(paper.get("Abstract", ""))
194
+ entry = f"{i+1}. {title} ({year})"
195
+ if snippet:
196
+ entry += f"\n Abstract snippet: {snippet}"
197
+ entries.append(entry)
198
+ top_papers_info = "\n".join(entries)
166
199
 
167
200
  self.content = (
168
201
  "Recommendations based on multiple papers were successful. "