aiagents4pharma 1.40.0__py3-none-any.whl → 1.41.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +4 -0
- aiagents4pharma/talk2scholars/configs/tools/question_and_answer/default.yaml +44 -4
- aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker.py +127 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_answer_formatter.py +66 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_batch_processor.py +101 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_collection_manager.py +150 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_document_processor.py +69 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_generate_answer.py +75 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_gpu_detection.py +140 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_paper_loader.py +116 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_rag_pipeline.py +98 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_retrieve_chunks.py +197 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_singleton_manager.py +156 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_vector_normalization.py +121 -0
- aiagents4pharma/talk2scholars/tests/test_pdf_vector_store.py +434 -0
- aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +89 -509
- aiagents4pharma/talk2scholars/tests/test_tool_helper_utils.py +34 -89
- aiagents4pharma/talk2scholars/tools/paper_download/download_biorxiv_input.py +8 -6
- aiagents4pharma/talk2scholars/tools/paper_download/download_medrxiv_input.py +6 -4
- aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +74 -40
- aiagents4pharma/talk2scholars/tools/pdf/utils/__init__.py +26 -1
- aiagents4pharma/talk2scholars/tools/pdf/utils/answer_formatter.py +62 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/batch_processor.py +200 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/collection_manager.py +172 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/document_processor.py +76 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/generate_answer.py +14 -14
- aiagents4pharma/talk2scholars/tools/pdf/utils/get_vectorstore.py +63 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py +154 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py +60 -40
- aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py +123 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py +122 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py +162 -40
- aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py +140 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py +40 -78
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py +159 -0
- aiagents4pharma/talk2scholars/tools/pdf/utils/vector_store.py +277 -96
- aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +12 -9
- aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +0 -1
- aiagents4pharma/talk2scholars/tools/s2/retrieve_semantic_scholar_paper_id.py +9 -8
- aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +5 -5
- {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/METADATA +27 -115
- {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/RECORD +45 -23
- aiagents4pharma/talk2scholars/tests/test_nvidia_nim_reranker_utils.py +0 -28
- {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/WHEEL +0 -0
- {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/licenses/LICENSE +0 -0
- {aiagents4pharma-1.40.0.dist-info → aiagents4pharma-1.41.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,8 @@
|
|
1
1
|
"""
|
2
|
-
Vectorstore class for managing
|
2
|
+
Vectorstore class for managing PDF embeddings with Milvus.
|
3
|
+
Manages GPU normalization and similarity search and MMR operations.
|
4
|
+
With automatic handling of COSINE to IP conversion for GPU compatibility.
|
5
|
+
Supports both GPU and CPU configurations.
|
3
6
|
"""
|
4
7
|
|
5
8
|
import logging
|
@@ -7,13 +10,18 @@ import os
|
|
7
10
|
import time
|
8
11
|
from typing import Any, Dict, List, Optional
|
9
12
|
|
10
|
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
11
|
-
from langchain_community.document_loaders import PyPDFLoader
|
12
|
-
from langchain_community.vectorstores import FAISS
|
13
13
|
from langchain_core.documents import Document
|
14
14
|
from langchain_core.embeddings import Embeddings
|
15
|
-
from
|
15
|
+
from langchain_milvus import Milvus
|
16
16
|
|
17
|
+
from .collection_manager import ensure_collection_exists
|
18
|
+
from .gpu_detection import (
|
19
|
+
detect_nvidia_gpu,
|
20
|
+
get_optimal_index_config,
|
21
|
+
log_index_configuration,
|
22
|
+
)
|
23
|
+
from .singleton_manager import VectorstoreSingleton
|
24
|
+
from .vector_normalization import wrap_embedding_model_if_needed
|
17
25
|
|
18
26
|
# Set up logging with configurable level
|
19
27
|
log_level = os.environ.get("LOG_LEVEL", "INFO")
|
@@ -24,8 +32,8 @@ logger.setLevel(getattr(logging, log_level))
|
|
24
32
|
|
25
33
|
class Vectorstore:
|
26
34
|
"""
|
27
|
-
|
28
|
-
|
35
|
+
Enhanced Vectorstore class with GPU normalization support.
|
36
|
+
Automatically handles COSINE -> IP conversion for GPU compatibility.
|
29
37
|
"""
|
30
38
|
|
31
39
|
def __init__(
|
@@ -35,13 +43,13 @@ class Vectorstore:
|
|
35
43
|
config: Any = None,
|
36
44
|
):
|
37
45
|
"""
|
38
|
-
Initialize the document store.
|
46
|
+
Initialize the document store with Milvus and GPU optimization.
|
39
47
|
|
40
48
|
Args:
|
41
49
|
embedding_model: The embedding model to use
|
42
|
-
metadata_fields: Fields to include in document metadata
|
50
|
+
metadata_fields: Fields to include in document metadata
|
51
|
+
config: Configuration object containing Milvus connection details
|
43
52
|
"""
|
44
|
-
self.embedding_model = embedding_model
|
45
53
|
self.config = config
|
46
54
|
self.metadata_fields = metadata_fields or [
|
47
55
|
"title",
|
@@ -50,113 +58,286 @@ class Vectorstore:
|
|
50
58
|
"chunk_id",
|
51
59
|
]
|
52
60
|
self.initialization_time = time.time()
|
53
|
-
|
61
|
+
|
62
|
+
# GPU detection with config override (SINGLE CALL)
|
63
|
+
self.has_gpu = detect_nvidia_gpu(config)
|
64
|
+
|
65
|
+
# Additional check for force CPU mode
|
66
|
+
if (
|
67
|
+
config
|
68
|
+
and hasattr(config, "gpu_detection")
|
69
|
+
and getattr(config.gpu_detection, "force_cpu_mode", False)
|
70
|
+
):
|
71
|
+
logger.info("Running in forced CPU mode (config override)")
|
72
|
+
self.has_gpu = False
|
73
|
+
|
74
|
+
# Determine if we want to use COSINE similarity
|
75
|
+
self.use_cosine = True # Default preference
|
76
|
+
if config and hasattr(config, "similarity_metric"):
|
77
|
+
self.use_cosine = getattr(config.similarity_metric, "use_cosine", True)
|
78
|
+
|
79
|
+
# Wrap embedding model with normalization if needed for GPU
|
80
|
+
self.original_embedding_model = embedding_model
|
81
|
+
self.embedding_model = wrap_embedding_model_if_needed(
|
82
|
+
embedding_model, self.has_gpu, self.use_cosine
|
83
|
+
)
|
84
|
+
|
85
|
+
# Configure index parameters AFTER determining GPU usage and normalization
|
86
|
+
embedding_dim = config.milvus.embedding_dim if config else 768
|
87
|
+
self.index_params, self.search_params = get_optimal_index_config(
|
88
|
+
self.has_gpu, embedding_dim, self.use_cosine
|
89
|
+
)
|
90
|
+
|
91
|
+
# Log the configuration
|
92
|
+
log_index_configuration(self.index_params, self.search_params, self.use_cosine)
|
54
93
|
|
55
94
|
# Track loaded papers to prevent duplicate loading
|
56
95
|
self.loaded_papers = set()
|
57
|
-
self.vector_store_class = FAISS
|
58
|
-
logger.info("Using FAISS vector store")
|
59
96
|
|
60
|
-
#
|
97
|
+
# Initialize Milvus connection parameters with environment variable fallback
|
98
|
+
self.connection_args = {
|
99
|
+
"host": (
|
100
|
+
config.milvus.host if config else os.getenv("MILVUS_HOST", "127.0.0.1")
|
101
|
+
),
|
102
|
+
"port": (
|
103
|
+
config.milvus.port if config else int(os.getenv("MILVUS_PORT", "19530"))
|
104
|
+
),
|
105
|
+
}
|
106
|
+
# Log the connection parameters being used
|
107
|
+
logger.info(
|
108
|
+
"Using Milvus connection: %s:%s",
|
109
|
+
self.connection_args["host"],
|
110
|
+
self.connection_args["port"],
|
111
|
+
)
|
112
|
+
self.collection_name = (
|
113
|
+
config.milvus.collection_name if config else "pdf_rag_documents"
|
114
|
+
)
|
115
|
+
self.db_name = config.milvus.db_name if config else "pdf_rag_db"
|
116
|
+
|
117
|
+
# Get singleton instance
|
118
|
+
self._singleton = VectorstoreSingleton()
|
119
|
+
|
120
|
+
# Connect to Milvus (reuses existing connection if available)
|
121
|
+
self._connect_milvus()
|
122
|
+
|
123
|
+
# Create collection with proper metric type
|
124
|
+
self.collection = ensure_collection_exists(
|
125
|
+
self.collection_name, self.config, self.index_params, self.has_gpu
|
126
|
+
)
|
127
|
+
|
128
|
+
# Initialize the LangChain Milvus vector store
|
129
|
+
self.vector_store = self._initialize_vector_store()
|
130
|
+
|
131
|
+
# Load existing papers AFTER vector store is ready
|
132
|
+
self._load_existing_paper_ids()
|
133
|
+
|
134
|
+
# CRITICAL: Load collection into memory/GPU after any existing data is identified
|
135
|
+
logger.info(
|
136
|
+
"Calling _ensure_collection_loaded() for %s processing...",
|
137
|
+
"GPU" if self.has_gpu else "CPU",
|
138
|
+
)
|
139
|
+
self._ensure_collection_loaded()
|
140
|
+
|
141
|
+
# Store for document metadata (keeping for compatibility)
|
61
142
|
self.documents: Dict[str, Document] = {}
|
62
|
-
self.vector_store: Optional[VectorStore] = None
|
63
143
|
self.paper_metadata: Dict[str, Dict[str, Any]] = {}
|
64
|
-
# Cache for document chunk embeddings to avoid recomputation
|
65
|
-
self.embeddings: Dict[str, Any] = {}
|
66
144
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
"""
|
74
|
-
Add a paper to the document store.
|
145
|
+
# Log final configuration
|
146
|
+
metric_info = (
|
147
|
+
"IP (normalized for COSINE)"
|
148
|
+
if self.has_gpu and self.use_cosine
|
149
|
+
else self.index_params["metric_type"]
|
150
|
+
)
|
75
151
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
152
|
+
logger.info(
|
153
|
+
"Milvus vector store initialized with collection: %s (GPU: %s, Metric: %s)",
|
154
|
+
self.collection_name,
|
155
|
+
"enabled" if self.has_gpu else "disabled",
|
156
|
+
metric_info,
|
157
|
+
)
|
158
|
+
|
159
|
+
def _connect_milvus(self) -> None:
|
160
|
+
"""Establish connection to Milvus server using singleton."""
|
161
|
+
self._singleton.get_connection(
|
162
|
+
self.connection_args["host"], self.connection_args["port"], self.db_name
|
163
|
+
)
|
164
|
+
|
165
|
+
def _initialize_vector_store(self) -> Milvus:
|
166
|
+
"""Initialize or load the Milvus vector store with proper embedding model."""
|
167
|
+
# Use the wrapped embedding model (with normalization if needed)
|
168
|
+
vector_store = self._singleton.get_vector_store(
|
169
|
+
self.collection_name, self.embedding_model, self.connection_args
|
170
|
+
)
|
171
|
+
|
172
|
+
return vector_store
|
85
173
|
|
86
|
-
|
174
|
+
def _load_existing_paper_ids(self):
|
175
|
+
"""Load already embedded paper IDs using LangChain's collection access."""
|
176
|
+
logger.info("Checking for existing papers via LangChain collection...")
|
87
177
|
|
88
|
-
#
|
89
|
-
self.
|
178
|
+
# Access the collection through LangChain's wrapper
|
179
|
+
langchain_collection = getattr(self.vector_store, "col", None)
|
90
180
|
|
91
|
-
|
92
|
-
|
93
|
-
documents = loader.load()
|
94
|
-
logger.info("Loaded %d pages from %s", len(documents), paper_id)
|
181
|
+
if langchain_collection is None:
|
182
|
+
langchain_collection = getattr(self.vector_store, "collection", None)
|
95
183
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
"Configuration is required for text splitting in Vectorstore."
|
184
|
+
if langchain_collection is None:
|
185
|
+
logger.warning(
|
186
|
+
"No LangChain collection found, proceeding with empty loaded_papers"
|
100
187
|
)
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
188
|
+
return
|
189
|
+
|
190
|
+
# Force flush and check entity count
|
191
|
+
langchain_collection.flush()
|
192
|
+
num_entities = langchain_collection.num_entities
|
106
193
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
# Add paper metadata to each chunk
|
118
|
-
chunk.metadata.update(
|
119
|
-
{
|
120
|
-
"paper_id": paper_id,
|
121
|
-
"title": paper_metadata.get("Title", "Unknown"),
|
122
|
-
"chunk_id": i,
|
123
|
-
# Keep existing page number if available
|
124
|
-
"page": chunk.metadata.get("page", 0),
|
125
|
-
}
|
194
|
+
logger.info("LangChain collection entity count: %d", num_entities)
|
195
|
+
|
196
|
+
if num_entities > 0:
|
197
|
+
logger.info("Loading existing paper IDs from LangChain collection...")
|
198
|
+
|
199
|
+
results = langchain_collection.query(
|
200
|
+
expr="", # No filter - get all
|
201
|
+
output_fields=["paper_id"],
|
202
|
+
limit=16384, # Max limit
|
203
|
+
consistency_level="Strong",
|
126
204
|
)
|
127
205
|
|
128
|
-
#
|
129
|
-
for
|
130
|
-
|
131
|
-
chunk.metadata[field] = paper_metadata[field]
|
206
|
+
# Extract unique paper IDs
|
207
|
+
existing_paper_ids = set(result["paper_id"] for result in results)
|
208
|
+
self.loaded_papers.update(existing_paper_ids)
|
132
209
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
# Cache embedding if available
|
137
|
-
if chunk_embeddings[i] is not None:
|
138
|
-
self.embeddings[doc_id] = chunk_embeddings[i]
|
210
|
+
logger.info("Found %d unique papers in collection", len(existing_paper_ids))
|
211
|
+
else:
|
212
|
+
logger.info("Collection is empty - no existing papers")
|
139
213
|
|
140
|
-
|
141
|
-
|
142
|
-
|
214
|
+
def similarity_search(self, query: str, **kwargs: Any) -> List[Document]:
|
215
|
+
"""
|
216
|
+
Perform similarity search on the vector store.
|
217
|
+
Query embedding will be automatically normalized if using GPU with COSINE.
|
218
|
+
Keyword args:
|
219
|
+
k: int = 4
|
220
|
+
filter: Optional[Dict[str, Any]] = None
|
221
|
+
plus any other kwargs to pass through to the underlying vector_store.
|
222
|
+
"""
|
223
|
+
# Extract our parameters
|
224
|
+
k: int = kwargs.pop("k", 4)
|
225
|
+
filter_: Optional[Dict[str, Any]] = kwargs.pop("filter", None)
|
226
|
+
|
227
|
+
# Build Milvus expr from filter_, if present
|
228
|
+
expr = None
|
229
|
+
if filter_:
|
230
|
+
conditions = []
|
231
|
+
for key, value in filter_.items():
|
232
|
+
if isinstance(value, str):
|
233
|
+
conditions.append(f'{key} == "{value}"')
|
234
|
+
elif isinstance(value, list):
|
235
|
+
vals = ", ".join(
|
236
|
+
f'"{v}"' if isinstance(v, str) else str(v) for v in value
|
237
|
+
)
|
238
|
+
conditions.append(f"{key} in [{vals}]")
|
239
|
+
else:
|
240
|
+
conditions.append(f"{key} == {value}")
|
241
|
+
expr = " and ".join(conditions)
|
242
|
+
|
243
|
+
# Delegate to the wrapped store
|
244
|
+
return self.vector_store.similarity_search(
|
245
|
+
query=query, k=k, expr=expr, **kwargs
|
246
|
+
)
|
143
247
|
|
144
|
-
def
|
248
|
+
def max_marginal_relevance_search(
|
249
|
+
self, query: str, **kwargs: Any
|
250
|
+
) -> List[Document]:
|
145
251
|
"""
|
146
|
-
|
147
|
-
|
252
|
+
Perform MMR search on the vector store.
|
253
|
+
Query embedding will be automatically normalized if using GPU with COSINE.
|
254
|
+
Keyword args:
|
255
|
+
k: int = 4
|
256
|
+
fetch_k: int = 20
|
257
|
+
lambda_mult: float = 0.5
|
258
|
+
filter: Optional[Dict[str, Any]] = None
|
259
|
+
plus any other kwargs to pass through.
|
148
260
|
"""
|
149
|
-
|
150
|
-
|
151
|
-
|
261
|
+
# Extract our parameters
|
262
|
+
k: int = kwargs.pop("k", 4)
|
263
|
+
fetch_k: int = kwargs.pop("fetch_k", 20)
|
264
|
+
lambda_mult: float = kwargs.pop("lambda_mult", 0.5)
|
265
|
+
filter_: Optional[Dict[str, Any]] = kwargs.pop("filter", None)
|
152
266
|
|
153
|
-
|
154
|
-
|
155
|
-
|
267
|
+
# Build Milvus expr from filter_, if present
|
268
|
+
expr = None
|
269
|
+
if filter_:
|
270
|
+
conditions = []
|
271
|
+
for key, value in filter_.items():
|
272
|
+
if isinstance(value, str):
|
273
|
+
conditions.append(f'{key} == "{value}"')
|
274
|
+
elif isinstance(value, list):
|
275
|
+
vals = ", ".join(
|
276
|
+
f'"{v}"' if isinstance(v, str) else str(v) for v in value
|
277
|
+
)
|
278
|
+
conditions.append(f"{key} in [{vals}]")
|
279
|
+
else:
|
280
|
+
conditions.append(f"{key} == {value}")
|
281
|
+
expr = " and ".join(conditions)
|
156
282
|
|
157
|
-
#
|
158
|
-
|
159
|
-
|
160
|
-
|
283
|
+
# Delegate to the wrapped store
|
284
|
+
return self.vector_store.max_marginal_relevance_search(
|
285
|
+
query=query,
|
286
|
+
k=k,
|
287
|
+
fetch_k=fetch_k,
|
288
|
+
lambda_mult=lambda_mult,
|
289
|
+
expr=expr,
|
290
|
+
**kwargs,
|
161
291
|
)
|
162
|
-
|
292
|
+
|
293
|
+
def _ensure_collection_loaded(self):
|
294
|
+
"""Ensure collection is loaded into memory/GPU after data insertion."""
|
295
|
+
# Get the collection
|
296
|
+
collection = getattr(self.vector_store, "col", None)
|
297
|
+
if collection is None:
|
298
|
+
collection = getattr(self.vector_store, "collection", None)
|
299
|
+
|
300
|
+
if collection is None:
|
301
|
+
logger.warning("Cannot access collection for loading")
|
302
|
+
return
|
303
|
+
|
304
|
+
# Force flush to ensure we see all data
|
305
|
+
logger.info("Flushing collection to ensure data visibility...")
|
306
|
+
collection.flush()
|
307
|
+
|
308
|
+
# Check entity count after flush
|
309
|
+
num_entities = collection.num_entities
|
310
|
+
logger.info("Collection entity count after flush: %d", num_entities)
|
311
|
+
|
312
|
+
if num_entities > 0:
|
313
|
+
hardware_type = "GPU" if self.has_gpu else "CPU"
|
314
|
+
logger.info(
|
315
|
+
"Loading collection with %d entities into %s memory...",
|
316
|
+
num_entities,
|
317
|
+
hardware_type,
|
318
|
+
)
|
319
|
+
|
320
|
+
# Load collection into memory (CPU or GPU)
|
321
|
+
collection.load()
|
322
|
+
|
323
|
+
# Verify loading was successful
|
324
|
+
final_count = collection.num_entities
|
325
|
+
logger.info(
|
326
|
+
"Collection successfully loaded into %s memory with %d entities",
|
327
|
+
hardware_type,
|
328
|
+
final_count,
|
329
|
+
)
|
330
|
+
else:
|
331
|
+
logger.info("Collection is empty, skipping load operation")
|
332
|
+
|
333
|
+
def get_embedding_info(self) -> Dict[str, Any]:
|
334
|
+
"""Get information about the embedding configuration."""
|
335
|
+
return {
|
336
|
+
"has_gpu": self.has_gpu,
|
337
|
+
"use_cosine": self.use_cosine,
|
338
|
+
"metric_type": self.index_params["metric_type"],
|
339
|
+
"index_type": self.index_params["index_type"],
|
340
|
+
"normalization_enabled": hasattr(self.embedding_model, "normalize_for_gpu"),
|
341
|
+
"original_model_type": type(self.original_embedding_model).__name__,
|
342
|
+
"wrapped_model_type": type(self.embedding_model).__name__,
|
343
|
+
}
|
@@ -65,25 +65,28 @@ def get_multi_paper_recommendations(
|
|
65
65
|
year: Optional[str] = None,
|
66
66
|
) -> Command[Any]:
|
67
67
|
"""
|
68
|
-
|
68
|
+
Recommend related research papers using the Semantic Scholar API.
|
69
69
|
|
70
|
-
This tool
|
71
|
-
|
72
|
-
|
70
|
+
This tool is designed to suggest relevant papers based on a list of
|
71
|
+
input Semantic Scholar paper IDs.
|
72
|
+
|
73
|
+
It fetches citations and references for each input paper and aggregates
|
74
|
+
them to generate a set of
|
75
|
+
recommended papers.
|
73
76
|
|
74
77
|
Args:
|
75
78
|
paper_ids (List[str]): List of 40-character Semantic Scholar paper IDs.
|
76
|
-
|
79
|
+
Provide at least two IDs to improve the relevance of recommendations.
|
77
80
|
tool_call_id (str): Internal tool call identifier injected by the system.
|
78
|
-
limit (int, optional): Maximum
|
79
|
-
year (str, optional):
|
80
|
-
|
81
|
+
limit (int, optional): Maximum number of recommendations to return. Defaults to 10.
|
82
|
+
year (str, optional): Filter recommendations by publication year.
|
83
|
+
Supports formats: 'YYYY', 'YYYY-', '-YYYY', or 'YYYY:YYYY'. Defaults to None.
|
81
84
|
|
82
85
|
Returns:
|
83
86
|
Command: A Command object containing:
|
84
87
|
- multi_papers: List of recommended papers.
|
85
88
|
- last_displayed_papers: Same list for display purposes.
|
86
|
-
- messages: List containing a ToolMessage with
|
89
|
+
- messages: List containing a ToolMessage with recommendation details.
|
87
90
|
"""
|
88
91
|
# Create recommendation data object to organize variables
|
89
92
|
rec_data = MultiPaperRecData(paper_ids, limit, year, tool_call_id)
|
@@ -50,22 +50,23 @@ def retrieve_semantic_scholar_paper_id(
|
|
50
50
|
tool_call_id: str,
|
51
51
|
) -> Command[Any]:
|
52
52
|
"""
|
53
|
-
|
53
|
+
Retrieve a Semantic Scholar paper ID using a paper title.
|
54
54
|
|
55
|
-
This tool
|
56
|
-
|
55
|
+
This tool searches Semantic Scholar for the best match to the provided paper title
|
56
|
+
and returns the corresponding unique paper ID. It is intended to support downstream
|
57
|
+
tasks such as recommendations, metadata lookups, or citation graph queries.
|
57
58
|
|
58
|
-
Use when you
|
59
|
-
|
60
|
-
|
59
|
+
Use this tool when you know the full or partial title of a paper and need its
|
60
|
+
Semantic Scholar ID.
|
61
|
+
For broad literature searches or topic-based queries, use a general `search` tool instead.
|
61
62
|
|
62
63
|
Args:
|
63
|
-
paper_title (str): The title of the paper to look up.
|
64
|
+
paper_title (str): The full or partial title of the paper to look up.
|
64
65
|
tool_call_id (str): LangGraph-injected identifier for this tool call.
|
65
66
|
|
66
67
|
Returns:
|
67
68
|
Command: A structured response containing a ToolMessage whose content is
|
68
|
-
|
69
|
+
the Semantic Scholar paper ID string (e.g., 'abc123xyz').
|
69
70
|
|
70
71
|
Raises:
|
71
72
|
ValueError: If no matching paper is found for the given title.
|
@@ -60,17 +60,17 @@ def get_single_paper_recommendations(
|
|
60
60
|
year: Optional[str] = None,
|
61
61
|
) -> Command[Any]:
|
62
62
|
"""
|
63
|
-
|
63
|
+
Recommend related research papers using the Semantic Scholar API for a single paper ID.
|
64
64
|
|
65
|
-
This tool
|
66
|
-
|
65
|
+
This tool is designed to suggest relevant papers based on one input Semantic Scholar paper ID.
|
66
|
+
It fetches citations and references for the given paper and returns a set of recommended works.
|
67
67
|
|
68
68
|
Args:
|
69
69
|
paper_id (str): 40-character Semantic Scholar paper ID.
|
70
70
|
tool_call_id (str): Internal tool call identifier injected by the system.
|
71
71
|
limit (int, optional): Maximum number of recommendations to return. Defaults to 5.
|
72
|
-
year (str, optional):
|
73
|
-
|
72
|
+
year (str, optional): Filter recommendations by publication year.
|
73
|
+
Supports formats: 'YYYY', 'YYYY-', '-YYYY', or 'YYYY:YYYY'. Defaults to None.
|
74
74
|
|
75
75
|
Returns:
|
76
76
|
Command: A Command object containing:
|