aiagents4pharma 1.30.0__py3-none-any.whl → 1.30.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. aiagents4pharma/talk2scholars/agents/main_agent.py +18 -10
  2. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +5 -6
  3. aiagents4pharma/talk2scholars/agents/pdf_agent.py +4 -10
  4. aiagents4pharma/talk2scholars/agents/zotero_agent.py +9 -7
  5. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +18 -9
  6. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +2 -2
  7. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +9 -15
  8. aiagents4pharma/talk2scholars/configs/app/frontend/default.yaml +1 -0
  9. aiagents4pharma/talk2scholars/configs/tools/multi_paper_recommendation/default.yaml +6 -1
  10. aiagents4pharma/talk2scholars/configs/tools/search/default.yaml +7 -1
  11. aiagents4pharma/talk2scholars/configs/tools/single_paper_recommendation/default.yaml +6 -1
  12. aiagents4pharma/talk2scholars/configs/tools/zotero_read/default.yaml +1 -1
  13. aiagents4pharma/talk2scholars/configs/tools/zotero_write/default.yaml +55 -0
  14. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +7 -1
  15. aiagents4pharma/talk2scholars/tests/test_llm_main_integration.py +84 -53
  16. aiagents4pharma/talk2scholars/tests/test_main_agent.py +24 -0
  17. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +79 -15
  18. aiagents4pharma/talk2scholars/tests/test_routing_logic.py +13 -10
  19. aiagents4pharma/talk2scholars/tests/test_s2_multi.py +27 -4
  20. aiagents4pharma/talk2scholars/tests/test_s2_search.py +19 -3
  21. aiagents4pharma/talk2scholars/tests/test_s2_single.py +27 -3
  22. aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +3 -2
  23. aiagents4pharma/talk2scholars/tests/test_zotero_human_in_the_loop.py +273 -0
  24. aiagents4pharma/talk2scholars/tests/test_zotero_path.py +419 -1
  25. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +25 -18
  26. aiagents4pharma/talk2scholars/tests/test_zotero_write.py +123 -588
  27. aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py +2 -0
  28. aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py +11 -4
  29. aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +5 -1
  30. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +73 -26
  31. aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py +46 -22
  32. aiagents4pharma/talk2scholars/tools/s2/query_results.py +1 -1
  33. aiagents4pharma/talk2scholars/tools/s2/search.py +40 -12
  34. aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py +42 -16
  35. aiagents4pharma/talk2scholars/tools/zotero/__init__.py +1 -0
  36. aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_path.py +125 -0
  37. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +35 -20
  38. aiagents4pharma/talk2scholars/tools/zotero/zotero_review.py +198 -0
  39. aiagents4pharma/talk2scholars/tools/zotero/zotero_write.py +86 -118
  40. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.2.dist-info}/METADATA +4 -3
  41. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.2.dist-info}/RECORD +44 -41
  42. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.2.dist-info}/WHEEL +1 -1
  43. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.2.dist-info/licenses}/LICENSE +0 -0
  44. {aiagents4pharma-1.30.0.dist-info → aiagents4pharma-1.30.2.dist-info}/top_level.txt +0 -0
@@ -9,6 +9,8 @@ inherit from this class and implement its methods.
9
9
 
10
10
  from abc import ABC, abstractmethod
11
11
  from typing import Any, Dict
12
+
13
+
12
14
  class AbstractPaperDownloader(ABC):
13
15
  """
14
16
  Abstract base class for scholarly paper downloaders.
@@ -8,6 +8,7 @@ downloads the corresponding PDF.
8
8
  By using an abstract base class, this implementation is extendable to other
9
9
  APIs like PubMed, IEEE Xplore, etc.
10
10
  """
11
+
11
12
  import xml.etree.ElementTree as ET
12
13
  from typing import Any, Dict
13
14
  import logging
@@ -19,6 +20,7 @@ from .abstract_downloader import AbstractPaperDownloader
19
20
  logging.basicConfig(level=logging.INFO)
20
21
  logger = logging.getLogger(__name__)
21
22
 
23
+
22
24
  class ArxivPaperDownloader(AbstractPaperDownloader):
23
25
  """
24
26
  Downloader class for arXiv.
@@ -35,13 +37,13 @@ class ArxivPaperDownloader(AbstractPaperDownloader):
35
37
  """
36
38
  with hydra.initialize(version_base=None, config_path="../../configs"):
37
39
  cfg = hydra.compose(
38
- config_name="config",
39
- overrides=["tools/download_arxiv_paper=default"]
40
+ config_name="config", overrides=["tools/download_arxiv_paper=default"]
40
41
  )
41
42
  self.api_url = cfg.tools.download_arxiv_paper.api_url
42
43
  self.request_timeout = cfg.tools.download_arxiv_paper.request_timeout
43
44
  self.chunk_size = cfg.tools.download_arxiv_paper.chunk_size
44
45
  self.pdf_base_url = cfg.tools.download_arxiv_paper.pdf_base_url
46
+
45
47
  def fetch_metadata(self, paper_id: str) -> Dict[str, Any]:
46
48
  """
47
49
  Fetch metadata from arXiv for a given paper ID.
@@ -95,11 +97,16 @@ class ArxivPaperDownloader(AbstractPaperDownloader):
95
97
  logger.info("Downloading PDF from: %s", pdf_url)
96
98
  pdf_response = requests.get(pdf_url, stream=True, timeout=self.request_timeout)
97
99
  pdf_response.raise_for_status()
100
+ # print (pdf_response)
98
101
 
99
102
  # Combine the PDF data from chunks.
100
103
  pdf_object = b"".join(
101
- chunk for chunk in pdf_response.iter_content(chunk_size=self.chunk_size) if chunk
102
- )
104
+ chunk
105
+ for chunk in pdf_response.iter_content(chunk_size=self.chunk_size)
106
+ if chunk
107
+ )
108
+ # print (pdf_object)
109
+ print("PDF_URL", pdf_url)
103
110
 
104
111
  return {
105
112
  "pdf_object": pdf_object,
@@ -14,16 +14,19 @@ from langgraph.types import Command
14
14
  # Local import from the same package:
15
15
  from .arxiv_downloader import ArxivPaperDownloader
16
16
 
17
+
17
18
  class DownloadArxivPaperInput(BaseModel):
18
19
  """
19
20
  Input schema for the arXiv paper download tool.
20
21
  (Optional: if you decide to keep Pydantic validation in the future)
21
22
  """
23
+
22
24
  arxiv_id: str = Field(
23
25
  description="The arXiv paper ID used to retrieve the paper details and PDF."
24
- )
26
+ )
25
27
  tool_call_id: Annotated[str, InjectedToolCallId]
26
28
 
29
+
27
30
  @tool(args_schema=DownloadArxivPaperInput, parse_docstring=True)
28
31
  def download_arxiv_paper(
29
32
  arxiv_id: str,
@@ -49,6 +52,7 @@ def download_arxiv_paper(
49
52
 
50
53
  # If the downloader fails or the arxiv_id is invalid, this might raise an error
51
54
  pdf_data = downloader.download_pdf(arxiv_id)
55
+ # print (pdf_data)
52
56
 
53
57
  content = f"Successfully downloaded PDF for arXiv ID {arxiv_id}"
54
58
 
@@ -2,8 +2,8 @@
2
2
  """
3
3
  question_and_answer: Tool for performing Q&A on PDF documents using retrieval augmented generation.
4
4
 
5
- This module provides functionality to extract text from PDF binary data, split it into
6
- chunks, retrieve relevant segments via a vector store, and generate an answer to a
5
+ This module provides functionality to extract text from PDF binary data, split it into
6
+ chunks, retrieve relevant segments via a vector store, and generate an answer to a
7
7
  user-provided question using a language model chain.
8
8
  """
9
9
 
@@ -18,13 +18,15 @@ import hydra
18
18
  from langchain.chains.question_answering import load_qa_chain
19
19
  from langchain.docstore.document import Document
20
20
  from langchain.text_splitter import CharacterTextSplitter
21
- from langchain_community.vectorstores import Annoy
22
- from langchain_openai import OpenAIEmbeddings
23
21
  from langchain_core.language_models.chat_models import BaseChatModel
24
-
22
+ from langchain_core.vectorstores import InMemoryVectorStore
25
23
  from langchain_core.messages import ToolMessage
26
24
  from langchain_core.tools import tool
27
25
  from langchain_core.tools.base import InjectedToolCallId
26
+ from langchain_core.embeddings import Embeddings
27
+ from langchain_community.vectorstores import Annoy
28
+ from langchain_community.document_loaders import PyPDFLoader
29
+ from langchain_openai import OpenAIEmbeddings
28
30
  from langgraph.types import Command
29
31
  from langgraph.prebuilt import InjectedState
30
32
 
@@ -35,10 +37,13 @@ logger.setLevel(logging.INFO)
35
37
 
36
38
  # Load configuration using Hydra.
37
39
  with hydra.initialize(version_base=None, config_path="../../configs"):
38
- cfg = hydra.compose(config_name="config", overrides=["tools/question_and_answer=default"])
40
+ cfg = hydra.compose(
41
+ config_name="config", overrides=["tools/question_and_answer=default"]
42
+ )
39
43
  cfg = cfg.tools.question_and_answer
40
44
  logger.info("Loaded Question and Answer tool configuration.")
41
45
 
46
+
42
47
  class QuestionAndAnswerInput(BaseModel):
43
48
  """
44
49
  Input schema for the PDF Question and Answer tool.
@@ -47,12 +52,12 @@ class QuestionAndAnswerInput(BaseModel):
47
52
  question (str): The question to ask regarding the PDF content.
48
53
  tool_call_id (str): Unique identifier for the tool call, injected automatically.
49
54
  """
50
- question: str = Field(
51
- description="The question to ask regarding the PDF content."
52
- )
55
+
56
+ question: str = Field(description="The question to ask regarding the PDF content.")
53
57
  tool_call_id: Annotated[str, InjectedToolCallId]
54
58
  state: Annotated[dict, InjectedState]
55
59
 
60
+
56
61
  def extract_text_from_pdf_data(pdf_bytes: bytes) -> str:
57
62
  """
58
63
  Extract text content from PDF binary data.
@@ -73,7 +78,10 @@ def extract_text_from_pdf_data(pdf_bytes: bytes) -> str:
73
78
  text += page_text
74
79
  return text
75
80
 
76
- def generate_answer(question: str, pdf_bytes: bytes, llm_model: BaseChatModel) -> Dict[str, Any]:
81
+
82
+ def generate_answer(
83
+ question: str, pdf_bytes: bytes, llm_model: BaseChatModel
84
+ ) -> Dict[str, Any]:
77
85
  """
78
86
  Generate an answer for a question using retrieval augmented generation on PDF content.
79
87
 
@@ -92,9 +100,7 @@ def generate_answer(question: str, pdf_bytes: bytes, llm_model: BaseChatModel) -
92
100
  text = extract_text_from_pdf_data(pdf_bytes)
93
101
  logger.info("Extracted text from PDF.")
94
102
  text_splitter = CharacterTextSplitter(
95
- separator="\n",
96
- chunk_size=cfg.chunk_size,
97
- chunk_overlap=cfg.chunk_overlap
103
+ separator="\n", chunk_size=cfg.chunk_size, chunk_overlap=cfg.chunk_overlap
98
104
  )
99
105
  chunks = text_splitter.split_text(text)
100
106
  documents: List[Document] = [Document(page_content=chunk) for chunk in chunks]
@@ -102,10 +108,7 @@ def generate_answer(question: str, pdf_bytes: bytes, llm_model: BaseChatModel) -
102
108
 
103
109
  embeddings = OpenAIEmbeddings(openai_api_key=cfg.openai_api_key)
104
110
  vector_store = Annoy.from_documents(documents, embeddings)
105
- search_results = vector_store.similarity_search(
106
- question,
107
- k=cfg.num_retrievals
108
- )
111
+ search_results = vector_store.similarity_search(question, k=cfg.num_retrievals)
109
112
  logger.info("Retrieved %d relevant document chunks.", len(search_results))
110
113
  # Use the provided llm_model to build the QA chain.
111
114
  qa_chain = load_qa_chain(llm_model, chain_type=cfg.qa_chain_type)
@@ -114,6 +117,49 @@ def generate_answer(question: str, pdf_bytes: bytes, llm_model: BaseChatModel) -
114
117
  )
115
118
  return answer
116
119
 
120
+
121
+ def generate_answer2(
122
+ question: str, pdf_url: str, text_embedding_model: Embeddings
123
+ ) -> Dict[str, Any]:
124
+ """
125
+ Generate an answer for a question using retrieval augmented generation on PDF content.
126
+
127
+ This function extracts text from the PDF data, splits the text into manageable chunks,
128
+ performs a similarity search to retrieve the most relevant segments, and then uses a
129
+ question-answering chain (built using the provided llm_model) to generate an answer.
130
+
131
+ Args:
132
+ question (str): The question to be answered.
133
+ pdf_bytes (bytes): The binary content of the PDF document.
134
+ llm_model (BaseChatModel): The language model instance to use for answering.
135
+
136
+ Returns:
137
+ Dict[str, Any]: A dictionary containing the answer generated by the language model.
138
+ """
139
+ # text = extract_text_from_pdf_data(pdf_bytes)
140
+ # logger.info("Extracted text from PDF.")
141
+ logger.log(logging.INFO, "searching the article with the question: %s", question)
142
+ # Load the article
143
+ # loader = PyPDFLoader(state['pdf_file_name'])
144
+ # loader = PyPDFLoader("https://arxiv.org/pdf/2310.08365")
145
+ loader = PyPDFLoader(pdf_url)
146
+ # Load the pages of the article
147
+ pages = []
148
+ for page in loader.lazy_load():
149
+ pages.append(page)
150
+ # Set up text embedding model
151
+ # text_embedding_model = state['text_embedding_model']
152
+ # text_embedding_model = OpenAIEmbeddings(openai_api_key=cfg.openai_api_key)
153
+ logging.info("Loaded text embedding model %s", text_embedding_model)
154
+ # Create a vector store from the pages
155
+ vector_store = InMemoryVectorStore.from_documents(pages, text_embedding_model)
156
+ # Search the article with the question
157
+ docs = vector_store.similarity_search(question)
158
+ # Return the content of the pages
159
+ return "\n".join([doc.page_content for doc in docs])
160
+ # return answer
161
+
162
+
117
163
  @tool(args_schema=QuestionAndAnswerInput)
118
164
  def question_and_answer_tool(
119
165
  question: str,
@@ -124,7 +170,7 @@ def question_and_answer_tool(
124
170
  Answer a question using PDF content stored in the state via retrieval augmented generation.
125
171
 
126
172
  This tool retrieves the PDF binary data from the state (under the key "pdf_data"), extracts its
127
- textual content, and generates an answer to the specified question. It also extracts the
173
+ textual content, and generates an answer to the specified question. It also extracts the
128
174
  llm_model (of type BaseChatModel) from the state to use for answering.
129
175
 
130
176
  Args:
@@ -138,15 +184,15 @@ def question_and_answer_tool(
138
184
  Dict[str, Any]: A dictionary containing the generated answer or an error message.
139
185
  """
140
186
  logger.info("Starting PDF Question and Answer tool using PDF data from state.")
187
+ # print (state['text_embedding_model'])
188
+ text_embedding_model = state["text_embedding_model"]
141
189
  pdf_state = state.get("pdf_data")
142
190
  if not pdf_state:
143
191
  error_msg = "No pdf_data found in state."
144
192
  logger.error(error_msg)
145
193
  return Command(
146
194
  update={
147
- "messages": [
148
- ToolMessage(content=error_msg, tool_call_id=tool_call_id)
149
- ]
195
+ "messages": [ToolMessage(content=error_msg, tool_call_id=tool_call_id)]
150
196
  }
151
197
  )
152
198
  pdf_bytes = pdf_state.get("pdf_object")
@@ -155,16 +201,17 @@ def question_and_answer_tool(
155
201
  logger.error(error_msg)
156
202
  return Command(
157
203
  update={
158
- "messages": [
159
- ToolMessage(content=error_msg, tool_call_id=tool_call_id)
160
- ]
204
+ "messages": [ToolMessage(content=error_msg, tool_call_id=tool_call_id)]
161
205
  }
162
206
  )
207
+ pdf_url = pdf_state.get("pdf_url")
163
208
  # Retrieve llm_model from state; use a default if not provided.
164
209
  llm_model = state.get("llm_model")
165
210
  if not llm_model:
166
211
  logger.error("Missing LLM model instance in state.")
167
212
  return {"error": "No LLM model found in state."}
168
- answer = generate_answer(question, pdf_bytes, llm_model)
169
- logger.info("Generated answer: %s", answer)
213
+ # answer = generate_answer(question, pdf_bytes, llm_model)
214
+ print(pdf_url)
215
+ answer = generate_answer2(question, pdf_url, text_embedding_model)
216
+ # logger.info("Generated answer: %s", answer)
170
217
  return answer
@@ -30,7 +30,7 @@ class MultiPaperRecInput(BaseModel):
30
30
  description="List of Semantic Scholar Paper IDs to get recommendations for"
31
31
  )
32
32
  limit: int = Field(
33
- default=2,
33
+ default=10,
34
34
  description="Maximum total number of recommendations to return",
35
35
  ge=1,
36
36
  le=500,
@@ -90,23 +90,33 @@ def get_multi_paper_recommendations(
90
90
  params["year"] = year
91
91
 
92
92
  # Wrap API call in try/except to catch connectivity issues and validate response format
93
- try:
94
- response = requests.post(
95
- endpoint,
96
- headers=headers,
97
- params=params,
98
- data=json.dumps(payload),
99
- timeout=cfg.request_timeout,
100
- )
101
- response.raise_for_status() # Raises HTTPError for bad responses
102
- except requests.exceptions.RequestException as e:
103
- logger.error(
104
- "Failed to connect to Semantic Scholar API for multi-paper recommendations: %s",
105
- e,
106
- )
107
- raise RuntimeError(
108
- "Failed to connect to Semantic Scholar API. Please retry the same query."
109
- ) from e
93
+ response = None
94
+ for attempt in range(10):
95
+ try:
96
+ response = requests.post(
97
+ endpoint,
98
+ headers=headers,
99
+ params=params,
100
+ data=json.dumps(payload),
101
+ timeout=cfg.request_timeout,
102
+ )
103
+ response.raise_for_status() # Raises HTTPError for bad responses
104
+ break # Exit loop if request is successful
105
+ except requests.exceptions.RequestException as e:
106
+ logger.error(
107
+ "Attempt %d: Failed to connect to Semantic Scholar API for "
108
+ "multi-paper recommendations: %s",
109
+ attempt + 1,
110
+ e,
111
+ )
112
+ if attempt == 9: # Last attempt
113
+ raise RuntimeError(
114
+ "Failed to connect to Semantic Scholar API after 10 attempts."
115
+ "Please retry the same query."
116
+ ) from e
117
+
118
+ if response is None:
119
+ raise RuntimeError("Failed to obtain a response from the Semantic Scholar API.")
110
120
 
111
121
  logger.info(
112
122
  "API Response Status for multi-paper recommendations: %s", response.status_code
@@ -137,11 +147,22 @@ def get_multi_paper_recommendations(
137
147
  # Create a dictionary to store the papers
138
148
  filtered_papers = {
139
149
  paper["paperId"]: {
140
- "paper_id": paper["paperId"],
150
+ "semantic_scholar_paper_id": paper["paperId"],
141
151
  "Title": paper.get("title", "N/A"),
142
152
  "Abstract": paper.get("abstract", "N/A"),
143
153
  "Year": paper.get("year", "N/A"),
154
+ "Publication Date": paper.get("publicationDate", "N/A"),
155
+ "Venue": paper.get("venue", "N/A"),
156
+ # "Publication Venue": (paper.get("publicationVenue") or {}).get("name", "N/A"),
157
+ # "Venue Type": (paper.get("publicationVenue") or {}).get("name", "N/A"),
158
+ "Journal Name": (paper.get("journal") or {}).get("name", "N/A"),
159
+ # "Journal Volume": paper.get("journal", {}).get("volume", "N/A"),
160
+ # "Journal Pages": paper.get("journal", {}).get("pages", "N/A"),
144
161
  "Citation Count": paper.get("citationCount", "N/A"),
162
+ "Authors": [
163
+ f"{author.get('name', 'N/A')} (ID: {author.get('authorId', 'N/A')})"
164
+ for author in paper.get("authors", [])
165
+ ],
145
166
  "URL": paper.get("url", "N/A"),
146
167
  "arxiv_id": paper.get("externalIds", {}).get("ArXiv", "N/A"),
147
168
  }
@@ -153,7 +174,10 @@ def get_multi_paper_recommendations(
153
174
  top_papers = list(filtered_papers.values())[:3]
154
175
  top_papers_info = "\n".join(
155
176
  [
156
- f"{i+1}. {paper['Title']} ({paper['Year']})"
177
+ # f"{i+1}. {paper['Title']} ({paper['Year']})"
178
+ f"{i+1}. {paper['Title']} ({paper['Year']}; "
179
+ f"semantic_scholar_paper_id: {paper['semantic_scholar_paper_id']}; "
180
+ f"arXiv ID: {paper['arxiv_id']})"
157
181
  for i, paper in enumerate(top_papers)
158
182
  ]
159
183
  )
@@ -165,10 +189,10 @@ def get_multi_paper_recommendations(
165
189
  "Papers are attached as an artifact."
166
190
  )
167
191
  content += " Here is a summary of the recommendations:\n"
168
- content += f"Number of papers found: {len(filtered_papers)}\n"
192
+ content += f"Number of recommended papers found: {len(filtered_papers)}\n"
169
193
  content += f"Query Paper IDs: {', '.join(paper_ids)}\n"
170
194
  content += f"Year: {year}\n" if year else ""
171
- content += "Top papers:\n" + top_papers_info
195
+ content += "Here are a few of these papers:\n" + top_papers_info
172
196
 
173
197
  return Command(
174
198
  update={
@@ -44,7 +44,7 @@ def query_results(question: str, state: Annotated[dict, InjectedState]) -> str:
44
44
  raise NoPapersFoundError(
45
45
  "No papers found. A search needs to be performed first."
46
46
  )
47
- context_key = state.get("last_displayed_papers","pdf_data")
47
+ context_key = state.get("last_displayed_papers", "pdf_data")
48
48
  dic_papers = state.get(context_key)
49
49
  df_papers = pd.DataFrame.from_dict(dic_papers, orient="index")
50
50
  df_agent = create_pandas_dataframe_agent(
@@ -14,6 +14,7 @@ from langchain_core.tools.base import InjectedToolCallId
14
14
  from langgraph.types import Command
15
15
  from pydantic import BaseModel, Field
16
16
 
17
+ # pylint: disable=R0914,R0912,R0915
17
18
  # Configure logging
18
19
  logging.basicConfig(level=logging.INFO)
19
20
  logger = logging.getLogger(__name__)
@@ -27,7 +28,7 @@ class SearchInput(BaseModel):
27
28
  "Be specific and include relevant academic terms."
28
29
  )
29
30
  limit: int = Field(
30
- default=5, description="Maximum number of results to return", ge=1, le=100
31
+ default=10, description="Maximum number of results to return", ge=1, le=100
31
32
  )
32
33
  year: Optional[str] = Field(
33
34
  default=None,
@@ -75,14 +76,26 @@ def search_tool(
75
76
  params["year"] = year
76
77
 
77
78
  # Wrap API call in try/except to catch connectivity issues
78
- try:
79
- response = requests.get(endpoint, params=params, timeout=10)
80
- response.raise_for_status() # Raises HTTPError for bad responses
81
- except requests.exceptions.RequestException as e:
82
- logger.error("Failed to connect to Semantic Scholar API: %s", e)
83
- raise RuntimeError(
84
- "Failed to connect to Semantic Scholar API. Please retry the same query."
85
- ) from e
79
+ response = None
80
+ for attempt in range(10):
81
+ try:
82
+ response = requests.get(endpoint, params=params, timeout=10)
83
+ response.raise_for_status() # Raises HTTPError for bad responses
84
+ break # Exit loop if request is successful
85
+ except requests.exceptions.RequestException as e:
86
+ logger.error(
87
+ "Attempt %d: Failed to connect to Semantic Scholar API: %s",
88
+ attempt + 1,
89
+ e,
90
+ )
91
+ if attempt == 9: # Last attempt
92
+ raise RuntimeError(
93
+ "Failed to connect to Semantic Scholar API after 10 attempts."
94
+ "Please retry the same query."
95
+ ) from e
96
+
97
+ if response is None:
98
+ raise RuntimeError("Failed to obtain a response from the Semantic Scholar API.")
86
99
 
87
100
  data = response.json()
88
101
 
@@ -108,11 +121,22 @@ def search_tool(
108
121
  # Create a dictionary to store the papers
109
122
  filtered_papers = {
110
123
  paper["paperId"]: {
111
- "paper_id": paper["paperId"],
124
+ "semantic_scholar_paper_id": paper["paperId"],
112
125
  "Title": paper.get("title", "N/A"),
113
126
  "Abstract": paper.get("abstract", "N/A"),
114
127
  "Year": paper.get("year", "N/A"),
128
+ "Publication Date": paper.get("publicationDate", "N/A"),
129
+ "Venue": paper.get("venue", "N/A"),
130
+ # "Publication Venue": (paper.get("publicationVenue") or {}).get("name", "N/A"),
131
+ # "Venue Type": (paper.get("publicationVenue") or {}).get("name", "N/A"),
132
+ "Journal Name": (paper.get("journal") or {}).get("name", "N/A"),
133
+ # "Journal Volume": paper.get("journal", {}).get("volume", "N/A"),
134
+ # "Journal Pages": paper.get("journal", {}).get("pages", "N/A"),
115
135
  "Citation Count": paper.get("citationCount", "N/A"),
136
+ "Authors": [
137
+ f"{author.get('name', 'N/A')} (ID: {author.get('authorId', 'N/A')})"
138
+ for author in paper.get("authors", [])
139
+ ],
116
140
  "URL": paper.get("url", "N/A"),
117
141
  "arxiv_id": paper.get("externalIds", {}).get("ArXiv", "N/A"),
118
142
  }
@@ -126,11 +150,15 @@ def search_tool(
126
150
  top_papers = list(filtered_papers.values())[:3]
127
151
  top_papers_info = "\n".join(
128
152
  [
129
- f"{i+1}. {paper['Title']} ({paper['Year']})"
153
+ f"{i+1}. {paper['Title']} ({paper['Year']}; "
154
+ f"semantic_scholar_paper_id: {paper['semantic_scholar_paper_id']}; "
155
+ f"arXiv ID: {paper['arxiv_id']})"
130
156
  for i, paper in enumerate(top_papers)
131
157
  ]
132
158
  )
133
159
 
160
+ logger.info("-----------Filtered %d papers", len(filtered_papers))
161
+
134
162
  content = (
135
163
  "Search was successful. Papers are attached as an artifact. "
136
164
  "Here is a summary of the search results:\n"
@@ -138,7 +166,7 @@ def search_tool(
138
166
  content += f"Number of papers found: {len(filtered_papers)}\n"
139
167
  content += f"Query: {query}\n"
140
168
  content += f"Year: {year}\n" if year else ""
141
- content += "Top papers:\n" + top_papers_info
169
+ content += "Top 3 papers:\n" + top_papers_info
142
170
 
143
171
  return Command(
144
172
  update={
@@ -14,6 +14,7 @@ from langchain_core.tools.base import InjectedToolCallId
14
14
  from langgraph.types import Command
15
15
  from pydantic import BaseModel, Field
16
16
 
17
+ # pylint: disable=R0914,R0912,R0915
17
18
  # Configure logging
18
19
  logging.basicConfig(level=logging.INFO)
19
20
  logger = logging.getLogger(__name__)
@@ -44,7 +45,7 @@ class SinglePaperRecInput(BaseModel):
44
45
  def get_single_paper_recommendations(
45
46
  paper_id: str,
46
47
  tool_call_id: Annotated[str, InjectedToolCallId],
47
- limit: int = 5,
48
+ limit: int = 10,
48
49
  year: Optional[str] = None,
49
50
  ) -> Command[Any]:
50
51
  """
@@ -85,16 +86,28 @@ def get_single_paper_recommendations(
85
86
  params["year"] = year
86
87
 
87
88
  # Wrap API call in try/except to catch connectivity issues and check response format
88
- try:
89
- response = requests.get(endpoint, params=params, timeout=cfg.request_timeout)
90
- response.raise_for_status() # Raises HTTPError for bad responses
91
- except requests.exceptions.RequestException as e:
92
- logger.error(
93
- "Failed to connect to Semantic Scholar API for recommendations: %s", e
94
- )
95
- raise RuntimeError(
96
- "Failed to connect to Semantic Scholar API. Please retry the same query."
97
- ) from e
89
+ response = None
90
+ for attempt in range(10):
91
+ try:
92
+ response = requests.get(
93
+ endpoint, params=params, timeout=cfg.request_timeout
94
+ )
95
+ response.raise_for_status() # Raises HTTPError for bad responses
96
+ break # Exit loop if request is successful
97
+ except requests.exceptions.RequestException as e:
98
+ logger.error(
99
+ "Attempt %d: Failed to connect to Semantic Scholar API for recommendations: %s",
100
+ attempt + 1,
101
+ e,
102
+ )
103
+ if attempt == 9: # Last attempt
104
+ raise RuntimeError(
105
+ "Failed to connect to Semantic Scholar API after 10 attempts."
106
+ "Please retry the same query."
107
+ ) from e
108
+
109
+ if response is None:
110
+ raise RuntimeError("Failed to obtain a response from the Semantic Scholar API.")
98
111
 
99
112
  logger.info(
100
113
  "API Response Status for recommendations of paper %s: %s",
@@ -125,11 +138,22 @@ def get_single_paper_recommendations(
125
138
  # Extract paper ID and title from recommendations
126
139
  filtered_papers = {
127
140
  paper["paperId"]: {
128
- "paper_id": paper["paperId"],
141
+ "semantic_scholar_paper_id": paper["paperId"],
129
142
  "Title": paper.get("title", "N/A"),
130
143
  "Abstract": paper.get("abstract", "N/A"),
131
144
  "Year": paper.get("year", "N/A"),
145
+ "Publication Date": paper.get("publicationDate", "N/A"),
146
+ "Venue": paper.get("venue", "N/A"),
147
+ # "Publication Venue": (paper.get("publicationVenue") or {}).get("name", "N/A"),
148
+ # "Venue Type": (paper.get("publicationVenue") or {}).get("name", "N/A"),
149
+ "Journal Name": (paper.get("journal") or {}).get("name", "N/A"),
150
+ # "Journal Volume": paper.get("journal", {}).get("volume", "N/A"),
151
+ # "Journal Pages": paper.get("journal", {}).get("pages", "N/A"),
132
152
  "Citation Count": paper.get("citationCount", "N/A"),
153
+ "Authors": [
154
+ f"{author.get('name', 'N/A')} (ID: {author.get('authorId', 'N/A')})"
155
+ for author in paper.get("authors", [])
156
+ ],
133
157
  "URL": paper.get("url", "N/A"),
134
158
  "arxiv_id": paper.get("externalIds", {}).get("ArXiv", "N/A"),
135
159
  }
@@ -141,7 +165,10 @@ def get_single_paper_recommendations(
141
165
  top_papers = list(filtered_papers.values())[:3]
142
166
  top_papers_info = "\n".join(
143
167
  [
144
- f"{i+1}. {paper['Title']} ({paper['Year']})"
168
+ # f"{i+1}. {paper['Title']} ({paper['Year']})"
169
+ f"{i+1}. {paper['Title']} ({paper['Year']}; "
170
+ f"semantic_scholar_paper_id: {paper['semantic_scholar_paper_id']}; "
171
+ f"arXiv ID: {paper['arxiv_id']})"
145
172
  for i, paper in enumerate(top_papers)
146
173
  ]
147
174
  )
@@ -153,10 +180,9 @@ def get_single_paper_recommendations(
153
180
  "Papers are attached as an artifact. "
154
181
  "Here is a summary of the recommendations:\n"
155
182
  )
156
- content += f"Number of papers found: {len(filtered_papers)}\n"
183
+ content += f"Number of recommended papers found: {len(filtered_papers)}\n"
157
184
  content += f"Query Paper ID: {paper_id}\n"
158
- content += f"Year: {year}\n" if year else ""
159
- content += "Top papers:\n" + top_papers_info
185
+ content += "Here are a few of these papers:\n" + top_papers_info
160
186
 
161
187
  return Command(
162
188
  update={
@@ -5,3 +5,4 @@ Import statements
5
5
  from . import zotero_read
6
6
  from . import zotero_write
7
7
  from . import utils
8
+ from . import zotero_review