aiagents4pharma 1.31.0__py3-none-any.whl → 1.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. aiagents4pharma/talk2scholars/agents/main_agent.py +4 -3
  2. aiagents4pharma/talk2scholars/agents/paper_download_agent.py +3 -4
  3. aiagents4pharma/talk2scholars/agents/pdf_agent.py +6 -7
  4. aiagents4pharma/talk2scholars/agents/s2_agent.py +23 -20
  5. aiagents4pharma/talk2scholars/agents/zotero_agent.py +11 -11
  6. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/main_agent/default.yaml +19 -19
  7. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/s2_agent/default.yaml +20 -15
  8. aiagents4pharma/talk2scholars/configs/agents/talk2scholars/zotero_agent/default.yaml +27 -6
  9. aiagents4pharma/talk2scholars/state/state_talk2scholars.py +7 -7
  10. aiagents4pharma/talk2scholars/tests/test_main_agent.py +16 -16
  11. aiagents4pharma/talk2scholars/tests/test_paper_download_agent.py +17 -24
  12. aiagents4pharma/talk2scholars/tests/test_paper_download_tools.py +152 -135
  13. aiagents4pharma/talk2scholars/tests/test_pdf_agent.py +9 -16
  14. aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py +790 -218
  15. aiagents4pharma/talk2scholars/tests/test_s2_agent.py +9 -9
  16. aiagents4pharma/talk2scholars/tests/test_s2_display.py +8 -8
  17. aiagents4pharma/talk2scholars/tests/test_s2_query.py +8 -8
  18. aiagents4pharma/talk2scholars/tests/test_zotero_agent.py +12 -12
  19. aiagents4pharma/talk2scholars/tests/test_zotero_path.py +11 -12
  20. aiagents4pharma/talk2scholars/tests/test_zotero_read.py +400 -22
  21. aiagents4pharma/talk2scholars/tools/paper_download/__init__.py +0 -6
  22. aiagents4pharma/talk2scholars/tools/paper_download/download_arxiv_input.py +89 -31
  23. aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py +540 -156
  24. aiagents4pharma/talk2scholars/tools/s2/__init__.py +4 -4
  25. aiagents4pharma/talk2scholars/tools/s2/{display_results.py → display_dataframe.py} +19 -21
  26. aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py +71 -0
  27. aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py +213 -35
  28. aiagents4pharma/talk2scholars/tools/zotero/zotero_read.py +3 -3
  29. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/METADATA +3 -1
  30. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/RECORD +33 -35
  31. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/WHEEL +1 -1
  32. aiagents4pharma/talk2scholars/tools/paper_download/abstract_downloader.py +0 -45
  33. aiagents4pharma/talk2scholars/tools/paper_download/arxiv_downloader.py +0 -115
  34. aiagents4pharma/talk2scholars/tools/s2/query_results.py +0 -61
  35. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/licenses/LICENSE +0 -0
  36. {aiagents4pharma-1.31.0.dist-info → aiagents4pharma-1.32.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.0.0)
2
+ Generator: setuptools (80.3.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,45 +0,0 @@
1
- """
2
- Abstract Base Class for Paper Downloaders.
3
-
4
- This module defines the `AbstractPaperDownloader` class, which serves as a
5
- base class for downloading scholarly papers from different sources
6
- (e.g., arXiv, PubMed, IEEE Xplore). Any specific downloader should
7
- inherit from this class and implement its methods.
8
- """
9
-
10
- from abc import ABC, abstractmethod
11
- from typing import Any, Dict
12
-
13
-
14
- class AbstractPaperDownloader(ABC):
15
- """
16
- Abstract base class for scholarly paper downloaders.
17
-
18
- This is designed to be extended for different paper sources
19
- like arXiv, PubMed, IEEE Xplore, etc. Each implementation
20
- must define methods for fetching metadata and downloading PDFs.
21
- """
22
-
23
- @abstractmethod
24
- def fetch_metadata(self, paper_id: str) -> Dict[str, Any]:
25
- """
26
- Fetch metadata for a given paper ID.
27
-
28
- Args:
29
- paper_id (str): The unique identifier for the paper.
30
-
31
- Returns:
32
- Dict[str, Any]: The metadata dictionary (format depends on the data source).
33
- """
34
-
35
- @abstractmethod
36
- def download_pdf(self, paper_id: str) -> bytes:
37
- """
38
- Download the PDF for a given paper ID.
39
-
40
- Args:
41
- paper_id (str): The unique identifier for the paper.
42
-
43
- Returns:
44
- bytes: The binary content of the downloaded PDF.
45
- """
@@ -1,115 +0,0 @@
1
- """
2
- Arxiv Paper Downloader
3
-
4
- This module provides an implementation of `AbstractPaperDownloader` for arXiv.
5
- It connects to the arXiv API, retrieves metadata for a research paper, and
6
- downloads the corresponding PDF.
7
-
8
- By using an abstract base class, this implementation is extendable to other
9
- APIs like PubMed, IEEE Xplore, etc.
10
- """
11
-
12
- import xml.etree.ElementTree as ET
13
- from typing import Any, Dict
14
- import logging
15
- import hydra
16
- import requests
17
- from .abstract_downloader import AbstractPaperDownloader
18
-
19
- # Configure logging
20
- logging.basicConfig(level=logging.INFO)
21
- logger = logging.getLogger(__name__)
22
-
23
-
24
- class ArxivPaperDownloader(AbstractPaperDownloader):
25
- """
26
- Downloader class for arXiv.
27
-
28
- This class interfaces with the arXiv API to fetch metadata
29
- and retrieve PDFs of academic papers based on their arXiv IDs.
30
- """
31
-
32
- def __init__(self):
33
- """
34
- Initializes the arXiv paper downloader.
35
-
36
- Uses Hydra for configuration management to retrieve API details.
37
- """
38
- with hydra.initialize(version_base=None, config_path="../../configs"):
39
- cfg = hydra.compose(
40
- config_name="config", overrides=["tools/download_arxiv_paper=default"]
41
- )
42
- self.api_url = cfg.tools.download_arxiv_paper.api_url
43
- self.request_timeout = cfg.tools.download_arxiv_paper.request_timeout
44
- self.chunk_size = cfg.tools.download_arxiv_paper.chunk_size
45
- self.pdf_base_url = cfg.tools.download_arxiv_paper.pdf_base_url
46
-
47
- def fetch_metadata(self, paper_id: str) -> Dict[str, Any]:
48
- """
49
- Fetch metadata from arXiv for a given paper ID.
50
-
51
- Args:
52
- paper_id (str): The arXiv ID of the paper.
53
-
54
- Returns:
55
- Dict[str, Any]: A dictionary containing metadata, including the XML response.
56
- """
57
- logger.info("Fetching metadata from arXiv for paper ID: %s", paper_id)
58
- api_url = f"{self.api_url}?search_query=id:{paper_id}&start=0&max_results=1"
59
- response = requests.get(api_url, timeout=self.request_timeout)
60
- response.raise_for_status()
61
- return {"xml": response.text}
62
-
63
- def download_pdf(self, paper_id: str) -> Dict[str, Any]:
64
- """
65
- Download the PDF of a paper from arXiv.
66
-
67
- This function first retrieves the paper's metadata to locate the PDF link
68
- before downloading the file.
69
-
70
- Args:
71
- paper_id (str): The arXiv ID of the paper.
72
-
73
- Returns:
74
- Dict[str, Any]: A dictionary containing:
75
- - `pdf_object`: The binary content of the downloaded PDF.
76
- - `pdf_url`: The URL from which the PDF was fetched.
77
- - `arxiv_id`: The arXiv ID of the downloaded paper.
78
- """
79
- metadata = self.fetch_metadata(paper_id)
80
-
81
- # Parse the XML response to locate the PDF link.
82
- root = ET.fromstring(metadata["xml"])
83
- ns = {"atom": "http://www.w3.org/2005/Atom"}
84
- pdf_url = next(
85
- (
86
- link.attrib.get("href")
87
- for entry in root.findall("atom:entry", ns)
88
- for link in entry.findall("atom:link", ns)
89
- if link.attrib.get("title") == "pdf"
90
- ),
91
- None,
92
- )
93
-
94
- if not pdf_url:
95
- raise RuntimeError(f"Failed to download PDF for arXiv ID {paper_id}.")
96
-
97
- logger.info("Downloading PDF from: %s", pdf_url)
98
- pdf_response = requests.get(pdf_url, stream=True, timeout=self.request_timeout)
99
- pdf_response.raise_for_status()
100
- # print (pdf_response)
101
-
102
- # Combine the PDF data from chunks.
103
- pdf_object = b"".join(
104
- chunk
105
- for chunk in pdf_response.iter_content(chunk_size=self.chunk_size)
106
- if chunk
107
- )
108
- # print (pdf_object)
109
- print("PDF_URL", pdf_url)
110
-
111
- return {
112
- "pdf_object": pdf_object,
113
- "pdf_url": pdf_url,
114
- "arxiv_id": paper_id,
115
- }
@@ -1,61 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- """
4
- This tool is used to display the table of studies.
5
- """
6
-
7
- import logging
8
- from typing import Annotated
9
- import pandas as pd
10
- from langchain_experimental.agents import create_pandas_dataframe_agent
11
- from langchain_core.tools import tool
12
- from langgraph.prebuilt import InjectedState
13
-
14
- # Configure logging
15
- logging.basicConfig(level=logging.INFO)
16
- logger = logging.getLogger(__name__)
17
-
18
-
19
- class NoPapersFoundError(Exception):
20
- """Exception raised when no papers are found in the state."""
21
-
22
-
23
- @tool("query_results", parse_docstring=True)
24
- def query_results(question: str, state: Annotated[dict, InjectedState]) -> str:
25
- """
26
- Query the last displayed papers from the state. If no papers are found,
27
- raises an exception.
28
-
29
- Use this also to get the last displayed papers from the state,
30
- and then use the papers to get recommendations for a single paper or
31
- multiple papers.
32
-
33
- Args:
34
- question (str): The question to ask the agent.
35
- state (dict): The state of the agent containing the papers.
36
-
37
- Returns:
38
- str: A message with the last displayed papers.
39
- """
40
- logger.info("Querying last displayed papers with question: %s", question)
41
- llm_model = state.get("llm_model")
42
- if not state.get("last_displayed_papers"):
43
- logger.info("No papers displayed so far, raising NoPapersFoundError")
44
- raise NoPapersFoundError(
45
- "No papers found. A search needs to be performed first."
46
- )
47
- context_key = state.get("last_displayed_papers", "pdf_data")
48
- dic_papers = state.get(context_key)
49
- df_papers = pd.DataFrame.from_dict(dic_papers, orient="index")
50
- df_agent = create_pandas_dataframe_agent(
51
- llm_model,
52
- allow_dangerous_code=True,
53
- agent_type="tool-calling",
54
- df=df_papers,
55
- max_iterations=5,
56
- include_df_in_prompt=True,
57
- number_of_head_rows=df_papers.shape[0],
58
- verbose=True,
59
- )
60
- llm_result = df_agent.invoke(question, stream_mode=None)
61
- return llm_result["output"]