PyPI - aiagents4pharma - Versions diffs - 0.0.0__py3-none-any.whl - Mend

aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (336) hide show

aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py ADDED Viewed

@@ -0,0 +1,270 @@
+#!/usr/bin/env python3
+"""
+Utility for zotero read tool.
+"""
+import logging
+from typing import Any
+import hydra
+import requests
+from pyzotero import zotero
+from .zotero_path import get_item_collections
+from .zotero_pdf_downloader import download_pdfs_in_parallel
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# pylint: disable=broad-exception-caught
+class ZoteroSearchData:
+    """Helper class to organize Zotero search-related data."""
+    def __init__(
+        self,
+        query: str,
+        only_articles: bool,
+        limit: int,
+        download_pdfs: bool = True,
+        **_kwargs,
+    ):
+        self.query = query
+        self.only_articles = only_articles
+        self.limit = limit
+        # Control whether to fetch PDF attachments now
+        self.download_pdfs = download_pdfs
+        self.cfg = self._load_config()
+        self.zot = self._init_zotero_client()
+        self.item_to_collections = get_item_collections(self.zot)
+        self.article_data = {}
+        self.content = ""
+        # Create a session for connection pooling
+        self.session = requests.Session()
+    def process_search(self) -> None:
+        """Process the search request and prepare results."""
+        items = self._fetch_items()
+        self._filter_and_format_papers(items)
+        self._create_content()
+    def get_search_results(self) -> dict[str, Any]:
+        """Get the search results and content."""
+        return {
+            "article_data": self.article_data,
+            "content": self.content,
+        }
+    def _load_config(self) -> Any:
+        """Load hydra configuration."""
+        with hydra.initialize(version_base=None, config_path="../../../configs"):
+            cfg = hydra.compose(config_name="config", overrides=["tools/zotero_read=default"])
+            logger.info("Loaded configuration for Zotero search tool")
+            return cfg.tools.zotero_read
+    def _init_zotero_client(self) -> zotero.Zotero:
+        """Initialize Zotero client."""
+        logger.info(
+            "Searching Zotero for query: '%s' (only_articles: %s, limit: %d)",
+            self.query,
+            self.only_articles,
+            self.limit,
+        )
+        return zotero.Zotero(self.cfg.user_id, self.cfg.library_type, self.cfg.api_key)
+    def _fetch_items(self) -> list[dict[str, Any]]:
+        """Fetch items from Zotero."""
+        try:
+            if self.query.strip() == "":
+                logger.info(
+                    "Empty query provided, fetching all items up to max_limit: %d",
+                    self.cfg.zotero.max_limit,
+                )
+                items = self.zot.items(limit=self.cfg.zotero.max_limit)
+            else:
+                items = self.zot.items(
+                    q=self.query, limit=min(self.limit, self.cfg.zotero.max_limit)
+                )
+        except Exception as e:
+            logger.error("Failed to fetch items from Zotero: %s", e)
+            raise RuntimeError(
+                "Failed to fetch items from Zotero. Please retry the same query."
+            ) from e
+        logger.info("Received %d items from Zotero", len(items))
+        if not items:
+            logger.error("No items returned from Zotero for query: '%s'", self.query)
+            raise RuntimeError("No items returned from Zotero. Please retry the same query.")
+        return items
+    def _collect_item_attachments(self) -> dict[str, str]:
+        """Collect PDF attachment keys for non-orphan items."""
+        item_attachments: dict[str, str] = {}
+        for item_key, item_data in self.article_data.items():
+            if item_data.get("Type") == "orphan_attachment":
+                continue
+            try:
+                children = self.zot.children(item_key)
+                for child in children:
+                    data = child.get("data", {})
+                    if data.get("contentType") == "application/pdf":
+                        attachment_key = data.get("key")
+                        filename = data.get("filename", "unknown.pdf")
+                        if attachment_key:
+                            item_attachments[attachment_key] = item_key
+                            self.article_data[item_key]["filename"] = filename
+                            break
+            except Exception as e:
+                logger.error("Failed to get attachments for item %s: %s", item_key, e)
+        return item_attachments
+    def _process_orphaned_pdfs(self, orphaned_pdfs: dict[str, str]) -> None:
+        """Download or record orphaned PDF attachments."""
+        if self.download_pdfs:
+            logger.info("Downloading %d orphaned PDFs in parallel", len(orphaned_pdfs))
+            results = download_pdfs_in_parallel(
+                self.session,
+                self.cfg.user_id,
+                self.cfg.api_key,
+                orphaned_pdfs,
+                chunk_size=getattr(self.cfg, "chunk_size", None),
+            )
+            for item_key, (file_path, filename, attachment_key) in results.items():
+                self.article_data[item_key]["filename"] = filename
+                self.article_data[item_key]["pdf_url"] = file_path
+                self.article_data[item_key]["attachment_key"] = attachment_key
+                logger.info("Downloaded orphaned Zotero PDF to: %s", file_path)
+        else:
+            logger.info("Skipping orphaned PDF downloads (download_pdfs=False)")
+            for attachment_key in orphaned_pdfs:
+                self.article_data[attachment_key]["attachment_key"] = attachment_key
+                self.article_data[attachment_key]["filename"] = self.article_data[
+                    attachment_key
+                ].get("Title", attachment_key)
+    def _process_item_pdfs(self, item_attachments: dict[str, str]) -> None:
+        """Download or record regular item PDF attachments."""
+        if self.download_pdfs:
+            logger.info("Downloading %d regular item PDFs in parallel", len(item_attachments))
+            results = download_pdfs_in_parallel(
+                self.session,
+                self.cfg.user_id,
+                self.cfg.api_key,
+                item_attachments,
+                chunk_size=getattr(self.cfg, "chunk_size", None),
+            )
+        else:
+            logger.info("Skipping regular PDF downloads (download_pdfs=False)")
+            results = {}
+            for attachment_key, item_key in item_attachments.items():
+                self.article_data[item_key]["attachment_key"] = attachment_key
+        for item_key, (file_path, filename, attachment_key) in results.items():
+            self.article_data[item_key]["filename"] = filename
+            self.article_data[item_key]["pdf_url"] = file_path
+            self.article_data[item_key]["attachment_key"] = attachment_key
+            logger.info("Downloaded Zotero PDF to: %s", file_path)
+    def _filter_and_format_papers(self, items: list[dict[str, Any]]) -> None:
+        """Filter and format papers from Zotero items, including standalone PDFs."""
+        filter_item_types = self.cfg.zotero.filter_item_types if self.only_articles else []
+        logger.debug("Filtering item types: %s", filter_item_types)
+        # Maps to track attachments for batch processing
+        orphaned_pdfs: dict[str, str] = {}  # attachment_key -> item key (same for orphans)
+        # First pass: process all items without downloading PDFs
+        for item in items:
+            if not isinstance(item, dict):
+                continue
+            data = item.get("data", {})
+            item_type = data.get("itemType", "N/A")
+            key = data.get("key")
+            if not key:
+                continue
+            # CASE 1: Top-level item (e.g., journalArticle)
+            if item_type != "attachment":
+                collection_paths = self.item_to_collections.get(key, ["/Unknown"])
+                self.article_data[key] = {
+                    "Title": data.get("title", "N/A"),
+                    "Abstract": data.get("abstractNote", "N/A"),
+                    "Publication Date": data.get("date", "N/A"),
+                    "URL": data.get("url", "N/A"),
+                    "Type": item_type,
+                    "Collections": collection_paths,
+                    "Citation Count": data.get("citationCount", "N/A"),
+                    "Venue": data.get("venue", "N/A"),
+                    "Publication Venue": data.get("publicationTitle", "N/A"),
+                    "Journal Name": data.get("journalAbbreviation", "N/A"),
+                    "Authors": [
+                        f"{creator.get('firstName', '')} {creator.get('lastName', '')}".strip()
+                        for creator in data.get("creators", [])
+                        if isinstance(creator, dict) and creator.get("creatorType") == "author"
+                    ],
+                    "source": "zotero",
+                }
+                # We'll collect attachment info in second pass
+            # CASE 2: Standalone orphaned PDF attachment
+            elif data.get("contentType") == "application/pdf" and not data.get("parentItem"):
+                attachment_key = key
+                filename = data.get("filename", "unknown.pdf")
+                # Add to orphaned PDFs for batch processing
+                orphaned_pdfs[attachment_key] = (
+                    attachment_key  # Same key as both attachment and "item"
+                )
+                # Create the entry without PDF info yet
+                self.article_data[key] = {
+                    "Title": filename,
+                    "Abstract": "No abstract available",
+                    "Publication Date": "N/A",
+                    "URL": "N/A",
+                    "Type": "orphan_attachment",
+                    "Collections": ["/(No Collection)"],
+                    "Citation Count": "N/A",
+                    "Venue": "N/A",
+                    "Publication Venue": "N/A",
+                    "Journal Name": "N/A",
+                    "Authors": ["(Unknown)"],
+                    "source": "zotero",
+                }
+        # Collect and process attachments
+        item_attachments = self._collect_item_attachments()
+        # Process orphaned PDFs
+        self._process_orphaned_pdfs(orphaned_pdfs)
+        # Process regular item PDFs
+        self._process_item_pdfs(item_attachments)
+        # Ensure we have some results
+        if not self.article_data:
+            logger.error("No matching papers returned from Zotero for query: '%s'", self.query)
+            raise RuntimeError(
+                "No matching papers returned from Zotero. Please retry the same query."
+            )
+        logger.info("Filtered %d items (including orphaned attachments)", len(self.article_data))
+    def _create_content(self) -> None:
+        """Create the content message for the response."""
+        top_papers = list(self.article_data.values())[:2]
+        top_papers_info = "\n".join(
+            [f"{i + 1}. {paper['Title']} ({paper['Type']})" for i, paper in enumerate(top_papers)]
+        )
+        self.content = "Retrieval was successful. Papers are attached as an artifact."
+        self.content += " And here is a summary of the retrieval results:\n"
+        self.content += f"Number of papers found: {len(self.article_data)}\n"
+        self.content += f"Query: {self.query}\n"
+        self.content += "Here are a few of these papers:\n" + top_papers_info

aiagents4pharma/talk2scholars/tools/zotero/utils/review_helper.py ADDED Viewed

@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+"""
+Utility for reviewing papers and saving them to Zotero.
+"""
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class ReviewData:
+    """Helper class to organize review-related data."""
+    def __init__(
+        self,
+        collection_path: str,
+        fetched_papers: dict,
+        tool_call_id: str,
+        state: dict,
+    ):
+        self.collection_path = collection_path
+        self.fetched_papers = fetched_papers
+        self.tool_call_id = tool_call_id
+        self.state = state
+        self.total_papers = len(fetched_papers)
+        self.papers_summary = self._create_papers_summary()
+        self.papers_preview = "\n".join(self.papers_summary)
+        self.review_info = self._create_review_info()
+    def get_approval_message(self) -> str:
+        """Get the formatted approval message for the review."""
+        return (
+            f"Human approved saving {self.total_papers} papers to Zotero "
+            f"collection '{self.collection_path}'."
+        )
+    def get_custom_path_approval_message(self, custom_path: str) -> str:
+        """Get the formatted approval message for a custom collection path."""
+        return f"Human approved saving papers to custom Zotero collection '{custom_path}'."
+    def _create_papers_summary(self) -> list[str]:
+        """Create a summary of papers for review."""
+        summary = []
+        for paper_id, paper in list(self.fetched_papers.items())[:5]:
+            logger.info("Paper ID: %s", paper_id)
+            title = paper.get("Title", "N/A")
+            authors = ", ".join(
+                [author.split(" (ID: ")[0] for author in paper.get("Authors", [])[:2]]
+            )
+            if len(paper.get("Authors", [])) > 2:
+                authors += " et al."
+            summary.append(f"- {title} by {authors}")
+        if self.total_papers > 5:
+            summary.append(f"... and {self.total_papers - 5} more papers")
+        return summary
+    def _create_review_info(self) -> dict:
+        """Create the review information dictionary."""
+        return {
+            "action": "save_to_zotero",
+            "collection_path": self.collection_path,
+            "total_papers": self.total_papers,
+            "papers_preview": self.papers_preview,
+            "message": (
+                f"Would you like to save {self.total_papers} papers to Zotero "
+                f"collection '{self.collection_path}'? Please respond with a "
+                f"structured decision using one of the following options: 'approve', "
+                f"'reject', or 'custom' (with a custom_path)."
+            ),
+        }

aiagents4pharma/talk2scholars/tools/zotero/utils/write_helper.py ADDED Viewed

@@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+"""
+Utility for zotero write tool.
+"""
+import logging
+from typing import Any
+import hydra
+from pyzotero import zotero
+from .zotero_path import (
+    fetch_papers_for_save,
+    find_or_create_collection,
+)
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class ZoteroWriteData:
+    """Helper class to organize Zotero write-related data."""
+    def __init__(
+        self,
+        tool_call_id: str,
+        collection_path: str,
+        state: dict,
+    ):
+        self.tool_call_id = tool_call_id
+        self.collection_path = collection_path
+        self.state = state
+        self.cfg = self._load_config()
+        self.zot = self._init_zotero_client()
+        self.fetched_papers = fetch_papers_for_save(state)
+        self.normalized_path = collection_path.rstrip("/").lower()
+        self.zotero_items = []
+        self.content = ""
+    def _load_config(self) -> Any:
+        """Load hydra configuration."""
+        with hydra.initialize(version_base=None, config_path="../../../configs"):
+            cfg = hydra.compose(config_name="config", overrides=["tools/zotero_write=default"])
+            logger.info("Loaded configuration for Zotero write tool")
+            return cfg.tools.zotero_write
+    def _init_zotero_client(self) -> zotero.Zotero:
+        """Initialize Zotero client."""
+        logger.info(
+            "Saving fetched papers to Zotero under collection path: %s",
+            self.collection_path,
+        )
+        return zotero.Zotero(self.cfg.user_id, self.cfg.library_type, self.cfg.api_key)
+    def _validate_papers(self) -> None:
+        """Validate that papers exist to save."""
+        if not self.fetched_papers:
+            raise ValueError(
+                "No fetched papers were found to save. "
+                "Please retrieve papers using Zotero Read or Semantic Scholar first."
+            )
+    def _find_collection(self) -> str:
+        """Find or create the target collection."""
+        matched_collection_key = find_or_create_collection(
+            self.zot, self.normalized_path, create_missing=False
+        )
+        if not matched_collection_key:
+            available_collections = self.zot.collections()
+            collection_names = [col["data"]["name"] for col in available_collections]
+            names_display = ", ".join(collection_names)
+            raise ValueError(
+                f"Error: The collection path '{self.collection_path}' does "
+                f"not exist in Zotero. "
+                f"Available collections are: {names_display}. "
+                f"Please try saving to one of these existing collections."
+            )
+        return matched_collection_key
+    def _format_papers_for_zotero(self, matched_collection_key: str) -> None:
+        """Format papers for Zotero and assign to the specified collection."""
+        for paper_id, paper in self.fetched_papers.items():
+            title = paper.get("Title", "N/A")
+            abstract = paper.get("Abstract", "N/A")
+            publication_date = paper.get("Publication Date", "N/A")
+            url = paper.get("URL", "N/A")
+            citations = paper.get("Citation Count", "N/A")
+            venue = paper.get("Venue", "N/A")
+            publication_venue = paper.get("Publication Venue", "N/A")
+            journal_name = paper.get("Journal Name", "N/A")
+            journal_volume = paper.get("Journal Volume", "N/A")
+            journal_pages = paper.get("Journal Pages", "N/A")
+            authors = [
+                (
+                    {
+                        "creatorType": "author",
+                        "firstName": name.split(" ")[0],
+                        "lastName": " ".join(name.split(" ")[1:]),
+                    }
+                    if " " in name
+                    else {"creatorType": "author", "lastName": name}
+                )
+                for name in [author.split(" (ID: ")[0] for author in paper.get("Authors", [])]
+            ]
+            self.zotero_items.append(
+                {
+                    "itemType": "journalArticle",
+                    "title": title,
+                    "abstractNote": abstract,
+                    "date": publication_date,
+                    "url": url,
+                    "extra": f"Paper ID: {paper_id}\nCitations: {citations}",
+                    "collections": [matched_collection_key],
+                    "publicationTitle": (
+                        publication_venue if publication_venue != "N/A" else venue
+                    ),
+                    "journalAbbreviation": journal_name,
+                    "volume": journal_volume if journal_volume != "N/A" else None,
+                    "pages": journal_pages if journal_pages != "N/A" else None,
+                    "creators": authors,
+                }
+            )
+    def _save_to_zotero(self) -> None:
+        """Save items to Zotero."""
+        try:
+            response = self.zot.create_items(self.zotero_items)
+            logger.info("Papers successfully saved to Zotero: %s", response)
+        except Exception as e:
+            logger.error("Error saving to Zotero: %s", str(e))
+            raise RuntimeError(f"Error saving papers to Zotero: {str(e)}") from e
+    def _create_content(self, collection_name: str) -> None:
+        """Create the content message for the response."""
+        self.content = (
+            f"Save was successful. Papers have been saved to Zotero collection "
+            f"'{collection_name}' with the requested path '{self.get_collection_path()}'.\n"
+        )
+        self.content += "Summary of saved papers:\n"
+        self.content += f"Number of articles saved: {self.get_paper_count()}\n"
+        self.content += f"Query: {self.state.get('query', 'N/A')}\n"
+        top_papers = list(self.fetched_papers.values())[:2]
+        top_papers_info = "\n".join(
+            [
+                f"{i + 1}. {paper.get('Title', 'N/A')} ({paper.get('URL', 'N/A')})"
+                for i, paper in enumerate(top_papers)
+            ]
+        )
+        self.content += "Here are a few of these articles:\n" + top_papers_info
+    def process_write(self) -> dict[str, Any]:
+        """Process the write operation and return results."""
+        self._validate_papers()
+        matched_collection_key = self._find_collection()
+        self._format_papers_for_zotero(matched_collection_key)
+        self._save_to_zotero()
+        # Get collection name for feedback
+        collections = self.zot.collections()
+        collection_name = ""
+        for col in collections:
+            if col["key"] == matched_collection_key:
+                collection_name = col["data"]["name"]
+                break
+        self._create_content(collection_name)
+        return {
+            "content": self.content,
+            "fetched_papers": self.fetched_papers,
+        }
+    def get_paper_count(self) -> int:
+        """Get the number of papers to be saved.
+        Returns:
+            int: The number of papers in the fetched papers dictionary.
+        """
+        return len(self.fetched_papers)
+    def get_collection_path(self) -> str:
+        """Get the normalized collection path.
+        Returns:
+            str: The normalized collection path where papers will be saved.
+        """
+        return self.collection_path