PyPI - aiagents4pharma - Versions diffs - 1.36.0__py3-none-any.whl → 1.38.0__py3-none-any.whl - Mend

aiagents4pharma 1.36.0py3-none-any.whl → 1.38.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

aiagents4pharma/talk2scholars/tools/pdf/question_and_answer.py CHANGED Viewed

@@ -1,8 +1,10 @@
 """
-Tool for performing Q&A on PDF documents using retrieval augmented generation.
-This module provides functionality to load PDFs from URLs, split them into
-chunks, retrieve relevant segments via semantic search, and generate answers
-to user-provided questions using a language model chain.
+PDF Question & Answer Tool
+This LangGraph tool answers user questions by leveraging a pre-built FAISS vector store
+of embedded PDF document chunks. Given a question, it retrieves the most relevant text
+segments from the loaded PDFs, invokes an LLM for answer generation, and returns the
+response with source attribution.
 """
 import logging
@@ -52,19 +54,18 @@ def load_hydra_config() -> Any:
 class QuestionAndAnswerInput(BaseModel):
     """
-    Input schema for the PDF Question and Answer tool.
-    This schema defines the inputs required for querying academic or research-related
-    PDFs to answer a specific question using a language model and document retrieval.
+    Input schema for the PDF Q&A tool.
     Attributes:
-        question (str): The question to ask regarding the PDF content.
-        paper_ids (Optional[List[str]]): Optional list of specific paper IDs to query.
-            If not provided, the system will determine relevant papers automatically.
-        use_all_papers (bool): Whether to use all available papers for answering the question.
-            If True, the system will include all loaded papers regardless of relevance filtering.
-        tool_call_id (str): Unique identifier for the tool call, injected automatically.
-        state (dict): Shared application state, injected automatically.
+        question (str): Free-text question to answer based on PDF content.
+        paper_ids (Optional[List[str]]): If provided, restricts retrieval to these paper IDs.
+        use_all_papers (bool): If True, include all loaded papers without semantic ranking.
+        tool_call_id (str): Internal ID injected by LangGraph for this tool call.
+        state (dict): Shared agent state containing:
+            - 'article_data': dict of paper metadata with 'pdf_url' keys
+            - 'text_embedding_model': embedding model instance
+            - 'llm_model': chat/LLM instance
+            - 'vector_store': pre-built Vectorstore for retrieval
     """
     question: str = Field(description="The question to ask regarding the PDF content.")
@@ -119,6 +120,8 @@ class Vectorstore:
         self.documents: Dict[str, Document] = {}
         self.vector_store: Optional[VectorStore] = None
         self.paper_metadata: Dict[str, Dict[str, Any]] = {}
+        # Cache for document chunk embeddings to avoid recomputation
+        self.embeddings: Dict[str, Any] = {}
     def add_paper(
         self,
@@ -160,6 +163,10 @@ class Vectorstore:
         # Split documents and add metadata for each chunk
         chunks = splitter.split_documents(documents)
         logger.info("Split %s into %d chunks", paper_id, len(chunks))
+        # Embed and cache chunk embeddings
+        chunk_texts = [chunk.page_content for chunk in chunks]
+        chunk_embeddings = self.embedding_model.embed_documents(chunk_texts)
+        logger.info("Embedded %d chunks for paper %s", len(chunk_embeddings), paper_id)
         # Enhance document metadata
         for i, chunk in enumerate(chunks):
@@ -182,6 +189,9 @@ class Vectorstore:
             # Store chunk
             doc_id = f"{paper_id}_{i}"
             self.documents[doc_id] = chunk
+            # Cache embedding if available
+            if chunk_embeddings[i] is not None:
+                self.embeddings[doc_id] = chunk_embeddings[i]
         # Mark as loaded to prevent duplicate loading
         self.loaded_papers.add(paper_id)
@@ -295,12 +305,16 @@ class Vectorstore:
             logger.warning("No documents found after filtering by paper_ids.")
             return []
-        texts = [doc.page_content for doc in all_docs]
-        # Step 3: Batch embed all documents
-        logger.info("Starting batch embedding for %d chunks...", len(texts))
-        all_embeddings = self.embedding_model.embed_documents(texts)
-        logger.info("Completed embedding for %d chunks...", len(texts))
+        # Step 3: Retrieve or compute embeddings for all documents using cache
+        logger.info("Retrieving embeddings for %d chunks...", len(all_docs))
+        all_embeddings = []
+        for doc in all_docs:
+            doc_id = f"{doc.metadata['paper_id']}_{doc.metadata['chunk_id']}"
+            if doc_id not in self.embeddings:
+                logger.info("Embedding missing chunk %s", doc_id)
+                emb = self.embedding_model.embed_documents([doc.page_content])[0]
+                self.embeddings[doc_id] = emb
+            all_embeddings.append(self.embeddings[doc_id])
         # Step 4: Apply MMR
         mmr_indices = maximal_marginal_relevance(
@@ -392,6 +406,10 @@ def generate_answer(
     }
+# Shared pre-built Vectorstore for RAG (set externally, e.g., by Streamlit startup)
+prebuilt_vector_store: Optional[Vectorstore] = None
 @tool(args_schema=QuestionAndAnswerInput, parse_docstring=True)
 def question_and_answer(
     question: str,
@@ -401,30 +419,29 @@ def question_and_answer(
     use_all_papers: bool = False,
 ) -> Command[Any]:
     """
-    Answer a question using PDF content with advanced retrieval augmented generation.
+    Generate an answer to a user question using Retrieval-Augmented Generation (RAG) over PDFs.
-    This tool retrieves PDF documents from URLs, processes them using semantic search,
-    and generates an answer to the user's question based on the most relevant content.
-    It can work with multiple papers simultaneously and provides source attribution.
+    This tool expects that a FAISS vector store of PDF document chunks has already been built
+    and stored in shared state. It retrieves the most relevant chunks for the input question,
+    invokes an LLM to craft a response, and returns the answer with source attribution.
     Args:
-        question (str): The question to answer based on PDF content.
-        paper_ids (Optional[List[str]]): Optional list of specific paper IDs to query.
-        use_all_papers (bool): Whether to use all available papers.
-        tool_call_id (str): Unique identifier for the current tool call.
-        state (dict): Current state dictionary containing article data and required models.
-            Expected keys:
-            - "article_data": Dictionary containing article metadata including PDF URLs
-            - "text_embedding_model": Model for generating embeddings
-            - "llm_model": Language model for generating answers
-            - "vector_store": Optional Vectorstore instance
+        question (str): The free-text question to answer.
+        state (dict): Injected agent state mapping that must include:
+            - 'article_data': mapping of paper IDs to metadata (including 'pdf_url')
+            - 'text_embedding_model': the embedding model instance
+            - 'llm_model': the chat/LLM instance
+        tool_call_id (str): Internal identifier for this tool call.
+        paper_ids (Optional[List[str]]): Specific paper IDs to restrict retrieval (default: None).
+        use_all_papers (bool): If True, bypasses semantic ranking and includes all papers.
     Returns:
-        Dict[str, Any]: A dictionary wrapped in a Command that updates the conversation
-            with either the answer or an error message.
+        Command[Any]: A LangGraph Command that updates the conversation state:
+            - 'messages': a single ToolMessage containing the generated answer text.
     Raises:
-        ValueError: If required components are missing or if PDF processing fails.
+        ValueError: If required models or 'article_data' are missing from state.
+        RuntimeError: If no relevant document chunks can be retrieved.
     """
     # Load configuration
     config = load_hydra_config()
@@ -456,8 +473,13 @@ def question_and_answer(
         logger.error("%s: %s", call_id, error_msg)
         raise ValueError(error_msg)
-    # Always use a fresh in-memory document store for this Q&A call
-    vector_store = Vectorstore(embedding_model=text_embedding_model)
+    # Use shared pre-built Vectorstore if provided, else create a new one
+    if prebuilt_vector_store is not None:
+        vector_store = prebuilt_vector_store
+        logger.info("Using shared pre-built vector store from the memory")
+    else:
+        vector_store = Vectorstore(embedding_model=text_embedding_model)
+        logger.info("Initialized new vector store (no pre-built store found)")
     # Check if there are papers from different sources
     has_uploaded_papers = any(

aiagents4pharma/talk2scholars/tools/s2/display_dataframe.py CHANGED Viewed

@@ -66,8 +66,12 @@ def display_dataframe(
         NoPapersFoundError: If no entries exist under 'last_displayed_papers' in state.
     """
     logger.info("Displaying papers")
-    context_key = state.get("last_displayed_papers")
-    artifact = state.get(context_key)
+    context_val = state.get("last_displayed_papers")
+    # Support both key reference (str) and direct mapping
+    if isinstance(context_val, dict):
+        artifact = context_val
+    else:
+        artifact = state.get(context_val)
     if not artifact:
         logger.info("No papers found in state, raising NoPapersFoundError")
         raise NoPapersFoundError(

aiagents4pharma/talk2scholars/tools/s2/multi_paper_rec.py CHANGED Viewed

@@ -71,7 +71,8 @@ def get_multi_paper_recommendations(
     return Command(
         update={
             "multi_papers": results["papers"],
-            "last_displayed_papers": "multi_papers",
+            # Store the latest multi-paper results mapping directly for display
+            "last_displayed_papers": results["papers"],
             "messages": [
                 ToolMessage(
                     content=results["content"],

aiagents4pharma/talk2scholars/tools/s2/query_dataframe.py CHANGED Viewed

@@ -49,13 +49,17 @@ def query_dataframe(question: str, state: Annotated[dict, InjectedState]) -> str
     """
     logger.info("Querying last displayed papers with question: %s", question)
     llm_model = state.get("llm_model")
-    if not state.get("last_displayed_papers"):
+    context_val = state.get("last_displayed_papers")
+    if not context_val:
         logger.info("No papers displayed so far, raising NoPapersFoundError")
         raise NoPapersFoundError(
             "No papers found. A search needs to be performed first."
         )
-    context_key = state.get("last_displayed_papers")
-    dic_papers = state.get(context_key)
+    # Support both key reference (str) and direct mapping
+    if isinstance(context_val, dict):
+        dic_papers = context_val
+    else:
+        dic_papers = state.get(context_val)
     df_papers = pd.DataFrame.from_dict(dic_papers, orient="index")
     df_agent = create_pandas_dataframe_agent(
         llm_model,

aiagents4pharma/talk2scholars/tools/s2/search.py CHANGED Viewed

@@ -65,7 +65,8 @@ def search_tool(
     return Command(
         update={
             "papers": results["papers"],
-            "last_displayed_papers": "papers",
+            # Store the latest results mapping directly for display
+            "last_displayed_papers": results["papers"],
             "messages": [
                 ToolMessage(
                     content=results["content"],

aiagents4pharma/talk2scholars/tools/s2/single_paper_rec.py CHANGED Viewed

@@ -69,7 +69,8 @@ def get_single_paper_recommendations(
     return Command(
         update={
             "papers": results["papers"],
-            "last_displayed_papers": "papers",
+            # Store the latest single-paper results mapping directly for display
+            "last_displayed_papers": results["papers"],
             "messages": [
                 ToolMessage(
                     content=results["content"],

aiagents4pharma/talk2scholars/tools/zotero/utils/read_helper.py CHANGED Viewed

@@ -5,15 +5,14 @@ Utility for zotero read tool.
 """
 import logging
-import tempfile
-from typing import Any, Dict, List, Tuple, Optional
-import concurrent.futures
+from typing import Any, Dict, List
 import hydra
 import requests
 from pyzotero import zotero
 from .zotero_path import get_item_collections
+from .zotero_pdf_downloader import download_pdfs_in_parallel
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -30,12 +29,14 @@ class ZoteroSearchData:
         query: str,
         only_articles: bool,
         limit: int,
-        tool_call_id: str,
+        download_pdfs: bool = True,
+        **_kwargs,
     ):
         self.query = query
         self.only_articles = only_articles
         self.limit = limit
-        self.tool_call_id = tool_call_id
+        # Control whether to fetch PDF attachments now
+        self.download_pdfs = download_pdfs
         self.cfg = self._load_config()
         self.zot = self._init_zotero_client()
         self.item_to_collections = get_item_collections(self.zot)
@@ -105,89 +106,75 @@ class ZoteroSearchData:
         return items
-    def _download_zotero_pdf(self, attachment_key: str) -> Optional[Tuple[str, str]]:
-        """Download a PDF from Zotero by attachment key. Returns (file_path, filename) or None."""
-        zotero_pdf_url = (
-            f"https://api.zotero.org/users/{self.cfg.user_id}/items/"
-            f"{attachment_key}/file"
-        )
-        headers = {"Zotero-API-Key": self.cfg.api_key}
+    def _collect_item_attachments(self) -> Dict[str, str]:
+        """Collect PDF attachment keys for non-orphan items."""
+        item_attachments: Dict[str, str] = {}
+        for item_key, item_data in self.article_data.items():
+            if item_data.get("Type") == "orphan_attachment":
+                continue
+            try:
+                children = self.zot.children(item_key)
+                for child in children:
+                    data = child.get("data", {})
+                    if data.get("contentType") == "application/pdf":
+                        attachment_key = data.get("key")
+                        filename = data.get("filename", "unknown.pdf")
+                        if attachment_key:
+                            item_attachments[attachment_key] = item_key
+                            self.article_data[item_key]["filename"] = filename
+                            break
+            except Exception as e:
+                logger.error("Failed to get attachments for item %s: %s", item_key, e)
+        return item_attachments
+    def _process_orphaned_pdfs(self, orphaned_pdfs: Dict[str, str]) -> None:
+        """Download or record orphaned PDF attachments."""
+        if self.download_pdfs:
+            logger.info("Downloading %d orphaned PDFs in parallel", len(orphaned_pdfs))
+            results = download_pdfs_in_parallel(
+                self.session,
+                self.cfg.user_id,
+                self.cfg.api_key,
+                orphaned_pdfs,
+                chunk_size=getattr(self.cfg, "chunk_size", None),
+            )
+            for item_key, (file_path, filename, attachment_key) in results.items():
+                self.article_data[item_key]["filename"] = filename
+                self.article_data[item_key]["pdf_url"] = file_path
+                self.article_data[item_key]["attachment_key"] = attachment_key
+                logger.info("Downloaded orphaned Zotero PDF to: %s", file_path)
+        else:
+            logger.info("Skipping orphaned PDF downloads (download_pdfs=False)")
+            for attachment_key in orphaned_pdfs:
+                self.article_data[attachment_key]["attachment_key"] = attachment_key
+                self.article_data[attachment_key]["filename"] = (
+                    self.article_data[attachment_key].get("Title", attachment_key)
+                )
-        try:
-            # Use session for connection pooling
-            response = self.session.get(
-                zotero_pdf_url, headers=headers, stream=True, timeout=10
+    def _process_item_pdfs(self, item_attachments: Dict[str, str]) -> None:
+        """Download or record regular item PDF attachments."""
+        if self.download_pdfs:
+            logger.info(
+                "Downloading %d regular item PDFs in parallel", len(item_attachments)
             )
-            response.raise_for_status()
-            with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
-                # Increased chunk size for better performance
-                for chunk in response.iter_content(chunk_size=16384):
-                    temp_file.write(chunk)
-                temp_file_path = temp_file.name
-            content_disp = response.headers.get("Content-Disposition", "")
-            filename = (
-                content_disp.split("filename=")[-1].strip('"')
-                if "filename=" in content_disp
-                else "downloaded.pdf"
+            results = download_pdfs_in_parallel(
+                self.session,
+                self.cfg.user_id,
+                self.cfg.api_key,
+                item_attachments,
+                chunk_size=getattr(self.cfg, "chunk_size", None),
             )
+        else:
+            logger.info("Skipping regular PDF downloads (download_pdfs=False)")
+            results = {}
+            for attachment_key, item_key in item_attachments.items():
+                self.article_data[item_key]["attachment_key"] = attachment_key
+        for item_key, (file_path, filename, attachment_key) in results.items():
+            self.article_data[item_key]["filename"] = filename
+            self.article_data[item_key]["pdf_url"] = file_path
+            self.article_data[item_key]["attachment_key"] = attachment_key
+            logger.info("Downloaded Zotero PDF to: %s", file_path)
-            return temp_file_path, filename
-        except Exception as e:
-            logger.error(
-                "Failed to download Zotero PDF for attachment %s: %s", attachment_key, e
-            )
-            return None
-    def _download_pdfs_in_parallel(
-        self, attachment_item_map: Dict[str, str]
-    ) -> Dict[str, Tuple[str, str, str]]:
-        """
-        Download multiple PDFs in parallel using ThreadPoolExecutor.
-        Args:
-            attachment_item_map: Dictionary mapping attachment keys to parent item keys
-        Returns:
-            Dictionary mapping parent item keys to (file_path, filename, attachment_key)
-        """
-        results = {}
-        max_workers = min(10, len(attachment_item_map))  # Set reasonable limit
-        if not attachment_item_map:
-            return results
-        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
-            # Create a dictionary mapping Future objects to attachment keys
-            future_to_key = {
-                executor.submit(self._download_zotero_pdf, attachment_key): (
-                    attachment_key,
-                    item_key,
-                )
-                for attachment_key, item_key in attachment_item_map.items()
-            }
-            for future in concurrent.futures.as_completed(future_to_key):
-                attachment_key, item_key = future_to_key[future]
-                try:
-                    result = future.result()
-                    if result:
-                        temp_file_path, resolved_filename = result
-                        results[item_key] = (
-                            temp_file_path,
-                            resolved_filename,
-                            attachment_key,
-                        )
-                except Exception as e:
-                    logger.error(
-                        "Failed to download PDF for key %s: %s", attachment_key, e
-                    )
-        return results
-    # pylint: disable=too-many-locals, too-many-branches
     def _filter_and_format_papers(self, items: List[Dict[str, Any]]) -> None:
         """Filter and format papers from Zotero items, including standalone PDFs."""
         filter_item_types = (
@@ -196,8 +183,7 @@ class ZoteroSearchData:
         logger.debug("Filtering item types: %s", filter_item_types)
         # Maps to track attachments for batch processing
-        orphaned_pdfs = {}  # attachment_key -> item key (same for orphans)
-        item_attachments = {}  # item_key -> [attachment_keys]
+        orphaned_pdfs: Dict[str, str] = {}  # attachment_key -> item key (same for orphans)
         # First pass: process all items without downloading PDFs
         for item in items:
@@ -263,59 +249,16 @@ class ZoteroSearchData:
                     "source": "zotero",
                 }
-        # Second pass: collect attachment info for all items
-        for item_key, item_data in self.article_data.items():
-            if item_data["Type"] != "orphan_attachment":
-                try:
-                    children = self.zot.children(item_key)
-                    pdf_attachments = [
-                        child
-                        for child in children
-                        if isinstance(child, dict)
-                        and child.get("data", {}).get("contentType")
-                        == "application/pdf"
-                    ]
-                    if pdf_attachments:
-                        attachment = pdf_attachments[0]
-                        attachment_data = attachment.get("data", {})
-                        attachment_key = attachment_data.get("key")
-                        filename = attachment_data.get("filename", "unknown.pdf")
+        # Collect and process attachments
+        item_attachments = self._collect_item_attachments()
-                        if attachment_key:
-                            # Add to item attachments map
-                            item_attachments[attachment_key] = item_key
-                            # Add basic info
-                            self.article_data[item_key]["filename"] = filename
-                except Exception as e:
-                    logger.error(
-                        "Failed to get attachments for item %s: %s", item_key, e
-                    )
+        # Process orphaned PDFs
+        self._process_orphaned_pdfs(orphaned_pdfs)
-        # Now download all PDFs in parallel - first orphaned PDFs
-        logger.info("Downloading %d orphaned PDFs in parallel", len(orphaned_pdfs))
-        orphan_results = self._download_pdfs_in_parallel(orphaned_pdfs)
-        # Update orphan data
-        for item_key, (file_path, filename, attachment_key) in orphan_results.items():
-            self.article_data[item_key]["filename"] = filename
-            self.article_data[item_key]["pdf_url"] = file_path
-            self.article_data[item_key]["attachment_key"] = attachment_key
-            logger.info("Downloaded orphaned Zotero PDF to: %s", file_path)
-        # Download regular item attachments
-        logger.info(
-            "Downloading %d regular item PDFs in parallel", len(item_attachments)
-        )
-        item_results = self._download_pdfs_in_parallel(item_attachments)
-        # Update item data
-        for item_key, (file_path, filename, attachment_key) in item_results.items():
-            self.article_data[item_key]["filename"] = filename
-            self.article_data[item_key]["pdf_url"] = file_path
-            self.article_data[item_key]["attachment_key"] = attachment_key
-            logger.info("Downloaded Zotero PDF to: %s", file_path)
+        # Process regular item PDFs
+        self._process_item_pdfs(item_attachments)
+        # Ensure we have some results
         if not self.article_data:
             logger.error(
                 "No matching papers returned from Zotero for query: '%s'", self.query

aiagents4pharma/talk2scholars/tools/zotero/utils/zotero_pdf_downloader.py ADDED Viewed

@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+"""
+Utility functions for downloading PDFs from Zotero.
+"""
+import logging
+import tempfile
+from typing import Optional, Tuple, Dict
+import concurrent.futures
+import requests
+logger = logging.getLogger(__name__)
+def download_zotero_pdf(
+    session: requests.Session,
+    user_id: str,
+    api_key: str,
+    attachment_key: str,
+    **kwargs,
+) -> Optional[Tuple[str, str]]:
+    """
+    Download a PDF from Zotero by attachment key.
+    Args:
+        session: requests.Session for HTTP requests.
+        user_id: Zotero user ID.
+        api_key: Zotero API key.
+        attachment_key: Zotero attachment item key.
+        kwargs:
+            timeout (int): Request timeout in seconds (default: 10).
+            chunk_size (int, optional): Chunk size for streaming.
+    Returns:
+        Tuple of (local_file_path, filename) if successful, else None.
+    """
+    # Extract optional parameters
+    timeout = kwargs.get("timeout", 10)
+    chunk_size = kwargs.get("chunk_size")
+    # Log configured parameters for verification
+    logger.info("download_zotero_pdf params -> timeout=%s, chunk_size=%s", timeout, chunk_size)
+    # Log download start
+    logger.info(
+        "Downloading Zotero PDF for attachment %s from Zotero API", attachment_key
+    )
+    zotero_pdf_url = (
+        f"https://api.zotero.org/users/{user_id}/items/" f"{attachment_key}/file"
+    )
+    headers = {"Zotero-API-Key": api_key}
+    try:
+        response = session.get(
+            zotero_pdf_url, headers=headers, stream=True, timeout=timeout
+        )
+        response.raise_for_status()
+        # Download to a temporary file first
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+            for chunk in response.iter_content(chunk_size=chunk_size):
+                temp_file.write(chunk)
+            temp_file_path = temp_file.name
+        # Temp file written to %s
+        logger.info("Zotero PDF downloaded to temporary file: %s", temp_file_path)
+        # Determine filename from Content-Disposition header or default
+        if "filename=" in response.headers.get("Content-Disposition", ""):
+            filename = (
+                response.headers.get("Content-Disposition", "")
+                .split("filename=")[-1]
+                .strip('"')
+            )
+        else:
+            filename = "downloaded.pdf"
+        return temp_file_path, filename
+    except (requests.exceptions.RequestException, OSError) as e:
+        logger.error(
+            "Failed to download Zotero PDF for attachment %s: %s", attachment_key, e
+        )
+        return None
+def download_pdfs_in_parallel(
+    session: requests.Session,
+    user_id: str,
+    api_key: str,
+    attachment_item_map: Dict[str, str],
+    **kwargs,
+) -> Dict[str, Tuple[str, str, str]]:
+    """
+    Download multiple PDFs in parallel using ThreadPoolExecutor.
+    Args:
+        session: requests.Session for HTTP requests.
+        user_id: Zotero user ID.
+        api_key: Zotero API key.
+        attachment_item_map: Mapping of attachment_key to parent item_key.
+        kwargs:
+            max_workers (int, optional): Maximum number of worker threads (default: min(10, n)).
+            chunk_size (int, optional): Chunk size for streaming.
+    Returns:
+        Mapping of parent item_key to (local_file_path, filename, attachment_key).
+    """
+    # Extract optional parameters
+    max_workers = kwargs.get("max_workers")
+    chunk_size = kwargs.get("chunk_size")
+    # Log configured parameters for verification
+    logger.info(
+        "download_pdfs_in_parallel params -> max_workers=%s, chunk_size=%s",
+        max_workers,
+        chunk_size,
+    )
+    results: Dict[str, Tuple[str, str, str]] = {}
+    if not attachment_item_map:
+        return results
+    with concurrent.futures.ThreadPoolExecutor(
+        max_workers=(
+            max_workers
+            if max_workers is not None
+            else min(10, len(attachment_item_map))
+        )
+    ) as executor:
+        future_to_keys = {
+            executor.submit(
+                download_zotero_pdf,
+                session,
+                user_id,
+                api_key,
+                attachment_key,
+                chunk_size=chunk_size,
+            ): (attachment_key, item_key)
+            for attachment_key, item_key in attachment_item_map.items()
+        }
+        for future in concurrent.futures.as_completed(future_to_keys):
+            attachment_key, item_key = future_to_keys[future]
+            try:
+                res = future.result()
+                if res:
+                    results[item_key] = (*res, attachment_key)
+            except (requests.exceptions.RequestException, OSError) as e:
+                logger.error("Failed to download PDF for key %s: %s", attachment_key, e)
+    return results

aiagents4pharma 1.36.0__py3-none-any.whl → 1.38.0__py3-none-any.whl

aiagents4pharma 1.36.0py3-none-any.whl → 1.38.0py3-none-any.whl