PyPI - aiagents4pharma - Versions diffs - 0.0.0__py3-none-any.whl - Mend

aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (336) hide show

aiagents4pharma/talk2scholars/tools/pdf/utils/gpu_detection.py ADDED Viewed

@@ -0,0 +1,150 @@
+"""
+GPU Detection Utility for Milvus Index Selection
+Handle COSINE -> IP conversion for GPU indexes
+"""
+import logging
+import subprocess
+from typing import Any
+logger = logging.getLogger(__name__)
+def detect_nvidia_gpu(config=None) -> bool:
+    """
+    Detect if NVIDIA GPU is available and should be used.
+    Args:
+        config: Hydra config object that may contain force_cpu_mode flag
+    Returns:
+        bool: True if GPU should be used, False if CPU should be used
+    """
+    # Check for force CPU mode in config
+    if config and hasattr(config, "gpu_detection"):
+        force_cpu = getattr(config.gpu_detection, "force_cpu_mode", False)
+        if force_cpu:
+            logger.info(
+                "Force CPU mode enabled in config - using CPU even though GPU may be available"
+            )
+            return False
+    # Normal GPU detection logic
+    try:
+        result = subprocess.run(
+            ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"],
+            capture_output=True,
+            text=True,
+            timeout=10,
+            check=False,
+        )
+        if result.returncode == 0 and result.stdout.strip():
+            gpu_names = result.stdout.strip().split("\n")
+            logger.info("Detected NVIDIA GPU(s): %s", gpu_names)
+            logger.info("To force CPU mode, set 'force_cpu_mode: true' in config")
+            return True
+        logger.info("nvidia-smi command failed or no GPUs detected")
+        return False
+    except (subprocess.TimeoutExpired, FileNotFoundError) as e:
+        logger.info("NVIDIA GPU detection failed: %s", e)
+        return False
+def get_optimal_index_config(
+    has_gpu: bool, embedding_dim: int = 768, use_cosine: bool = True
+) -> tuple[dict[str, Any], dict[str, Any]]:
+    """
+    Get optimal index and search parameters based on GPU availability.
+    IMPORTANT: GPU indexes don't support COSINE distance. When using GPU with COSINE,
+    vectors must be normalized and IP distance used instead.
+    Args:
+        has_gpu (bool): Whether NVIDIA GPU is available
+        embedding_dim (int): Dimension of embeddings
+        use_cosine (bool): Whether to use cosine similarity (will be converted to IP for GPU)
+    Returns:
+        Tuple[Dict[str, Any], Dict[str, Any]]: (index_params, search_params)
+    """
+    if has_gpu:
+        logger.info("Configuring GPU_CAGRA index for NVIDIA GPU")
+        # For GPU: COSINE is not supported, must use IP with normalized vectors
+        if use_cosine:
+            logger.warning(
+                "GPU indexes don't support COSINE distance. "
+                "Vectors will be normalized and IP distance will be used instead."
+            )
+            metric_type = "IP"  # Inner Product for normalized vectors = cosine similarity
+        else:
+            metric_type = "IP"  # Default to IP for GPU
+        # GPU_CAGRA index parameters - optimized for performance
+        index_params = {
+            "index_type": "GPU_CAGRA",
+            "metric_type": metric_type,
+            "params": {
+                "intermediate_graph_degree": 64,  # Higher for better recall
+                "graph_degree": 32,  # Balanced performance/recall
+                "build_algo": "IVF_PQ",  # Higher quality build
+                "cache_dataset_on_device": "true",  # Cache for better recall
+                "adapt_for_cpu": "false",  # Pure GPU mode
+            },
+        }
+        # GPU_CAGRA search parameters
+        search_params = {
+            "metric_type": metric_type,
+            "params": {
+                "itopk_size": 128,  # Power of 2, good for intermediate results
+                "search_width": 16,  # Balanced entry points
+                "team_size": 16,  # Optimize for typical vector dimensions
+            },
+        }
+    else:
+        logger.info("Configuring CPU index (IVF_FLAT) - no NVIDIA GPU detected")
+        # CPU supports COSINE directly
+        metric_type = "COSINE" if use_cosine else "IP"
+        # CPU IVF_FLAT index parameters
+        index_params = {
+            "index_type": "IVF_FLAT",
+            "metric_type": metric_type,
+            "params": {
+                "nlist": min(1024, max(64, embedding_dim // 8))  # Dynamic nlist based on dimension
+            },
+        }
+        # CPU search parameters
+        search_params = {
+            "metric_type": metric_type,
+            "params": {"nprobe": 16},  # Slightly higher than original for better recall
+        }
+    return index_params, search_params
+def log_index_configuration(
+    index_params: dict[str, Any], search_params: dict[str, Any], use_cosine: bool = True
+) -> None:
+    """Log the selected index configuration for debugging."""
+    index_type = index_params.get("index_type", "Unknown")
+    metric_type = index_params.get("metric_type", "Unknown")
+    logger.info("=== Milvus Index Configuration ===")
+    logger.info("Index Type: %s", index_type)
+    logger.info("Metric Type: %s", metric_type)
+    if index_type == "GPU_CAGRA" and use_cosine and metric_type == "IP":
+        logger.info("NOTE: Using IP with normalized vectors to simulate COSINE for GPU")
+    logger.info("Index Params: %s", index_params.get("params", {}))
+    logger.info("Search Params: %s", search_params.get("params", {}))
+    logger.info("===================================")

aiagents4pharma/talk2scholars/tools/pdf/utils/nvidia_nim_reranker.py ADDED Viewed

@@ -0,0 +1,97 @@
+"""
+NVIDIA NIM Reranker Utility for Milvus Integration
+Rerank chunks instead of papers following traditional RAG pipeline
+"""
+import logging
+import os
+from typing import Any
+from langchain_core.documents import Document
+from langchain_nvidia_ai_endpoints import NVIDIARerank
+# Set up logging with configurable level
+log_level = os.environ.get("LOG_LEVEL", "INFO")
+logging.basicConfig(level=getattr(logging, log_level))
+logger = logging.getLogger(__name__)
+logger.setLevel(getattr(logging, log_level))
+def rerank_chunks(
+    chunks: list[Document], query: str, config: Any, top_k: int = 25
+) -> list[Document]:
+    """
+    Rerank chunks by relevance to the query using NVIDIA's reranker.
+    This follows the traditional RAG pipeline: first retrieve chunks, then rerank them.
+    Args:
+        chunks (List[Document]): List of chunks to rerank
+        query (str): The query string
+        config (Any): Configuration containing reranker settings
+        top_k (int): Number of top chunks to return after reranking
+    Returns:
+        List[Document]: Reranked chunks (top_k most relevant)
+    """
+    logger.info(
+        "Starting NVIDIA chunk reranker for query: '%s' with %d chunks, top_k=%d",
+        query[:50] + "..." if len(query) > 50 else query,
+        len(chunks),
+        top_k,
+    )
+    # If we have fewer chunks than top_k, just return all
+    if len(chunks) <= top_k:
+        logger.info(
+            "Number of chunks (%d) <= top_k (%d), returning all chunks without reranking",
+            len(chunks),
+            top_k,
+        )
+        return chunks
+    # Get API key from config
+    api_key = config.reranker.api_key
+    if not api_key:
+        logger.error("No NVIDIA API key found in configuration for reranking")
+        raise ValueError("Configuration 'reranker.api_key' must be set for reranking")
+    logger.info("Using NVIDIA reranker model: %s", config.reranker.model)
+    # Initialize reranker with truncation to handle long chunks
+    reranker = NVIDIARerank(
+        model=config.reranker.model,
+        api_key=api_key,
+        truncate="END",  # Truncate at the end if too long
+    )
+    # Log chunk metadata for debugging
+    logger.debug(
+        "Reranking chunks from papers: %s",
+        list({chunk.metadata.get("paper_id", "unknown") for chunk in chunks})[:5],
+    )
+    # Rerank the chunks
+    logger.info("Calling NVIDIA reranker API with %d chunks...", len(chunks))
+    reranked_chunks = reranker.compress_documents(query=query, documents=chunks)
+    for i, doc in enumerate(reranked_chunks[:top_k]):
+        score = doc.metadata.get("relevance_score", "N/A")
+        source = doc.metadata.get("paper_id", "unknown")
+        logger.info("Rank %d | Score: %.4f | Source: %s", i + 1, score, source)
+    logger.info(
+        "Successfully reranked chunks. Returning top %d chunks",
+        min(top_k, len(reranked_chunks)),
+    )
+    # Log which papers the top chunks come from
+    if reranked_chunks and logger.isEnabledFor(logging.DEBUG):
+        top_papers = {}
+        for chunk in reranked_chunks[:top_k]:
+            paper_id = chunk.metadata.get("paper_id", "unknown")
+            top_papers[paper_id] = top_papers.get(paper_id, 0) + 1
+        logger.debug("Top %d chunks distribution by paper: %s", top_k, top_papers)
+    # Return only top_k chunks (convert to list to match return type)
+    return list(reranked_chunks[:top_k])

aiagents4pharma/talk2scholars/tools/pdf/utils/paper_loader.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""
+Paper loading utilities for managing PDF documents in vector store.
+"""
+import logging
+from typing import Any
+from .batch_processor import add_papers_batch
+logger = logging.getLogger(__name__)
+def load_all_papers(
+    vector_store: Any,  # The Vectorstore instance
+    articles: dict[str, Any],
+    call_id: str,
+    config: Any,
+    has_gpu: bool,
+) -> None:
+    """
+    Ensure all papers from article_data are loaded into the Milvus vector store.
+    Optimized for GPU/CPU processing.
+    Args:
+        vector_store: The Vectorstore instance
+        articles: Dictionary of article data
+        call_id: Call identifier for logging
+        config: Configuration object
+        has_gpu: Whether GPU is available
+    """
+    papers_to_load = []
+    skipped_papers = []
+    already_loaded = []
+    # Check which papers need to be loaded
+    for pid, article_info in articles.items():
+        if pid not in vector_store.loaded_papers:
+            pdf_url = article_info.get("pdf_url")
+            if pdf_url:
+                # Prepare tuple for batch loading
+                papers_to_load.append((pid, pdf_url, article_info))
+            else:
+                skipped_papers.append(pid)
+        else:
+            already_loaded.append(pid)
+    # Log summary of papers status with hardware info
+    hardware_info = f" (GPU acceleration: {'enabled' if has_gpu else 'disabled'})"
+    logger.info(
+        "%s: Paper loading summary%s - Total: %d, Already loaded: %d, To load: %d, No PDF: %d",
+        call_id,
+        hardware_info,
+        len(articles),
+        len(already_loaded),
+        len(papers_to_load),
+        len(skipped_papers),
+    )
+    if skipped_papers:
+        logger.warning(
+            "%s: Skipping %d papers without PDF URLs: %s%s",
+            call_id,
+            len(skipped_papers),
+            skipped_papers[:5],  # Show first 5
+            "..." if len(skipped_papers) > 5 else "",
+        )
+    if not papers_to_load:
+        logger.info("%s: All papers with PDFs are already loaded in Milvus", call_id)
+        return
+    # Use batch loading with parallel processing for ALL papers at once
+    # Adjust parameters based on hardware capabilities
+    if has_gpu:
+        # GPU can handle more parallel processing
+        max_workers = min(12, max(4, len(papers_to_load)))  # More workers for GPU
+        batch_size = config.get("embedding_batch_size", 2000)  # Larger batches for GPU
+        logger.info(
+            "%s: Using GPU-optimized loading parameters: %d workers, batch size %d",
+            call_id,
+            max_workers,
+            batch_size,
+        )
+    else:
+        # CPU - more conservative parameters
+        max_workers = min(8, max(3, len(papers_to_load)))  # Conservative for CPU
+        batch_size = config.get("embedding_batch_size", 1000)  # Smaller batches for CPU
+        logger.info(
+            "%s: Using CPU-optimized loading parameters: %d workers, batch size %d",
+            call_id,
+            max_workers,
+            batch_size,
+        )
+    logger.info(
+        "%s: Loading %d papers in ONE BATCH using %d parallel workers (batch size: %d, %s)",
+        call_id,
+        len(papers_to_load),
+        max_workers,
+        batch_size,
+        "GPU accelerated" if has_gpu else "CPU processing",
+    )
+    # This should process ALL papers at once with hardware optimization
+    add_papers_batch(
+        papers_to_add=papers_to_load,
+        vector_store=vector_store.vector_store,  # Pass the LangChain vector store
+        loaded_papers=vector_store.loaded_papers,
+        paper_metadata=vector_store.paper_metadata,
+        documents=vector_store.documents,
+        config=vector_store.config,
+        metadata_fields=vector_store.metadata_fields,
+        has_gpu=vector_store.has_gpu,
+        max_workers=max_workers,
+        batch_size=batch_size,
+    )
+    logger.info(
+        "%s: Successfully completed batch loading of all %d papers with %s",
+        call_id,
+        len(papers_to_load),
+        "GPU acceleration" if has_gpu else "CPU processing",
+    )

aiagents4pharma/talk2scholars/tools/pdf/utils/rag_pipeline.py ADDED Viewed

@@ -0,0 +1,113 @@
+"""
+RAG pipeline for retrieving and reranking chunks from a vector store.
+"""
+import logging
+from typing import Any
+# Import our GPU detection utility
+from .nvidia_nim_reranker import rerank_chunks
+from .retrieve_chunks import retrieve_relevant_chunks
+logger = logging.getLogger(__name__)
+def retrieve_and_rerank_chunks(
+    vector_store: Any, query: str, config: Any, call_id: str, has_gpu: bool
+) -> list[Any]:
+    """
+    Traditional RAG pipeline: retrieve chunks from all papers, then rerank.
+    Optimized for GPU/CPU hardware.
+    Args:
+        vs: Vector store instance
+        query: User query
+    Returns:
+        List of reranked chunks
+    """
+    hardware_mode = "GPU-accelerated" if has_gpu else "CPU-optimized"
+    logger.info(
+        "%s: Starting traditional RAG pipeline - retrieve then rerank (%s)",
+        call_id,
+        hardware_mode,
+    )
+    # Step 1: Retrieve chunks from ALL papers (cast wide net)
+    # Adjust initial retrieval count based on hardware
+    if has_gpu:
+        # GPU can handle larger initial retrieval efficiently
+        initial_chunks_count = config.get("initial_retrieval_k", 150)  # Increased for GPU
+        mmr_diversity = config.get("mmr_diversity", 0.75)  # Slightly more diverse for larger sets
+    else:
+        # CPU - use conservative settings
+        initial_chunks_count = config.get("initial_retrieval_k", 100)  # Original
+        mmr_diversity = config.get("mmr_diversity", 0.8)  # Original
+    logger.info(
+        "%s: Step 1 - Retrieving top %d chunks from ALL papers (%s mode)",
+        call_id,
+        initial_chunks_count,
+        hardware_mode,
+    )
+    retrieved_chunks = retrieve_relevant_chunks(
+        vector_store,
+        query=query,
+        paper_ids=None,  # No filter - retrieve from all papers
+        top_k=initial_chunks_count,
+        mmr_diversity=mmr_diversity,
+    )
+    if not retrieved_chunks:
+        logger.warning("%s: No chunks retrieved from vector store", call_id)
+        return []
+    logger.info(
+        "%s: Retrieved %d chunks from %d unique papers using %s",
+        call_id,
+        len(retrieved_chunks),
+        len({chunk.metadata.get("paper_id", "unknown") for chunk in retrieved_chunks}),
+        hardware_mode,
+    )
+    # Step 2: Rerank the retrieved chunks
+    final_chunk_count = config.top_k_chunks
+    logger.info(
+        "%s: Step 2 - Reranking %d chunks to get top %d",
+        call_id,
+        len(retrieved_chunks),
+        final_chunk_count,
+    )
+    reranked_chunks = rerank_chunks(
+        chunks=retrieved_chunks,
+        query=query,
+        config=config,
+        top_k=final_chunk_count,
+    )
+    # Log final results with hardware info
+    final_papers = len({chunk.metadata.get("paper_id", "unknown") for chunk in reranked_chunks})
+    logger.info(
+        "%s: Reranking complete using %s. Final %d chunks from %d unique papers",
+        call_id,
+        hardware_mode,
+        len(reranked_chunks),
+        final_papers,
+    )
+    # Log performance insights
+    if len(retrieved_chunks) > 0:
+        efficiency = len(reranked_chunks) / len(retrieved_chunks) * 100
+        logger.debug(
+            "%s: Pipeline efficiency: %.1f%% (%d final / %d initial chunks) - %s",
+            call_id,
+            efficiency,
+            len(reranked_chunks),
+            len(retrieved_chunks),
+            hardware_mode,
+        )
+    return reranked_chunks

aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py ADDED Viewed

@@ -0,0 +1,197 @@
+"""
+Retrieve relevant chunks from a Milvus vector store using MMR (Maximal Marginal Relevance).
+Follows traditional RAG pipeline - retrieve first, then rerank.
+With automatic GPU/CPU search parameter optimization.
+"""
+import logging
+import os
+from langchain_core.documents import Document
+# Set up logging with configurable level
+log_level = os.environ.get("LOG_LEVEL", "INFO")
+logging.basicConfig(level=getattr(logging, log_level))
+logger = logging.getLogger(__name__)
+logger.setLevel(getattr(logging, log_level))
+def retrieve_relevant_chunks(
+    vector_store,
+    query: str,
+    paper_ids: list[str] | None = None,
+    top_k: int = 100,  # Increased default to cast wider net before reranking
+    mmr_diversity: float = 0.8,  # Slightly reduced for better diversity
+) -> list[Document]:
+    """
+    Retrieve the most relevant chunks for a query using maximal marginal relevance.
+    Automatically uses GPU-optimized search parameters if GPU is available.
+    In the traditional RAG pipeline, this should retrieve chunks from ALL available papers,
+    not just pre-selected ones. The reranker will then select the best chunks.
+    Args:
+        vector_store: The Milvus vector store instance
+        query: Query string
+        paper_ids: Optional list of paper IDs to filter by (default: None - search all papers)
+        top_k: Number of chunks to retrieve (default: 100 for reranking pipeline)
+        mmr_diversity: Diversity parameter for MMR (0=max diversity, 1=max relevance)
+    Returns:
+        List of document chunks
+    """
+    if not vector_store:
+        logger.error("Vector store is not initialized")
+        return []
+    # Check if vector store has GPU capabilities
+    has_gpu = getattr(vector_store, "has_gpu", False)
+    search_mode = "GPU-accelerated" if has_gpu else "CPU"
+    # Prepare filter for paper_ids if provided
+    filter_dict = None
+    if paper_ids:
+        logger.warning(
+            "Paper IDs filter provided. Traditional RAG pipeline typically"
+            "retrieves from ALL papers first. "
+            "Consider removing paper_ids filter for better results."
+        )
+        logger.info("Filtering retrieval to papers: %s", paper_ids)
+        filter_dict = {"paper_id": paper_ids}
+    else:
+        logger.info(
+            "Retrieving chunks from ALL papers (traditional RAG approach) using %s search",
+            search_mode,
+        )
+    # Use Milvus's built-in MMR search with optimized parameters
+    logger.info(
+        "Performing %s MMR search with query: '%s', k=%d, diversity=%.2f",
+        search_mode,
+        query[:50] + "..." if len(query) > 50 else query,
+        top_k,
+        mmr_diversity,
+    )
+    # Fetch more candidates for better MMR results
+    # Adjust fetch_k based on available hardware
+    if has_gpu:
+        # GPU can handle larger candidate sets efficiently
+        fetch_k = min(top_k * 6, 800)  # Increased for GPU
+        logger.debug("Using GPU-optimized fetch_k: %d", fetch_k)
+    else:
+        # CPU - more conservative to avoid performance issues
+        fetch_k = min(top_k * 4, 500)  # Original conservative approach
+        logger.debug("Using CPU-optimized fetch_k: %d", fetch_k)
+    # Get search parameters from vector store if available
+    search_params = getattr(vector_store, "search_params", None)
+    if search_params:
+        logger.debug("Using hardware-optimized search parameters: %s", search_params)
+    else:
+        logger.debug("Using default search parameters (no hardware optimization)")
+    # Perform MMR search - let the vector store handle search_params internally
+    # Don't pass search_params explicitly to avoid conflicts
+    results = vector_store.max_marginal_relevance_search(
+        query=query,
+        k=top_k,
+        fetch_k=fetch_k,
+        lambda_mult=mmr_diversity,
+        filter=filter_dict,
+    )
+    logger.info("Retrieved %d chunks using %s MMR from Milvus", len(results), search_mode)
+    # Log some details about retrieved chunks for debugging
+    if results and logger.isEnabledFor(logging.DEBUG):
+        paper_counts = {}
+        for doc in results:
+            paper_id = doc.metadata.get("paper_id", "unknown")
+            paper_counts[paper_id] = paper_counts.get(paper_id, 0) + 1
+        logger.debug(
+            "%s retrieval - chunks per paper: %s",
+            search_mode,
+            dict(sorted(paper_counts.items(), key=lambda x: x[1], reverse=True)[:10]),
+        )
+        logger.debug(
+            "%s retrieval - total papers represented: %d",
+            search_mode,
+            len(paper_counts),
+        )
+    return results
+def retrieve_relevant_chunks_with_scores(
+    vector_store,
+    query: str,
+    paper_ids: list[str] | None = None,
+    top_k: int = 100,
+    score_threshold: float = 0.0,
+) -> list[tuple[Document, float]]:
+    """
+    Retrieve chunks with similarity scores, optimized for GPU/CPU.
+    Args:
+        vector_store: The Milvus vector store instance
+        query: Query string
+        paper_ids: Optional list of paper IDs to filter by
+        top_k: Number of chunks to retrieve
+        score_threshold: Minimum similarity score threshold
+    Returns:
+        List of (document, score) tuples
+    """
+    if not vector_store:
+        logger.error("Vector store is not initialized")
+        return []
+    has_gpu = getattr(vector_store, "has_gpu", False)
+    search_mode = "GPU-accelerated" if has_gpu else "CPU"
+    # Prepare filter
+    filter_dict = None
+    if paper_ids:
+        filter_dict = {"paper_id": paper_ids}
+    logger.info(
+        "Performing %s similarity search with scores: query='%s', k=%d, threshold=%.3f",
+        search_mode,
+        query[:50] + "..." if len(query) > 50 else query,
+        top_k,
+        score_threshold,
+    )
+    # Check hardware optimization status instead of unused search_params
+    has_optimization = hasattr(vector_store, "has_gpu") and vector_store.has_gpu
+    if has_optimization:
+        logger.debug("GPU-accelerated similarity search enabled")
+    else:
+        logger.debug("Standard CPU similarity search")
+    if hasattr(vector_store, "similarity_search_with_score"):
+        # Don't pass search_params to avoid conflicts
+        results = vector_store.similarity_search_with_score(
+            query=query,
+            k=top_k,
+            filter=filter_dict,
+        )
+        # Filter by score threshold
+        filtered_results = [(doc, score) for doc, score in results if score >= score_threshold]
+        logger.info(
+            "%s search with scores retrieved %d/%d chunks above threshold %.3f",
+            search_mode,
+            len(filtered_results),
+            len(results),
+            score_threshold,
+        )
+        return filtered_results
+    raise NotImplementedError("Vector store does not support similarity_search_with_score")