PyPI - aiagents4pharma - Versions diffs - 1.40.1__py3-none-any.whl → 1.42.0__py3-none-any.whl - Mend

aiagents4pharma 1.40.1py3-none-any.whl → 1.42.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

aiagents4pharma/talk2scholars/tools/pdf/utils/retrieve_chunks.py CHANGED Viewed

@@ -1,14 +1,14 @@
 """
-Retrieve relevant chunks from a vector store using MMR (Maximal Marginal Relevance).
+Retrieve relevant chunks from a Milvus vector store using MMR (Maximal Marginal Relevance).
+Follows traditional RAG pipeline - retrieve first, then rerank.
+With automatic GPU/CPU search parameter optimization.
 """
 import logging
 import os
 from typing import List, Optional
-import numpy as np
 from langchain_core.documents import Document
-from langchain_core.vectorstores.utils import maximal_marginal_relevance
 # Set up logging with configurable level
@@ -19,65 +19,187 @@ logger.setLevel(getattr(logging, log_level))
 def retrieve_relevant_chunks(
-    self,
+    vector_store,
     query: str,
     paper_ids: Optional[List[str]] = None,
-    top_k: int = 25,
-    mmr_diversity: float = 1.00,
+    top_k: int = 100,  # Increased default to cast wider net before reranking
+    mmr_diversity: float = 0.8,  # Slightly reduced for better diversity
 ) -> List[Document]:
     """
     Retrieve the most relevant chunks for a query using maximal marginal relevance.
+    Automatically uses GPU-optimized search parameters if GPU is available.
+    In the traditional RAG pipeline, this should retrieve chunks from ALL available papers,
+    not just pre-selected ones. The reranker will then select the best chunks.
     Args:
+        vector_store: The Milvus vector store instance
         query: Query string
-        paper_ids: Optional list of paper IDs to filter by
-        top_k: Number of chunks to retrieve
-        mmr_diversity: Diversity parameter for MMR (higher = more diverse)
+        paper_ids: Optional list of paper IDs to filter by (default: None - search all papers)
+        top_k: Number of chunks to retrieve (default: 100 for reranking pipeline)
+        mmr_diversity: Diversity parameter for MMR (0=max diversity, 1=max relevance)
     Returns:
         List of document chunks
     """
-    if not self.vector_store:
-        logger.error("Failed to build vector store")
+    if not vector_store:
+        logger.error("Vector store is not initialized")
         return []
+    # Check if vector store has GPU capabilities
+    has_gpu = getattr(vector_store, "has_gpu", False)
+    search_mode = "GPU-accelerated" if has_gpu else "CPU"
+    # Prepare filter for paper_ids if provided
+    filter_dict = None
     if paper_ids:
+        logger.warning(
+            "Paper IDs filter provided. Traditional RAG pipeline typically"
+            "retrieves from ALL papers first. "
+            "Consider removing paper_ids filter for better results."
+        )
         logger.info("Filtering retrieval to papers: %s", paper_ids)
+        filter_dict = {"paper_id": paper_ids}
+    else:
+        logger.info(
+            "Retrieving chunks from ALL papers (traditional RAG approach) using %s search",
+            search_mode,
+        )
+    # Use Milvus's built-in MMR search with optimized parameters
+    logger.info(
+        "Performing %s MMR search with query: '%s', k=%d, diversity=%.2f",
+        search_mode,
+        query[:50] + "..." if len(query) > 50 else query,
+        top_k,
+        mmr_diversity,
+    )
-    # Step 1: Embed the query
-    logger.info("Embedding query using model: %s", type(self.embedding_model).__name__)
-    query_embedding = np.array(self.embedding_model.embed_query(query))
+    # Fetch more candidates for better MMR results
+    # Adjust fetch_k based on available hardware
+    if has_gpu:
+        # GPU can handle larger candidate sets efficiently
+        fetch_k = min(top_k * 6, 800)  # Increased for GPU
+        logger.debug("Using GPU-optimized fetch_k: %d", fetch_k)
+    else:
+        # CPU - more conservative to avoid performance issues
+        fetch_k = min(top_k * 4, 500)  # Original conservative approach
+        logger.debug("Using CPU-optimized fetch_k: %d", fetch_k)
-    # Step 2: Filter relevant documents
-    all_docs = [
-        doc
-        for doc in self.documents.values()
-        if not paper_ids or doc.metadata["paper_id"] in paper_ids
-    ]
+    # Get search parameters from vector store if available
+    search_params = getattr(vector_store, "search_params", None)
-    if not all_docs:
-        logger.warning("No documents found after filtering by paper_ids.")
-        return []
+    if search_params:
+        logger.debug("Using hardware-optimized search parameters: %s", search_params)
+    else:
+        logger.debug("Using default search parameters (no hardware optimization)")
-    # Step 3: Retrieve or compute embeddings for all documents using cache
-    logger.info("Retrieving embeddings for %d chunks...", len(all_docs))
-    all_embeddings = []
-    for doc in all_docs:
-        doc_id = f"{doc.metadata['paper_id']}_{doc.metadata['chunk_id']}"
-        if doc_id not in self.embeddings:
-            logger.info("Embedding missing chunk %s", doc_id)
-            emb = self.embedding_model.embed_documents([doc.page_content])[0]
-            self.embeddings[doc_id] = emb
-        all_embeddings.append(self.embeddings[doc_id])
-    # Step 4: Apply MMR
-    mmr_indices = maximal_marginal_relevance(
-        query_embedding,
-        all_embeddings,
+    # Perform MMR search - let the vector store handle search_params internally
+    # Don't pass search_params explicitly to avoid conflicts
+    results = vector_store.max_marginal_relevance_search(
+        query=query,
         k=top_k,
+        fetch_k=fetch_k,
         lambda_mult=mmr_diversity,
+        filter=filter_dict,
     )
-    results = [all_docs[i] for i in mmr_indices]
-    logger.info("Retrieved %d chunks using MMR", len(results))
+    logger.info(
+        "Retrieved %d chunks using %s MMR from Milvus", len(results), search_mode
+    )
+    # Log some details about retrieved chunks for debugging
+    if results and logger.isEnabledFor(logging.DEBUG):
+        paper_counts = {}
+        for doc in results:
+            paper_id = doc.metadata.get("paper_id", "unknown")
+            paper_counts[paper_id] = paper_counts.get(paper_id, 0) + 1
+        logger.debug(
+            "%s retrieval - chunks per paper: %s",
+            search_mode,
+            dict(sorted(paper_counts.items(), key=lambda x: x[1], reverse=True)[:10]),
+        )
+        logger.debug(
+            "%s retrieval - total papers represented: %d",
+            search_mode,
+            len(paper_counts),
+        )
     return results
+def retrieve_relevant_chunks_with_scores(
+    vector_store,
+    query: str,
+    paper_ids: Optional[List[str]] = None,
+    top_k: int = 100,
+    score_threshold: float = 0.0,
+) -> List[tuple[Document, float]]:
+    """
+    Retrieve chunks with similarity scores, optimized for GPU/CPU.
+    Args:
+        vector_store: The Milvus vector store instance
+        query: Query string
+        paper_ids: Optional list of paper IDs to filter by
+        top_k: Number of chunks to retrieve
+        score_threshold: Minimum similarity score threshold
+    Returns:
+        List of (document, score) tuples
+    """
+    if not vector_store:
+        logger.error("Vector store is not initialized")
+        return []
+    has_gpu = getattr(vector_store, "has_gpu", False)
+    search_mode = "GPU-accelerated" if has_gpu else "CPU"
+    # Prepare filter
+    filter_dict = None
+    if paper_ids:
+        filter_dict = {"paper_id": paper_ids}
+    logger.info(
+        "Performing %s similarity search with scores: query='%s', k=%d, threshold=%.3f",
+        search_mode,
+        query[:50] + "..." if len(query) > 50 else query,
+        top_k,
+        score_threshold,
+    )
+    # Check hardware optimization status instead of unused search_params
+    has_optimization = hasattr(vector_store, "has_gpu") and vector_store.has_gpu
+    if has_optimization:
+        logger.debug("GPU-accelerated similarity search enabled")
+    else:
+        logger.debug("Standard CPU similarity search")
+    if hasattr(vector_store, "similarity_search_with_score"):
+        # Don't pass search_params to avoid conflicts
+        results = vector_store.similarity_search_with_score(
+            query=query,
+            k=top_k,
+            filter=filter_dict,
+        )
+        # Filter by score threshold
+        filtered_results = [
+            (doc, score) for doc, score in results if score >= score_threshold
+        ]
+        logger.info(
+            "%s search with scores retrieved %d/%d chunks above threshold %.3f",
+            search_mode,
+            len(filtered_results),
+            len(results),
+            score_threshold,
+        )
+        return filtered_results
+    raise NotImplementedError(
+        "Vector store does not support similarity_search_with_score"
+    )

aiagents4pharma/talk2scholars/tools/pdf/utils/singleton_manager.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""
+Singleton manager for Milvus connections and vector stores.
+Handles connection reuse, event loops, and GPU detection caching.
+"""
+import asyncio
+import logging
+import threading
+from typing import Any, Dict
+from langchain_core.embeddings import Embeddings
+from langchain_milvus import Milvus
+from pymilvus import connections, db, utility
+from pymilvus.exceptions import MilvusException
+from .gpu_detection import detect_nvidia_gpu
+logger = logging.getLogger(__name__)
+class VectorstoreSingleton:
+    """Singleton manager for Milvus connections and vector stores."""
+    _instance = None
+    _lock = threading.Lock()
+    _connections = {}  # Store connections by connection string
+    _vector_stores = {}  # Store vector stores by collection name
+    _event_loops = {}  # Store event loops by thread ID
+    _gpu_detected = None  # Cache GPU detection result
+    def __new__(cls):
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super().__new__(cls)
+        return cls._instance
+    def get_event_loop(self) -> asyncio.AbstractEventLoop:
+        """Get or create event loop for current thread."""
+        thread_id = threading.get_ident()
+        if thread_id not in self._event_loops:
+            try:
+                loop = asyncio.get_event_loop()
+                if loop.is_closed():
+                    raise RuntimeError("Event loop is closed")
+            except RuntimeError:
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+            self._event_loops[thread_id] = loop
+            logger.info("Created new event loop for thread %s", thread_id)
+        return self._event_loops[thread_id]
+    def detect_gpu_once(self) -> bool:
+        """Detect GPU availability once and cache the result."""
+        if self._gpu_detected is None:
+            self._gpu_detected = detect_nvidia_gpu()
+            gpu_status = "available" if self._gpu_detected else "not available"
+            logger.info("GPU detection completed: NVIDIA GPU %s", gpu_status)
+        return self._gpu_detected
+    def get_connection(self, host: str, port: int, db_name: str) -> str:
+        """Get or create a Milvus connection."""
+        conn_key = f"{host}:{port}/{db_name}"
+        if conn_key not in self._connections:
+            try:
+                # Check if already connected
+                if connections.has_connection("default"):
+                    connections.remove_connection("default")
+                # Connect to Milvus
+                connections.connect(
+                    alias="default",
+                    host=host,
+                    port=port,
+                )
+                logger.info("Connected to Milvus at %s:%s", host, port)
+                # Check if database exists, create if not
+                existing_dbs = db.list_database()
+                if db_name not in existing_dbs:
+                    db.create_database(db_name)
+                    logger.info("Created database: %s", db_name)
+                # Use the database
+                db.using_database(db_name)
+                logger.info("Using database: %s", db_name)
+                logger.debug(
+                    "Milvus DB switched to: %s, available collections: %s",
+                    db_name,
+                    utility.list_collections(),
+                )
+                self._connections[conn_key] = "default"
+            except MilvusException as e:
+                logger.error("Failed to connect to Milvus: %s", e)
+                raise
+        return self._connections[conn_key]
+    def get_vector_store(
+        self,
+        collection_name: str,
+        embedding_model: Embeddings,
+        connection_args: Dict[str, Any],
+    ) -> Milvus:
+        """Get or create a vector store for a collection."""
+        if collection_name not in self._vector_stores:
+            # Ensure event loop exists for this thread
+            self.get_event_loop()
+            # Create LangChain Milvus instance with explicit URI format
+            # This ensures LangChain uses the correct host
+            milvus_uri = f"http://{connection_args['host']}:{connection_args['port']}"
+            vector_store = Milvus(
+                embedding_function=embedding_model,
+                collection_name=collection_name,
+                connection_args={
+                    "uri": milvus_uri,  # Use URI format instead of host/port
+                    "host": connection_args["host"],
+                    "port": connection_args["port"],
+                },
+                text_field="text",
+                auto_id=False,
+                drop_old=False,
+                consistency_level="Strong",
+            )
+            self._vector_stores[collection_name] = vector_store
+            logger.info(
+                "Created new vector store for collection: %s with URI: %s",
+                collection_name,
+                milvus_uri,
+            )
+        return self._vector_stores[collection_name]

aiagents4pharma/talk2scholars/tools/pdf/utils/tool_helper.py CHANGED Viewed

@@ -1,25 +1,26 @@
 """
-Helper class for PDF Q&A tool orchestration: state validation, vectorstore init,
-paper loading, reranking, and answer formatting.
+Helper class for question and answer tool in PDF processing.
 """
 import logging
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict
-from .generate_answer import generate_answer
-from .nvidia_nim_reranker import rank_papers_by_query
-from .vector_store import Vectorstore
+from .get_vectorstore import get_vectorstore
 logger = logging.getLogger(__name__)
 class QAToolHelper:
-    """Encapsulates helper routines for the PDF Question & Answer tool."""
+    """
+    Encapsulates helper routines for the PDF Question & Answer tool.
+    Enhanced with automatic GPU/CPU detection and optimization.
+    """
     def __init__(self) -> None:
-        self.prebuilt_vector_store: Optional[Vectorstore] = None
         self.config: Any = None
         self.call_id: str = ""
+        self.has_gpu: bool = False  # Track GPU availability
         logger.debug("Initialized QAToolHelper")
     def start_call(self, config: Any, call_id: str) -> None:
@@ -47,79 +48,40 @@ class QAToolHelper:
             raise ValueError(msg)
         return text_emb, llm, articles
-    def init_vector_store(self, emb_model: Any) -> Vectorstore:
-        """Return shared or new Vectorstore instance."""
-        if self.prebuilt_vector_store is not None:
-            logger.info("Using shared pre-built vector store from memory")
-            return self.prebuilt_vector_store
-        vs = Vectorstore(embedding_model=emb_model, config=self.config)
-        logger.info("Initialized new vector store with provided configuration")
-        self.prebuilt_vector_store = vs
-        return vs
+    def init_vector_store(self, emb_model: Any) -> Any:
+        """Get the singleton Milvus vector store instance with GPU/CPU optimization."""
+        logger.info(
+            "%s: Getting singleton vector store instance with hardware optimization",
+            self.call_id,
+        )
+        vs = get_vectorstore(embedding_model=emb_model, config=self.config)
-    def load_candidate_papers(
-        self,
-        vs: Vectorstore,
-        articles: Dict[str, Any],
-        candidates: List[str],
-    ) -> None:
-        """Ensure each candidate paper is loaded into the vector store."""
-        for pid in candidates:
-            if pid not in vs.loaded_papers:
-                pdf_url = articles.get(pid, {}).get("pdf_url")
-                if not pdf_url:
-                    continue
-                try:
-                    vs.add_paper(pid, pdf_url, articles[pid])
-                except (IOError, ValueError) as exc:
-                    logger.warning(
-                        "%s: Error loading paper %s: %s", self.call_id, pid, exc
-                    )
+        # Track GPU availability from vector store
+        self.has_gpu = getattr(vs, "has_gpu", False)
+        hardware_type = "GPU-accelerated" if self.has_gpu else "CPU-only"
-    def run_reranker(
-        self,
-        vs: Vectorstore,
-        query: str,
-        candidates: List[str],
-    ) -> List[str]:
-        """Rank papers by relevance and return filtered paper IDs."""
-        try:
-            ranked = rank_papers_by_query(
-                vs, query, self.config, top_k=self.config.top_k_papers
-            )
-            logger.info("%s: Papers after NVIDIA reranking: %s", self.call_id, ranked)
-            return [pid for pid in ranked if pid in candidates]
-        except (ValueError, RuntimeError) as exc:
-            logger.error("%s: NVIDIA reranker failed: %s", self.call_id, exc)
+        logger.info(
+            "%s: Vector store initialized (%s mode)",
+            self.call_id,
+            hardware_type,
+        )
+        # Log hardware-specific configuration
+        if hasattr(vs, "index_params"):
+            index_type = vs.index_params.get("index_type", "Unknown")
             logger.info(
-                "%s: Falling back to all %d candidate papers",
+                "%s: Using %s index type for %s processing",
                 self.call_id,
-                len(candidates),
+                index_type,
+                hardware_type,
             )
-            return candidates
-    def format_answer(
-        self,
-        question: str,
-        chunks: List[Any],
-        llm: Any,
-        articles: Dict[str, Any],
-    ) -> str:
-        """Generate the final answer text with source attributions."""
-        result = generate_answer(question, chunks, llm, self.config)
-        answer = result.get("output_text", "No answer generated.")
-        titles: Dict[str, str] = {}
-        for pid in result.get("papers_used", []):
-            if pid in articles:
-                titles[pid] = articles[pid].get("Title", "Unknown paper")
-        if titles:
-            srcs = "\n\nSources:\n" + "\n".join(f"- {t}" for t in titles.values())
-        else:
-            srcs = ""
-        logger.info(
-            "%s: Generated answer using %d chunks from %d papers",
-            self.call_id,
-            len(chunks),
-            len(titles),
-        )
-        return f"{answer}{srcs}"
+        return vs
+    def get_hardware_stats(self) -> Dict[str, Any]:
+        """Get current hardware configuration stats for monitoring."""
+        return {
+            "gpu_available": self.has_gpu,
+            "hardware_mode": "GPU-accelerated" if self.has_gpu else "CPU-only",
+            "call_id": self.call_id,
+        }

aiagents4pharma/talk2scholars/tools/pdf/utils/vector_normalization.py ADDED Viewed

@@ -0,0 +1,159 @@
+"""
+Vector normalization utilities for GPU COSINE similarity support.
+Since GPU indexes don't support COSINE distance, we normalize vectors
+and use IP (Inner Product) distance instead.
+"""
+import logging
+from typing import List, Union
+import numpy as np
+from langchain_core.embeddings import Embeddings
+logger = logging.getLogger(__name__)
+def normalize_vector(vector: Union[List[float], np.ndarray]) -> List[float]:
+    """
+    Normalize a single vector to unit length.
+    Args:
+        vector: Input vector as list or numpy array
+    Returns:
+        Normalized vector as list
+    """
+    vector = np.asarray(vector, dtype=np.float32)
+    norm = np.linalg.norm(vector)
+    if norm == 0:
+        logger.warning("Zero vector encountered during normalization")
+        return vector.tolist()
+    normalized = vector / norm
+    return normalized.tolist()
+def normalize_vectors_batch(vectors: List[List[float]]) -> List[List[float]]:
+    """
+    Normalize a batch of vectors to unit length.
+    Args:
+        vectors: List of vectors
+    Returns:
+        List of normalized vectors
+    """
+    if not vectors:
+        return vectors
+    # Convert to numpy array for efficient computation
+    vectors_array = np.asarray(vectors, dtype=np.float32)
+    # Calculate norms for each vector
+    norms = np.linalg.norm(vectors_array, axis=1, keepdims=True)
+    # Handle zero vectors
+    zero_mask = norms.flatten() == 0
+    if np.any(zero_mask):
+        logger.warning(
+            "Found %d zero vectors during batch normalization", np.sum(zero_mask)
+        )
+        norms[zero_mask] = 1.0  # Avoid division by zero
+    # Normalize
+    normalized = vectors_array / norms
+    return normalized.tolist()
+class NormalizingEmbeddings(Embeddings):
+    """
+    Wrapper around an embedding model that automatically normalizes outputs.
+    This is needed for GPU indexes when using COSINE similarity.
+    """
+    def __init__(self, embedding_model: Embeddings, normalize_for_gpu: bool = True):
+        """
+        Initialize the normalizing wrapper.
+        Args:
+            embedding_model: The underlying embedding model
+            normalize_for_gpu: Whether to normalize embeddings (for GPU compatibility)
+        """
+        self.embedding_model = embedding_model
+        self.normalize_for_gpu = normalize_for_gpu
+        if normalize_for_gpu:
+            logger.info(
+                "Embedding model wrapped with normalization for GPU compatibility"
+            )
+    def embed_documents(self, texts: List[str]) -> List[List[float]]:
+        """Embed documents and optionally normalize."""
+        embeddings = self.embedding_model.embed_documents(texts)
+        if self.normalize_for_gpu:
+            embeddings = normalize_vectors_batch(embeddings)
+            logger.debug("Normalized %d document embeddings for GPU", len(embeddings))
+        return embeddings
+    def embed_query(self, text: str) -> List[float]:
+        """Embed query and optionally normalize."""
+        embedding = self.embedding_model.embed_query(text)
+        if self.normalize_for_gpu:
+            embedding = normalize_vector(embedding)
+            logger.debug("Normalized query embedding for GPU")
+        return embedding
+    def __getattr__(self, name):
+        """Delegate other attributes to the underlying model."""
+        return getattr(self.embedding_model, name)
+def should_normalize_vectors(has_gpu: bool, use_cosine: bool) -> bool:
+    """
+    Determine if vectors should be normalized based on hardware and similarity metric.
+    Args:
+        has_gpu: Whether GPU is being used
+        use_cosine: Whether COSINE similarity is desired
+    Returns:
+        True if vectors should be normalized
+    """
+    needs_normalization = has_gpu and use_cosine
+    if needs_normalization:
+        logger.info(
+            "Vector normalization ENABLED: GPU detected with COSINE similarity request"
+        )
+    else:
+        logger.info(
+            "Vector normalization DISABLED: GPU=%s, COSINE=%s", has_gpu, use_cosine
+        )
+    return needs_normalization
+def wrap_embedding_model_if_needed(
+    embedding_model: Embeddings, has_gpu: bool, use_cosine: bool = True
+) -> Embeddings:
+    """
+    Wrap embedding model with normalization if needed for GPU compatibility.
+    Args:
+        embedding_model: Original embedding model
+        has_gpu: Whether GPU is being used
+        use_cosine: Whether COSINE similarity is desired
+    Returns:
+        Original or wrapped embedding model
+    """
+    if should_normalize_vectors(has_gpu, use_cosine):
+        return NormalizingEmbeddings(embedding_model, normalize_for_gpu=True)
+    return embedding_model

aiagents4pharma 1.40.1__py3-none-any.whl → 1.42.0__py3-none-any.whl

aiagents4pharma 1.40.1py3-none-any.whl → 1.42.0py3-none-any.whl