PyPI - aiagents4pharma - Versions diffs - 0.0.0__py3-none-any.whl - Mend

aiagents4pharma 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (336) hide show

aiagents4pharma/talk2scholars/tests/test_utils_pdf_rag_pipeline.py ADDED Viewed

@@ -0,0 +1,88 @@
+"""pdf rag pipeline tests."""
+from unittest.mock import MagicMock, patch
+import pytest
+from langchain_core.documents import Document
+from aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline import (
+    retrieve_and_rerank_chunks,
+)
+@pytest.fixture(name="base_config_fixture")
+def _base_config_fixture():
+    """Provides a config-like object for testing."""
+    config = MagicMock()
+    config.get.side_effect = lambda key, default=None: {
+        "initial_retrieval_k": 120,
+        "mmr_diversity": 0.7,
+    }.get(key, default)
+    config.top_k_chunks = 5
+    return config
+@pytest.fixture(name="mock_docs_fixture")
+def _mock_docs_fixture():
+    """Simulates PDF document chunks."""
+    return [
+        Document(page_content=f"chunk {i}", metadata={"paper_id": f"P{i % 2}"}) for i in range(10)
+    ]
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.rerank_chunks")
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.retrieve_relevant_chunks")
+def test_rag_pipeline_gpu_path(mock_retrieve, mock_rerank, base_config_fixture, mock_docs_fixture):
+    """test RAG pipeline with GPU path."""
+    mock_retrieve.return_value = mock_docs_fixture
+    mock_rerank.return_value = mock_docs_fixture[:5]
+    result = retrieve_and_rerank_chunks(
+        vector_store=MagicMock(),
+        query="Explain AI.",
+        config=base_config_fixture,
+        call_id="gpu_test",
+        has_gpu=True,
+    )
+    assert result == mock_docs_fixture[:5]
+    mock_retrieve.assert_called_once()
+    mock_rerank.assert_called_once()
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.rerank_chunks")
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.retrieve_relevant_chunks")
+def test_rag_pipeline_cpu_path(mock_retrieve, mock_rerank, base_config_fixture, mock_docs_fixture):
+    """rag pipeline with CPU path."""
+    mock_retrieve.return_value = mock_docs_fixture
+    mock_rerank.return_value = mock_docs_fixture[:5]
+    result = retrieve_and_rerank_chunks(
+        vector_store=MagicMock(),
+        query="Explain quantum physics.",
+        config=base_config_fixture,
+        call_id="cpu_test",
+        has_gpu=False,
+    )
+    assert result == mock_docs_fixture[:5]
+    mock_retrieve.assert_called_once()
+    mock_rerank.assert_called_once()
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.rerank_chunks")
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.rag_pipeline.retrieve_relevant_chunks")
+def test_rag_pipeline_empty_results(mock_retrieve, mock_rerank, base_config_fixture):
+    """rag pipeline with no results."""
+    mock_retrieve.return_value = []
+    result = retrieve_and_rerank_chunks(
+        vector_store=MagicMock(),
+        query="No match?",
+        config=base_config_fixture,
+        call_id="empty_test",
+        has_gpu=False,
+    )
+    assert result == []
+    mock_rerank.assert_not_called()

aiagents4pharma/talk2scholars/tests/test_utils_pdf_retrieve_chunks.py ADDED Viewed

@@ -0,0 +1,190 @@
+"""retrieve_chunks for PDF tool tests"""
+from unittest.mock import MagicMock, patch
+import pytest
+from langchain_core.documents import Document
+from aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks import (
+    retrieve_relevant_chunks,
+    retrieve_relevant_chunks_with_scores,
+)
+@pytest.fixture
+def mock_vector_store():
+    """Fixture to simulate a vector store."""
+    return MagicMock()
+@pytest.fixture
+def mock_chunks():
+    """Fixture to simulate PDF chunks."""
+    return [
+        Document(page_content=f"chunk {i}", metadata={"paper_id": f"P{i % 2}"}) for i in range(5)
+    ]
+@pytest.fixture
+def mock_scored_chunks():
+    """Fixture to simulate scored PDF chunks."""
+    return [
+        (Document(page_content=f"chunk {i}", metadata={}), score)
+        for i, score in enumerate([0.9, 0.8, 0.4, 0.95])
+    ]
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
+def test_retrieve_chunks_cpu_success(mock_logger, request):
+    """Test retrieve_relevant_chunks with CPU path."""
+    vector_store = request.getfixturevalue("mock_vector_store")
+    chunks = request.getfixturevalue("mock_chunks")
+    vector_store.has_gpu = False
+    mock_logger.debug = MagicMock()
+    vector_store.max_marginal_relevance_search.return_value = chunks
+    results = retrieve_relevant_chunks(vector_store, query="AI", top_k=5)
+    assert results == chunks
+    vector_store.max_marginal_relevance_search.assert_called_once()
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
+def test_retrieve_chunks_gpu_success(mock_logger, request):
+    """Test retrieve_relevant_chunks with GPU path."""
+    vector_store = request.getfixturevalue("mock_vector_store")
+    chunks = request.getfixturevalue("mock_chunks")
+    vector_store.has_gpu = True
+    mock_logger.debug = MagicMock()
+    vector_store.max_marginal_relevance_search.return_value = chunks
+    results = retrieve_relevant_chunks(vector_store, query="AI", top_k=5)
+    assert results == chunks
+    vector_store.max_marginal_relevance_search.assert_called_once()
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
+def test_retrieve_chunks_with_filter(mock_logger, request):
+    """Test retrieve_relevant_chunks with paper_id filter."""
+    vector_store = request.getfixturevalue("mock_vector_store")
+    chunks = request.getfixturevalue("mock_chunks")
+    vector_store.has_gpu = False
+    mock_logger.debug = MagicMock()
+    vector_store.max_marginal_relevance_search.return_value = chunks
+    results = retrieve_relevant_chunks(vector_store, query="filter test", paper_ids=["P1"], top_k=3)
+    assert results == chunks
+    args, kwargs = vector_store.max_marginal_relevance_search.call_args
+    assert len(args) == 0
+    assert kwargs["filter"] == {"paper_id": ["P1"]}
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
+def test_retrieve_chunks_no_vector_store(mock_logger):
+    """Test when vector store is None."""
+    result = retrieve_relevant_chunks(vector_store=None, query="irrelevant")
+    assert result == []
+    mock_logger.error.assert_called_with("Vector store is not initialized")
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
+def test_retrieve_chunks_with_scores_no_vector_store(mock_logger):
+    """Test retrieve_relevant_chunks_with_scores when vector store is None."""
+    result = retrieve_relevant_chunks_with_scores(vector_store=None, query="none")
+    assert result == []
+    mock_logger.error.assert_called_with("Vector store is not initialized")
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
+def test_retrieve_chunks_default_search_params(mock_logger, request):
+    """Test default search params used when not defined."""
+    vector_store = request.getfixturevalue("mock_vector_store")
+    chunks = request.getfixturevalue("mock_chunks")
+    vector_store.has_gpu = False
+    delattr(vector_store, "search_params")
+    vector_store.max_marginal_relevance_search.return_value = chunks
+    results = retrieve_relevant_chunks(
+        vector_store,
+        query="default search param test",
+        top_k=5,
+    )
+    assert results == chunks
+    mock_logger.debug.assert_any_call("Using default search parameters (no hardware optimization)")
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
+def test_retrieve_chunks_with_scores_paper_filter(mock_logger, request):
+    """Test retrieve_relevant_chunks_with_scores applies paper_id filter."""
+    vector_store = request.getfixturevalue("mock_vector_store")
+    scored_chunks = request.getfixturevalue("mock_scored_chunks")
+    vector_store.similarity_search_with_score.return_value = scored_chunks
+    mock_logger.debug = MagicMock()
+    results = retrieve_relevant_chunks_with_scores(
+        vector_store=vector_store,
+        query="filtered score",
+        paper_ids=["P123"],
+        top_k=5,
+        score_threshold=0.0,
+    )
+    assert isinstance(results, list)
+    assert vector_store.similarity_search_with_score.call_args[1]["filter"] == {
+        "paper_id": ["P123"]
+    }
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
+def test_retrieve_chunks_with_scores_gpu_debug(mock_logger, request):
+    """Test GPU debug log and correct return in retrieve_relevant_chunks_with_scores."""
+    vector_store = request.getfixturevalue("mock_vector_store")
+    scored_chunks = request.getfixturevalue("mock_scored_chunks")
+    vector_store.has_gpu = True
+    vector_store.similarity_search_with_score.return_value = scored_chunks
+    mock_logger.debug = MagicMock()
+    results = retrieve_relevant_chunks_with_scores(
+        vector_store=vector_store, query="gpu test", top_k=4, score_threshold=0.0
+    )
+    # Should return all scored_chunks since threshold=0.0
+    assert results == scored_chunks
+    mock_logger.debug.assert_called_with("GPU-accelerated similarity search enabled")
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
+def test_retrieve_chunks_with_scores_cpu_debug(mock_logger, request):
+    """Test CPU debug log and correct return in retrieve_relevant_chunks_with_scores."""
+    vector_store = request.getfixturevalue("mock_vector_store")
+    scored_chunks = request.getfixturevalue("mock_scored_chunks")
+    vector_store.has_gpu = False
+    vector_store.similarity_search_with_score.return_value = scored_chunks
+    mock_logger.debug = MagicMock()
+    results = retrieve_relevant_chunks_with_scores(
+        vector_store=vector_store, query="cpu test", top_k=2, score_threshold=0.0
+    )
+    assert results == scored_chunks
+    mock_logger.debug.assert_called_with("Standard CPU similarity search")
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.retrieve_chunks.logger")
+def test_retrieve_chunks_with_scores_not_implemented(mock_logger, request):
+    """Test NotImplementedError path when similarity_search_with_score is missing."""
+    vector_store = request.getfixturevalue("mock_vector_store")
+    vector_store.has_gpu = True
+    # Remove the method to trigger NotImplementedError
+    if hasattr(vector_store, "similarity_search_with_score"):
+        delattr(vector_store, "similarity_search_with_score")
+    mock_logger.debug = MagicMock()
+    with pytest.raises(NotImplementedError) as excinfo:
+        retrieve_relevant_chunks_with_scores(
+            vector_store=vector_store, query="fail test", top_k=1, score_threshold=0.0
+        )
+    assert "Vector store does not support similarity_search_with_score" in str(excinfo.value)
+    mock_logger.debug.assert_called_with("GPU-accelerated similarity search enabled")

aiagents4pharma/talk2scholars/tests/test_utils_pdf_singleton_manager.py ADDED Viewed

@@ -0,0 +1,159 @@
+"""
+Tests for singleton_manager: manages vector store connections and event loops.
+"""
+from unittest.mock import MagicMock, patch
+import pytest
+from pymilvus.exceptions import MilvusException
+from aiagents4pharma.talk2scholars.tools.pdf.utils.get_vectorstore import (
+    get_vectorstore,
+)
+from aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager import (
+    VectorstoreSingleton,
+)
+def test_singleton_instance_identity():
+    """Singleton should return the same instance."""
+    a = VectorstoreSingleton()
+    b = VectorstoreSingleton()
+    assert a is b
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.detect_nvidia_gpu")
+def test_detect_gpu_once(mock_detect, monkeypatch):
+    """Ensure GPU detection is cached."""
+    mock_detect.return_value = True
+    singleton = VectorstoreSingleton()
+    # Reset GPU detection cache safely
+    monkeypatch.setattr(VectorstoreSingleton, "_gpu_detected", None, raising=False)
+    result = singleton.detect_gpu_once()
+    assert result is True
+    # Second call should use cached value; detect_nvidia_gpu called only once
+    result2 = singleton.detect_gpu_once()
+    assert result2 is True
+    mock_detect.assert_called_once()
+def test_get_event_loop_reuses_existing():
+    """get_event_loop should return the same loop if it exists."""
+    singleton = VectorstoreSingleton()
+    loop1 = singleton.get_event_loop()
+    loop2 = singleton.get_event_loop()
+    assert loop1 is loop2
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.connections")
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.db")
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.utility")
+def test_get_connection_creates_connection(_, mock_db, mock_conns):
+    """get_connection should create a new connection if none exists."""
+    singleton = VectorstoreSingleton()
+    mock_conns.has_connection.return_value = True
+    mock_db.list_database.return_value = []
+    conn_key = singleton.get_connection("localhost", 19530, "test_db")
+    assert conn_key == "default"
+    mock_conns.remove_connection.assert_called_once()
+    mock_conns.connect.assert_called_once()
+    mock_db.create_database.assert_called_once_with("test_db")
+    mock_db.using_database.assert_called_once_with("test_db")
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.Milvus")
+def test_get_vector_store_creates_if_missing(mock_milvus, monkeypatch):
+    """get_vector_store should create a new vector store if missing."""
+    singleton = VectorstoreSingleton()
+    # Clear caches safely
+    monkeypatch.setattr(VectorstoreSingleton, "_vector_stores", {}, raising=False)
+    monkeypatch.setattr(VectorstoreSingleton, "_event_loops", {}, raising=False)
+    mock_embed = MagicMock()
+    connection_args = {"host": "localhost", "port": 19530}
+    vs = singleton.get_vector_store("collection1", mock_embed, connection_args)
+    assert vs is not None
+    mock_milvus.assert_called_once()
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.get_vectorstore.Vectorstore")
+def test_get_vectorstore_factory(mock_vectorstore_cls):
+    """get_vectorstore should reuse or create Vectorstore."""
+    mock_config = MagicMock()
+    mock_config.milvus.collection_name = "demo"
+    mock_config.milvus.embedding_dim = 768
+    mock_embed = MagicMock()
+    result1 = get_vectorstore(mock_embed, mock_config, force_new=True)
+    assert result1 == mock_vectorstore_cls.return_value
+    result2 = get_vectorstore(mock_embed, mock_config)
+    assert result2 == result1
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.get_vectorstore.Vectorstore")
+def test_get_vectorstore_force_new(mock_vectorstore_cls):
+    """get_vectorstore should return a new instance if force_new=True."""
+    mock_vs1 = MagicMock(name="Vectorstore1")
+    mock_vs2 = MagicMock(name="Vectorstore2")
+    mock_vectorstore_cls.side_effect = [mock_vs1, mock_vs2]
+    dummy_config = MagicMock()
+    dummy_config.milvus.collection_name = "my_test_collection"
+    dummy_config.milvus.embedding_dim = 768
+    vs1 = get_vectorstore(mock_vs1, dummy_config)
+    vs2 = get_vectorstore(mock_vs2, dummy_config, force_new=True)
+    assert vs1 is mock_vs1
+    assert vs2 is mock_vs2
+    assert vs1 != vs2
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.connections.connect")
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.connections.has_connection")
+@patch("aiagents4pharma.talk2scholars.tools.pdf.utils.singleton_manager.db")
+def test_get_connection_milvus_error(_, mock_has_connection, mock_connect, monkeypatch):
+    """get_connection should raise MilvusException on connection failure."""
+    manager = VectorstoreSingleton()
+    # Reset connections cache safely
+    monkeypatch.setattr(VectorstoreSingleton, "_connections", {}, raising=False)
+    mock_has_connection.return_value = False
+    mock_connect.side_effect = MilvusException("Connection failed")
+    with pytest.raises(MilvusException, match="Connection failed"):
+        manager.get_connection("localhost", 19530, "test_db")
+def test_get_event_loop_creates_new_loop_on_closed(monkeypatch):
+    """Ensure get_event_loop creates a new loop if current one is closed."""
+    manager = VectorstoreSingleton()
+    # Clear event loops safely
+    monkeypatch.setattr(VectorstoreSingleton, "_event_loops", {}, raising=False)
+    mock_loop = MagicMock()
+    mock_loop.is_closed.return_value = True
+    with (
+        patch("asyncio.get_event_loop", return_value=mock_loop),
+        patch("asyncio.new_event_loop") as mock_new_loop,
+        patch("asyncio.set_event_loop") as mock_set_loop,
+    ):
+        new_loop = MagicMock()
+        mock_new_loop.return_value = new_loop
+        result_loop = manager.get_event_loop()
+        mock_new_loop.assert_called_once()
+        mock_set_loop.assert_called_once_with(new_loop)
+        assert result_loop == new_loop

aiagents4pharma/talk2scholars/tests/test_utils_pdf_vector_normalization.py ADDED Viewed

@@ -0,0 +1,121 @@
+"""Unit tests for vector normalization utilities for GPU COSINE support."""
+import logging
+import pytest
+from langchain_core.embeddings import Embeddings
+from aiagents4pharma.talk2scholars.tools.pdf.utils import vector_normalization as vn
+def test_normalize_vector_nonzero():
+    """Test normalizing a non-zero vector."""
+    vec = [3.0, 4.0]
+    result = vn.normalize_vector(vec)
+    expected = [0.6, 0.8]
+    assert pytest.approx(result) == expected
+def test_normalize_vector_zero_logs_warning(caplog):
+    """Test normalizing a zero vector logs a warning."""
+    with caplog.at_level(logging.WARNING):
+        result = vn.normalize_vector([0.0, 0.0])
+        assert result == [0.0, 0.0]
+        assert "Zero vector encountered" in caplog.text
+def test_normalize_vectors_batch_empty():
+    """Test that an empty batch returns unchanged."""
+    result = vn.normalize_vectors_batch([])
+    assert result == []
+def test_normalize_vectors_batch_normal_case():
+    """Test batch normalization of valid vectors with equal dimensions."""
+    vectors = [[3, 4], [6, 8]]
+    result = vn.normalize_vectors_batch(vectors)
+    expected = [
+        [0.6, 0.8],
+        [0.6, 0.8],
+    ]
+    for r, e in zip(result, expected, strict=False):
+        assert pytest.approx(r) == e
+def test_normalize_vectors_batch_with_zero_vector(caplog):
+    """Test that zero vectors are handled and logged."""
+    vectors = [[0.0, 0.0], [1.0, 0.0]]
+    with caplog.at_level(logging.WARNING):
+        result = vn.normalize_vectors_batch(vectors)
+        assert len(result) == 2
+        assert "zero vectors during batch normalization" in caplog.text
+        assert pytest.approx(result[1]) == [1.0, 0.0]
+class DummyEmbedding(Embeddings):
+    """A dummy embedding class for testing normalization wrapper."""
+    def __init__(self):
+        self.test_attr = "test"
+    def embed_documents(self, texts):
+        return [[3.0, 4.0] for _ in texts]
+    def embed_query(self, text):
+        return [3.0, 4.0]
+def test_normalizing_embeddings_embed_documents():
+    """Test that document embeddings are normalized."""
+    model = vn.NormalizingEmbeddings(DummyEmbedding())
+    result = model.embed_documents(["doc1", "doc2"])
+    assert len(result) == 2
+    assert pytest.approx(result[0]) == [0.6, 0.8]
+def test_normalizing_embeddings_embed_query():
+    """Test that query embeddings are normalized."""
+    model = vn.NormalizingEmbeddings(DummyEmbedding())
+    result = model.embed_query("query")
+    assert pytest.approx(result) == [0.6, 0.8]
+def test_normalizing_embeddings_passthrough():
+    """Test attribute delegation to base embedding model."""
+    dummy = DummyEmbedding()
+    model = vn.NormalizingEmbeddings(dummy)
+    assert model.test_attr == "test"
+@pytest.mark.parametrize(
+    "has_gpu,use_cosine,expected_log",
+    [
+        (True, True, "ENABLED"),
+        (False, True, "DISABLED"),
+        (True, False, "DISABLED"),
+        (False, False, "DISABLED"),
+    ],
+)
+def test_should_normalize_vectors_logging(has_gpu, use_cosine, expected_log, caplog):
+    """Test should_normalize_vectors decision logic and logging."""
+    with caplog.at_level(logging.INFO):
+        result = vn.should_normalize_vectors(has_gpu, use_cosine)
+        if has_gpu and use_cosine:
+            assert result is True
+        else:
+            assert result is False
+        assert expected_log in caplog.text
+def test_wrap_embedding_model_if_needed_enabled():
+    """Test that wrapping is applied when needed."""
+    base = DummyEmbedding()
+    wrapped = vn.wrap_embedding_model_if_needed(base, has_gpu=True, use_cosine=True)
+    assert isinstance(wrapped, vn.NormalizingEmbeddings)
+def test_wrap_embedding_model_if_needed_disabled():
+    """Test that original model is returned when normalization not needed."""
+    base = DummyEmbedding()
+    wrapped = vn.wrap_embedding_model_if_needed(base, has_gpu=False, use_cosine=True)
+    assert wrapped is base