PyPI - aiagents4pharma - Versions diffs - 1.27.2__py3-none-any.whl → 1.29.0__py3-none-any.whl - Mend

aiagents4pharma 1.27.2py3-none-any.whl → 1.29.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

aiagents4pharma/talk2scholars/tests/test_question_and_answer_tool.py ADDED Viewed

@@ -0,0 +1,186 @@
+"""
+Unit tests for question_and_answer tool functionality.
+"""
+from langchain.docstore.document import Document
+from ..tools.pdf import question_and_answer
+from ..tools.pdf.question_and_answer import (
+    extract_text_from_pdf_data,
+    question_and_answer_tool,
+    generate_answer,
+)
+def test_extract_text_from_pdf_data():
+    """
+    Test that extract_text_from_pdf_data returns text containing 'Hello World'.
+    """
+    extracted_text = extract_text_from_pdf_data(DUMMY_PDF_BYTES)
+    assert "Hello World" in extracted_text
+DUMMY_PDF_BYTES = (
+    b"%PDF-1.4\n"
+    b"%\xe2\xe3\xcf\xd3\n"
+    b"1 0 obj\n"
+    b"<< /Type /Catalog /Pages 2 0 R >>\n"
+    b"endobj\n"
+    b"2 0 obj\n"
+    b"<< /Type /Pages /Count 1 /Kids [3 0 R] >>\n"
+    b"endobj\n"
+    b"3 0 obj\n"
+    b"<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R "
+    b"/Resources << /Font << /F1 5 0 R >> >> >>\n"
+    b"endobj\n"
+    b"4 0 obj\n"
+    b"<< /Length 44 >>\n"
+    b"stream\nBT\n/F1 24 Tf\n72 712 Td\n(Hello World) Tj\nET\nendstream\n"
+    b"endobj\n"
+    b"5 0 obj\n"
+    b"<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>\n"
+    b"endobj\n"
+    b"xref\n0 6\n0000000000 65535 f \n0000000010 00000 n \n0000000053 00000 n \n"
+    b"0000000100 00000 n \n0000000150 00000 n \n0000000200 00000 n \n"
+    b"trailer\n<< /Size 6 /Root 1 0 R >>\nstartxref\n250\n%%EOF\n"
+)
+def fake_generate_answer(question, pdf_bytes, _llm_model):
+    """
+    Fake generate_answer function to bypass external dependencies.
+    """
+    return {
+        "answer": "Mock answer",
+        "question": question,
+        "pdf_bytes_length": len(pdf_bytes),
+    }
+def test_question_and_answer_tool_success(monkeypatch):
+    """
+    Test that question_and_answer_tool returns the expected result on success.
+    """
+    monkeypatch.setattr(
+        question_and_answer, "generate_answer", fake_generate_answer
+    )
+    # Create a valid state with pdf_data containing both pdf_object and pdf_url,
+    # and include a dummy llm_model.
+    state = {
+        "pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"},
+        "llm_model": object(),  # Provide a dummy LLM model instance.
+    }
+    question = "What is in the PDF?"
+    # Call the underlying function directly via .func to bypass the StructuredTool wrapper.
+    result = question_and_answer_tool.func(
+        question=question, tool_call_id="test_call_id", state=state
+    )
+    assert result["answer"] == "Mock answer"
+    assert result["question"] == question
+    assert result["pdf_bytes_length"] == len(DUMMY_PDF_BYTES)
+def test_question_and_answer_tool_no_pdf_data():
+    """
+    Test that an error is returned if the state lacks the 'pdf_data' key.
+    """
+    state = {}  # pdf_data key is missing.
+    question = "Any question?"
+    result = question_and_answer_tool.func(
+        question=question, tool_call_id="test_call_id", state=state
+    )
+    messages = result.update["messages"]
+    assert any("No pdf_data found in state." in msg.content for msg in messages)
+def test_question_and_answer_tool_no_pdf_object():
+    """
+    Test that an error is returned if the pdf_object is missing within pdf_data.
+    """
+    state = {"pdf_data": {"pdf_object": None}}
+    question = "Any question?"
+    result = question_and_answer_tool.func(
+        question=question, tool_call_id="test_call_id", state=state
+    )
+    messages = result.update["messages"]
+    assert any(
+        "PDF binary data is missing in the pdf_data from state." in msg.content
+        for msg in messages
+    )
+def test_question_and_answer_tool_no_llm_model():
+    """
+    Test that an error is returned if the LLM model is missing in the state.
+    """
+    state = {
+        "pdf_data": {"pdf_object": DUMMY_PDF_BYTES, "pdf_url": "http://dummy.url"}
+        # Note: llm_model is intentionally omitted.
+    }
+    question = "What is in the PDF?"
+    result = question_and_answer_tool.func(
+        question=question, tool_call_id="test_call_id", state=state
+    )
+    assert result == {"error": "No LLM model found in state."}
+def test_generate_answer(monkeypatch):
+    """
+    Test generate_answer function with controlled monkeypatched dependencies.
+    """
+    def fake_split_text(_self, _text):
+        """Fake split_text method that returns controlled chunks."""
+        return ["chunk1", "chunk2"]
+    monkeypatch.setattr(
+        question_and_answer.CharacterTextSplitter, "split_text", fake_split_text
+    )
+    def fake_annoy_from_documents(_documents, _embeddings):
+        """
+        Fake Annoy.from_documents function that returns a fake vector store.
+        """
+        # pylint: disable=too-few-public-methods, unused-argument
+        class FakeVectorStore:
+            """Fake vector store for similarity search."""
+            def similarity_search(self, _question, k):
+                """Return a list with a single dummy Document."""
+                return [Document(page_content="dummy content")]
+        return FakeVectorStore()
+    monkeypatch.setattr(
+        question_and_answer.Annoy, "from_documents", fake_annoy_from_documents
+    )
+    def fake_load_qa_chain(_llm, chain_type):  # chain_type matches the keyword argument
+        """
+        Fake load_qa_chain function that returns a fake QA chain.
+        """
+        # pylint: disable=too-few-public-methods, unused-argument
+        class FakeChain:
+            """Fake QA chain for testing generate_answer."""
+            def invoke(self, **kwargs):
+                """
+                Fake invoke method that returns a mock answer.
+                """
+                input_data = kwargs.get("input")
+                return {
+                    "answer": "real mock answer",
+                    "question": input_data.get("question"),
+                }
+        return FakeChain()
+    monkeypatch.setattr(question_and_answer, "load_qa_chain", fake_load_qa_chain)
+    # Set dummy configuration values so that generate_answer can run.
+    question_and_answer.cfg.chunk_size = 1000
+    question_and_answer.cfg.chunk_overlap = 0
+    question_and_answer.cfg.openai_api_key = "dummy_key"
+    question_and_answer.cfg.num_retrievals = 1
+    question_and_answer.cfg.qa_chain_type = "dummy-chain"
+    question = "What is in the PDF?"
+    dummy_llm_model = object()  # A dummy model placeholder.
+    answer = generate_answer(question, DUMMY_PDF_BYTES, dummy_llm_model)
+    assert answer["answer"] == "real mock answer"
+    assert answer["question"] == question

aiagents4pharma/talk2scholars/tests/test_s2_display.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""
+Unit tests for S2 tools functionality.
+"""
+# pylint: disable=redefined-outer-name
+import pytest
+from langgraph.types import Command
+from ..tools.s2.display_results import (
+    display_results,
+    NoPapersFoundError as raised_error,
+)
+@pytest.fixture
+def initial_state():
+    """Provides an empty initial state for tests."""
+    return {"papers": {}, "multi_papers": {}}
+# Fixed test data for deterministic results
+MOCK_SEARCH_RESPONSE = {
+    "data": [
+        {
+            "paperId": "123",
+            "title": "Machine Learning Basics",
+            "abstract": "An introduction to ML",
+            "year": 2023,
+            "citationCount": 100,
+            "url": "https://example.com/paper1",
+            "authors": [{"name": "Test Author"}],
+        }
+    ]
+}
+MOCK_STATE_PAPER = {
+    "123": {
+        "Title": "Machine Learning Basics",
+        "Abstract": "An introduction to ML",
+        "Year": 2023,
+        "Citation Count": 100,
+        "URL": "https://example.com/paper1",
+    }
+}
+class TestS2Tools:
+    """Unit tests for individual S2 tools"""
+    def test_display_results_empty_state(self, initial_state):
+        """Verifies display_results tool behavior when state is empty and raises an exception"""
+        with pytest.raises(
+            raised_error,
+            match="No papers found. A search/rec needs to be performed first.",
+        ):
+            display_results.invoke({"state": initial_state, "tool_call_id": "test123"})
+    def test_display_results_shows_papers(self, initial_state):
+        """Verifies display_results tool correctly returns papers from state"""
+        state = initial_state.copy()
+        state["last_displayed_papers"] = "papers"
+        state["papers"] = MOCK_STATE_PAPER
+        result = display_results.invoke(
+            input={"state": state, "tool_call_id": "test123"}
+        )
+        assert isinstance(result, Command)  # Expect a Command object
+        assert isinstance(result.update, dict)  # Ensure update is a dictionary
+        assert "messages" in result.update
+        assert len(result.update["messages"]) == 1
+        assert (
+            "1 papers found. Papers are attached as an artifact."
+            in result.update["messages"][0].content
+        )

aiagents4pharma/talk2scholars/tests/test_s2_multi.py ADDED Viewed

@@ -0,0 +1,282 @@
+"""
+Unit tests for S2 tools functionality.
+"""
+import json
+from types import SimpleNamespace
+import pytest
+import requests
+from langgraph.types import Command
+from langchain_core.messages import ToolMessage
+import hydra
+from aiagents4pharma.talk2scholars.tools.s2.multi_paper_rec import (
+    get_multi_paper_recommendations,
+)
+# --- Dummy Hydra Config Setup ---
+class DummyHydraContext:
+    """dummy context manager for mocking Hydra's initialize and compose functions."""
+    def __enter__(self):
+        """enter function that returns None."""
+        return None
+    def __exit__(self, exc_type, exc_val, traceback):
+        """exit function that does nothing."""
+        return None
+# Create a dummy configuration that mimics the expected hydra config.
+dummy_config = SimpleNamespace(
+    tools=SimpleNamespace(
+        multi_paper_recommendation=SimpleNamespace(
+            api_endpoint="http://dummy.endpoint/multi",
+            headers={"Content-Type": "application/json"},
+            api_fields=["paperId", "title", "authors"],
+            request_timeout=10,
+        )
+    )
+)
+# --- Dummy Response Classes and Functions for requests.post ---
+class DummyResponse:
+    """A dummy response class for mocking HTTP responses."""
+    def __init__(self, json_data, status_code=200):
+        """Initialize a DummyResponse with the given JSON data and status code."""
+        self._json_data = json_data
+        self.status_code = status_code
+    def json(self):
+        """Return the JSON data from the response."""
+        return self._json_data
+    def raise_for_status(self):
+        """raise an HTTP error for status codes >= 400."""
+        if self.status_code >= 400:
+            raise requests.HTTPError("HTTP Error")
+def test_dummy_response_no_error():
+    """Test that raise_for_status does not raise an exception for a successful response."""
+    # Create a DummyResponse with a successful status code.
+    response = DummyResponse({"data": "success"}, status_code=200)
+    # Calling raise_for_status should not raise an exception and should return None.
+    assert response.raise_for_status() is None
+def test_dummy_response_raise_error():
+    """Test that raise_for_status raises an exception for a failing response."""
+    # Create a DummyResponse with a failing status code.
+    response = DummyResponse({"error": "fail"}, status_code=400)
+    # Calling raise_for_status should raise an HTTPError.
+    with pytest.raises(requests.HTTPError):
+        response.raise_for_status()
+def dummy_requests_post_success(url, headers, params, data, timeout):
+    """dummy_requests_post_success"""
+    # Record call parameters for assertions.
+    dummy_requests_post_success.called_url = url
+    dummy_requests_post_success.called_headers = headers
+    dummy_requests_post_success.called_params = params
+    dummy_requests_post_success.called_data = data
+    dummy_requests_post_success.called_timeout = timeout
+    # Simulate a valid API response with three recommended papers;
+    # one paper missing authors should be filtered out.
+    dummy_data = {
+        "recommendedPapers": [
+            {
+                "paperId": "paperA",
+                "title": "Multi Rec Paper A",
+                "authors": ["Author X"],
+                "year": 2019,
+                "citationCount": 12,
+                "url": "http://paperA",
+                "externalIds": {"ArXiv": "arxivA"},
+            },
+            {
+                "paperId": "paperB",
+                "title": "Multi Rec Paper B",
+                "authors": ["Author Y"],
+                "year": 2020,
+                "citationCount": 18,
+                "url": "http://paperB",
+                "externalIds": {},
+            },
+            {
+                "paperId": "paperC",
+                "title": "Multi Rec Paper C",
+                "authors": None,  # This one should be filtered out.
+                "year": 2021,
+                "citationCount": 25,
+                "url": "http://paperC",
+                "externalIds": {"ArXiv": "arxivC"},
+            },
+        ]
+    }
+    return DummyResponse(dummy_data)
+def dummy_requests_post_unexpected(url, headers, params, data, timeout):
+    """dummy_requests_post_unexpected"""
+    dummy_requests_post_unexpected.called_url = url
+    dummy_requests_post_unexpected.called_headers = headers
+    dummy_requests_post_unexpected.called_params = params
+    dummy_requests_post_unexpected.called_data = data
+    dummy_requests_post_unexpected.called_timeout = timeout
+    # Simulate a response missing the 'recommendedPapers' key.
+    return DummyResponse({"error": "Invalid format"})
+def dummy_requests_post_no_recs(url, headers, params, data, timeout):
+    """dummy_requests_post_no_recs"""
+    dummy_requests_post_no_recs.called_url = url
+    dummy_requests_post_no_recs.called_headers = headers
+    dummy_requests_post_no_recs.called_params = params
+    dummy_requests_post_no_recs.called_data = data
+    dummy_requests_post_no_recs.called_timeout = timeout
+    # Simulate a response with an empty recommendations list.
+    return DummyResponse({"recommendedPapers": []})
+def dummy_requests_post_exception(url, headers, params, data, timeout):
+    """dummy_requests_post_exception"""
+    dummy_requests_post_exception.called_url = url
+    dummy_requests_post_exception.called_headers = headers
+    dummy_requests_post_exception.called_params = params
+    dummy_requests_post_exception.called_data = data
+    dummy_requests_post_exception.called_timeout = timeout
+    # Simulate a network exception.
+    raise requests.exceptions.RequestException("Connection error")
+# --- Pytest Fixture to Patch Hydra ---
+@pytest.fixture(autouse=True)
+def patch_hydra(monkeypatch):
+    """Patch Hydra's initialize and compose functions to return dummy objects."""
+    # Patch hydra.initialize to return our dummy context manager.
+    monkeypatch.setattr(
+        hydra, "initialize", lambda version_base, config_path: DummyHydraContext()
+    )
+    # Patch hydra.compose to return our dummy config.
+    monkeypatch.setattr(hydra, "compose", lambda config_name, overrides: dummy_config)
+# --- Test Cases ---
+def test_multi_paper_rec_success(monkeypatch):
+    """
+    Test that get_multi_paper_recommendations returns a valid Command object
+    when the API response is successful. Also, ensure that recommendations missing
+    required fields (like authors) are filtered out.
+    """
+    monkeypatch.setattr(requests, "post", dummy_requests_post_success)
+    tool_call_id = "test_tool_call_id"
+    input_data = {
+        "paper_ids": ["p1", "p2"],
+        "tool_call_id": tool_call_id,
+        "limit": 2,
+        "year": "2020",
+    }
+    # Call the tool using .run() with a dictionary input.
+    result = get_multi_paper_recommendations.run(input_data)
+    # Validate that the result is a Command with the expected update structure.
+    assert isinstance(result, Command)
+    update = result.update
+    assert "multi_papers" in update
+    papers = update["multi_papers"]
+    # Papers with valid 'title' and 'authors' should be included.
+    assert "paperA" in papers
+    assert "paperB" in papers
+    # Paper "paperC" is missing authors and should be filtered out.
+    assert "paperC" not in papers
+    # Check that a ToolMessage is included in the messages.
+    messages = update.get("messages", [])
+    assert len(messages) == 1
+    msg = messages[0]
+    assert isinstance(msg, ToolMessage)
+    assert "Recommendations based on multiple papers were successful" in msg.content
+    # Verify that the correct parameters were sent to requests.post.
+    called_params = dummy_requests_post_success.called_params
+    assert called_params["limit"] == 2  # Should be min(limit, 500)
+    assert called_params["fields"] == "paperId,title,authors"
+    # The year parameter should be present.
+    assert called_params["year"] == "2020"
+    # Also check the payload sent in the data.
+    sent_payload = json.loads(dummy_requests_post_success.called_data)
+    assert sent_payload["positivePaperIds"] == ["p1", "p2"]
+    assert sent_payload["negativePaperIds"] == []
+def test_multi_paper_rec_unexpected_format(monkeypatch):
+    """
+    Test that get_multi_paper_recommendations raises a RuntimeError when the API
+    response does not include the expected 'recommendedPapers' key.
+    """
+    monkeypatch.setattr(requests, "post", dummy_requests_post_unexpected)
+    tool_call_id = "test_tool_call_id"
+    input_data = {
+        "paper_ids": ["p1", "p2"],
+        "tool_call_id": tool_call_id,
+    }
+    with pytest.raises(
+        RuntimeError,
+        match=(
+            "Unexpected response from Semantic Scholar API. The results could not be "
+            "retrieved due to an unexpected format. "
+            "Please modify your search query and try again."
+        ),
+    ):
+        get_multi_paper_recommendations.run(input_data)
+def test_multi_paper_rec_no_recommendations(monkeypatch):
+    """
+    Test that get_multi_paper_recommendations raises a RuntimeError when the API
+    returns no recommendations.
+    """
+    monkeypatch.setattr(requests, "post", dummy_requests_post_no_recs)
+    tool_call_id = "test_tool_call_id"
+    input_data = {
+        "paper_ids": ["p1", "p2"],
+        "tool_call_id": tool_call_id,
+    }
+    with pytest.raises(
+        RuntimeError,
+        match=(
+            "No recommendations were found for your query. Consider refining your search "
+            "by using more specific keywords or different terms."
+        ),
+    ):
+        get_multi_paper_recommendations.run(input_data)
+def test_multi_paper_rec_requests_exception(monkeypatch):
+    """
+    Test that get_multi_paper_recommendations raises a RuntimeError when requests.post
+    throws an exception.
+    """
+    monkeypatch.setattr(requests, "post", dummy_requests_post_exception)
+    tool_call_id = "test_tool_call_id"
+    input_data = {
+        "paper_ids": ["p1", "p2"],
+        "tool_call_id": tool_call_id,
+    }
+    with pytest.raises(
+        RuntimeError,
+        match="Failed to connect to Semantic Scholar API. Please retry the same query.",
+    ):
+        get_multi_paper_recommendations.run(input_data)

aiagents4pharma/talk2scholars/tests/test_s2_query.py ADDED Viewed

@@ -0,0 +1,78 @@
+"""
+Unit tests for S2 tools functionality.
+"""
+# pylint: disable=redefined-outer-name
+from unittest.mock import patch
+from unittest.mock import MagicMock
+import pytest
+from ..tools.s2.query_results import query_results, NoPapersFoundError
+@pytest.fixture
+def initial_state():
+    """Provides an empty initial state for tests."""
+    return {"papers": {}, "multi_papers": {}}
+# Fixed test data for deterministic results
+MOCK_SEARCH_RESPONSE = {
+    "data": [
+        {
+            "paperId": "123",
+            "title": "Machine Learning Basics",
+            "abstract": "An introduction to ML",
+            "year": 2023,
+            "citationCount": 100,
+            "url": "https://example.com/paper1",
+            "authors": [{"name": "Test Author"}],
+        }
+    ]
+}
+MOCK_STATE_PAPER = {
+    "123": {
+        "Title": "Machine Learning Basics",
+        "Abstract": "An introduction to ML",
+        "Year": 2023,
+        "Citation Count": 100,
+        "URL": "https://example.com/paper1",
+    }
+}
+class TestS2Tools:
+    """Unit tests for individual S2 tools"""
+    def test_query_results_empty_state(self, initial_state):
+        """Tests query_results tool behavior when no papers are found."""
+        with pytest.raises(
+            NoPapersFoundError,
+            match="No papers found. A search needs to be performed first.",
+        ):
+            query_results.invoke(
+                {"question": "List all papers", "state": initial_state}
+            )
+    @patch(
+        "aiagents4pharma.talk2scholars.tools.s2.query_results.create_pandas_dataframe_agent"
+    )
+    def test_query_results_with_papers(self, mock_create_agent, initial_state):
+        """Tests querying papers when data is available."""
+        state = initial_state.copy()
+        state["last_displayed_papers"] = "papers"
+        state["papers"] = MOCK_STATE_PAPER
+        # Mock the dataframe agent instead of the LLM
+        mock_agent = MagicMock()
+        mock_agent.invoke.return_value = {"output": "Mocked response"}
+        mock_create_agent.return_value = (
+            mock_agent  # Mock the function returning the agent
+        )
+        # Ensure that the output of query_results is correctly structured
+        result = query_results.invoke({"question": "List all papers", "state": state})
+        assert isinstance(result, str)  # Ensure output is a string
+        assert result == "Mocked response"  # Validate the expected response

aiagents4pharma/talk2scholars/tests/test_s2_retrieve.py ADDED Viewed

@@ -0,0 +1,65 @@
+"""
+Unit tests for S2 tools functionality.
+"""
+# pylint: disable=redefined-outer-name
+from unittest.mock import patch
+import pytest
+from langgraph.types import Command
+from ..tools.s2.retrieve_semantic_scholar_paper_id import (
+    retrieve_semantic_scholar_paper_id,
+)
+# Fixed test data for deterministic results
+MOCK_SEARCH_RESPONSE = {
+    "data": [
+        {
+            "paperId": "123",
+            "title": "Machine Learning Basics",
+            "abstract": "An introduction to ML",
+            "year": 2023,
+            "citationCount": 100,
+            "url": "https://example.com/paper1",
+            "authors": [{"name": "Test Author"}],
+        }
+    ]
+}
+MOCK_STATE_PAPER = {
+    "123": {
+        "Title": "Machine Learning Basics",
+        "Abstract": "An introduction to ML",
+        "Year": 2023,
+        "Citation Count": 100,
+        "URL": "https://example.com/paper1",
+    }
+}
+class TestS2Tools:
+    """Unit tests for individual S2 tools"""
+    @patch("requests.get")
+    def test_retrieve_semantic_scholar_paper_id(self, mock_get):
+        """Tests retrieving a paper ID from Semantic Scholar."""
+        mock_get.return_value.json.return_value = MOCK_SEARCH_RESPONSE
+        mock_get.return_value.status_code = 200
+        result = retrieve_semantic_scholar_paper_id.invoke(
+            input={"paper_title": "Machine Learning Basics", "tool_call_id": "test123"}
+        )
+        assert isinstance(result, Command)
+        assert "messages" in result.update
+        assert (
+            "Paper ID for 'Machine Learning Basics' is: 123"
+            in result.update["messages"][0].content
+        )
+    def test_retrieve_semantic_scholar_paper_id_no_results(self):
+        """Test retrieving a paper ID when no results are found."""
+        with pytest.raises(ValueError, match="No papers found for query: UnknownPaper"):
+            retrieve_semantic_scholar_paper_id.invoke(
+                input={"paper_title": "UnknownPaper", "tool_call_id": "test123"}
+            )

aiagents4pharma 1.27.2__py3-none-any.whl → 1.29.0__py3-none-any.whl

aiagents4pharma 1.27.2py3-none-any.whl → 1.29.0py3-none-any.whl