PyPI - cognee - Versions diffs - 0.5.0.dev1__py3-none-any.whl → 0.5.1.dev0__py3-none-any.whl - Mend

cognee 0.5.0.dev1py3-none-any.whl → 0.5.1.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

cognee/tests/unit/eval_framework/corpus_builder_test.py CHANGED Viewed

@@ -2,15 +2,38 @@ import pytest
 from cognee.eval_framework.corpus_builder.corpus_builder_executor import CorpusBuilderExecutor
 from cognee.infrastructure.databases.graph import get_graph_engine
 from unittest.mock import AsyncMock, patch
+from cognee.eval_framework.benchmark_adapters.hotpot_qa_adapter import HotpotQAAdapter
 benchmark_options = ["HotPotQA", "Dummy", "TwoWikiMultiHop"]
+MOCK_HOTPOT_CORPUS = [
+    {
+        "_id": "1",
+        "question": "Next to which country is Germany located?",
+        "answer": "Netherlands",
+        # HotpotQA uses "level"; TwoWikiMultiHop uses "type".
+        "level": "easy",
+        "type": "comparison",
+        "context": [
+            ["Germany", ["Germany is in Europe."]],
+            ["Netherlands", ["The Netherlands borders Germany."]],
+        ],
+        "supporting_facts": [["Netherlands", 0]],
+    }
+]
 @pytest.mark.parametrize("benchmark", benchmark_options)
 def test_corpus_builder_load_corpus(benchmark):
     limit = 2
-    corpus_builder = CorpusBuilderExecutor(benchmark, "Default")
-    raw_corpus, questions = corpus_builder.load_corpus(limit=limit)
+    if benchmark in ("HotPotQA", "TwoWikiMultiHop"):
+        with patch.object(HotpotQAAdapter, "_get_raw_corpus", return_value=MOCK_HOTPOT_CORPUS):
+            corpus_builder = CorpusBuilderExecutor(benchmark, "Default")
+            raw_corpus, questions = corpus_builder.load_corpus(limit=limit)
+    else:
+        corpus_builder = CorpusBuilderExecutor(benchmark, "Default")
+        raw_corpus, questions = corpus_builder.load_corpus(limit=limit)
     assert len(raw_corpus) > 0, f"Corpus builder loads empty corpus for {benchmark}"
     assert len(questions) <= 2, (
         f"Corpus builder loads {len(questions)} for {benchmark} when limit is {limit}"
@@ -22,8 +45,14 @@ def test_corpus_builder_load_corpus(benchmark):
 @patch.object(CorpusBuilderExecutor, "run_cognee", new_callable=AsyncMock)
 async def test_corpus_builder_build_corpus(mock_run_cognee, benchmark):
     limit = 2
-    corpus_builder = CorpusBuilderExecutor(benchmark, "Default")
-    questions = await corpus_builder.build_corpus(limit=limit)
+    if benchmark in ("HotPotQA", "TwoWikiMultiHop"):
+        with patch.object(HotpotQAAdapter, "_get_raw_corpus", return_value=MOCK_HOTPOT_CORPUS):
+            corpus_builder = CorpusBuilderExecutor(benchmark, "Default")
+            questions = await corpus_builder.build_corpus(limit=limit)
+    else:
+        corpus_builder = CorpusBuilderExecutor(benchmark, "Default")
+        questions = await corpus_builder.build_corpus(limit=limit)
     assert len(questions) <= 2, (
         f"Corpus builder loads {len(questions)} for {benchmark} when limit is {limit}"
     )

cognee/tests/unit/infrastructure/databases/relational/test_RelationalConfig.py ADDED Viewed

@@ -0,0 +1,69 @@
+import os
+from unittest.mock import patch
+from cognee.infrastructure.databases.relational.config import RelationalConfig
+class TestRelationalConfig:
+    """Test suite for RelationalConfig DATABASE_CONNECT_ARGS parsing."""
+    def test_database_connect_args_valid_json_dict(self):
+        """Test that DATABASE_CONNECT_ARGS is parsed correctly when it's a valid JSON dict."""
+        with patch.dict(
+            os.environ, {"DATABASE_CONNECT_ARGS": '{"timeout": 60, "sslmode": "require"}'}
+        ):
+            config = RelationalConfig()
+            assert config.database_connect_args == {"timeout": 60, "sslmode": "require"}
+    def test_database_connect_args_empty_string(self):
+        """Test that empty DATABASE_CONNECT_ARGS is handled correctly."""
+        with patch.dict(os.environ, {"DATABASE_CONNECT_ARGS": ""}):
+            config = RelationalConfig()
+            assert config.database_connect_args == ""
+    def test_database_connect_args_not_set(self):
+        """Test that missing DATABASE_CONNECT_ARGS results in None."""
+        with patch.dict(os.environ, {}, clear=True):
+            config = RelationalConfig()
+            assert config.database_connect_args is None
+    def test_database_connect_args_invalid_json(self):
+        """Test that invalid JSON in DATABASE_CONNECT_ARGS results in empty dict."""
+        with patch.dict(os.environ, {"DATABASE_CONNECT_ARGS": '{"timeout": 60'}):  # Invalid JSON
+            config = RelationalConfig()
+            assert config.database_connect_args == {}
+    def test_database_connect_args_non_dict_json(self):
+        """Test that non-dict JSON in DATABASE_CONNECT_ARGS results in empty dict."""
+        with patch.dict(os.environ, {"DATABASE_CONNECT_ARGS": '["list", "instead", "of", "dict"]'}):
+            config = RelationalConfig()
+            assert config.database_connect_args == {}
+    def test_database_connect_args_to_dict(self):
+        """Test that database_connect_args is included in to_dict() output."""
+        with patch.dict(os.environ, {"DATABASE_CONNECT_ARGS": '{"timeout": 60}'}):
+            config = RelationalConfig()
+            config_dict = config.to_dict()
+            assert "database_connect_args" in config_dict
+            assert config_dict["database_connect_args"] == {"timeout": 60}
+    def test_database_connect_args_integer_value(self):
+        """Test that DATABASE_CONNECT_ARGS with integer values is parsed correctly."""
+        with patch.dict(os.environ, {"DATABASE_CONNECT_ARGS": '{"connect_timeout": 10}'}):
+            config = RelationalConfig()
+            assert config.database_connect_args == {"connect_timeout": 10}
+    def test_database_connect_args_mixed_types(self):
+        """Test that DATABASE_CONNECT_ARGS with mixed value types is parsed correctly."""
+        with patch.dict(
+            os.environ,
+            {
+                "DATABASE_CONNECT_ARGS": '{"timeout": 60, "sslmode": "require", "retries": 3, "keepalive": true}'
+            },
+        ):
+            config = RelationalConfig()
+            assert config.database_connect_args == {
+                "timeout": 60,
+                "sslmode": "require",
+                "retries": 3,
+                "keepalive": True,
+            }

cognee/tests/unit/modules/retrieval/chunks_retriever_test.py CHANGED Viewed

@@ -1,201 +1,183 @@
-import os
 import pytest
-import pathlib
-from typing import List
-import cognee
-from cognee.low_level import setup
-from cognee.tasks.storage import add_data_points
-from cognee.infrastructure.databases.vector import get_vector_engine
-from cognee.modules.chunking.models import DocumentChunk
-from cognee.modules.data.processing.document_types import TextDocument
-from cognee.modules.retrieval.exceptions.exceptions import NoDataError
+from unittest.mock import AsyncMock, patch, MagicMock
 from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
-from cognee.infrastructure.engine import DataPoint
-from cognee.modules.data.processing.document_types import Document
-from cognee.modules.engine.models import Entity
-class DocumentChunkWithEntities(DataPoint):
-    text: str
-    chunk_size: int
-    chunk_index: int
-    cut_type: str
-    is_part_of: Document
-    contains: List[Entity] = None
-    metadata: dict = {"index_fields": ["text"]}
-class TestChunksRetriever:
-    @pytest.mark.asyncio
-    async def test_chunk_context_simple(self):
-        system_directory_path = os.path.join(
-            pathlib.Path(__file__).parent, ".cognee_system/test_chunk_context_simple"
-        )
-        cognee.config.system_root_directory(system_directory_path)
-        data_directory_path = os.path.join(
-            pathlib.Path(__file__).parent, ".data_storage/test_chunk_context_simple"
-        )
-        cognee.config.data_root_directory(data_directory_path)
-        await cognee.prune.prune_data()
-        await cognee.prune.prune_system(metadata=True)
-        await setup()
-        document = TextDocument(
-            name="Steve Rodger's career",
-            raw_data_location="somewhere",
-            external_metadata="",
-            mime_type="text/plain",
-        )
-        chunk1 = DocumentChunk(
-            text="Steve Rodger",
-            chunk_size=2,
-            chunk_index=0,
-            cut_type="sentence_end",
-            is_part_of=document,
-            contains=[],
-        )
-        chunk2 = DocumentChunk(
-            text="Mike Broski",
-            chunk_size=2,
-            chunk_index=1,
-            cut_type="sentence_end",
-            is_part_of=document,
-            contains=[],
-        )
-        chunk3 = DocumentChunk(
-            text="Christina Mayer",
-            chunk_size=2,
-            chunk_index=2,
-            cut_type="sentence_end",
-            is_part_of=document,
-            contains=[],
-        )
-        entities = [chunk1, chunk2, chunk3]
-        await add_data_points(entities)
-        retriever = ChunksRetriever()
-        context = await retriever.get_context("Mike")
-        assert context[0]["text"] == "Mike Broski", "Failed to get Mike Broski"
-    @pytest.mark.asyncio
-    async def test_chunk_context_complex(self):
-        system_directory_path = os.path.join(
-            pathlib.Path(__file__).parent, ".cognee_system/test_chunk_context_complex"
-        )
-        cognee.config.system_root_directory(system_directory_path)
-        data_directory_path = os.path.join(
-            pathlib.Path(__file__).parent, ".data_storage/test_chunk_context_complex"
-        )
-        cognee.config.data_root_directory(data_directory_path)
-        await cognee.prune.prune_data()
-        await cognee.prune.prune_system(metadata=True)
-        await setup()
-        document1 = TextDocument(
-            name="Employee List",
-            raw_data_location="somewhere",
-            external_metadata="",
-            mime_type="text/plain",
-        )
-        document2 = TextDocument(
-            name="Car List",
-            raw_data_location="somewhere",
-            external_metadata="",
-            mime_type="text/plain",
-        )
-        chunk1 = DocumentChunk(
-            text="Steve Rodger",
-            chunk_size=2,
-            chunk_index=0,
-            cut_type="sentence_end",
-            is_part_of=document1,
-            contains=[],
-        )
-        chunk2 = DocumentChunk(
-            text="Mike Broski",
-            chunk_size=2,
-            chunk_index=1,
-            cut_type="sentence_end",
-            is_part_of=document1,
-            contains=[],
-        )
-        chunk3 = DocumentChunk(
-            text="Christina Mayer",
-            chunk_size=2,
-            chunk_index=2,
-            cut_type="sentence_end",
-            is_part_of=document1,
-            contains=[],
-        )
-        chunk4 = DocumentChunk(
-            text="Range Rover",
-            chunk_size=2,
-            chunk_index=0,
-            cut_type="sentence_end",
-            is_part_of=document2,
-            contains=[],
-        )
-        chunk5 = DocumentChunk(
-            text="Hyundai",
-            chunk_size=2,
-            chunk_index=1,
-            cut_type="sentence_end",
-            is_part_of=document2,
-            contains=[],
-        )
-        chunk6 = DocumentChunk(
-            text="Chrysler",
-            chunk_size=2,
-            chunk_index=2,
-            cut_type="sentence_end",
-            is_part_of=document2,
-            contains=[],
-        )
-        entities = [chunk1, chunk2, chunk3, chunk4, chunk5, chunk6]
-        await add_data_points(entities)
-        retriever = ChunksRetriever(top_k=20)
-        context = await retriever.get_context("Christina")
-        assert context[0]["text"] == "Christina Mayer", "Failed to get Christina Mayer"
-    @pytest.mark.asyncio
-    async def test_chunk_context_on_empty_graph(self):
-        system_directory_path = os.path.join(
-            pathlib.Path(__file__).parent, ".cognee_system/test_chunk_context_on_empty_graph"
-        )
-        cognee.config.system_root_directory(system_directory_path)
-        data_directory_path = os.path.join(
-            pathlib.Path(__file__).parent, ".data_storage/test_chunk_context_on_empty_graph"
-        )
-        cognee.config.data_root_directory(data_directory_path)
-        await cognee.prune.prune_data()
-        await cognee.prune.prune_system(metadata=True)
-        retriever = ChunksRetriever()
-        with pytest.raises(NoDataError):
-            await retriever.get_context("Christina Mayer")
-        vector_engine = get_vector_engine()
-        await vector_engine.create_collection(
-            "DocumentChunk_text", payload_schema=DocumentChunkWithEntities
-        )
-        context = await retriever.get_context("Christina Mayer")
-        assert len(context) == 0, "Found chunks when none should exist"
+from cognee.modules.retrieval.exceptions.exceptions import NoDataError
+from cognee.infrastructure.databases.vector.exceptions import CollectionNotFoundError
+@pytest.fixture
+def mock_vector_engine():
+    """Create a mock vector engine."""
+    engine = AsyncMock()
+    engine.search = AsyncMock()
+    return engine
+@pytest.mark.asyncio
+async def test_get_context_success(mock_vector_engine):
+    """Test successful retrieval of chunk context."""
+    mock_result1 = MagicMock()
+    mock_result1.payload = {"text": "Steve Rodger", "chunk_index": 0}
+    mock_result2 = MagicMock()
+    mock_result2.payload = {"text": "Mike Broski", "chunk_index": 1}
+    mock_vector_engine.search.return_value = [mock_result1, mock_result2]
+    retriever = ChunksRetriever(top_k=5)
+    with patch(
+        "cognee.modules.retrieval.chunks_retriever.get_vector_engine",
+        return_value=mock_vector_engine,
+    ):
+        context = await retriever.get_context("test query")
+    assert len(context) == 2
+    assert context[0]["text"] == "Steve Rodger"
+    assert context[1]["text"] == "Mike Broski"
+    mock_vector_engine.search.assert_awaited_once_with("DocumentChunk_text", "test query", limit=5)
+@pytest.mark.asyncio
+async def test_get_context_collection_not_found_error(mock_vector_engine):
+    """Test that CollectionNotFoundError is converted to NoDataError."""
+    mock_vector_engine.search.side_effect = CollectionNotFoundError("Collection not found")
+    retriever = ChunksRetriever()
+    with patch(
+        "cognee.modules.retrieval.chunks_retriever.get_vector_engine",
+        return_value=mock_vector_engine,
+    ):
+        with pytest.raises(NoDataError, match="No data found"):
+            await retriever.get_context("test query")
+@pytest.mark.asyncio
+async def test_get_context_empty_results(mock_vector_engine):
+    """Test that empty list is returned when no chunks are found."""
+    mock_vector_engine.search.return_value = []
+    retriever = ChunksRetriever()
+    with patch(
+        "cognee.modules.retrieval.chunks_retriever.get_vector_engine",
+        return_value=mock_vector_engine,
+    ):
+        context = await retriever.get_context("test query")
+    assert context == []
+@pytest.mark.asyncio
+async def test_get_context_top_k_limit(mock_vector_engine):
+    """Test that top_k parameter limits the number of results."""
+    mock_results = [MagicMock() for _ in range(3)]
+    for i, result in enumerate(mock_results):
+        result.payload = {"text": f"Chunk {i}"}
+    mock_vector_engine.search.return_value = mock_results
+    retriever = ChunksRetriever(top_k=3)
+    with patch(
+        "cognee.modules.retrieval.chunks_retriever.get_vector_engine",
+        return_value=mock_vector_engine,
+    ):
+        context = await retriever.get_context("test query")
+    assert len(context) == 3
+    mock_vector_engine.search.assert_awaited_once_with("DocumentChunk_text", "test query", limit=3)
+@pytest.mark.asyncio
+async def test_get_completion_with_context(mock_vector_engine):
+    """Test get_completion returns provided context."""
+    retriever = ChunksRetriever()
+    provided_context = [{"text": "Steve Rodger"}, {"text": "Mike Broski"}]
+    completion = await retriever.get_completion("test query", context=provided_context)
+    assert completion == provided_context
+@pytest.mark.asyncio
+async def test_get_completion_without_context(mock_vector_engine):
+    """Test get_completion retrieves context when not provided."""
+    mock_result = MagicMock()
+    mock_result.payload = {"text": "Steve Rodger"}
+    mock_vector_engine.search.return_value = [mock_result]
+    retriever = ChunksRetriever()
+    with patch(
+        "cognee.modules.retrieval.chunks_retriever.get_vector_engine",
+        return_value=mock_vector_engine,
+    ):
+        completion = await retriever.get_completion("test query")
+    assert len(completion) == 1
+    assert completion[0]["text"] == "Steve Rodger"
+@pytest.mark.asyncio
+async def test_init_defaults():
+    """Test ChunksRetriever initialization with defaults."""
+    retriever = ChunksRetriever()
+    assert retriever.top_k == 5
+@pytest.mark.asyncio
+async def test_init_custom_top_k():
+    """Test ChunksRetriever initialization with custom top_k."""
+    retriever = ChunksRetriever(top_k=10)
+    assert retriever.top_k == 10
+@pytest.mark.asyncio
+async def test_init_none_top_k():
+    """Test ChunksRetriever initialization with None top_k."""
+    retriever = ChunksRetriever(top_k=None)
+    assert retriever.top_k is None
+@pytest.mark.asyncio
+async def test_get_context_empty_payload(mock_vector_engine):
+    """Test get_context handles empty payload."""
+    mock_result = MagicMock()
+    mock_result.payload = {}
+    mock_vector_engine.search.return_value = [mock_result]
+    retriever = ChunksRetriever()
+    with patch(
+        "cognee.modules.retrieval.chunks_retriever.get_vector_engine",
+        return_value=mock_vector_engine,
+    ):
+        context = await retriever.get_context("test query")
+    assert len(context) == 1
+    assert context[0] == {}
+@pytest.mark.asyncio
+async def test_get_completion_with_session_id(mock_vector_engine):
+    """Test get_completion with session_id parameter."""
+    mock_result = MagicMock()
+    mock_result.payload = {"text": "Steve Rodger"}
+    mock_vector_engine.search.return_value = [mock_result]
+    retriever = ChunksRetriever()
+    with patch(
+        "cognee.modules.retrieval.chunks_retriever.get_vector_engine",
+        return_value=mock_vector_engine,
+    ):
+        completion = await retriever.get_completion("test query", session_id="test_session")
+    assert len(completion) == 1
+    assert completion[0]["text"] == "Steve Rodger"

cognee 0.5.0.dev1__py3-none-any.whl → 0.5.1.dev0__py3-none-any.whl

cognee 0.5.0.dev1py3-none-any.whl → 0.5.1.dev0py3-none-any.whl