PyPI - cognee - Versions diffs - 0.4.1__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl - Mend

cognee 0.4.1py3-none-any.whl → 0.5.0.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (135) hide show

cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py ADDED Viewed

@@ -0,0 +1,175 @@
+import sys
+import pytest
+from unittest.mock import AsyncMock, MagicMock, patch
+from cognee.tasks.memify.extract_user_sessions import extract_user_sessions
+from cognee.exceptions import CogneeSystemError
+from cognee.modules.users.models import User
+# Get the actual module object (not the function) for patching
+extract_user_sessions_module = sys.modules["cognee.tasks.memify.extract_user_sessions"]
+@pytest.fixture
+def mock_user():
+    """Create a mock user."""
+    user = MagicMock(spec=User)
+    user.id = "test-user-123"
+    return user
+@pytest.fixture
+def mock_qa_data():
+    """Create mock Q&A data."""
+    return [
+        {
+            "question": "What is cognee?",
+            "context": "context about cognee",
+            "answer": "Cognee is a knowledge graph solution",
+            "time": "2025-01-01T12:00:00",
+        },
+        {
+            "question": "How does it work?",
+            "context": "how it works context",
+            "answer": "It processes data and creates graphs",
+            "time": "2025-01-01T12:05:00",
+        },
+    ]
+@pytest.mark.asyncio
+async def test_extract_user_sessions_success(mock_user, mock_qa_data):
+    """Test successful extraction of sessions."""
+    mock_cache_engine = AsyncMock()
+    mock_cache_engine.get_all_qas.return_value = mock_qa_data
+    with (
+        patch.object(extract_user_sessions_module, "session_user") as mock_session_user,
+        patch.object(
+            extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine
+        ),
+    ):
+        mock_session_user.get.return_value = mock_user
+        sessions = []
+        async for session in extract_user_sessions([{}], session_ids=["test_session"]):
+            sessions.append(session)
+        assert len(sessions) == 1
+        assert "Session ID: test_session" in sessions[0]
+        assert "Question: What is cognee?" in sessions[0]
+        assert "Answer: Cognee is a knowledge graph solution" in sessions[0]
+        assert "Question: How does it work?" in sessions[0]
+        assert "Answer: It processes data and creates graphs" in sessions[0]
+@pytest.mark.asyncio
+async def test_extract_user_sessions_multiple_sessions(mock_user, mock_qa_data):
+    """Test extraction of multiple sessions."""
+    mock_cache_engine = AsyncMock()
+    mock_cache_engine.get_all_qas.return_value = mock_qa_data
+    with (
+        patch.object(extract_user_sessions_module, "session_user") as mock_session_user,
+        patch.object(
+            extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine
+        ),
+    ):
+        mock_session_user.get.return_value = mock_user
+        sessions = []
+        async for session in extract_user_sessions([{}], session_ids=["session1", "session2"]):
+            sessions.append(session)
+        assert len(sessions) == 2
+        assert mock_cache_engine.get_all_qas.call_count == 2
+@pytest.mark.asyncio
+async def test_extract_user_sessions_no_data(mock_user, mock_qa_data):
+    """Test extraction handles empty data parameter."""
+    mock_cache_engine = AsyncMock()
+    mock_cache_engine.get_all_qas.return_value = mock_qa_data
+    with (
+        patch.object(extract_user_sessions_module, "session_user") as mock_session_user,
+        patch.object(
+            extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine
+        ),
+    ):
+        mock_session_user.get.return_value = mock_user
+        sessions = []
+        async for session in extract_user_sessions(None, session_ids=["test_session"]):
+            sessions.append(session)
+        assert len(sessions) == 1
+@pytest.mark.asyncio
+async def test_extract_user_sessions_no_session_ids(mock_user):
+    """Test extraction handles no session IDs provided."""
+    mock_cache_engine = AsyncMock()
+    with (
+        patch.object(extract_user_sessions_module, "session_user") as mock_session_user,
+        patch.object(
+            extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine
+        ),
+    ):
+        mock_session_user.get.return_value = mock_user
+        sessions = []
+        async for session in extract_user_sessions([{}], session_ids=None):
+            sessions.append(session)
+        assert len(sessions) == 0
+        mock_cache_engine.get_all_qas.assert_not_called()
+@pytest.mark.asyncio
+async def test_extract_user_sessions_empty_qa_data(mock_user):
+    """Test extraction handles empty Q&A data."""
+    mock_cache_engine = AsyncMock()
+    mock_cache_engine.get_all_qas.return_value = []
+    with (
+        patch.object(extract_user_sessions_module, "session_user") as mock_session_user,
+        patch.object(
+            extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine
+        ),
+    ):
+        mock_session_user.get.return_value = mock_user
+        sessions = []
+        async for session in extract_user_sessions([{}], session_ids=["empty_session"]):
+            sessions.append(session)
+        assert len(sessions) == 0
+@pytest.mark.asyncio
+async def test_extract_user_sessions_cache_error_handling(mock_user, mock_qa_data):
+    """Test extraction continues on cache error for specific session."""
+    mock_cache_engine = AsyncMock()
+    mock_cache_engine.get_all_qas.side_effect = [
+        mock_qa_data,
+        Exception("Cache error"),
+        mock_qa_data,
+    ]
+    with (
+        patch.object(extract_user_sessions_module, "session_user") as mock_session_user,
+        patch.object(
+            extract_user_sessions_module, "get_cache_engine", return_value=mock_cache_engine
+        ),
+    ):
+        mock_session_user.get.return_value = mock_user
+        sessions = []
+        async for session in extract_user_sessions(
+            [{}], session_ids=["session1", "session2", "session3"]
+        ):
+            sessions.append(session)
+        assert len(sessions) == 2

cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 import pytest
 import pathlib
 from typing import Optional, Union
-from pydantic import BaseModel
 import cognee
 from cognee.low_level import setup, DataPoint
@@ -11,11 +10,6 @@ from cognee.tasks.storage import add_data_points
 from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever
-class TestAnswer(BaseModel):
-    answer: str
-    explanation: str
 class TestGraphCompletionCoTRetriever:
     @pytest.mark.asyncio
     async def test_graph_completion_cot_context_simple(self):
@@ -174,48 +168,3 @@ class TestGraphCompletionCoTRetriever:
         assert all(isinstance(item, str) and item.strip() for item in answer), (
             "Answer must contain only non-empty strings"
         )
-    @pytest.mark.asyncio
-    async def test_get_structured_completion(self):
-        system_directory_path = os.path.join(
-            pathlib.Path(__file__).parent, ".cognee_system/test_get_structured_completion"
-        )
-        cognee.config.system_root_directory(system_directory_path)
-        data_directory_path = os.path.join(
-            pathlib.Path(__file__).parent, ".data_storage/test_get_structured_completion"
-        )
-        cognee.config.data_root_directory(data_directory_path)
-        await cognee.prune.prune_data()
-        await cognee.prune.prune_system(metadata=True)
-        await setup()
-        class Company(DataPoint):
-            name: str
-        class Person(DataPoint):
-            name: str
-            works_for: Company
-        company1 = Company(name="Figma")
-        person1 = Person(name="Steve Rodger", works_for=company1)
-        entities = [company1, person1]
-        await add_data_points(entities)
-        retriever = GraphCompletionCotRetriever()
-        # Test with string response model (default)
-        string_answer = await retriever.get_structured_completion("Who works at Figma?")
-        assert isinstance(string_answer, str), f"Expected str, got {type(string_answer).__name__}"
-        assert string_answer.strip(), "Answer should not be empty"
-        # Test with structured response model
-        structured_answer = await retriever.get_structured_completion(
-            "Who works at Figma?", response_model=TestAnswer
-        )
-        assert isinstance(structured_answer, TestAnswer), (
-            f"Expected TestAnswer, got {type(structured_answer).__name__}"
-        )
-        assert structured_answer.answer.strip(), "Answer field should not be empty"
-        assert structured_answer.explanation.strip(), "Explanation field should not be empty"

cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import List
 import pytest
 import pathlib
 import cognee
 from cognee.low_level import setup
 from cognee.tasks.storage import add_data_points
 from cognee.infrastructure.databases.vector import get_vector_engine

cognee/tests/unit/modules/retrieval/structured_output_test.py ADDED Viewed

@@ -0,0 +1,204 @@
+import asyncio
+import pytest
+import cognee
+import pathlib
+import os
+from pydantic import BaseModel
+from cognee.low_level import setup, DataPoint
+from cognee.tasks.storage import add_data_points
+from cognee.modules.chunking.models import DocumentChunk
+from cognee.modules.data.processing.document_types import TextDocument
+from cognee.modules.engine.models import Entity, EntityType
+from cognee.modules.retrieval.entity_extractors.DummyEntityExtractor import DummyEntityExtractor
+from cognee.modules.retrieval.context_providers.DummyContextProvider import DummyContextProvider
+from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever
+from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
+from cognee.modules.retrieval.graph_completion_context_extension_retriever import (
+    GraphCompletionContextExtensionRetriever,
+)
+from cognee.modules.retrieval.EntityCompletionRetriever import EntityCompletionRetriever
+from cognee.modules.retrieval.temporal_retriever import TemporalRetriever
+from cognee.modules.retrieval.completion_retriever import CompletionRetriever
+class TestAnswer(BaseModel):
+    answer: str
+    explanation: str
+def _assert_string_answer(answer: list[str]):
+    assert isinstance(answer, list), f"Expected str, got {type(answer).__name__}"
+    assert all(isinstance(item, str) and item.strip() for item in answer), "Items should be strings"
+    assert all(item.strip() for item in answer), "Items should not be empty"
+def _assert_structured_answer(answer: list[TestAnswer]):
+    assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
+    assert all(isinstance(x, TestAnswer) for x in answer), "Items should be TestAnswer"
+    assert all(x.answer.strip() for x in answer), "Answer text should not be empty"
+    assert all(x.explanation.strip() for x in answer), "Explanation should not be empty"
+async def _test_get_structured_graph_completion_cot():
+    retriever = GraphCompletionCotRetriever()
+    # Test with string response model (default)
+    string_answer = await retriever.get_completion("Who works at Figma?")
+    _assert_string_answer(string_answer)
+    # Test with structured response model
+    structured_answer = await retriever.get_completion(
+        "Who works at Figma?", response_model=TestAnswer
+    )
+    _assert_structured_answer(structured_answer)
+async def _test_get_structured_graph_completion():
+    retriever = GraphCompletionRetriever()
+    # Test with string response model (default)
+    string_answer = await retriever.get_completion("Who works at Figma?")
+    _assert_string_answer(string_answer)
+    # Test with structured response model
+    structured_answer = await retriever.get_completion(
+        "Who works at Figma?", response_model=TestAnswer
+    )
+    _assert_structured_answer(structured_answer)
+async def _test_get_structured_graph_completion_temporal():
+    retriever = TemporalRetriever()
+    # Test with string response model (default)
+    string_answer = await retriever.get_completion("When did Steve start working at Figma?")
+    _assert_string_answer(string_answer)
+    # Test with structured response model
+    structured_answer = await retriever.get_completion(
+        "When did Steve start working at Figma??", response_model=TestAnswer
+    )
+    _assert_structured_answer(structured_answer)
+async def _test_get_structured_graph_completion_rag():
+    retriever = CompletionRetriever()
+    # Test with string response model (default)
+    string_answer = await retriever.get_completion("Where does Steve work?")
+    _assert_string_answer(string_answer)
+    # Test with structured response model
+    structured_answer = await retriever.get_completion(
+        "Where does Steve work?", response_model=TestAnswer
+    )
+    _assert_structured_answer(structured_answer)
+async def _test_get_structured_graph_completion_context_extension():
+    retriever = GraphCompletionContextExtensionRetriever()
+    # Test with string response model (default)
+    string_answer = await retriever.get_completion("Who works at Figma?")
+    _assert_string_answer(string_answer)
+    # Test with structured response model
+    structured_answer = await retriever.get_completion(
+        "Who works at Figma?", response_model=TestAnswer
+    )
+    _assert_structured_answer(structured_answer)
+async def _test_get_structured_entity_completion():
+    retriever = EntityCompletionRetriever(DummyEntityExtractor(), DummyContextProvider())
+    # Test with string response model (default)
+    string_answer = await retriever.get_completion("Who is Albert Einstein?")
+    _assert_string_answer(string_answer)
+    # Test with structured response model
+    structured_answer = await retriever.get_completion(
+        "Who is Albert Einstein?", response_model=TestAnswer
+    )
+    _assert_structured_answer(structured_answer)
+class TestStructuredOutputCompletion:
+    @pytest.mark.asyncio
+    async def test_get_structured_completion(self):
+        system_directory_path = os.path.join(
+            pathlib.Path(__file__).parent, ".cognee_system/test_get_structured_completion"
+        )
+        cognee.config.system_root_directory(system_directory_path)
+        data_directory_path = os.path.join(
+            pathlib.Path(__file__).parent, ".data_storage/test_get_structured_completion"
+        )
+        cognee.config.data_root_directory(data_directory_path)
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+        await setup()
+        class Company(DataPoint):
+            name: str
+        class Person(DataPoint):
+            name: str
+            works_for: Company
+            works_since: int
+        company1 = Company(name="Figma")
+        person1 = Person(name="Steve Rodger", works_for=company1, works_since=2015)
+        entities = [company1, person1]
+        await add_data_points(entities)
+        document = TextDocument(
+            name="Steve Rodger's career",
+            raw_data_location="somewhere",
+            external_metadata="",
+            mime_type="text/plain",
+        )
+        chunk1 = DocumentChunk(
+            text="Steve Rodger",
+            chunk_size=2,
+            chunk_index=0,
+            cut_type="sentence_end",
+            is_part_of=document,
+            contains=[],
+        )
+        chunk2 = DocumentChunk(
+            text="Mike Broski",
+            chunk_size=2,
+            chunk_index=1,
+            cut_type="sentence_end",
+            is_part_of=document,
+            contains=[],
+        )
+        chunk3 = DocumentChunk(
+            text="Christina Mayer",
+            chunk_size=2,
+            chunk_index=2,
+            cut_type="sentence_end",
+            is_part_of=document,
+            contains=[],
+        )
+        entities = [chunk1, chunk2, chunk3]
+        await add_data_points(entities)
+        entity_type = EntityType(name="Person", description="A human individual")
+        entity = Entity(name="Albert Einstein", is_a=entity_type, description="A famous physicist")
+        entities = [entity]
+        await add_data_points(entities)
+        await _test_get_structured_graph_completion_cot()
+        await _test_get_structured_graph_completion()
+        await _test_get_structured_graph_completion_temporal()
+        await _test_get_structured_graph_completion_rag()
+        await _test_get_structured_graph_completion_context_extension()
+        await _test_get_structured_entity_completion()

cognee/tests/unit/modules/retrieval/summaries_retriever_test.py CHANGED Viewed

@@ -13,7 +13,7 @@ from cognee.modules.retrieval.exceptions.exceptions import NoDataError
 from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
-class TextSummariesRetriever:
+class TestSummariesRetriever:
     @pytest.mark.asyncio
     async def test_chunk_context(self):
         system_directory_path = os.path.join(

cognee/tests/unit/modules/retrieval/temporal_retriever_test.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import asyncio
 from types import SimpleNamespace
 import pytest

cognee/tests/unit/modules/users/test_conditional_authentication.py CHANGED Viewed

@@ -107,29 +107,10 @@ class TestConditionalAuthenticationIntegration:
         # REQUIRE_AUTHENTICATION should be a boolean
         assert isinstance(REQUIRE_AUTHENTICATION, bool)
-        # Currently should be False (optional authentication)
-        assert not REQUIRE_AUTHENTICATION
 class TestConditionalAuthenticationEnvironmentVariables:
     """Test environment variable handling."""
-    def test_require_authentication_default_false(self):
-        """Test that REQUIRE_AUTHENTICATION defaults to false when imported with no env vars."""
-        with patch.dict(os.environ, {}, clear=True):
-            # Remove module from cache to force fresh import
-            module_name = "cognee.modules.users.methods.get_authenticated_user"
-            if module_name in sys.modules:
-                del sys.modules[module_name]
-            # Import after patching environment - module will see empty environment
-            from cognee.modules.users.methods.get_authenticated_user import (
-                REQUIRE_AUTHENTICATION,
-            )
-            importlib.invalidate_caches()
-            assert not REQUIRE_AUTHENTICATION
     def test_require_authentication_true(self):
         """Test that REQUIRE_AUTHENTICATION=true is parsed correctly when imported."""
         with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": "true"}):
@@ -145,50 +126,6 @@ class TestConditionalAuthenticationEnvironmentVariables:
             assert REQUIRE_AUTHENTICATION
-    def test_require_authentication_false_explicit(self):
-        """Test that REQUIRE_AUTHENTICATION=false is parsed correctly when imported."""
-        with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": "false"}):
-            # Remove module from cache to force fresh import
-            module_name = "cognee.modules.users.methods.get_authenticated_user"
-            if module_name in sys.modules:
-                del sys.modules[module_name]
-            # Import after patching environment - module will see REQUIRE_AUTHENTICATION=false
-            from cognee.modules.users.methods.get_authenticated_user import (
-                REQUIRE_AUTHENTICATION,
-            )
-            assert not REQUIRE_AUTHENTICATION
-    def test_require_authentication_case_insensitive(self):
-        """Test that environment variable parsing is case insensitive when imported."""
-        test_cases = ["TRUE", "True", "tRuE", "FALSE", "False", "fAlSe"]
-        for case in test_cases:
-            with patch.dict(os.environ, {"REQUIRE_AUTHENTICATION": case}):
-                # Remove module from cache to force fresh import
-                module_name = "cognee.modules.users.methods.get_authenticated_user"
-                if module_name in sys.modules:
-                    del sys.modules[module_name]
-                # Import after patching environment
-                from cognee.modules.users.methods.get_authenticated_user import (
-                    REQUIRE_AUTHENTICATION,
-                )
-                expected = case.lower() == "true"
-                assert REQUIRE_AUTHENTICATION == expected, f"Failed for case: {case}"
-    def test_current_require_authentication_value(self):
-        """Test that the current REQUIRE_AUTHENTICATION module value is as expected."""
-        from cognee.modules.users.methods.get_authenticated_user import (
-            REQUIRE_AUTHENTICATION,
-        )
-        # The module-level variable should currently be False (set at import time)
-        assert isinstance(REQUIRE_AUTHENTICATION, bool)
-        assert not REQUIRE_AUTHENTICATION
 class TestConditionalAuthenticationEdgeCases:
     """Test edge cases and error scenarios."""

cognee/tests/unit/processing/chunks/chunk_by_row_test.py ADDED Viewed

@@ -0,0 +1,52 @@
+from itertools import product
+import numpy as np
+import pytest
+from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine
+from cognee.tasks.chunks import chunk_by_row
+INPUT_TEXTS = "name: John, age: 30, city: New York, country: USA"
+max_chunk_size_vals = [8, 32]
+@pytest.mark.parametrize(
+    "input_text,max_chunk_size",
+    list(product([INPUT_TEXTS], max_chunk_size_vals)),
+)
+def test_chunk_by_row_isomorphism(input_text, max_chunk_size):
+    chunks = chunk_by_row(input_text, max_chunk_size)
+    reconstructed_text = ", ".join([chunk["text"] for chunk in chunks])
+    assert reconstructed_text == input_text, (
+        f"texts are not identical: {len(input_text) = }, {len(reconstructed_text) = }"
+    )
+@pytest.mark.parametrize(
+    "input_text,max_chunk_size",
+    list(product([INPUT_TEXTS], max_chunk_size_vals)),
+)
+def test_row_chunk_length(input_text, max_chunk_size):
+    chunks = list(chunk_by_row(data=input_text, max_chunk_size=max_chunk_size))
+    embedding_engine = get_embedding_engine()
+    chunk_lengths = np.array(
+        [embedding_engine.tokenizer.count_tokens(chunk["text"]) for chunk in chunks]
+    )
+    larger_chunks = chunk_lengths[chunk_lengths > max_chunk_size]
+    assert np.all(chunk_lengths <= max_chunk_size), (
+        f"{max_chunk_size = }: {larger_chunks} are too large"
+    )
+@pytest.mark.parametrize(
+    "input_text,max_chunk_size",
+    list(product([INPUT_TEXTS], max_chunk_size_vals)),
+)
+def test_chunk_by_row_chunk_numbering(input_text, max_chunk_size):
+    chunks = chunk_by_row(data=input_text, max_chunk_size=max_chunk_size)
+    chunk_indices = np.array([chunk["chunk_index"] for chunk in chunks])
+    assert np.all(chunk_indices == np.arange(len(chunk_indices))), (
+        f"{chunk_indices = } are not monotonically increasing"
+    )

cognee 0.4.1__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl

cognee 0.4.1py3-none-any.whl → 0.5.0.dev0py3-none-any.whl