PyPI - cognee - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

cognee 0.5.1py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (265) hide show

cognee/tests/integration/retrieval/test_rag_completion_retriever.py ADDED Viewed

@@ -0,0 +1,319 @@
+import os
+from typing import List
+import pytest
+import pathlib
+import pytest_asyncio
+import cognee
+from cognee.low_level import setup
+from cognee.tasks.storage import add_data_points
+from cognee.infrastructure.databases.vector import get_vector_engine
+from cognee.modules.chunking.models import DocumentChunk
+from cognee.modules.data.processing.document_types import TextDocument
+from cognee.modules.retrieval.exceptions.exceptions import NoDataError
+from cognee.modules.retrieval.completion_retriever import CompletionRetriever
+from cognee.infrastructure.engine import DataPoint
+from cognee.modules.data.processing.document_types import Document
+from cognee.modules.engine.models import Entity
+class DocumentChunkWithEntities(DataPoint):
+    text: str
+    chunk_size: int
+    chunk_index: int
+    cut_type: str
+    is_part_of: Document
+    contains: List[Entity] = None
+    metadata: dict = {"index_fields": ["text"]}
+@pytest_asyncio.fixture
+async def setup_test_environment_with_chunks_simple():
+    """Set up a clean test environment with simple chunks."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_rag_completion_context_simple")
+    data_directory_path = str(base_dir / ".data_storage/test_rag_completion_context_simple")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    document = TextDocument(
+        name="Steve Rodger's career",
+        raw_data_location="somewhere",
+        external_metadata="",
+        mime_type="text/plain",
+    )
+    chunk1 = DocumentChunk(
+        text="Steve Rodger",
+        chunk_size=2,
+        chunk_index=0,
+        cut_type="sentence_end",
+        is_part_of=document,
+        contains=[],
+    )
+    chunk2 = DocumentChunk(
+        text="Mike Broski",
+        chunk_size=2,
+        chunk_index=1,
+        cut_type="sentence_end",
+        is_part_of=document,
+        contains=[],
+    )
+    chunk3 = DocumentChunk(
+        text="Christina Mayer",
+        chunk_size=2,
+        chunk_index=2,
+        cut_type="sentence_end",
+        is_part_of=document,
+        contains=[],
+    )
+    entities = [chunk1, chunk2, chunk3]
+    await add_data_points(entities)
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+        from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
+        from cognee.infrastructure.databases.vector.create_vector_engine import (
+            _create_vector_engine,
+        )
+        from cognee.infrastructure.databases.relational.create_relational_engine import (
+            create_relational_engine,
+        )
+        _create_graph_engine.cache_clear()
+        _create_vector_engine.cache_clear()
+        create_relational_engine.cache_clear()
+    except Exception:
+        pass
+@pytest_asyncio.fixture
+async def setup_test_environment_with_chunks_complex():
+    """Set up a clean test environment with complex chunks."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_rag_completion_context_complex")
+    data_directory_path = str(base_dir / ".data_storage/test_rag_completion_context_complex")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    document1 = TextDocument(
+        name="Employee List",
+        raw_data_location="somewhere",
+        external_metadata="",
+        mime_type="text/plain",
+    )
+    document2 = TextDocument(
+        name="Car List",
+        raw_data_location="somewhere",
+        external_metadata="",
+        mime_type="text/plain",
+    )
+    chunk1 = DocumentChunk(
+        text="Steve Rodger",
+        chunk_size=2,
+        chunk_index=0,
+        cut_type="sentence_end",
+        is_part_of=document1,
+        contains=[],
+    )
+    chunk2 = DocumentChunk(
+        text="Mike Broski",
+        chunk_size=2,
+        chunk_index=1,
+        cut_type="sentence_end",
+        is_part_of=document1,
+        contains=[],
+    )
+    chunk3 = DocumentChunk(
+        text="Christina Mayer",
+        chunk_size=2,
+        chunk_index=2,
+        cut_type="sentence_end",
+        is_part_of=document1,
+        contains=[],
+    )
+    chunk4 = DocumentChunk(
+        text="Range Rover",
+        chunk_size=2,
+        chunk_index=0,
+        cut_type="sentence_end",
+        is_part_of=document2,
+        contains=[],
+    )
+    chunk5 = DocumentChunk(
+        text="Hyundai",
+        chunk_size=2,
+        chunk_index=1,
+        cut_type="sentence_end",
+        is_part_of=document2,
+        contains=[],
+    )
+    chunk6 = DocumentChunk(
+        text="Chrysler",
+        chunk_size=2,
+        chunk_index=2,
+        cut_type="sentence_end",
+        is_part_of=document2,
+        contains=[],
+    )
+    entities = [chunk1, chunk2, chunk3, chunk4, chunk5, chunk6]
+    await add_data_points(entities)
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+        from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
+        from cognee.infrastructure.databases.vector.create_vector_engine import (
+            _create_vector_engine,
+        )
+        from cognee.infrastructure.databases.relational.create_relational_engine import (
+            create_relational_engine,
+        )
+        _create_graph_engine.cache_clear()
+        _create_vector_engine.cache_clear()
+        create_relational_engine.cache_clear()
+    except Exception:
+        pass
+@pytest_asyncio.fixture
+async def setup_test_environment_empty():
+    """Set up a clean test environment without chunks."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(
+        base_dir / ".cognee_system/test_get_rag_completion_context_on_empty_graph"
+    )
+    data_directory_path = str(
+        base_dir / ".data_storage/test_get_rag_completion_context_on_empty_graph"
+    )
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
+    from cognee.infrastructure.databases.vector.create_vector_engine import _create_vector_engine
+    from cognee.infrastructure.databases.relational.create_relational_engine import (
+        create_relational_engine,
+    )
+    _create_graph_engine.cache_clear()
+    _create_vector_engine.cache_clear()
+    create_relational_engine.cache_clear()
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+        from cognee.infrastructure.databases.graph.get_graph_engine import _create_graph_engine
+        from cognee.infrastructure.databases.vector.create_vector_engine import (
+            _create_vector_engine,
+        )
+        from cognee.infrastructure.databases.relational.create_relational_engine import (
+            create_relational_engine,
+        )
+        _create_graph_engine.cache_clear()
+        _create_vector_engine.cache_clear()
+        create_relational_engine.cache_clear()
+    except Exception:
+        pass
+@pytest.mark.asyncio
+async def test_rag_completion_context_simple(setup_test_environment_with_chunks_simple):
+    """Integration test: verify CompletionRetriever can retrieve context (simple)."""
+    retriever = CompletionRetriever()
+    query = "Mike"
+    retrieved_objects = await retriever.get_retrieved_objects(query)
+    context = await retriever.get_context_from_objects(
+        query=query, retrieved_objects=retrieved_objects
+    )
+    assert isinstance(context, str), "Context should be a string"
+    assert "Mike Broski" in context, "Failed to get Mike Broski"
+@pytest.mark.asyncio
+async def test_rag_completion_context_multiple_chunks(setup_test_environment_with_chunks_simple):
+    """Integration test: verify CompletionRetriever can retrieve context from multiple chunks."""
+    retriever = CompletionRetriever()
+    query = "Steve"
+    retrieved_objects = await retriever.get_retrieved_objects(query)
+    context = await retriever.get_context_from_objects(
+        query=query, retrieved_objects=retrieved_objects
+    )
+    assert isinstance(context, str), "Context should be a string"
+    assert "Steve Rodger" in context, "Failed to get Steve Rodger"
+@pytest.mark.asyncio
+async def test_rag_completion_context_complex(setup_test_environment_with_chunks_complex):
+    """Integration test: verify CompletionRetriever can retrieve context (complex)."""
+    # TODO: top_k doesn't affect the output, it should be fixed.
+    retriever = CompletionRetriever(top_k=20)
+    query = "Christina"
+    retrieved_objects = await retriever.get_retrieved_objects(query)
+    context = await retriever.get_context_from_objects(
+        query=query, retrieved_objects=retrieved_objects
+    )
+    assert context[0:15] == "Christina Mayer", "Failed to get Christina Mayer"
+@pytest.mark.asyncio
+async def test_get_rag_completion_context_on_empty_graph(setup_test_environment_empty):
+    """Integration test: verify CompletionRetriever handles empty graph correctly."""
+    retriever = CompletionRetriever()
+    query = "Christina Mayer"
+    with pytest.raises(NoDataError):
+        await retriever.get_retrieved_objects(query)
+    vector_engine = get_vector_engine()
+    await vector_engine.create_collection(
+        "DocumentChunk_text", payload_schema=DocumentChunkWithEntities
+    )
+    retrieved_objects = await retriever.get_retrieved_objects(query)
+    context = await retriever.get_context_from_objects(
+        query=query, retrieved_objects=retrieved_objects
+    )
+    assert context == "", "Returned context should be empty on an empty graph"

cognee/tests/integration/retrieval/test_structured_output.py ADDED Viewed

@@ -0,0 +1,258 @@
+import asyncio
+import os
+import pytest
+import pathlib
+import pytest_asyncio
+import cognee
+from pydantic import BaseModel
+from cognee.low_level import setup, DataPoint
+from cognee.tasks.storage import add_data_points
+from cognee.modules.chunking.models import DocumentChunk
+from cognee.modules.data.processing.document_types import TextDocument
+from cognee.modules.engine.models import Entity, EntityType
+from cognee.modules.retrieval.entity_extractors.DummyEntityExtractor import DummyEntityExtractor
+from cognee.modules.retrieval.context_providers.DummyContextProvider import DummyContextProvider
+from cognee.modules.retrieval.graph_completion_cot_retriever import GraphCompletionCotRetriever
+from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
+from cognee.modules.retrieval.graph_completion_context_extension_retriever import (
+    GraphCompletionContextExtensionRetriever,
+)
+from cognee.modules.retrieval.EntityCompletionRetriever import EntityCompletionRetriever
+from cognee.modules.retrieval.temporal_retriever import TemporalRetriever
+from cognee.modules.retrieval.completion_retriever import CompletionRetriever
+class TestAnswer(BaseModel):
+    answer: str
+    explanation: str
+def _assert_string_answer(answer: list[str]):
+    assert isinstance(answer, list), f"Expected str, got {type(answer).__name__}"
+    assert all(isinstance(item, str) and item.strip() for item in answer), "Items should be strings"
+    assert all(item.strip() for item in answer), "Items should not be empty"
+def _assert_structured_answer(answer: list[TestAnswer]):
+    assert isinstance(answer, list), f"Expected list, got {type(answer).__name__}"
+    assert all(isinstance(x, TestAnswer) for x in answer), "Items should be TestAnswer"
+    assert all(x.answer.strip() for x in answer), "Answer text should not be empty"
+    assert all(x.explanation.strip() for x in answer), "Explanation should not be empty"
+async def _test_get_structured_graph_completion_cot():
+    retriever = GraphCompletionCotRetriever()
+    query = "Who works at Figma?"
+    triplets = await retriever.get_retrieved_objects(query)
+    context = await retriever.get_context_from_objects(query=query, retrieved_objects=triplets)
+    # Test with string response model (default)
+    string_answer = await retriever.get_completion_from_context(
+        query=query, retrieved_objects=triplets, context=context
+    )
+    _assert_string_answer(string_answer)
+    retriever.response_model = TestAnswer
+    # Test with structured response model
+    triplets = await retriever.get_retrieved_objects(query)
+    structured_answer = await retriever.get_completion_from_context(
+        query=query, retrieved_objects=triplets, context=context
+    )
+    _assert_structured_answer(structured_answer)
+async def _test_get_structured_graph_completion():
+    retriever = GraphCompletionRetriever()
+    query = "Who works at Figma?"
+    triplets = await retriever.get_retrieved_objects(query)
+    context = await retriever.get_context_from_objects(query=query, retrieved_objects=triplets)
+    # Test with string response model (default)
+    string_answer = await retriever.get_completion_from_context(
+        query=query, retrieved_objects=triplets, context=context
+    )
+    _assert_string_answer(string_answer)
+    retriever.response_model = TestAnswer
+    # Test with structured response model
+    structured_answer = await retriever.get_completion_from_context(
+        query=query, retrieved_objects=triplets, context=context
+    )
+    _assert_structured_answer(structured_answer)
+async def _test_get_structured_graph_completion_temporal():
+    retriever = TemporalRetriever()
+    query = "When did Steve start working at Figma?"
+    # Test with string response model (default)
+    triplets = await retriever.get_retrieved_objects(query)
+    context = await retriever.get_context_from_objects(query=query, retrieved_objects=triplets)
+    # Test with string response model (default)
+    string_answer = await retriever.get_completion_from_context(
+        query=query, retrieved_objects=triplets, context=context
+    )
+    _assert_string_answer(string_answer)
+    retriever.response_model = TestAnswer
+    # Test with structured response model
+    structured_answer = await retriever.get_completion_from_context(
+        query=query, retrieved_objects=triplets, context=context
+    )
+    _assert_structured_answer(structured_answer)
+async def _test_get_structured_graph_completion_rag():
+    retriever = CompletionRetriever()
+    query = "Where does Steve work?"
+    triplets = await retriever.get_retrieved_objects(query)
+    context = await retriever.get_context_from_objects(query=query, retrieved_objects=triplets)
+    # Test with string response model (default)
+    string_answer = await retriever.get_completion_from_context(
+        query=query, retrieved_objects=triplets, context=context
+    )
+    _assert_string_answer(string_answer)
+    retriever.response_model = TestAnswer
+    # Test with structured response model
+    structured_answer = await retriever.get_completion_from_context(
+        query=query, retrieved_objects=triplets, context=context
+    )
+    _assert_structured_answer(structured_answer)
+async def _test_get_structured_graph_completion_context_extension():
+    retriever = GraphCompletionContextExtensionRetriever()
+    query = "Who works at Figma?"
+    triplets = await retriever.get_retrieved_objects(query)
+    context = await retriever.get_context_from_objects(query=query, retrieved_objects=triplets)
+    # Test with string response model (default)
+    string_answer = await retriever.get_completion_from_context(
+        query=query, retrieved_objects=triplets, context=context
+    )
+    _assert_string_answer(string_answer)
+    retriever.response_model = TestAnswer
+    # Test with structured response model
+    structured_answer = await retriever.get_completion_from_context(
+        query=query, retrieved_objects=triplets, context=context
+    )
+    _assert_structured_answer(structured_answer)
+async def _test_get_structured_entity_completion():
+    retriever = EntityCompletionRetriever(DummyEntityExtractor(), DummyContextProvider())
+    query = "Who is Albert Einstein?"
+    entities = await retriever.get_retrieved_objects(query)
+    context = await retriever.get_context_from_objects(query=query, retrieved_objects=entities)
+    # Test with string response model (default)
+    string_answer = await retriever.get_completion_from_context(
+        query=query, retrieved_objects=entities, context=context
+    )
+    _assert_string_answer(string_answer)
+    retriever.response_model = TestAnswer
+    # Test with structured response model
+    structured_answer = await retriever.get_completion_from_context(
+        query=query, retrieved_objects=entities, context=context
+    )
+    _assert_structured_answer(structured_answer)
+@pytest_asyncio.fixture
+async def setup_test_environment():
+    """Set up a clean test environment with graph and document data."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_get_structured_completion")
+    data_directory_path = str(base_dir / ".data_storage/test_get_structured_completion")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    class Company(DataPoint):
+        name: str
+    class Person(DataPoint):
+        name: str
+        works_for: Company
+        works_since: int
+    company1 = Company(name="Figma")
+    person1 = Person(name="Steve Rodger", works_for=company1, works_since=2015)
+    entities = [company1, person1]
+    await add_data_points(entities)
+    document = TextDocument(
+        name="Steve Rodger's career",
+        raw_data_location="somewhere",
+        external_metadata="",
+        mime_type="text/plain",
+    )
+    chunk1 = DocumentChunk(
+        text="Steve Rodger",
+        chunk_size=2,
+        chunk_index=0,
+        cut_type="sentence_end",
+        is_part_of=document,
+        contains=[],
+    )
+    chunk2 = DocumentChunk(
+        text="Mike Broski",
+        chunk_size=2,
+        chunk_index=1,
+        cut_type="sentence_end",
+        is_part_of=document,
+        contains=[],
+    )
+    chunk3 = DocumentChunk(
+        text="Christina Mayer",
+        chunk_size=2,
+        chunk_index=2,
+        cut_type="sentence_end",
+        is_part_of=document,
+        contains=[],
+    )
+    entities = [chunk1, chunk2, chunk3]
+    await add_data_points(entities)
+    entity_type = EntityType(name="Person", description="A human individual")
+    entity = Entity(name="Albert Einstein", is_a=entity_type, description="A famous physicist")
+    entities = [entity]
+    await add_data_points(entities)
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+    except Exception:
+        pass
+@pytest.mark.asyncio
+async def test_get_structured_completion(setup_test_environment):
+    """Integration test: verify structured output completion for all retrievers."""
+    await _test_get_structured_graph_completion_cot()
+    await _test_get_structured_graph_completion()
+    await _test_get_structured_graph_completion_temporal()
+    await _test_get_structured_graph_completion_rag()
+    await _test_get_structured_graph_completion_context_extension()
+    await _test_get_structured_entity_completion()

cognee 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl

cognee 0.5.1py3-none-any.whl → 0.5.2py3-none-any.whl