PyPI - cognee - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.1.dev0__py3-none-any.whl - Mend

cognee 0.5.1py3-none-any.whl → 0.5.1.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

cognee/tests/integration/retrieval/test_chunks_retriever.py ADDED Viewed

@@ -0,0 +1,252 @@
+import os
+import pytest
+import pathlib
+import pytest_asyncio
+from typing import List
+import cognee
+from cognee.low_level import setup
+from cognee.tasks.storage import add_data_points
+from cognee.infrastructure.databases.vector import get_vector_engine
+from cognee.modules.chunking.models import DocumentChunk
+from cognee.modules.data.processing.document_types import TextDocument
+from cognee.modules.retrieval.exceptions.exceptions import NoDataError
+from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
+from cognee.infrastructure.engine import DataPoint
+from cognee.modules.data.processing.document_types import Document
+from cognee.modules.engine.models import Entity
+class DocumentChunkWithEntities(DataPoint):
+    text: str
+    chunk_size: int
+    chunk_index: int
+    cut_type: str
+    is_part_of: Document
+    contains: List[Entity] = None
+    metadata: dict = {"index_fields": ["text"]}
+@pytest_asyncio.fixture
+async def setup_test_environment_with_chunks_simple():
+    """Set up a clean test environment with simple chunks."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_chunks_retriever_context_simple")
+    data_directory_path = str(base_dir / ".data_storage/test_chunks_retriever_context_simple")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    document = TextDocument(
+        name="Steve Rodger's career",
+        raw_data_location="somewhere",
+        external_metadata="",
+        mime_type="text/plain",
+    )
+    chunk1 = DocumentChunk(
+        text="Steve Rodger",
+        chunk_size=2,
+        chunk_index=0,
+        cut_type="sentence_end",
+        is_part_of=document,
+        contains=[],
+    )
+    chunk2 = DocumentChunk(
+        text="Mike Broski",
+        chunk_size=2,
+        chunk_index=1,
+        cut_type="sentence_end",
+        is_part_of=document,
+        contains=[],
+    )
+    chunk3 = DocumentChunk(
+        text="Christina Mayer",
+        chunk_size=2,
+        chunk_index=2,
+        cut_type="sentence_end",
+        is_part_of=document,
+        contains=[],
+    )
+    entities = [chunk1, chunk2, chunk3]
+    await add_data_points(entities)
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+    except Exception:
+        pass
+@pytest_asyncio.fixture
+async def setup_test_environment_with_chunks_complex():
+    """Set up a clean test environment with complex chunks."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_chunks_retriever_context_complex")
+    data_directory_path = str(base_dir / ".data_storage/test_chunks_retriever_context_complex")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    document1 = TextDocument(
+        name="Employee List",
+        raw_data_location="somewhere",
+        external_metadata="",
+        mime_type="text/plain",
+    )
+    document2 = TextDocument(
+        name="Car List",
+        raw_data_location="somewhere",
+        external_metadata="",
+        mime_type="text/plain",
+    )
+    chunk1 = DocumentChunk(
+        text="Steve Rodger",
+        chunk_size=2,
+        chunk_index=0,
+        cut_type="sentence_end",
+        is_part_of=document1,
+        contains=[],
+    )
+    chunk2 = DocumentChunk(
+        text="Mike Broski",
+        chunk_size=2,
+        chunk_index=1,
+        cut_type="sentence_end",
+        is_part_of=document1,
+        contains=[],
+    )
+    chunk3 = DocumentChunk(
+        text="Christina Mayer",
+        chunk_size=2,
+        chunk_index=2,
+        cut_type="sentence_end",
+        is_part_of=document1,
+        contains=[],
+    )
+    chunk4 = DocumentChunk(
+        text="Range Rover",
+        chunk_size=2,
+        chunk_index=0,
+        cut_type="sentence_end",
+        is_part_of=document2,
+        contains=[],
+    )
+    chunk5 = DocumentChunk(
+        text="Hyundai",
+        chunk_size=2,
+        chunk_index=1,
+        cut_type="sentence_end",
+        is_part_of=document2,
+        contains=[],
+    )
+    chunk6 = DocumentChunk(
+        text="Chrysler",
+        chunk_size=2,
+        chunk_index=2,
+        cut_type="sentence_end",
+        is_part_of=document2,
+        contains=[],
+    )
+    entities = [chunk1, chunk2, chunk3, chunk4, chunk5, chunk6]
+    await add_data_points(entities)
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+    except Exception:
+        pass
+@pytest_asyncio.fixture
+async def setup_test_environment_empty():
+    """Set up a clean test environment without chunks."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_chunks_retriever_context_empty")
+    data_directory_path = str(base_dir / ".data_storage/test_chunks_retriever_context_empty")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+    except Exception:
+        pass
+@pytest.mark.asyncio
+async def test_chunks_retriever_context_multiple_chunks(setup_test_environment_with_chunks_simple):
+    """Integration test: verify ChunksRetriever can retrieve multiple chunks."""
+    retriever = ChunksRetriever()
+    context = await retriever.get_context("Steve")
+    assert isinstance(context, list), "Context should be a list"
+    assert len(context) > 0, "Context should not be empty"
+    assert any(chunk["text"] == "Steve Rodger" for chunk in context), (
+        "Failed to get Steve Rodger chunk"
+    )
+@pytest.mark.asyncio
+async def test_chunks_retriever_top_k_limit(setup_test_environment_with_chunks_complex):
+    """Integration test: verify ChunksRetriever respects top_k parameter."""
+    retriever = ChunksRetriever(top_k=2)
+    context = await retriever.get_context("Employee")
+    assert isinstance(context, list), "Context should be a list"
+    assert len(context) <= 2, "Should respect top_k limit"
+@pytest.mark.asyncio
+async def test_chunks_retriever_context_complex(setup_test_environment_with_chunks_complex):
+    """Integration test: verify ChunksRetriever can retrieve chunk context (complex)."""
+    retriever = ChunksRetriever(top_k=20)
+    context = await retriever.get_context("Christina")
+    assert context[0]["text"] == "Christina Mayer", "Failed to get Christina Mayer"
+@pytest.mark.asyncio
+async def test_chunks_retriever_context_on_empty_graph(setup_test_environment_empty):
+    """Integration test: verify ChunksRetriever handles empty graph correctly."""
+    retriever = ChunksRetriever()
+    with pytest.raises(NoDataError):
+        await retriever.get_context("Christina Mayer")
+    vector_engine = get_vector_engine()
+    await vector_engine.create_collection(
+        "DocumentChunk_text", payload_schema=DocumentChunkWithEntities
+    )
+    context = await retriever.get_context("Christina Mayer")
+    assert len(context) == 0, "Found chunks when none should exist"

cognee/tests/integration/retrieval/test_graph_completion_retriever.py ADDED Viewed

@@ -0,0 +1,268 @@
+import os
+import pytest
+import pathlib
+import pytest_asyncio
+from typing import Optional, Union
+import cognee
+from cognee.low_level import setup, DataPoint
+from cognee.modules.graph.utils import resolve_edges_to_text
+from cognee.tasks.storage import add_data_points
+from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
+@pytest_asyncio.fixture
+async def setup_test_environment_simple():
+    """Set up a clean test environment with simple graph data."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_graph_completion_context_simple")
+    data_directory_path = str(base_dir / ".data_storage/test_graph_completion_context_simple")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    class Company(DataPoint):
+        name: str
+        description: str
+    class Person(DataPoint):
+        name: str
+        description: str
+        works_for: Company
+    company1 = Company(name="Figma", description="Figma is a company")
+    company2 = Company(name="Canva", description="Canvas is a company")
+    person1 = Person(
+        name="Steve Rodger",
+        description="This is description about Steve Rodger",
+        works_for=company1,
+    )
+    person2 = Person(
+        name="Ike Loma", description="This is description about Ike Loma", works_for=company1
+    )
+    person3 = Person(
+        name="Jason Statham",
+        description="This is description about Jason Statham",
+        works_for=company1,
+    )
+    person4 = Person(
+        name="Mike Broski",
+        description="This is description about Mike Broski",
+        works_for=company2,
+    )
+    person5 = Person(
+        name="Christina Mayer",
+        description="This is description about Christina Mayer",
+        works_for=company2,
+    )
+    entities = [company1, company2, person1, person2, person3, person4, person5]
+    await add_data_points(entities)
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+    except Exception:
+        pass
+@pytest_asyncio.fixture
+async def setup_test_environment_complex():
+    """Set up a clean test environment with complex graph data."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_graph_completion_context_complex")
+    data_directory_path = str(base_dir / ".data_storage/test_graph_completion_context_complex")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    class Company(DataPoint):
+        name: str
+        metadata: dict = {"index_fields": ["name"]}
+    class Car(DataPoint):
+        brand: str
+        model: str
+        year: int
+    class Location(DataPoint):
+        country: str
+        city: str
+    class Home(DataPoint):
+        location: Location
+        rooms: int
+        sqm: int
+    class Person(DataPoint):
+        name: str
+        works_for: Company
+        owns: Optional[list[Union[Car, Home]]] = None
+    company1 = Company(name="Figma")
+    company2 = Company(name="Canva")
+    person1 = Person(name="Mike Rodger", works_for=company1)
+    person1.owns = [Car(brand="Toyota", model="Camry", year=2020)]
+    person2 = Person(name="Ike Loma", works_for=company1)
+    person2.owns = [
+        Car(brand="Tesla", model="Model S", year=2021),
+        Home(location=Location(country="USA", city="New York"), sqm=80, rooms=4),
+    ]
+    person3 = Person(name="Jason Statham", works_for=company1)
+    person4 = Person(name="Mike Broski", works_for=company2)
+    person4.owns = [Car(brand="Ford", model="Mustang", year=1978)]
+    person5 = Person(name="Christina Mayer", works_for=company2)
+    person5.owns = [Car(brand="Honda", model="Civic", year=2023)]
+    entities = [company1, company2, person1, person2, person3, person4, person5]
+    await add_data_points(entities)
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+    except Exception:
+        pass
+@pytest_asyncio.fixture
+async def setup_test_environment_empty():
+    """Set up a clean test environment without graph data."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(
+        base_dir / ".cognee_system/test_get_graph_completion_context_on_empty_graph"
+    )
+    data_directory_path = str(
+        base_dir / ".data_storage/test_get_graph_completion_context_on_empty_graph"
+    )
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+    except Exception:
+        pass
+@pytest.mark.asyncio
+async def test_graph_completion_context_simple(setup_test_environment_simple):
+    """Integration test: verify GraphCompletionRetriever can retrieve context (simple)."""
+    retriever = GraphCompletionRetriever()
+    context = await resolve_edges_to_text(await retriever.get_context("Who works at Canva?"))
+    # Ensure the top-level sections are present
+    assert "Nodes:" in context, "Missing 'Nodes:' section in context"
+    assert "Connections:" in context, "Missing 'Connections:' section in context"
+    # --- Nodes headers ---
+    assert "Node: Steve Rodger" in context, "Missing node header for Steve Rodger"
+    assert "Node: Figma" in context, "Missing node header for Figma"
+    assert "Node: Ike Loma" in context, "Missing node header for Ike Loma"
+    assert "Node: Jason Statham" in context, "Missing node header for Jason Statham"
+    assert "Node: Mike Broski" in context, "Missing node header for Mike Broski"
+    assert "Node: Canva" in context, "Missing node header for Canva"
+    assert "Node: Christina Mayer" in context, "Missing node header for Christina Mayer"
+    # --- Node contents ---
+    assert (
+        "__node_content_start__\nThis is description about Steve Rodger\n__node_content_end__"
+        in context
+    ), "Description block for Steve Rodger altered"
+    assert "__node_content_start__\nFigma is a company\n__node_content_end__" in context, (
+        "Description block for Figma altered"
+    )
+    assert (
+        "__node_content_start__\nThis is description about Ike Loma\n__node_content_end__"
+        in context
+    ), "Description block for Ike Loma altered"
+    assert (
+        "__node_content_start__\nThis is description about Jason Statham\n__node_content_end__"
+        in context
+    ), "Description block for Jason Statham altered"
+    assert (
+        "__node_content_start__\nThis is description about Mike Broski\n__node_content_end__"
+        in context
+    ), "Description block for Mike Broski altered"
+    assert "__node_content_start__\nCanvas is a company\n__node_content_end__" in context, (
+        "Description block for Canva altered"
+    )
+    assert (
+        "__node_content_start__\nThis is description about Christina Mayer\n__node_content_end__"
+        in context
+    ), "Description block for Christina Mayer altered"
+    # --- Connections ---
+    assert "Steve Rodger --[works_for]--> Figma" in context, (
+        "Connection Steve Rodger→Figma missing or changed"
+    )
+    assert "Ike Loma --[works_for]--> Figma" in context, (
+        "Connection Ike Loma→Figma missing or changed"
+    )
+    assert "Jason Statham --[works_for]--> Figma" in context, (
+        "Connection Jason Statham→Figma missing or changed"
+    )
+    assert "Mike Broski --[works_for]--> Canva" in context, (
+        "Connection Mike Broski→Canva missing or changed"
+    )
+    assert "Christina Mayer --[works_for]--> Canva" in context, (
+        "Connection Christina Mayer→Canva missing or changed"
+    )
+@pytest.mark.asyncio
+async def test_graph_completion_context_complex(setup_test_environment_complex):
+    """Integration test: verify GraphCompletionRetriever can retrieve context (complex)."""
+    retriever = GraphCompletionRetriever(top_k=20)
+    context = await resolve_edges_to_text(await retriever.get_context("Who works at Figma?"))
+    assert "Mike Rodger --[works_for]--> Figma" in context, "Failed to get Mike Rodger"
+    assert "Ike Loma --[works_for]--> Figma" in context, "Failed to get Ike Loma"
+    assert "Jason Statham --[works_for]--> Figma" in context, "Failed to get Jason Statham"
+@pytest.mark.asyncio
+async def test_get_graph_completion_context_on_empty_graph(setup_test_environment_empty):
+    """Integration test: verify GraphCompletionRetriever handles empty graph correctly."""
+    retriever = GraphCompletionRetriever()
+    context = await retriever.get_context("Who works at Figma?")
+    assert context == [], "Context should be empty on an empty graph"
+@pytest.mark.asyncio
+async def test_graph_completion_get_triplets_empty(setup_test_environment_empty):
+    """Integration test: verify GraphCompletionRetriever get_triplets handles empty graph."""
+    retriever = GraphCompletionRetriever()
+    triplets = await retriever.get_triplets("Who works at Figma?")
+    assert isinstance(triplets, list), "Triplets should be a list"
+    assert len(triplets) == 0, "Should return empty list on empty graph"

cognee 0.5.1__py3-none-any.whl → 0.5.1.dev0__py3-none-any.whl

cognee 0.5.1py3-none-any.whl → 0.5.1.dev0py3-none-any.whl