PyPI - cognee - Versions diffs - 0.5.1__py3-none-any.whl → 0.5.1.dev0__py3-none-any.whl - Mend

cognee 0.5.1py3-none-any.whl → 0.5.1.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

cognee/tests/{unit/modules/retrieval/structured_output_test.py → integration/retrieval/test_structured_output.py} RENAMED Viewed

@@ -1,9 +1,9 @@
 import asyncio
+import os
 import pytest
-import cognee
 import pathlib
-import os
+import pytest_asyncio
+import cognee
 from pydantic import BaseModel
 from cognee.low_level import setup, DataPoint
@@ -125,80 +125,90 @@ async def _test_get_structured_entity_completion():
     _assert_structured_answer(structured_answer)
-class TestStructuredOutputCompletion:
-    @pytest.mark.asyncio
-    async def test_get_structured_completion(self):
-        system_directory_path = os.path.join(
-            pathlib.Path(__file__).parent, ".cognee_system/test_get_structured_completion"
-        )
-        cognee.config.system_root_directory(system_directory_path)
-        data_directory_path = os.path.join(
-            pathlib.Path(__file__).parent, ".data_storage/test_get_structured_completion"
-        )
-        cognee.config.data_root_directory(data_directory_path)
+@pytest_asyncio.fixture
+async def setup_test_environment():
+    """Set up a clean test environment with graph and document data."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_get_structured_completion")
+    data_directory_path = str(base_dir / ".data_storage/test_get_structured_completion")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    class Company(DataPoint):
+        name: str
+    class Person(DataPoint):
+        name: str
+        works_for: Company
+        works_since: int
+    company1 = Company(name="Figma")
+    person1 = Person(name="Steve Rodger", works_for=company1, works_since=2015)
+    entities = [company1, person1]
+    await add_data_points(entities)
+    document = TextDocument(
+        name="Steve Rodger's career",
+        raw_data_location="somewhere",
+        external_metadata="",
+        mime_type="text/plain",
+    )
+    chunk1 = DocumentChunk(
+        text="Steve Rodger",
+        chunk_size=2,
+        chunk_index=0,
+        cut_type="sentence_end",
+        is_part_of=document,
+        contains=[],
+    )
+    chunk2 = DocumentChunk(
+        text="Mike Broski",
+        chunk_size=2,
+        chunk_index=1,
+        cut_type="sentence_end",
+        is_part_of=document,
+        contains=[],
+    )
+    chunk3 = DocumentChunk(
+        text="Christina Mayer",
+        chunk_size=2,
+        chunk_index=2,
+        cut_type="sentence_end",
+        is_part_of=document,
+        contains=[],
+    )
+    entities = [chunk1, chunk2, chunk3]
+    await add_data_points(entities)
+    entity_type = EntityType(name="Person", description="A human individual")
+    entity = Entity(name="Albert Einstein", is_a=entity_type, description="A famous physicist")
+    entities = [entity]
+    await add_data_points(entities)
+    yield
+    try:
         await cognee.prune.prune_data()
         await cognee.prune.prune_system(metadata=True)
-        await setup()
-        class Company(DataPoint):
-            name: str
-        class Person(DataPoint):
-            name: str
-            works_for: Company
-            works_since: int
-        company1 = Company(name="Figma")
-        person1 = Person(name="Steve Rodger", works_for=company1, works_since=2015)
-        entities = [company1, person1]
-        await add_data_points(entities)
-        document = TextDocument(
-            name="Steve Rodger's career",
-            raw_data_location="somewhere",
-            external_metadata="",
-            mime_type="text/plain",
-        )
-        chunk1 = DocumentChunk(
-            text="Steve Rodger",
-            chunk_size=2,
-            chunk_index=0,
-            cut_type="sentence_end",
-            is_part_of=document,
-            contains=[],
-        )
-        chunk2 = DocumentChunk(
-            text="Mike Broski",
-            chunk_size=2,
-            chunk_index=1,
-            cut_type="sentence_end",
-            is_part_of=document,
-            contains=[],
-        )
-        chunk3 = DocumentChunk(
-            text="Christina Mayer",
-            chunk_size=2,
-            chunk_index=2,
-            cut_type="sentence_end",
-            is_part_of=document,
-            contains=[],
-        )
-        entities = [chunk1, chunk2, chunk3]
-        await add_data_points(entities)
-        entity_type = EntityType(name="Person", description="A human individual")
-        entity = Entity(name="Albert Einstein", is_a=entity_type, description="A famous physicist")
-        entities = [entity]
-        await add_data_points(entities)
-        await _test_get_structured_graph_completion_cot()
-        await _test_get_structured_graph_completion()
-        await _test_get_structured_graph_completion_temporal()
-        await _test_get_structured_graph_completion_rag()
-        await _test_get_structured_graph_completion_context_extension()
-        await _test_get_structured_entity_completion()
+    except Exception:
+        pass
+@pytest.mark.asyncio
+async def test_get_structured_completion(setup_test_environment):
+    """Integration test: verify structured output completion for all retrievers."""
+    await _test_get_structured_graph_completion_cot()
+    await _test_get_structured_graph_completion()
+    await _test_get_structured_graph_completion_temporal()
+    await _test_get_structured_graph_completion_rag()
+    await _test_get_structured_graph_completion_context_extension()
+    await _test_get_structured_entity_completion()

cognee/tests/integration/retrieval/test_summaries_retriever.py ADDED Viewed

@@ -0,0 +1,184 @@
+import os
+import pytest
+import pathlib
+import pytest_asyncio
+import cognee
+from cognee.low_level import setup
+from cognee.tasks.storage import add_data_points
+from cognee.infrastructure.databases.vector import get_vector_engine
+from cognee.modules.chunking.models import DocumentChunk
+from cognee.tasks.summarization.models import TextSummary
+from cognee.modules.data.processing.document_types import TextDocument
+from cognee.modules.retrieval.exceptions.exceptions import NoDataError
+from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
+@pytest_asyncio.fixture
+async def setup_test_environment_with_summaries():
+    """Set up a clean test environment with summaries."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_summaries_retriever_context")
+    data_directory_path = str(base_dir / ".data_storage/test_summaries_retriever_context")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    document1 = TextDocument(
+        name="Employee List",
+        raw_data_location="somewhere",
+        external_metadata="",
+        mime_type="text/plain",
+    )
+    document2 = TextDocument(
+        name="Car List",
+        raw_data_location="somewhere",
+        external_metadata="",
+        mime_type="text/plain",
+    )
+    chunk1 = DocumentChunk(
+        text="Steve Rodger",
+        chunk_size=2,
+        chunk_index=0,
+        cut_type="sentence_end",
+        is_part_of=document1,
+        contains=[],
+    )
+    chunk1_summary = TextSummary(
+        text="S.R.",
+        made_from=chunk1,
+    )
+    chunk2 = DocumentChunk(
+        text="Mike Broski",
+        chunk_size=2,
+        chunk_index=1,
+        cut_type="sentence_end",
+        is_part_of=document1,
+        contains=[],
+    )
+    chunk2_summary = TextSummary(
+        text="M.B.",
+        made_from=chunk2,
+    )
+    chunk3 = DocumentChunk(
+        text="Christina Mayer",
+        chunk_size=2,
+        chunk_index=2,
+        cut_type="sentence_end",
+        is_part_of=document1,
+        contains=[],
+    )
+    chunk3_summary = TextSummary(
+        text="C.M.",
+        made_from=chunk3,
+    )
+    chunk4 = DocumentChunk(
+        text="Range Rover",
+        chunk_size=2,
+        chunk_index=0,
+        cut_type="sentence_end",
+        is_part_of=document2,
+        contains=[],
+    )
+    chunk4_summary = TextSummary(
+        text="R.R.",
+        made_from=chunk4,
+    )
+    chunk5 = DocumentChunk(
+        text="Hyundai",
+        chunk_size=2,
+        chunk_index=1,
+        cut_type="sentence_end",
+        is_part_of=document2,
+        contains=[],
+    )
+    chunk5_summary = TextSummary(
+        text="H.Y.",
+        made_from=chunk5,
+    )
+    chunk6 = DocumentChunk(
+        text="Chrysler",
+        chunk_size=2,
+        chunk_index=2,
+        cut_type="sentence_end",
+        is_part_of=document2,
+        contains=[],
+    )
+    chunk6_summary = TextSummary(
+        text="C.H.",
+        made_from=chunk6,
+    )
+    entities = [
+        chunk1_summary,
+        chunk2_summary,
+        chunk3_summary,
+        chunk4_summary,
+        chunk5_summary,
+        chunk6_summary,
+    ]
+    await add_data_points(entities)
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+    except Exception:
+        pass
+@pytest_asyncio.fixture
+async def setup_test_environment_empty():
+    """Set up a clean test environment without summaries."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_summaries_retriever_context_empty")
+    data_directory_path = str(base_dir / ".data_storage/test_summaries_retriever_context_empty")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+    except Exception:
+        pass
+@pytest.mark.asyncio
+async def test_summaries_retriever_context(setup_test_environment_with_summaries):
+    """Integration test: verify SummariesRetriever can retrieve summary context."""
+    retriever = SummariesRetriever(top_k=20)
+    context = await retriever.get_context("Christina")
+    assert isinstance(context, list), "Context should be a list"
+    assert len(context) > 0, "Context should not be empty"
+    assert context[0]["text"] == "C.M.", "Failed to get Christina Mayer"
+@pytest.mark.asyncio
+async def test_summaries_retriever_context_on_empty_graph(setup_test_environment_empty):
+    """Integration test: verify SummariesRetriever handles empty graph correctly."""
+    retriever = SummariesRetriever()
+    with pytest.raises(NoDataError):
+        await retriever.get_context("Christina Mayer")
+    vector_engine = get_vector_engine()
+    await vector_engine.create_collection("TextSummary_text", payload_schema=TextSummary)
+    context = await retriever.get_context("Christina Mayer")
+    assert context == [], "Returned context should be empty on an empty graph"

cognee/tests/integration/retrieval/test_temporal_retriever.py ADDED Viewed

@@ -0,0 +1,306 @@
+import os
+import pytest
+import pathlib
+import pytest_asyncio
+import cognee
+from cognee.low_level import setup, DataPoint
+from cognee.tasks.storage import add_data_points
+from cognee.modules.retrieval.temporal_retriever import TemporalRetriever
+from cognee.modules.engine.models.Event import Event
+from cognee.modules.engine.models.Timestamp import Timestamp
+from cognee.modules.engine.models.Interval import Interval
+@pytest_asyncio.fixture
+async def setup_test_environment_with_events():
+    """Set up a clean test environment with temporal events."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_temporal_retriever_with_events")
+    data_directory_path = str(base_dir / ".data_storage/test_temporal_retriever_with_events")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    # Create timestamps for events
+    timestamp1 = Timestamp(
+        time_at=1609459200,  # 2021-01-01 00:00:00
+        year=2021,
+        month=1,
+        day=1,
+        hour=0,
+        minute=0,
+        second=0,
+        timestamp_str="2021-01-01T00:00:00",
+    )
+    timestamp2 = Timestamp(
+        time_at=1612137600,  # 2021-02-01 00:00:00
+        year=2021,
+        month=2,
+        day=1,
+        hour=0,
+        minute=0,
+        second=0,
+        timestamp_str="2021-02-01T00:00:00",
+    )
+    timestamp3 = Timestamp(
+        time_at=1614556800,  # 2021-03-01 00:00:00
+        year=2021,
+        month=3,
+        day=1,
+        hour=0,
+        minute=0,
+        second=0,
+        timestamp_str="2021-03-01T00:00:00",
+    )
+    timestamp4 = Timestamp(
+        time_at=1625097600,  # 2021-07-01 00:00:00
+        year=2021,
+        month=7,
+        day=1,
+        hour=0,
+        minute=0,
+        second=0,
+        timestamp_str="2021-07-01T00:00:00",
+    )
+    timestamp5 = Timestamp(
+        time_at=1633046400,  # 2021-10-01 00:00:00
+        year=2021,
+        month=10,
+        day=1,
+        hour=0,
+        minute=0,
+        second=0,
+        timestamp_str="2021-10-01T00:00:00",
+    )
+    # Create interval for event spanning multiple timestamps
+    interval1 = Interval(time_from=timestamp2, time_to=timestamp3)
+    # Create events with timestamps
+    event1 = Event(
+        name="Project Alpha Launch",
+        description="Launched Project Alpha at the beginning of 2021",
+        at=timestamp1,
+        location="San Francisco",
+    )
+    event2 = Event(
+        name="Team Meeting",
+        description="Monthly team meeting discussing Q1 goals",
+        during=interval1,
+        location="New York",
+    )
+    event3 = Event(
+        name="Product Release",
+        description="Released new product features in July",
+        at=timestamp4,
+        location="Remote",
+    )
+    event4 = Event(
+        name="Company Retreat",
+        description="Annual company retreat in October",
+        at=timestamp5,
+        location="Lake Tahoe",
+    )
+    entities = [event1, event2, event3, event4]
+    await add_data_points(entities)
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+    except Exception:
+        pass
+@pytest_asyncio.fixture
+async def setup_test_environment_with_graph_data():
+    """Set up a clean test environment with graph data (for fallback to triplets)."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_temporal_retriever_with_graph")
+    data_directory_path = str(base_dir / ".data_storage/test_temporal_retriever_with_graph")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    class Company(DataPoint):
+        name: str
+        description: str
+    class Person(DataPoint):
+        name: str
+        description: str
+        works_for: Company
+    company1 = Company(name="Figma", description="Figma is a company")
+    person1 = Person(
+        name="Steve Rodger",
+        description="This is description about Steve Rodger",
+        works_for=company1,
+    )
+    entities = [company1, person1]
+    await add_data_points(entities)
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+    except Exception:
+        pass
+@pytest_asyncio.fixture
+async def setup_test_environment_empty():
+    """Set up a clean test environment without data."""
+    base_dir = pathlib.Path(__file__).parent.parent.parent.parent
+    system_directory_path = str(base_dir / ".cognee_system/test_temporal_retriever_empty")
+    data_directory_path = str(base_dir / ".data_storage/test_temporal_retriever_empty")
+    cognee.config.system_root_directory(system_directory_path)
+    cognee.config.data_root_directory(data_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    yield
+    try:
+        await cognee.prune.prune_data()
+        await cognee.prune.prune_system(metadata=True)
+    except Exception:
+        pass
+@pytest.mark.asyncio
+async def test_temporal_retriever_context_with_time_range(setup_test_environment_with_events):
+    """Integration test: verify TemporalRetriever can retrieve events within time range."""
+    retriever = TemporalRetriever(top_k=5)
+    context = await retriever.get_context("What happened in January 2021?")
+    assert isinstance(context, str), "Context should be a string"
+    assert len(context) > 0, "Context should not be empty"
+    assert "Project Alpha" in context or "Launch" in context, (
+        "Should retrieve Project Alpha Launch event from January 2021"
+    )
+@pytest.mark.asyncio
+async def test_temporal_retriever_context_with_single_time(setup_test_environment_with_events):
+    """Integration test: verify TemporalRetriever can retrieve events at specific time."""
+    retriever = TemporalRetriever(top_k=5)
+    context = await retriever.get_context("What happened in July 2021?")
+    assert isinstance(context, str), "Context should be a string"
+    assert len(context) > 0, "Context should not be empty"
+    assert "Product Release" in context or "July" in context, (
+        "Should retrieve Product Release event from July 2021"
+    )
+@pytest.mark.asyncio
+async def test_temporal_retriever_context_fallback_to_triplets(
+    setup_test_environment_with_graph_data,
+):
+    """Integration test: verify TemporalRetriever falls back to triplets when no time extracted."""
+    retriever = TemporalRetriever(top_k=5)
+    context = await retriever.get_context("Who works at Figma?")
+    assert isinstance(context, str), "Context should be a string"
+    assert len(context) > 0, "Context should not be empty"
+    assert "Steve" in context or "Figma" in context, (
+        "Should retrieve graph data via triplet search fallback"
+    )
+@pytest.mark.asyncio
+async def test_temporal_retriever_context_empty_graph(setup_test_environment_empty):
+    """Integration test: verify TemporalRetriever handles empty graph correctly."""
+    retriever = TemporalRetriever()
+    context = await retriever.get_context("What happened?")
+    assert isinstance(context, str), "Context should be a string"
+    assert len(context) >= 0, "Context should be a string (possibly empty)"
+@pytest.mark.asyncio
+async def test_temporal_retriever_get_completion(setup_test_environment_with_events):
+    """Integration test: verify TemporalRetriever can generate completions."""
+    retriever = TemporalRetriever()
+    completion = await retriever.get_completion("What happened in January 2021?")
+    assert isinstance(completion, list), "Completion should be a list"
+    assert len(completion) > 0, "Completion should not be empty"
+    assert all(isinstance(item, str) and item.strip() for item in completion), (
+        "Completion items should be non-empty strings"
+    )
+@pytest.mark.asyncio
+async def test_temporal_retriever_get_completion_fallback(setup_test_environment_with_graph_data):
+    """Integration test: verify TemporalRetriever get_completion works with triplet fallback."""
+    retriever = TemporalRetriever()
+    completion = await retriever.get_completion("Who works at Figma?")
+    assert isinstance(completion, list), "Completion should be a list"
+    assert len(completion) > 0, "Completion should not be empty"
+    assert all(isinstance(item, str) and item.strip() for item in completion), (
+        "Completion items should be non-empty strings"
+    )
+@pytest.mark.asyncio
+async def test_temporal_retriever_top_k_limit(setup_test_environment_with_events):
+    """Integration test: verify TemporalRetriever respects top_k parameter."""
+    retriever = TemporalRetriever(top_k=2)
+    context = await retriever.get_context("What happened in 2021?")
+    assert isinstance(context, str), "Context should be a string"
+    separator_count = context.count("#####################")
+    assert separator_count <= 1, "Should respect top_k limit of 2 events"
+@pytest.mark.asyncio
+async def test_temporal_retriever_multiple_events(setup_test_environment_with_events):
+    """Integration test: verify TemporalRetriever can retrieve multiple events."""
+    retriever = TemporalRetriever(top_k=10)
+    context = await retriever.get_context("What events occurred in 2021?")
+    assert isinstance(context, str), "Context should be a string"
+    assert len(context) > 0, "Context should not be empty"
+    assert (
+        "Project Alpha" in context
+        or "Team Meeting" in context
+        or "Product Release" in context
+        or "Company Retreat" in context
+    ), "Should retrieve at least one event from 2021"

cognee 0.5.1__py3-none-any.whl → 0.5.1.dev0__py3-none-any.whl

cognee 0.5.1py3-none-any.whl → 0.5.1.dev0py3-none-any.whl