PyPI - cognee - Versions diffs - 0.3.4.dev4__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

cognee 0.3.4.dev4py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (183) hide show

cognee/tasks/storage/index_graph_edges.py CHANGED Viewed

@@ -9,7 +9,7 @@ from cognee.modules.graph.models.EdgeType import EdgeType
 logger = get_logger(level=ERROR)
-async def index_graph_edges(batch_size: int = 1024):
+async def index_graph_edges():
     """
     Indexes graph edges by creating and managing vector indexes for relationship types.
@@ -72,6 +72,8 @@ async def index_graph_edges(batch_size: int = 1024):
     for index_name, indexable_points in index_points.items():
         index_name, field_name = index_name.split(".")
+        # Get maximum batch size for embedding model
+        batch_size = vector_engine.embedding_engine.get_batch_size()
         # We save the data in batches of {batch_size} to not put a lot of pressure on the database
         for start in range(0, len(indexable_points), batch_size):
             batch = indexable_points[start : start + batch_size]

cognee/tasks/summarization/summarize_code.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import AsyncGenerator, Union
 from uuid import uuid5
 from cognee.infrastructure.engine import DataPoint
-from cognee.infrastructure.llm.LLMGateway import LLMGateway
+from cognee.infrastructure.llm.extraction import extract_code_summary
 from .models import CodeSummary
@@ -16,7 +16,7 @@ async def summarize_code(
     code_data_points = [file for file in code_graph_nodes if hasattr(file, "source_code")]
     file_summaries = await asyncio.gather(
-        *[LLMGateway.extract_code_summary(file.source_code) for file in code_data_points]
+        *[extract_code_summary(file.source_code) for file in code_data_points]
     )
     file_summaries_map = {

cognee/tasks/summarization/summarize_text.py CHANGED Viewed

@@ -5,7 +5,7 @@ from pydantic import BaseModel
 from cognee.tasks.summarization.exceptions import InvalidSummaryInputsError
 from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
-from cognee.infrastructure.llm.LLMGateway import LLMGateway
+from cognee.infrastructure.llm.extraction import extract_summary
 from cognee.modules.cognify.config import get_cognify_config
 from cognee.tasks.summarization.models import TextSummary
@@ -50,7 +50,7 @@ async def summarize_text(
         summarization_model = cognee_config.summarization_model
     chunk_summaries = await asyncio.gather(
-        *[LLMGateway.extract_summary(chunk.text, summarization_model) for chunk in data_chunks]
+        *[extract_summary(chunk.text, summarization_model) for chunk in data_chunks]
     )
     summaries = [

cognee/tasks/temporal_graph/enrich_events.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import List
-from cognee.infrastructure.llm import LLMGateway
+from cognee.infrastructure.llm.extraction import extract_event_entities
 from cognee.modules.engine.models import Event
 from cognee.tasks.temporal_graph.models import EventWithEntities, EventEntityList
@@ -29,6 +29,6 @@ async def enrich_events(events: List[Event]) -> List[EventWithEntities]:
     events_json_str = json.dumps(events_json)
     # Extract entities from events
-    entity_result = await LLMGateway.extract_event_entities(events_json_str, EventEntityList)
+    entity_result = await extract_event_entities(events_json_str, EventEntityList)
     return entity_result.events

cognee/tasks/temporal_graph/extract_events_and_entities.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import asyncio
 from typing import Type, List
-from cognee.infrastructure.llm.LLMGateway import LLMGateway
+from cognee.infrastructure.llm.extraction import extract_event_graph
 from cognee.modules.chunking.models import DocumentChunk
 from cognee.tasks.temporal_graph.models import EventList
 from cognee.modules.engine.utils.generate_event_datapoint import generate_event_datapoint
@@ -21,7 +21,7 @@ async def extract_events_and_timestamps(data_chunks: List[DocumentChunk]) -> Lis
         List[DocumentChunk]: The same list of document chunks, enriched with extracted Event datapoints.
     """
     events = await asyncio.gather(
-        *[LLMGateway.extract_event_graph(chunk.text, EventList) for chunk in data_chunks]
+        *[extract_event_graph(chunk.text, EventList) for chunk in data_chunks]
     )
     for data_chunk, event_list in zip(data_chunks, events):

cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py CHANGED Viewed

@@ -12,7 +12,8 @@ from cognee.cli.commands.search_command import SearchCommand
 from cognee.cli.commands.cognify_command import CognifyCommand
 from cognee.cli.commands.delete_command import DeleteCommand
 from cognee.cli.commands.config_command import ConfigCommand
-from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
+from cognee.cli.exceptions import CliCommandException
+from cognee.modules.data.methods.get_deletion_counts import DeletionCountsPreview
 # Mock asyncio.run to properly handle coroutines
@@ -282,13 +283,18 @@ class TestDeleteCommand:
         assert "all" in actions
         assert "force" in actions
+    @patch("cognee.cli.commands.delete_command.get_deletion_counts")
     @patch("cognee.cli.commands.delete_command.fmt.confirm")
     @patch("cognee.cli.commands.delete_command.asyncio.run", side_effect=_mock_run)
-    def test_execute_delete_dataset_with_confirmation(self, mock_asyncio_run, mock_confirm):
+    def test_execute_delete_dataset_with_confirmation(
+        self, mock_asyncio_run, mock_confirm, mock_get_deletion_counts
+    ):
         """Test execute delete dataset with user confirmation"""
         # Mock the cognee module
         mock_cognee = MagicMock()
         mock_cognee.delete = AsyncMock()
+        mock_get_deletion_counts = AsyncMock()
+        mock_get_deletion_counts.return_value = DeletionCountsPreview()
         with patch.dict(sys.modules, {"cognee": mock_cognee}):
             command = DeleteCommand()
@@ -301,13 +307,16 @@ class TestDeleteCommand:
             command.execute(args)
         mock_confirm.assert_called_once_with(f"Delete dataset '{args.dataset_name}'?")
-        mock_asyncio_run.assert_called_once()
+        assert mock_asyncio_run.call_count == 2
         assert asyncio.iscoroutine(mock_asyncio_run.call_args[0][0])
         mock_cognee.delete.assert_awaited_once_with(dataset_name="test_dataset", user_id=None)
+    @patch("cognee.cli.commands.delete_command.get_deletion_counts")
     @patch("cognee.cli.commands.delete_command.fmt.confirm")
-    def test_execute_delete_cancelled(self, mock_confirm):
+    def test_execute_delete_cancelled(self, mock_confirm, mock_get_deletion_counts):
         """Test execute when user cancels deletion"""
+        mock_get_deletion_counts = AsyncMock()
+        mock_get_deletion_counts.return_value = DeletionCountsPreview()
         command = DeleteCommand()
         args = argparse.Namespace(dataset_name="test_dataset", user_id=None, all=False, force=False)

cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py CHANGED Viewed

@@ -13,6 +13,7 @@ from cognee.cli.commands.cognify_command import CognifyCommand
 from cognee.cli.commands.delete_command import DeleteCommand
 from cognee.cli.commands.config_command import ConfigCommand
 from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
+from cognee.modules.data.methods.get_deletion_counts import DeletionCountsPreview
 # Mock asyncio.run to properly handle coroutines
@@ -378,13 +379,18 @@ class TestCognifyCommandEdgeCases:
 class TestDeleteCommandEdgeCases:
     """Test edge cases for DeleteCommand"""
+    @patch("cognee.cli.commands.delete_command.get_deletion_counts")
     @patch("cognee.cli.commands.delete_command.fmt.confirm")
     @patch("cognee.cli.commands.delete_command.asyncio.run", side_effect=_mock_run)
-    def test_delete_all_with_user_id(self, mock_asyncio_run, mock_confirm):
+    def test_delete_all_with_user_id(
+        self, mock_asyncio_run, mock_confirm, mock_get_deletion_counts
+    ):
         """Test delete command with both --all and --user-id"""
         # Mock the cognee module
         mock_cognee = MagicMock()
         mock_cognee.delete = AsyncMock()
+        mock_get_deletion_counts = AsyncMock()
+        mock_get_deletion_counts.return_value = DeletionCountsPreview()
         with patch.dict(sys.modules, {"cognee": mock_cognee}):
             command = DeleteCommand()
@@ -396,13 +402,17 @@ class TestDeleteCommandEdgeCases:
             command.execute(args)
         mock_confirm.assert_called_once_with("Delete ALL data from cognee?")
-        mock_asyncio_run.assert_called_once()
+        assert mock_asyncio_run.call_count == 2
         assert asyncio.iscoroutine(mock_asyncio_run.call_args[0][0])
         mock_cognee.delete.assert_awaited_once_with(dataset_name=None, user_id="test_user")
+    @patch("cognee.cli.commands.delete_command.get_deletion_counts")
     @patch("cognee.cli.commands.delete_command.fmt.confirm")
-    def test_delete_confirmation_keyboard_interrupt(self, mock_confirm):
+    def test_delete_confirmation_keyboard_interrupt(self, mock_confirm, mock_get_deletion_counts):
         """Test delete command when user interrupts confirmation"""
+        mock_get_deletion_counts = AsyncMock()
+        mock_get_deletion_counts.return_value = DeletionCountsPreview()
         command = DeleteCommand()
         args = argparse.Namespace(dataset_name="test_dataset", user_id=None, all=False, force=False)

cognee/tests/test_advanced_pdf_loader.py ADDED Viewed

@@ -0,0 +1,141 @@
+import sys
+from unittest.mock import patch, MagicMock, AsyncMock, mock_open
+import pytest
+from cognee.infrastructure.loaders.external.advanced_pdf_loader import AdvancedPdfLoader
+advanced_pdf_loader_module = sys.modules.get(
+    "cognee.infrastructure.loaders.external.advanced_pdf_loader"
+)
+class MockElement:
+    def __init__(self, category, text, metadata):
+        self.category = category
+        self.text = text
+        self.metadata = metadata
+    def to_dict(self):
+        return {
+            "type": self.category,
+            "text": self.text,
+            "metadata": self.metadata,
+        }
+@pytest.fixture
+def loader():
+    return AdvancedPdfLoader()
+@pytest.mark.parametrize(
+    "extension, mime_type, expected",
+    [
+        ("pdf", "application/pdf", True),
+        ("txt", "text/plain", False),
+        ("pdf", "text/plain", False),
+        ("doc", "application/pdf", False),
+    ],
+)
+def test_can_handle(loader, extension, mime_type, expected):
+    """Test can_handle method can correctly identify PDF files"""
+    assert loader.can_handle(extension, mime_type) == expected
+@pytest.mark.asyncio
+@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.open", new_callable=mock_open)
+@patch(
+    "cognee.infrastructure.loaders.external.advanced_pdf_loader.get_file_metadata",
+    new_callable=AsyncMock,
+)
+@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.get_storage_config")
+@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.get_file_storage")
+@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.PyPdfLoader")
+@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.partition_pdf")
+async def test_load_success_with_unstructured(
+    mock_partition_pdf,
+    mock_pypdf_loader,
+    mock_get_file_storage,
+    mock_get_storage_config,
+    mock_get_file_metadata,
+    mock_open,
+    loader,
+):
+    """Test the main flow of using unstructured to successfully process PDF"""
+    # Prepare Mock data and objects
+    mock_elements = [
+        MockElement(
+            category="Title", text="Attention Is All You Need", metadata={"page_number": 1}
+        ),
+        MockElement(
+            category="NarrativeText",
+            text="The dominant sequence transduction models are based on complex recurrent or convolutional neural networks.",
+            metadata={"page_number": 1},
+        ),
+        MockElement(
+            category="Table",
+            text="This is a table.",
+            metadata={"page_number": 2, "text_as_html": "<table><tr><td>Data</td></tr></table>"},
+        ),
+    ]
+    mock_pypdf_loader.return_value.load = AsyncMock(return_value="/fake/path/fallback.txt")
+    mock_partition_pdf.return_value = mock_elements
+    mock_get_file_metadata.return_value = {"content_hash": "abc123def456"}
+    mock_storage_instance = MagicMock()
+    mock_storage_instance.store = AsyncMock(return_value="/stored/text_abc123def456.txt")
+    mock_get_file_storage.return_value = mock_storage_instance
+    mock_get_storage_config.return_value = {"data_root_directory": "/fake/data/root"}
+    test_file_path = "/fake/path/document.pdf"
+    # Run
+    result_path = await loader.load(test_file_path)
+    # Assert
+    assert result_path == "/stored/text_abc123def456.txt"
+    # Verify partition_pdf is called with the correct parameters
+    mock_partition_pdf.assert_called_once()
+    call_args, call_kwargs = mock_partition_pdf.call_args
+    assert call_kwargs.get("filename") == test_file_path
+    assert call_kwargs.get("strategy") == "auto"  # Default strategy
+    # Verify the stored content is correct
+    expected_content = "Page 1:\nAttention Is All You Need\n\nThe dominant sequence transduction models are based on complex recurrent or convolutional neural networks.\n\nPage 2:\n<table><tr><td>Data</td></tr></table>\n"
+    mock_storage_instance.store.assert_awaited_once_with("text_abc123def456.txt", expected_content)
+    # Verify fallback is not called
+    mock_pypdf_loader.assert_not_called()
+@pytest.mark.asyncio
+@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.open", new_callable=mock_open)
+@patch(
+    "cognee.infrastructure.loaders.external.advanced_pdf_loader.get_file_metadata",
+    new_callable=AsyncMock,
+)
+@patch("cognee.infrastructure.loaders.external.advanced_pdf_loader.PyPdfLoader")
+@patch(
+    "cognee.infrastructure.loaders.external.advanced_pdf_loader.partition_pdf",
+    side_effect=Exception("Unstructured failed!"),
+)
+async def test_load_fallback_on_unstructured_exception(
+    mock_partition_pdf, mock_pypdf_loader, mock_get_file_metadata, mock_open, loader
+):
+    """Test fallback to PyPdfLoader when unstructured throws an exception"""
+    # Prepare Mock
+    mock_fallback_instance = MagicMock()
+    mock_fallback_instance.load = AsyncMock(return_value="/fake/path/fallback.txt")
+    mock_pypdf_loader.return_value = mock_fallback_instance
+    mock_get_file_metadata.return_value = {"content_hash": "anyhash"}
+    test_file_path = "/fake/path/document.pdf"
+    # Run
+    result_path = await loader.load(test_file_path)
+    # Assert
+    assert result_path == "/fake/path/fallback.txt"
+    mock_partition_pdf.assert_called_once()  # Verify partition_pdf is called
+    mock_fallback_instance.load.assert_awaited_once_with(test_file_path)

cognee/tests/test_chromadb.py CHANGED Viewed

@@ -67,6 +67,44 @@ async def test_getting_of_documents(dataset_name_1):
     )
+async def test_vector_engine_search_none_limit():
+    file_path_quantum = os.path.join(
+        pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
+    )
+    file_path_nlp = os.path.join(
+        pathlib.Path(__file__).parent,
+        "test_data/Natural_language_processing.txt",
+    )
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await cognee.add(file_path_quantum)
+    await cognee.add(file_path_nlp)
+    await cognee.cognify()
+    query_text = "Tell me about Quantum computers"
+    from cognee.infrastructure.databases.vector import get_vector_engine
+    vector_engine = get_vector_engine()
+    collection_name = "Entity_name"
+    query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]
+    result = await vector_engine.search(
+        collection_name=collection_name, query_vector=query_vector, limit=None
+    )
+    # Check that we did not accidentally use any default value for limit
+    # in vector search along the way (like 5, 10, or 15)
+    assert len(result) > 15
 async def main():
     cognee.config.set_vector_db_config(
         {
@@ -165,6 +203,8 @@ async def main():
     tables_in_database = await vector_engine.get_collection_names()
     assert len(tables_in_database) == 0, "ChromaDB database is not empty"
+    await test_vector_engine_search_none_limit()
 if __name__ == "__main__":
     import asyncio

cognee/tests/test_cognee_server_start.py CHANGED Viewed

@@ -41,7 +41,12 @@ class TestCogneeServerStart(unittest.TestCase):
     def tearDownClass(cls):
         # Terminate the server process
         if hasattr(cls, "server_process") and cls.server_process:
-            os.killpg(os.getpgid(cls.server_process.pid), signal.SIGTERM)
+            if hasattr(os, "killpg"):
+                # Unix-like systems: Use process groups
+                os.killpg(os.getpgid(cls.server_process.pid), signal.SIGTERM)
+            else:
+                # Windows: Just terminate the main process
+                cls.server_process.terminate()
             cls.server_process.wait()
     def test_server_is_running(self):

cognee/tests/test_data/Quantum_computers.txt ADDED Viewed

@@ -0,0 +1,9 @@
+A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
+At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
+Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
+The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
+Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
+In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
+Emerging error-correcting codes aim to mitigate decoherence effects and are expected to pave the way for fault-tolerant quantum processors. Laboratories across the globe are investigating diverse qubit implementations, such as superconducting circuits, trapped ions, neutral atoms, and photonic systems. Significant government funding and private investment have created an ecosystem of startups and consortia focused on accelerating quantum hardware and software development. Universities are meanwhile launching interdisciplinary programs that teach physics, computer science, and engineering concepts necessary for tomorrow's quantum workforce. Establishing reliable benchmarking standards will be essential for objectively comparing devices and charting realistic milestones toward practical quantum advantage.
+Industry roadmaps anticipate that achieving error rates below the threshold for surface codes will require millions of physical qubits per logical qubit, highlighting daunting scale challenges. Researchers are therefore exploring hardware–software co-design strategies, where algorithmic breakthroughs and device engineering progress hand in hand to minimize overhead. Hybrid quantum-classical workflows, exemplified by variational algorithms running on near-term devices, offer a pragmatic path to extracting value before full fault tolerance arrives. Meanwhile, cryptographers are advancing post-quantum encryption schemes to safeguard information in a future where Shor’s algorithm becomes practical. The interplay between theoretical advances, experimental ingenuity, and policy considerations will ultimately determine how transformative quantum computing becomes for science, industry, and society.
+Collaborative open-source toolkits are lowering the barrier to entry for developers eager to prototype quantum algorithms and simulate small devices on classical hardware. As these software frameworks mature, they will foster standardization of gate libraries, circuit optimization passes, and error-mitigation techniques. At the same time, advances in cryogenic engineering, vacuum systems, and photonics are steadily improving the stability and manufacturability of next-generation qubit platforms. Policymakers are beginning to craft export controls and ethical guidelines aimed at preventing misuse while encouraging international collaboration in fundamental research. Ultimately, the success of quantum technology will hinge on integrating robust hardware, intelligent software, and a skilled workforce within an environment of responsible governance.

cognee/tests/test_lancedb.py ADDED Viewed

@@ -0,0 +1,211 @@
+import os
+import pathlib
+import cognee
+from cognee.shared.logging_utils import get_logger
+from cognee.infrastructure.files.storage import get_storage_config
+from cognee.modules.data.models import Data
+from cognee.modules.users.methods import get_default_user
+from cognee.modules.search.types import SearchType
+from cognee.modules.search.operations import get_history
+logger = get_logger()
+async def test_local_file_deletion(data_text, file_location):
+    from sqlalchemy import select
+    import hashlib
+    from cognee.infrastructure.databases.relational import get_relational_engine
+    engine = get_relational_engine()
+    async with engine.get_async_session() as session:
+        # Get hash of data contents
+        encoded_text = data_text.encode("utf-8")
+        data_hash = hashlib.md5(encoded_text).hexdigest()
+        # Get data entry from database based on hash contents
+        data = (await session.scalars(select(Data).where(Data.content_hash == data_hash))).one()
+        assert os.path.isfile(data.raw_data_location.replace("file://", "")), (
+            f"Data location doesn't exist: {data.raw_data_location}"
+        )
+        # Test deletion of data along with local files created by cognee
+        await engine.delete_data_entity(data.id)
+        assert not os.path.exists(data.raw_data_location.replace("file://", "")), (
+            f"Data location still exists after deletion: {data.raw_data_location}"
+        )
+    async with engine.get_async_session() as session:
+        # Get data entry from database based on file path
+        data = (
+            await session.scalars(select(Data).where(Data.raw_data_location == file_location))
+        ).one()
+        assert os.path.isfile(data.raw_data_location.replace("file://", "")), (
+            f"Data location doesn't exist: {data.raw_data_location}"
+        )
+        # Test local files not created by cognee won't get deleted
+        await engine.delete_data_entity(data.id)
+        assert os.path.exists(data.raw_data_location.replace("file://", "")), (
+            f"Data location doesn't exists: {data.raw_data_location}"
+        )
+async def test_getting_of_documents(dataset_name_1):
+    # Test getting of documents for search per dataset
+    from cognee.modules.users.permissions.methods import get_document_ids_for_user
+    user = await get_default_user()
+    document_ids = await get_document_ids_for_user(user.id, [dataset_name_1])
+    assert len(document_ids) == 1, (
+        f"Number of expected documents doesn't match {len(document_ids)} != 1"
+    )
+    # Test getting of documents for search when no dataset is provided
+    user = await get_default_user()
+    document_ids = await get_document_ids_for_user(user.id)
+    assert len(document_ids) == 2, (
+        f"Number of expected documents doesn't match {len(document_ids)} != 2"
+    )
+async def test_vector_engine_search_none_limit():
+    file_path_quantum = os.path.join(
+        pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
+    )
+    file_path_nlp = os.path.join(
+        pathlib.Path(__file__).parent,
+        "test_data/Natural_language_processing.txt",
+    )
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await cognee.add(file_path_quantum)
+    await cognee.add(file_path_nlp)
+    await cognee.cognify()
+    query_text = "Tell me about Quantum computers"
+    from cognee.infrastructure.databases.vector import get_vector_engine
+    vector_engine = get_vector_engine()
+    collection_name = "Entity_name"
+    query_vector = (await vector_engine.embedding_engine.embed_text([query_text]))[0]
+    result = await vector_engine.search(
+        collection_name=collection_name, query_vector=query_vector, limit=None
+    )
+    # Check that we did not accidentally use any default value for limit
+    # in vector search along the way (like 5, 10, or 15)
+    assert len(result) > 15
+async def main():
+    cognee.config.set_vector_db_config(
+        {
+            "vector_db_provider": "lancedb",
+        }
+    )
+    data_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_lancedb")
+        ).resolve()
+    )
+    cognee.config.data_root_directory(data_directory_path)
+    cognee_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_lancedb")
+        ).resolve()
+    )
+    cognee.config.system_root_directory(cognee_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    dataset_name_1 = "natural_language"
+    dataset_name_2 = "quantum"
+    explanation_file_path = os.path.join(
+        pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
+    )
+    await cognee.add([explanation_file_path], dataset_name_1)
+    text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
+    At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
+    Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
+    The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
+    Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
+    In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
+    """
+    await cognee.add([text], dataset_name_2)
+    await cognee.cognify([dataset_name_2, dataset_name_1])
+    from cognee.infrastructure.databases.vector import get_vector_engine
+    await test_getting_of_documents(dataset_name_1)
+    vector_engine = get_vector_engine()
+    random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0]
+    random_node_name = random_node.payload["text"]
+    search_results = await cognee.search(
+        query_type=SearchType.INSIGHTS, query_text=random_node_name
+    )
+    assert len(search_results) != 0, "The search results list is empty."
+    print("\n\nExtracted sentences are:\n")
+    for result in search_results:
+        print(f"{result}\n")
+    search_results = await cognee.search(
+        query_type=SearchType.CHUNKS, query_text=random_node_name, datasets=[dataset_name_2]
+    )
+    assert len(search_results) != 0, "The search results list is empty."
+    print("\n\nExtracted chunks are:\n")
+    for result in search_results:
+        print(f"{result}\n")
+    graph_completion = await cognee.search(
+        query_type=SearchType.GRAPH_COMPLETION,
+        query_text=random_node_name,
+        datasets=[dataset_name_2],
+    )
+    assert len(graph_completion) != 0, "Completion result is empty."
+    print("Completion result is:")
+    print(graph_completion)
+    search_results = await cognee.search(
+        query_type=SearchType.SUMMARIES, query_text=random_node_name
+    )
+    assert len(search_results) != 0, "Query related summaries don't exist."
+    print("\n\nExtracted summaries are:\n")
+    for result in search_results:
+        print(f"{result}\n")
+    user = await get_default_user()
+    history = await get_history(user.id)
+    assert len(history) == 8, "Search history is not correct."
+    await cognee.prune.prune_data()
+    data_root_directory = get_storage_config()["data_root_directory"]
+    assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
+    await cognee.prune.prune_system(metadata=True)
+    connection = await vector_engine.get_connection()
+    tables_in_database = await connection.table_names()
+    assert len(tables_in_database) == 0, "LanceDB database is not empty"
+    await test_vector_engine_search_none_limit()
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())

cognee 0.3.4.dev4__py3-none-any.whl → 0.3.5__py3-none-any.whl

cognee 0.3.4.dev4py3-none-any.whl → 0.3.5py3-none-any.whl