PyPI - cognee - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

cognee 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (227) hide show

cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py CHANGED Viewed

@@ -14,9 +14,7 @@ async def test_url_saves_as_html_file():
     await cognee.prune.prune_system(metadata=True)
     try:
-        original_file_path = await save_data_item_to_storage(
-            "https://en.wikipedia.org/wiki/Large_language_model"
-        )
+        original_file_path = await save_data_item_to_storage("http://example.com/")
         file_path = get_data_file_path(original_file_path)
         assert file_path.endswith(".html")
         file = Path(file_path)
@@ -44,9 +42,7 @@ async def test_saved_html_is_valid():
     await cognee.prune.prune_system(metadata=True)
     try:
-        original_file_path = await save_data_item_to_storage(
-            "https://en.wikipedia.org/wiki/Large_language_model"
-        )
+        original_file_path = await save_data_item_to_storage("http://example.com/")
         file_path = get_data_file_path(original_file_path)
         content = Path(file_path).read_text()
@@ -72,7 +68,7 @@ async def test_add_url():
     await cognee.prune.prune_data()
     await cognee.prune.prune_system(metadata=True)
-    await cognee.add("https://en.wikipedia.org/wiki/Large_language_model")
+    await cognee.add("http://example.com/")
 skip_in_ci = pytest.mark.skipif(
@@ -88,7 +84,7 @@ async def test_add_url_with_tavily():
     await cognee.prune.prune_data()
     await cognee.prune.prune_system(metadata=True)
-    await cognee.add("https://en.wikipedia.org/wiki/Large_language_model")
+    await cognee.add("http://example.com/")
 @pytest.mark.asyncio
@@ -98,7 +94,7 @@ async def test_add_url_without_incremental_loading():
     try:
         await cognee.add(
-            "https://en.wikipedia.org/wiki/Large_language_model",
+            "http://example.com/",
             incremental_loading=False,
         )
     except Exception as e:
@@ -112,7 +108,7 @@ async def test_add_url_with_incremental_loading():
     try:
         await cognee.add(
-            "https://en.wikipedia.org/wiki/Large_language_model",
+            "http://example.com/",
             incremental_loading=True,
         )
     except Exception as e:
@@ -125,7 +121,7 @@ async def test_add_url_can_define_preferred_loader_as_list_of_str():
     await cognee.prune.prune_system(metadata=True)
     await cognee.add(
-        "https://en.wikipedia.org/wiki/Large_language_model",
+        "http://example.com/",
         preferred_loaders=["beautiful_soup_loader"],
     )
@@ -144,7 +140,7 @@ async def test_add_url_with_extraction_rules():
     try:
         await cognee.add(
-            "https://en.wikipedia.org/wiki/Large_language_model",
+            "http://example.com/",
             preferred_loaders={"beautiful_soup_loader": {"extraction_rules": extraction_rules}},
         )
     except Exception as e:
@@ -163,9 +159,7 @@ async def test_loader_is_none_by_default():
     }
     try:
-        original_file_path = await save_data_item_to_storage(
-            "https://en.wikipedia.org/wiki/Large_language_model"
-        )
+        original_file_path = await save_data_item_to_storage("http://example.com/")
         file_path = get_data_file_path(original_file_path)
         assert file_path.endswith(".html")
         file = Path(file_path)
@@ -196,9 +190,7 @@ async def test_beautiful_soup_loader_is_selected_loader_if_preferred_loader_prov
     }
     try:
-        original_file_path = await save_data_item_to_storage(
-            "https://en.wikipedia.org/wiki/Large_language_model"
-        )
+        original_file_path = await save_data_item_to_storage("http://example.com/")
         file_path = get_data_file_path(original_file_path)
         assert file_path.endswith(".html")
         file = Path(file_path)
@@ -225,9 +217,7 @@ async def test_beautiful_soup_loader_works_with_and_without_arguments():
     await cognee.prune.prune_system(metadata=True)
     try:
-        original_file_path = await save_data_item_to_storage(
-            "https://en.wikipedia.org/wiki/Large_language_model"
-        )
+        original_file_path = await save_data_item_to_storage("http://example.com/")
         file_path = get_data_file_path(original_file_path)
         assert file_path.endswith(".html")
         file = Path(file_path)
@@ -263,9 +253,7 @@ async def test_beautiful_soup_loader_successfully_loads_file_if_required_args_pr
     await cognee.prune.prune_system(metadata=True)
     try:
-        original_file_path = await save_data_item_to_storage(
-            "https://en.wikipedia.org/wiki/Large_language_model"
-        )
+        original_file_path = await save_data_item_to_storage("http://example.com/")
         file_path = get_data_file_path(original_file_path)
         assert file_path.endswith(".html")
         file = Path(file_path)
@@ -302,9 +290,7 @@ async def test_beautiful_soup_loads_file_successfully():
     }
     try:
-        original_file_path = await save_data_item_to_storage(
-            "https://en.wikipedia.org/wiki/Large_language_model"
-        )
+        original_file_path = await save_data_item_to_storage("http://example.com/")
         file_path = get_data_file_path(original_file_path)
         assert file_path.endswith(".html")
         original_file = Path(file_path)

cognee/tests/tasks/entity_extraction/entity_extraction_test.py CHANGED Viewed

@@ -55,7 +55,7 @@ async def main():
         classified_data = ingestion.classify(file)
         # data_id is the hash of original file contents + owner id to avoid duplicate data
-        data_id = ingestion.identify(classified_data, await get_default_user())
+        data_id = await ingestion.identify(classified_data, await get_default_user())
     await cognee.add(file_path)

cognee/tests/test_add_docling_document.py CHANGED Viewed

@@ -39,12 +39,12 @@ async def main():
     answer = await cognee.search("Do programmers change light bulbs?")
     assert len(answer) != 0
-    lowercase_answer = answer[0].lower()
+    lowercase_answer = answer[0]["search_result"][0].lower()
     assert ("no" in lowercase_answer) or ("none" in lowercase_answer)
     answer = await cognee.search("What colours are there in the presentation table?")
     assert len(answer) != 0
-    lowercase_answer = answer[0].lower()
+    lowercase_answer = answer[0]["search_result"][0].lower()
     assert (
         ("red" in lowercase_answer)
         and ("blue" in lowercase_answer)

cognee/tests/test_cognee_server_start.py CHANGED Viewed

@@ -7,6 +7,7 @@ import requests
 from pathlib import Path
 import sys
 import uuid
+import json
 class TestCogneeServerStart(unittest.TestCase):
@@ -24,8 +25,6 @@ class TestCogneeServerStart(unittest.TestCase):
                 "--port",
                 "8000",
             ],
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
             preexec_fn=os.setsid,
         )
         # Give the server some time to start
@@ -90,12 +89,71 @@ class TestCogneeServerStart(unittest.TestCase):
             )
         }
-        payload = {"datasets": [dataset_name]}
+        ontology_key = f"test_ontology_{uuid.uuid4().hex[:8]}"
+        payload = {"datasets": [dataset_name], "ontology_key": [ontology_key]}
         add_response = requests.post(url, headers=headers, data=form_data, files=file, timeout=50)
         if add_response.status_code not in [200, 201]:
             add_response.raise_for_status()
+        ontology_content = b"""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+         xmlns:owl="http://www.w3.org/2002/07/owl#"
+         xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
+         xmlns="http://example.org/ontology#"
+         xml:base="http://example.org/ontology">
+                <owl:Ontology rdf:about="http://example.org/ontology"/>
+                <!-- Classes -->
+                <owl:Class rdf:ID="Problem"/>
+                <owl:Class rdf:ID="HardwareProblem"/>
+                <owl:Class rdf:ID="SoftwareProblem"/>
+                <owl:Class rdf:ID="Concept"/>
+                <owl:Class rdf:ID="Object"/>
+                <owl:Class rdf:ID="Joke"/>
+                <owl:Class rdf:ID="Image"/>
+                <owl:Class rdf:ID="Person"/>
+                <rdf:Description rdf:about="#HardwareProblem">
+                    <rdfs:subClassOf rdf:resource="#Problem"/>
+                    <rdfs:comment>A failure caused by physical components.</rdfs:comment>
+                </rdf:Description>
+                <rdf:Description rdf:about="#SoftwareProblem">
+                    <rdfs:subClassOf rdf:resource="#Problem"/>
+                    <rdfs:comment>An error caused by software logic or configuration.</rdfs:comment>
+                </rdf:Description>
+                <rdf:Description rdf:about="#Person">
+                    <rdfs:comment>A human being or individual.</rdfs:comment>
+                </rdf:Description>
+                <!-- Individuals -->
+                <Person rdf:ID="programmers">
+                    <rdfs:label>Programmers</rdfs:label>
+                </Person>
+                <Object rdf:ID="light_bulb">
+                    <rdfs:label>Light Bulb</rdfs:label>
+                </Object>
+                <HardwareProblem rdf:ID="hardware_problem">
+                    <rdfs:label>Hardware Problem</rdfs:label>
+                </HardwareProblem>
+            </rdf:RDF>"""
+        ontology_response = requests.post(
+            "http://127.0.0.1:8000/api/v1/ontologies",
+            headers=headers,
+            files=[("ontology_file", ("test.owl", ontology_content, "application/xml"))],
+            data={
+                "ontology_key": ontology_key,
+                "description": "Test ontology",
+            },
+        )
+        self.assertEqual(ontology_response.status_code, 200)
         # Cognify request
         url = "http://127.0.0.1:8000/api/v1/cognify"
         headers = {
@@ -107,6 +165,29 @@ class TestCogneeServerStart(unittest.TestCase):
         if cognify_response.status_code not in [200, 201]:
             cognify_response.raise_for_status()
+        datasets_response = requests.get("http://127.0.0.1:8000/api/v1/datasets", headers=headers)
+        datasets = datasets_response.json()
+        dataset_id = None
+        for dataset in datasets:
+            if dataset["name"] == dataset_name:
+                dataset_id = dataset["id"]
+                break
+        graph_response = requests.get(
+            f"http://127.0.0.1:8000/api/v1/datasets/{dataset_id}/graph", headers=headers
+        )
+        self.assertEqual(graph_response.status_code, 200)
+        graph_data = graph_response.json()
+        ontology_nodes = [
+            node for node in graph_data.get("nodes") if node.get("properties").get("ontology_valid")
+        ]
+        self.assertGreater(
+            len(ontology_nodes), 0, "No ontology nodes found - ontology was not integrated"
+        )
         # TODO: Add test to verify cognify pipeline is complete before testing search
         # Search request

cognee/tests/test_conversation_history.py CHANGED Viewed

@@ -8,17 +8,19 @@ Tests all retrievers that save conversation history to Redis cache:
 4. GRAPH_COMPLETION_CONTEXT_EXTENSION
 5. GRAPH_SUMMARY_COMPLETION
 6. TEMPORAL
+7. TRIPLET_COMPLETION
 """
 import os
-import shutil
 import cognee
 import pathlib
 from cognee.infrastructure.databases.cache import get_cache_engine
+from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.modules.search.types import SearchType
 from cognee.shared.logging_utils import get_logger
 from cognee.modules.users.methods import get_default_user
+from collections import Counter
 logger = get_logger()
@@ -54,13 +56,17 @@ async def main():
         """DataCo is a data analytics company. They help businesses make sense of their data."""
     )
-    await cognee.add(text_1, dataset_name)
-    await cognee.add(text_2, dataset_name)
+    await cognee.add(data=text_1, dataset_name=dataset_name)
+    await cognee.add(data=text_2, dataset_name=dataset_name)
-    await cognee.cognify([dataset_name])
+    await cognee.cognify(datasets=[dataset_name])
     user = await get_default_user()
+    from cognee.memify_pipelines.create_triplet_embeddings import create_triplet_embeddings
+    await create_triplet_embeddings(user=user, dataset=dataset_name)
     cache_engine = get_cache_engine()
     assert cache_engine is not None, "Cache engine should be available for testing"
@@ -188,7 +194,6 @@ async def main():
         f"GRAPH_SUMMARY_COMPLETION should return non-empty list, got: {result_summary!r}"
     )
-    # Verify saved
     history_summary = await cache_engine.get_latest_qa(str(user.id), session_id_summary, last_n=10)
     our_qa_summary = [
         h for h in history_summary if h["question"] == "What are the key points about TechCorp?"
@@ -215,6 +220,24 @@ async def main():
     ]
     assert len(our_qa_temporal) == 1, "Should find Temporal question in history"
+    session_id_triplet = "test_session_triplet"
+    result_triplet = await cognee.search(
+        query_type=SearchType.TRIPLET_COMPLETION,
+        query_text="What companies are mentioned?",
+        session_id=session_id_triplet,
+    )
+    assert isinstance(result_triplet, list) and len(result_triplet) > 0, (
+        f"TRIPLET_COMPLETION should return non-empty list, got: {result_triplet!r}"
+    )
+    history_triplet = await cache_engine.get_latest_qa(str(user.id), session_id_triplet, last_n=10)
+    our_qa_triplet = [
+        h for h in history_triplet if h["question"] == "What companies are mentioned?"
+    ]
+    assert len(our_qa_triplet) == 1, "Should find Triplet question in history"
     from cognee.modules.retrieval.utils.session_cache import (
         get_conversation_history,
     )
@@ -228,6 +251,46 @@ async def main():
     assert "CONTEXT:" in formatted_history, "Formatted history should contain 'CONTEXT:' prefix"
     assert "ANSWER:" in formatted_history, "Formatted history should contain 'ANSWER:' prefix"
+    from cognee.memify_pipelines.persist_sessions_in_knowledge_graph import (
+        persist_sessions_in_knowledge_graph_pipeline,
+    )
+    logger.info("Starting persist_sessions_in_knowledge_graph tests")
+    await persist_sessions_in_knowledge_graph_pipeline(
+        user=user,
+        session_ids=[session_id_1, session_id_2],
+        dataset=dataset_name,
+        run_in_background=False,
+    )
+    graph_engine = await get_graph_engine()
+    graph = await graph_engine.get_graph_data()
+    type_counts = Counter(node_data[1].get("type", {}) for node_data in graph[0])
+    "Tests the correct number of NodeSet nodes after session persistence"
+    assert type_counts.get("NodeSet", 0) == 1, (
+        f"Number of NodeSets in the graph is incorrect, found {type_counts.get('NodeSet', 0)} but there should be exactly 1."
+    )
+    "Tests the correct number of DocumentChunk nodes after session persistence"
+    assert type_counts.get("DocumentChunk", 0) == 4, (
+        f"Number of DocumentChunk ndoes in the graph is incorrect, found {type_counts.get('DocumentChunk', 0)} but there should be exactly 4 (2 original documents, 2 sessions)."
+    )
+    from cognee.infrastructure.databases.vector.get_vector_engine import get_vector_engine
+    vector_engine = get_vector_engine()
+    collection_size = await vector_engine.search(
+        collection_name="DocumentChunk_text",
+        query_text="test",
+        limit=1000,
+    )
+    assert len(collection_size) == 4, (
+        f"DocumentChunk_text collection should have exactly 4 embeddings, found {len(collection_size)}"
+    )
     await cognee.prune.prune_data()
     await cognee.prune.prune_system(metadata=True)

cognee/tests/test_data/example_with_header.csv ADDED Viewed

@@ -0,0 +1,3 @@
+id,name,age,city,country
+1,Eric,30,Beijing,China
+2,Joe,35,Berlin,Germany

cognee/tests/test_dataset_database_handler.py ADDED Viewed

@@ -0,0 +1,137 @@
+import asyncio
+import os
+# Set custom dataset database handler environment variable
+os.environ["VECTOR_DATASET_DATABASE_HANDLER"] = "custom_lancedb_handler"
+os.environ["GRAPH_DATASET_DATABASE_HANDLER"] = "custom_kuzu_handler"
+import cognee
+from cognee.modules.users.methods import get_default_user
+from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
+from cognee.shared.logging_utils import setup_logging, ERROR
+from cognee.api.v1.search import SearchType
+class LanceDBTestDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
+    @classmethod
+    async def create_dataset(cls, dataset_id, user):
+        import pathlib
+        cognee_directory_path = str(
+            pathlib.Path(
+                os.path.join(
+                    pathlib.Path(__file__).parent, ".cognee_system/test_dataset_database_handler"
+                )
+            ).resolve()
+        )
+        databases_directory_path = os.path.join(cognee_directory_path, "databases", str(user.id))
+        os.makedirs(databases_directory_path, exist_ok=True)
+        vector_db_name = "test.lance.db"
+        return {
+            "vector_dataset_database_handler": "custom_lancedb_handler",
+            "vector_database_name": vector_db_name,
+            "vector_database_url": os.path.join(databases_directory_path, vector_db_name),
+            "vector_database_provider": "lancedb",
+        }
+class KuzuTestDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
+    @classmethod
+    async def create_dataset(cls, dataset_id, user):
+        databases_directory_path = os.path.join("databases", str(user.id))
+        os.makedirs(databases_directory_path, exist_ok=True)
+        graph_db_name = "test.kuzu"
+        return {
+            "graph_dataset_database_handler": "custom_kuzu_handler",
+            "graph_database_name": graph_db_name,
+            "graph_database_url": os.path.join(databases_directory_path, graph_db_name),
+            "graph_database_provider": "kuzu",
+        }
+async def main():
+    import pathlib
+    data_directory_path = str(
+        pathlib.Path(
+            os.path.join(
+                pathlib.Path(__file__).parent, ".data_storage/test_dataset_database_handler"
+            )
+        ).resolve()
+    )
+    cognee.config.data_root_directory(data_directory_path)
+    cognee_directory_path = str(
+        pathlib.Path(
+            os.path.join(
+                pathlib.Path(__file__).parent, ".cognee_system/test_dataset_database_handler"
+            )
+        ).resolve()
+    )
+    cognee.config.system_root_directory(cognee_directory_path)
+    # Add custom dataset database handler
+    from cognee.infrastructure.databases.dataset_database_handler.use_dataset_database_handler import (
+        use_dataset_database_handler,
+    )
+    use_dataset_database_handler(
+        "custom_lancedb_handler", LanceDBTestDatasetDatabaseHandler, "lancedb"
+    )
+    use_dataset_database_handler("custom_kuzu_handler", KuzuTestDatasetDatabaseHandler, "kuzu")
+    # Create a clean slate for cognee -- reset data and system state
+    print("Resetting cognee data...")
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    print("Data reset complete.\n")
+    # cognee knowledge graph will be created based on this text
+    text = """
+    Natural language processing (NLP) is an interdisciplinary
+    subfield of computer science and information retrieval.
+    """
+    print("Adding text to cognee:")
+    print(text.strip())
+    # Add the text, and make it available for cognify
+    await cognee.add(text)
+    print("Text added successfully.\n")
+    # Use LLMs and cognee to create knowledge graph
+    await cognee.cognify()
+    print("Cognify process complete.\n")
+    query_text = "Tell me about NLP"
+    print(f"Searching cognee for insights with query: '{query_text}'")
+    # Query cognee for insights on the added text
+    search_results = await cognee.search(
+        query_type=SearchType.GRAPH_COMPLETION, query_text=query_text
+    )
+    print("Search results:")
+    # Display results
+    for result_text in search_results:
+        print(result_text)
+    default_user = await get_default_user()
+    # Assert that the custom database files were created based on the custom dataset database handlers
+    assert os.path.exists(
+        os.path.join(cognee_directory_path, "databases", str(default_user.id), "test.kuzu")
+    ), "Graph database file not found."
+    assert os.path.exists(
+        os.path.join(cognee_directory_path, "databases", str(default_user.id), "test.lance.db")
+    ), "Vector database file not found."
+if __name__ == "__main__":
+    logger = setup_logging(log_level=ERROR)
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        loop.run_until_complete(main())
+    finally:
+        loop.run_until_complete(loop.shutdown_asyncgens())

cognee/tests/test_dataset_delete.py ADDED Viewed

@@ -0,0 +1,76 @@
+import os
+import asyncio
+import pathlib
+from uuid import UUID
+import cognee
+from cognee.shared.logging_utils import setup_logging, ERROR
+from cognee.modules.data.methods.delete_dataset import delete_dataset
+from cognee.modules.data.methods.get_dataset import get_dataset
+from cognee.modules.users.methods import get_default_user
+async def main():
+    # Set data and system directory paths
+    data_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_dataset_delete")
+        ).resolve()
+    )
+    cognee.config.data_root_directory(data_directory_path)
+    cognee_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_dataset_delete")
+        ).resolve()
+    )
+    cognee.config.system_root_directory(cognee_directory_path)
+    # Create a clean slate for cognee -- reset data and system state
+    print("Resetting cognee data...")
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    print("Data reset complete.\n")
+    # cognee knowledge graph will be created based on this text
+    text = """
+    Natural language processing (NLP) is an interdisciplinary
+    subfield of computer science and information retrieval.
+    """
+    # Add the text, and make it available for cognify
+    await cognee.add(text, "nlp_dataset")
+    await cognee.add("Quantum computing is the study of quantum computers.", "quantum_dataset")
+    # Use LLMs and cognee to create knowledge graph
+    ret_val = await cognee.cognify()
+    user = await get_default_user()
+    for val in ret_val:
+        dataset_id = str(val)
+        vector_db_path = os.path.join(
+            cognee_directory_path, "databases", str(user.id), dataset_id + ".lance.db"
+        )
+        graph_db_path = os.path.join(
+            cognee_directory_path, "databases", str(user.id), dataset_id + ".pkl"
+        )
+        # Check if databases are properly created and exist before deletion
+        assert os.path.exists(graph_db_path), "Graph database file not found."
+        assert os.path.exists(vector_db_path), "Vector database file not found."
+        dataset = await get_dataset(user_id=user.id, dataset_id=UUID(dataset_id))
+        await delete_dataset(dataset)
+        # Confirm databases have been deleted
+        assert not os.path.exists(graph_db_path), "Graph database file found."
+        assert not os.path.exists(vector_db_path), "Vector database file found."
+if __name__ == "__main__":
+    logger = setup_logging(log_level=ERROR)
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        loop.run_until_complete(main())
+    finally:
+        loop.run_until_complete(loop.shutdown_asyncgens())

cognee 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

cognee 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl