PyPI - cognee - Versions diffs - 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

cognee 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

cognee/__init__.py +1 -0
cognee/api/health.py +2 -12
cognee/api/v1/add/add.py +46 -6
cognee/api/v1/add/routers/get_add_router.py +5 -1
cognee/api/v1/cognify/cognify.py +29 -9
cognee/api/v1/datasets/datasets.py +11 -0
cognee/api/v1/responses/default_tools.py +0 -1
cognee/api/v1/responses/dispatch_function.py +1 -1
cognee/api/v1/responses/routers/default_tools.py +0 -1
cognee/api/v1/search/search.py +11 -9
cognee/api/v1/settings/routers/get_settings_router.py +7 -1
cognee/api/v1/ui/ui.py +47 -16
cognee/api/v1/update/routers/get_update_router.py +1 -1
cognee/api/v1/update/update.py +3 -3
cognee/cli/_cognee.py +61 -10
cognee/cli/commands/add_command.py +3 -3
cognee/cli/commands/cognify_command.py +3 -3
cognee/cli/commands/config_command.py +9 -7
cognee/cli/commands/delete_command.py +3 -3
cognee/cli/commands/search_command.py +3 -7
cognee/cli/config.py +0 -1
cognee/context_global_variables.py +5 -0
cognee/exceptions/exceptions.py +1 -1
cognee/infrastructure/databases/cache/__init__.py +2 -0
cognee/infrastructure/databases/cache/cache_db_interface.py +79 -0
cognee/infrastructure/databases/cache/config.py +44 -0
cognee/infrastructure/databases/cache/get_cache_engine.py +67 -0
cognee/infrastructure/databases/cache/redis/RedisAdapter.py +243 -0
cognee/infrastructure/databases/exceptions/__init__.py +1 -0
cognee/infrastructure/databases/exceptions/exceptions.py +18 -2
cognee/infrastructure/databases/graph/get_graph_engine.py +1 -1
cognee/infrastructure/databases/graph/graph_db_interface.py +5 -0
cognee/infrastructure/databases/graph/kuzu/adapter.py +67 -44
cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +13 -3
cognee/infrastructure/databases/graph/neo4j_driver/deadlock_retry.py +1 -1
cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +1 -1
cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -1
cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +21 -3
cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -10
cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +17 -4
cognee/infrastructure/databases/vector/embeddings/config.py +2 -3
cognee/infrastructure/databases/vector/exceptions/exceptions.py +1 -1
cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -1
cognee/infrastructure/files/exceptions.py +1 -1
cognee/infrastructure/files/storage/LocalFileStorage.py +9 -9
cognee/infrastructure/files/storage/S3FileStorage.py +11 -11
cognee/infrastructure/files/utils/guess_file_type.py +6 -0
cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +0 -5
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +19 -9
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +17 -5
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +17 -5
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +32 -0
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/__init__.py +0 -0
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +109 -0
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +33 -8
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +40 -18
cognee/infrastructure/loaders/LoaderEngine.py +27 -7
cognee/infrastructure/loaders/external/__init__.py +7 -0
cognee/infrastructure/loaders/external/advanced_pdf_loader.py +2 -8
cognee/infrastructure/loaders/external/beautiful_soup_loader.py +310 -0
cognee/infrastructure/loaders/supported_loaders.py +7 -0
cognee/modules/data/exceptions/exceptions.py +1 -1
cognee/modules/data/methods/__init__.py +3 -0
cognee/modules/data/methods/get_dataset_data.py +4 -1
cognee/modules/data/methods/has_dataset_data.py +21 -0
cognee/modules/engine/models/TableRow.py +0 -1
cognee/modules/ingestion/save_data_to_file.py +9 -2
cognee/modules/pipelines/exceptions/exceptions.py +1 -1
cognee/modules/pipelines/operations/pipeline.py +12 -1
cognee/modules/pipelines/operations/run_tasks.py +25 -197
cognee/modules/pipelines/operations/run_tasks_data_item.py +260 -0
cognee/modules/pipelines/operations/run_tasks_distributed.py +121 -38
cognee/modules/retrieval/EntityCompletionRetriever.py +48 -8
cognee/modules/retrieval/base_graph_retriever.py +3 -1
cognee/modules/retrieval/base_retriever.py +3 -1
cognee/modules/retrieval/chunks_retriever.py +5 -1
cognee/modules/retrieval/code_retriever.py +20 -2
cognee/modules/retrieval/completion_retriever.py +50 -9
cognee/modules/retrieval/cypher_search_retriever.py +11 -1
cognee/modules/retrieval/graph_completion_context_extension_retriever.py +47 -8
cognee/modules/retrieval/graph_completion_cot_retriever.py +32 -1
cognee/modules/retrieval/graph_completion_retriever.py +54 -10
cognee/modules/retrieval/lexical_retriever.py +20 -2
cognee/modules/retrieval/natural_language_retriever.py +10 -1
cognee/modules/retrieval/summaries_retriever.py +5 -1
cognee/modules/retrieval/temporal_retriever.py +62 -10
cognee/modules/retrieval/user_qa_feedback.py +3 -2
cognee/modules/retrieval/utils/completion.py +5 -0
cognee/modules/retrieval/utils/description_to_codepart_search.py +1 -1
cognee/modules/retrieval/utils/session_cache.py +156 -0
cognee/modules/search/methods/get_search_type_tools.py +0 -5
cognee/modules/search/methods/no_access_control_search.py +12 -1
cognee/modules/search/methods/search.py +34 -2
cognee/modules/search/types/SearchType.py +0 -1
cognee/modules/settings/get_settings.py +23 -0
cognee/modules/users/methods/get_authenticated_user.py +3 -1
cognee/modules/users/methods/get_default_user.py +1 -6
cognee/modules/users/roles/methods/create_role.py +2 -2
cognee/modules/users/tenants/methods/create_tenant.py +2 -2
cognee/shared/exceptions/exceptions.py +1 -1
cognee/tasks/codingagents/coding_rule_associations.py +1 -2
cognee/tasks/documents/exceptions/exceptions.py +1 -1
cognee/tasks/graph/extract_graph_from_data.py +2 -0
cognee/tasks/ingestion/data_item_to_text_file.py +3 -3
cognee/tasks/ingestion/ingest_data.py +11 -5
cognee/tasks/ingestion/save_data_item_to_storage.py +12 -1
cognee/tasks/storage/add_data_points.py +3 -10
cognee/tasks/storage/index_data_points.py +19 -14
cognee/tasks/storage/index_graph_edges.py +25 -11
cognee/tasks/web_scraper/__init__.py +34 -0
cognee/tasks/web_scraper/config.py +26 -0
cognee/tasks/web_scraper/default_url_crawler.py +446 -0
cognee/tasks/web_scraper/models.py +46 -0
cognee/tasks/web_scraper/types.py +4 -0
cognee/tasks/web_scraper/utils.py +142 -0
cognee/tasks/web_scraper/web_scraper_task.py +396 -0
cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py +0 -1
cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +13 -0
cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +19 -0
cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +344 -0
cognee/tests/subprocesses/reader.py +25 -0
cognee/tests/subprocesses/simple_cognify_1.py +31 -0
cognee/tests/subprocesses/simple_cognify_2.py +31 -0
cognee/tests/subprocesses/writer.py +32 -0
cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py +0 -2
cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py +8 -3
cognee/tests/tasks/entity_extraction/entity_extraction_test.py +89 -0
cognee/tests/tasks/web_scraping/web_scraping_test.py +172 -0
cognee/tests/test_add_docling_document.py +56 -0
cognee/tests/test_chromadb.py +7 -11
cognee/tests/test_concurrent_subprocess_access.py +76 -0
cognee/tests/test_conversation_history.py +240 -0
cognee/tests/test_kuzu.py +27 -15
cognee/tests/test_lancedb.py +7 -11
cognee/tests/test_library.py +32 -2
cognee/tests/test_neo4j.py +24 -16
cognee/tests/test_neptune_analytics_vector.py +7 -11
cognee/tests/test_permissions.py +9 -13
cognee/tests/test_pgvector.py +4 -4
cognee/tests/test_remote_kuzu.py +8 -11
cognee/tests/test_s3_file_storage.py +1 -1
cognee/tests/test_search_db.py +6 -8
cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +89 -0
cognee/tests/unit/modules/retrieval/conversation_history_test.py +154 -0
{cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/METADATA +22 -7
{cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/RECORD +155 -128
{cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/entry_points.txt +1 -0
distributed/Dockerfile +0 -3
distributed/entrypoint.py +21 -9
distributed/signal.py +5 -0
distributed/workers/data_point_saving_worker.py +64 -34
distributed/workers/graph_saving_worker.py +71 -47
cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py +0 -1116
cognee/modules/retrieval/insights_retriever.py +0 -133
cognee/tests/test_memgraph.py +0 -109
cognee/tests/unit/modules/retrieval/insights_retriever_test.py +0 -251
distributed/poetry.lock +0 -12238
distributed/pyproject.toml +0 -185
{cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/WHEEL +0 -0
{cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/licenses/LICENSE +0 -0
{cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/licenses/NOTICE.md +0 -0

cognee/tests/tasks/web_scraping/web_scraping_test.py ADDED Viewed

@@ -0,0 +1,172 @@
+import asyncio
+import cognee
+from cognee.tasks.web_scraper.config import DefaultCrawlerConfig
+from cognee.tasks.web_scraper import cron_web_scraper_task
+async def test_web_scraping_using_bs4():
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system()
+    url = "https://quotes.toscrape.com/"
+    rules = {
+        "quotes": {"selector": ".quote span.text", "all": True},
+        "authors": {"selector": ".quote small", "all": True},
+    }
+    soup_config = DefaultCrawlerConfig(
+        concurrency=5,
+        crawl_delay=0.5,
+        timeout=15.0,
+        max_retries=2,
+        retry_delay_factor=0.5,
+        extraction_rules=rules,
+        use_playwright=False,
+    )
+    await cognee.add(
+        data=url,
+        soup_crawler_config=soup_config,
+        incremental_loading=False,
+    )
+    await cognee.cognify()
+    results = await cognee.search(
+        "Who said 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking'?",
+        query_type=cognee.SearchType.GRAPH_COMPLETION,
+    )
+    assert "Albert Einstein" in results[0]
+    print("Test passed! Found Albert Einstein in scraped data.")
+async def test_web_scraping_using_bs4_and_incremental_loading():
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    url = "https://books.toscrape.com/"
+    rules = {"titles": "article.product_pod h3 a", "prices": "article.product_pod p.price_color"}
+    soup_config = DefaultCrawlerConfig(
+        concurrency=1,
+        crawl_delay=0.1,
+        timeout=10.0,
+        max_retries=1,
+        retry_delay_factor=0.5,
+        extraction_rules=rules,
+        use_playwright=False,
+        structured=True,
+    )
+    await cognee.add(
+        data=url,
+        soup_crawler_config=soup_config,
+        incremental_loading=True,
+    )
+    await cognee.cognify()
+    results = await cognee.search(
+        "What is the price of 'A Light in the Attic' book?",
+        query_type=cognee.SearchType.GRAPH_COMPLETION,
+    )
+    assert "51.77" in results[0]
+    print("Test passed! Found 'A Light in the Attic' in scraped data.")
+async def test_web_scraping_using_tavily():
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    url = "https://quotes.toscrape.com/"
+    await cognee.add(
+        data=url,
+        incremental_loading=False,
+    )
+    await cognee.cognify()
+    results = await cognee.search(
+        "Who said 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking'?",
+        query_type=cognee.SearchType.GRAPH_COMPLETION,
+    )
+    assert "Albert Einstein" in results[0]
+    print("Test passed! Found Albert Einstein in scraped data.")
+async def test_web_scraping_using_tavily_and_incremental_loading():
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    url = "https://quotes.toscrape.com/"
+    await cognee.add(
+        data=url,
+        incremental_loading=True,
+    )
+    await cognee.cognify()
+    results = await cognee.search(
+        "Who said 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking'?",
+        query_type=cognee.SearchType.GRAPH_COMPLETION,
+    )
+    assert "Albert Einstein" in results[0]
+    print("Test passed! Found Albert Einstein in scraped data.")
+# ---------- cron job tests ----------
+async def test_cron_web_scraper():
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    urls = ["https://quotes.toscrape.com/", "https://books.toscrape.com/"]
+    extraction_rules = {
+        "quotes": ".quote .text",
+        "authors": ".quote .author",
+        "titles": "article.product_pod h3 a",
+        "prices": "article.product_pod p.price_color",
+    }
+    # Run cron_web_scraper_task
+    await cron_web_scraper_task(
+        url=urls,
+        job_name="cron_scraping_job",
+        extraction_rules=extraction_rules,
+    )
+    results = await cognee.search(
+        "Who said 'The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking'?",
+        query_type=cognee.SearchType.GRAPH_COMPLETION,
+    )
+    assert "Albert Einstein" in results[0]
+    results_books = await cognee.search(
+        "What is the price of 'A Light in the Attic' book?",
+        query_type=cognee.SearchType.GRAPH_COMPLETION,
+    )
+    assert "51.77" in results_books[0]
+    print("Cron job web_scraping test passed!")
+async def main():
+    print("Starting BS4 incremental loading test...")
+    await test_web_scraping_using_bs4_and_incremental_loading()
+    print("Starting BS4 normal test...")
+    await test_web_scraping_using_bs4()
+    print("Starting Tavily incremental loading test...")
+    await test_web_scraping_using_tavily_and_incremental_loading()
+    print("Starting Tavily normal test...")
+    await test_web_scraping_using_tavily()
+    print("Starting cron job test...")
+    await test_cron_web_scraper()
+if __name__ == "__main__":
+    asyncio.run(main())

cognee/tests/test_add_docling_document.py ADDED Viewed

@@ -0,0 +1,56 @@
+import asyncio
+import cognee
+import os
+async def main():
+    # Get file path to document to process
+    from pathlib import Path
+    current_directory = Path(__file__).resolve().parent
+    file_path_artificial = os.path.join(
+        current_directory, "test_data", "artificial-intelligence.pdf"
+    )
+    file_path_png = os.path.join(current_directory, "test_data", "example_copy.png")
+    file_path_pptx = os.path.join(current_directory, "test_data", "example.pptx")
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    # Import necessary converter, and convert file to DoclingDocument format
+    from docling.document_converter import DocumentConverter
+    converter = DocumentConverter()
+    result = converter.convert(file_path_artificial)
+    await cognee.add(result.document)
+    result = converter.convert(file_path_png)
+    await cognee.add(result.document)
+    result = converter.convert(file_path_pptx)
+    await cognee.add(result.document)
+    await cognee.cognify()
+    answer = await cognee.search("Tell me about Artificial Intelligence.")
+    assert len(answer) != 0
+    answer = await cognee.search("Do programmers change light bulbs?")
+    assert len(answer) != 0
+    lowercase_answer = answer[0].lower()
+    assert ("no" in lowercase_answer) or ("none" in lowercase_answer)
+    answer = await cognee.search("What colours are there in the presentation table?")
+    assert len(answer) != 0
+    lowercase_answer = answer[0].lower()
+    assert (
+        ("red" in lowercase_answer)
+        and ("blue" in lowercase_answer)
+        and ("green" in lowercase_answer)
+    )
+if __name__ == "__main__":
+    asyncio.run(main())

cognee/tests/test_chromadb.py CHANGED Viewed

@@ -133,20 +133,16 @@ async def main():
     dataset_name_1 = "natural_language"
     dataset_name_2 = "quantum"
-    explanation_file_path = os.path.join(
+    explanation_file_path_nlp = os.path.join(
         pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
     )
-    await cognee.add([explanation_file_path], dataset_name_1)
+    await cognee.add([explanation_file_path_nlp], dataset_name_1)
-    text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
-    At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
-    Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
-    The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
-    Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
-    In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
-    """
+    explanation_file_path_quantum = os.path.join(
+        pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
+    )
-    await cognee.add([text], dataset_name_2)
+    await cognee.add([explanation_file_path_quantum], dataset_name_2)
     await cognee.cognify([dataset_name_2, dataset_name_1])
@@ -159,7 +155,7 @@ async def main():
     random_node_name = random_node.payload["text"]
     search_results = await cognee.search(
-        query_type=SearchType.INSIGHTS, query_text=random_node_name
+        query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
     )
     assert len(search_results) != 0, "The search results list is empty."
     print("\n\nExtracted sentences are:\n")

cognee/tests/test_concurrent_subprocess_access.py ADDED Viewed

@@ -0,0 +1,76 @@
+import os
+import asyncio
+import cognee
+import pathlib
+import subprocess
+from cognee.shared.logging_utils import get_logger
+logger = get_logger()
+async def concurrent_subprocess_access():
+    data_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".data_storage/concurrent_tasks")
+        ).resolve()
+    )
+    cognee_directory_path = str(
+        pathlib.Path(
+            os.path.join(pathlib.Path(__file__).parent, ".cognee_system/concurrent_tasks")
+        ).resolve()
+    )
+    subprocess_directory_path = str(
+        pathlib.Path(os.path.join(pathlib.Path(__file__).parent, "subprocesses/")).resolve()
+    )
+    writer_path = subprocess_directory_path + "/writer.py"
+    reader_path = subprocess_directory_path + "/reader.py"
+    cognee.config.data_root_directory(data_directory_path)
+    cognee.config.system_root_directory(cognee_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    writer_process = subprocess.Popen([os.sys.executable, str(writer_path)])
+    reader_process = subprocess.Popen([os.sys.executable, str(reader_path)])
+    # Wait for both processes to complete
+    writer_process.wait()
+    reader_process.wait()
+    logger.info("Basic write read subprocess example finished")
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    text = """
+            This is the text of the first cognify subprocess
+            """
+    await cognee.add(text, dataset_name="first_cognify_dataset")
+    text = """
+            This is the text of the second cognify subprocess
+            """
+    await cognee.add(text, dataset_name="second_cognify_dataset")
+    first_cognify_path = subprocess_directory_path + "/simple_cognify_1.py"
+    second_cognify_path = subprocess_directory_path + "/simple_cognify_2.py"
+    first_cognify_process = subprocess.Popen([os.sys.executable, str(first_cognify_path)])
+    second_cognify_process = subprocess.Popen([os.sys.executable, str(second_cognify_path)])
+    # Wait for both processes to complete
+    first_cognify_process.wait()
+    second_cognify_process.wait()
+    logger.info("Database concurrent subprocess example finished")
+if __name__ == "__main__":
+    asyncio.run(concurrent_subprocess_access())

cognee/tests/test_conversation_history.py ADDED Viewed

@@ -0,0 +1,240 @@
+"""
+End-to-end integration test for conversation history feature.
+Tests all retrievers that save conversation history to Redis cache:
+1. GRAPH_COMPLETION
+2. RAG_COMPLETION
+3. GRAPH_COMPLETION_COT
+4. GRAPH_COMPLETION_CONTEXT_EXTENSION
+5. GRAPH_SUMMARY_COMPLETION
+6. TEMPORAL
+"""
+import os
+import shutil
+import cognee
+import pathlib
+from cognee.infrastructure.databases.cache import get_cache_engine
+from cognee.modules.search.types import SearchType
+from cognee.shared.logging_utils import get_logger
+from cognee.modules.users.methods import get_default_user
+logger = get_logger()
+async def main():
+    data_directory_path = str(
+        pathlib.Path(
+            os.path.join(
+                pathlib.Path(__file__).parent,
+                ".data_storage/test_conversation_history",
+            )
+        ).resolve()
+    )
+    cognee_directory_path = str(
+        pathlib.Path(
+            os.path.join(
+                pathlib.Path(__file__).parent,
+                ".cognee_system/test_conversation_history",
+            )
+        ).resolve()
+    )
+    cognee.config.data_root_directory(data_directory_path)
+    cognee.config.system_root_directory(cognee_directory_path)
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    dataset_name = "conversation_history_test"
+    text_1 = """TechCorp is a technology company based in San Francisco. They specialize in artificial intelligence and machine learning."""
+    text_2 = (
+        """DataCo is a data analytics company. They help businesses make sense of their data."""
+    )
+    await cognee.add(text_1, dataset_name)
+    await cognee.add(text_2, dataset_name)
+    await cognee.cognify([dataset_name])
+    user = await get_default_user()
+    cache_engine = get_cache_engine()
+    assert cache_engine is not None, "Cache engine should be available for testing"
+    session_id_1 = "test_session_graph"
+    await cognee.search(
+        query_type=SearchType.GRAPH_COMPLETION,
+        query_text="What is TechCorp?",
+        session_id=session_id_1,
+    )
+    history1 = await cache_engine.get_latest_qa(str(user.id), session_id_1, last_n=10)
+    assert len(history1) == 1, f"Expected at least 1 Q&A in history, got {len(history1)}"
+    our_qa = [h for h in history1 if h["question"] == "What is TechCorp?"]
+    assert len(our_qa) >= 1, "Expected to find 'What is TechCorp?' in history"
+    assert "answer" in our_qa[0] and "context" in our_qa[0], (
+        "Q&A should contain answer and context fields"
+    )
+    result2 = await cognee.search(
+        query_type=SearchType.GRAPH_COMPLETION,
+        query_text="Tell me more about it",
+        session_id=session_id_1,
+    )
+    assert isinstance(result2, list) and len(result2) > 0, (
+        f"Second query should return non-empty list, got: {result2!r}"
+    )
+    history2 = await cache_engine.get_latest_qa(str(user.id), session_id_1, last_n=10)
+    our_questions = [
+        h for h in history2 if h["question"] in ["What is TechCorp?", "Tell me more about it"]
+    ]
+    assert len(our_questions) == 2, (
+        f"Expected at least 2 Q&A pairs in history after 2 queries, got {len(our_questions)}"
+    )
+    session_id_2 = "test_session_separate"
+    result3 = await cognee.search(
+        query_type=SearchType.GRAPH_COMPLETION,
+        query_text="What is DataCo?",
+        session_id=session_id_2,
+    )
+    assert isinstance(result3, list) and len(result3) > 0, (
+        f"Different session should return non-empty list, got: {result3!r}"
+    )
+    history3 = await cache_engine.get_latest_qa(str(user.id), session_id_2, last_n=10)
+    our_qa_session2 = [h for h in history3 if h["question"] == "What is DataCo?"]
+    assert len(our_qa_session2) == 1, "Session 2 should have 'What is DataCo?' question"
+    result4 = await cognee.search(
+        query_type=SearchType.GRAPH_COMPLETION,
+        query_text="Test default session",
+        session_id=None,
+    )
+    assert isinstance(result4, list) and len(result4) > 0, (
+        f"Default session should return non-empty list, got: {result4!r}"
+    )
+    history_default = await cache_engine.get_latest_qa(str(user.id), "default_session", last_n=10)
+    our_qa_default = [h for h in history_default if h["question"] == "Test default session"]
+    assert len(our_qa_default) == 1, "Should find 'Test default session' in default_session"
+    session_id_rag = "test_session_rag"
+    result_rag = await cognee.search(
+        query_type=SearchType.RAG_COMPLETION,
+        query_text="What companies are mentioned?",
+        session_id=session_id_rag,
+    )
+    assert isinstance(result_rag, list) and len(result_rag) > 0, (
+        f"RAG_COMPLETION should return non-empty list, got: {result_rag!r}"
+    )
+    history_rag = await cache_engine.get_latest_qa(str(user.id), session_id_rag, last_n=10)
+    our_qa_rag = [h for h in history_rag if h["question"] == "What companies are mentioned?"]
+    assert len(our_qa_rag) == 1, "Should find RAG question in history"
+    session_id_cot = "test_session_cot"
+    result_cot = await cognee.search(
+        query_type=SearchType.GRAPH_COMPLETION_COT,
+        query_text="What do you know about TechCorp?",
+        session_id=session_id_cot,
+    )
+    assert isinstance(result_cot, list) and len(result_cot) > 0, (
+        f"GRAPH_COMPLETION_COT should return non-empty list, got: {result_cot!r}"
+    )
+    history_cot = await cache_engine.get_latest_qa(str(user.id), session_id_cot, last_n=10)
+    our_qa_cot = [h for h in history_cot if h["question"] == "What do you know about TechCorp?"]
+    assert len(our_qa_cot) == 1, "Should find CoT question in history"
+    session_id_ext = "test_session_ext"
+    result_ext = await cognee.search(
+        query_type=SearchType.GRAPH_COMPLETION_CONTEXT_EXTENSION,
+        query_text="Tell me about DataCo",
+        session_id=session_id_ext,
+    )
+    assert isinstance(result_ext, list) and len(result_ext) > 0, (
+        f"GRAPH_COMPLETION_CONTEXT_EXTENSION should return non-empty list, got: {result_ext!r}"
+    )
+    history_ext = await cache_engine.get_latest_qa(str(user.id), session_id_ext, last_n=10)
+    our_qa_ext = [h for h in history_ext if h["question"] == "Tell me about DataCo"]
+    assert len(our_qa_ext) == 1, "Should find Context Extension question in history"
+    session_id_summary = "test_session_summary"
+    result_summary = await cognee.search(
+        query_type=SearchType.GRAPH_SUMMARY_COMPLETION,
+        query_text="What are the key points about TechCorp?",
+        session_id=session_id_summary,
+    )
+    assert isinstance(result_summary, list) and len(result_summary) > 0, (
+        f"GRAPH_SUMMARY_COMPLETION should return non-empty list, got: {result_summary!r}"
+    )
+    # Verify saved
+    history_summary = await cache_engine.get_latest_qa(str(user.id), session_id_summary, last_n=10)
+    our_qa_summary = [
+        h for h in history_summary if h["question"] == "What are the key points about TechCorp?"
+    ]
+    assert len(our_qa_summary) == 1, "Should find Summary question in history"
+    session_id_temporal = "test_session_temporal"
+    result_temporal = await cognee.search(
+        query_type=SearchType.TEMPORAL,
+        query_text="Tell me about the companies",
+        session_id=session_id_temporal,
+    )
+    assert isinstance(result_temporal, list) and len(result_temporal) > 0, (
+        f"TEMPORAL should return non-empty list, got: {result_temporal!r}"
+    )
+    history_temporal = await cache_engine.get_latest_qa(
+        str(user.id), session_id_temporal, last_n=10
+    )
+    our_qa_temporal = [
+        h for h in history_temporal if h["question"] == "Tell me about the companies"
+    ]
+    assert len(our_qa_temporal) == 1, "Should find Temporal question in history"
+    from cognee.modules.retrieval.utils.session_cache import (
+        get_conversation_history,
+    )
+    formatted_history = await get_conversation_history(session_id=session_id_1)
+    assert "Previous conversation:" in formatted_history, (
+        "Formatted history should contain 'Previous conversation:' header"
+    )
+    assert "QUESTION:" in formatted_history, "Formatted history should contain 'QUESTION:' prefix"
+    assert "CONTEXT:" in formatted_history, "Formatted history should contain 'CONTEXT:' prefix"
+    assert "ANSWER:" in formatted_history, "Formatted history should contain 'ANSWER:' prefix"
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    logger.info("All conversation history tests passed successfully")
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())

cognee/tests/test_kuzu.py CHANGED Viewed

@@ -38,22 +38,35 @@ async def main():
         dataset_name = "cs_explanations"
-        explanation_file_path = os.path.join(
+        explanation_file_path_nlp = os.path.join(
             pathlib.Path(__file__).parent, "test_data/Natural_language_processing.txt"
         )
-        await cognee.add([explanation_file_path], dataset_name)
+        await cognee.add([explanation_file_path_nlp], dataset_name)
-        text = """A quantum computer is a computer that takes advantage of quantum mechanical phenomena.
-        At small scales, physical matter exhibits properties of both particles and waves, and quantum computing leverages this behavior, specifically quantum superposition and entanglement, using specialized hardware that supports the preparation and manipulation of quantum states.
-        Classical physics cannot explain the operation of these quantum devices, and a scalable quantum computer could perform some calculations exponentially faster (with respect to input size scaling) than any modern "classical" computer. In particular, a large-scale quantum computer could break widely used encryption schemes and aid physicists in performing physical simulations; however, the current state of the technology is largely experimental and impractical, with several obstacles to useful applications. Moreover, scalable quantum computers do not hold promise for many practical tasks, and for many important tasks quantum speedups are proven impossible.
-        The basic unit of information in quantum computing is the qubit, similar to the bit in traditional digital electronics. Unlike a classical bit, a qubit can exist in a superposition of its two "basis" states. When measuring a qubit, the result is a probabilistic output of a classical bit, therefore making quantum computers nondeterministic in general. If a quantum computer manipulates the qubit in a particular way, wave interference effects can amplify the desired measurement results. The design of quantum algorithms involves creating procedures that allow a quantum computer to perform calculations efficiently and quickly.
-        Physically engineering high-quality qubits has proven challenging. If a physical qubit is not sufficiently isolated from its environment, it suffers from quantum decoherence, introducing noise into calculations. Paradoxically, perfectly isolating qubits is also undesirable because quantum computations typically need to initialize qubits, perform controlled qubit interactions, and measure the resulting quantum states. Each of those operations introduces errors and suffers from noise, and such inaccuracies accumulate.
-        In principle, a non-quantum (classical) computer can solve the same computational problems as a quantum computer, given enough time. Quantum advantage comes in the form of time complexity rather than computability, and quantum complexity theory shows that some quantum algorithms for carefully selected tasks require exponentially fewer computational steps than the best known non-quantum algorithms. Such tasks can in theory be solved on a large-scale quantum computer whereas classical computers would not finish computations in any reasonable amount of time. However, quantum speedup is not universal or even typical across computational tasks, since basic tasks such as sorting are proven to not allow any asymptotic quantum speedup. Claims of quantum supremacy have drawn significant attention to the discipline, but are demonstrated on contrived tasks, while near-term practical use cases remain limited.
-        """
-        await cognee.add([text], dataset_name)
+        explanation_file_path_quantum = os.path.join(
+            pathlib.Path(__file__).parent, "test_data/Quantum_computers.txt"
+        )
+        from cognee.infrastructure.databases.graph import get_graph_engine
+        graph_engine = await get_graph_engine()
+        is_empty = await graph_engine.is_empty()
+        assert is_empty, "Kuzu graph database is not empty"
+        await cognee.add([explanation_file_path_quantum], dataset_name)
+        is_empty = await graph_engine.is_empty()
+        assert is_empty, "Kuzu graph database should be empty before cognify"
         await cognee.cognify([dataset_name])
+        is_empty = await graph_engine.is_empty()
+        assert not is_empty, "Kuzu graph database should not be empty"
         from cognee.infrastructure.databases.vector import get_vector_engine
         vector_engine = get_vector_engine()
@@ -61,7 +74,7 @@ async def main():
         random_node_name = random_node.payload["text"]
         search_results = await cognee.search(
-            query_type=SearchType.INSIGHTS, query_text=random_node_name
+            query_type=SearchType.GRAPH_COMPLETION, query_text=random_node_name
         )
         assert len(search_results) != 0, "The search results list is empty."
         print("\n\nExtracted sentences are:\n")
@@ -117,11 +130,10 @@ async def main():
         assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
         await cognee.prune.prune_system(metadata=True)
-        from cognee.infrastructure.databases.graph import get_graph_engine
-        graph_engine = await get_graph_engine()
-        nodes, edges = await graph_engine.get_graph_data()
-        assert len(nodes) == 0 and len(edges) == 0, "Kuzu graph database is not empty"
+        is_empty = await graph_engine.is_empty()
+        assert is_empty, "Kuzu graph database is not empty"
     finally:
         # Ensure cleanup even if tests fail

cognee 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

cognee 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl