PyPI - cognee - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl - Mend

cognee 0.3.6py3-none-any.whl → 0.3.7.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (182) hide show

cognee/__init__.py +1 -0
cognee/api/health.py +2 -12
cognee/api/v1/add/add.py +46 -6
cognee/api/v1/add/routers/get_add_router.py +11 -2
cognee/api/v1/cognify/cognify.py +29 -9
cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
cognee/api/v1/datasets/datasets.py +11 -0
cognee/api/v1/datasets/routers/get_datasets_router.py +8 -0
cognee/api/v1/delete/routers/get_delete_router.py +2 -0
cognee/api/v1/memify/routers/get_memify_router.py +2 -1
cognee/api/v1/permissions/routers/get_permissions_router.py +6 -0
cognee/api/v1/responses/default_tools.py +0 -1
cognee/api/v1/responses/dispatch_function.py +1 -1
cognee/api/v1/responses/routers/default_tools.py +0 -1
cognee/api/v1/search/routers/get_search_router.py +3 -3
cognee/api/v1/search/search.py +11 -9
cognee/api/v1/settings/routers/get_settings_router.py +7 -1
cognee/api/v1/sync/routers/get_sync_router.py +3 -0
cognee/api/v1/ui/ui.py +45 -16
cognee/api/v1/update/routers/get_update_router.py +3 -1
cognee/api/v1/update/update.py +3 -3
cognee/api/v1/users/routers/get_visualize_router.py +2 -0
cognee/cli/_cognee.py +61 -10
cognee/cli/commands/add_command.py +3 -3
cognee/cli/commands/cognify_command.py +3 -3
cognee/cli/commands/config_command.py +9 -7
cognee/cli/commands/delete_command.py +3 -3
cognee/cli/commands/search_command.py +3 -7
cognee/cli/config.py +0 -1
cognee/context_global_variables.py +5 -0
cognee/exceptions/exceptions.py +1 -1
cognee/infrastructure/databases/cache/__init__.py +2 -0
cognee/infrastructure/databases/cache/cache_db_interface.py +79 -0
cognee/infrastructure/databases/cache/config.py +44 -0
cognee/infrastructure/databases/cache/get_cache_engine.py +67 -0
cognee/infrastructure/databases/cache/redis/RedisAdapter.py +243 -0
cognee/infrastructure/databases/exceptions/__init__.py +1 -0
cognee/infrastructure/databases/exceptions/exceptions.py +18 -2
cognee/infrastructure/databases/graph/get_graph_engine.py +1 -1
cognee/infrastructure/databases/graph/graph_db_interface.py +5 -0
cognee/infrastructure/databases/graph/kuzu/adapter.py +76 -47
cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +13 -3
cognee/infrastructure/databases/graph/neo4j_driver/deadlock_retry.py +1 -1
cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +1 -1
cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -1
cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +21 -3
cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -10
cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +17 -4
cognee/infrastructure/databases/vector/embeddings/config.py +2 -3
cognee/infrastructure/databases/vector/exceptions/exceptions.py +1 -1
cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -1
cognee/infrastructure/files/exceptions.py +1 -1
cognee/infrastructure/files/storage/LocalFileStorage.py +9 -9
cognee/infrastructure/files/storage/S3FileStorage.py +11 -11
cognee/infrastructure/files/utils/guess_file_type.py +6 -0
cognee/infrastructure/llm/prompts/feedback_reaction_prompt.txt +14 -0
cognee/infrastructure/llm/prompts/feedback_report_prompt.txt +13 -0
cognee/infrastructure/llm/prompts/feedback_user_context_prompt.txt +5 -0
cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +0 -5
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +19 -9
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +17 -5
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +17 -5
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +32 -0
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/__init__.py +0 -0
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +109 -0
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +33 -8
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +40 -18
cognee/infrastructure/loaders/LoaderEngine.py +27 -7
cognee/infrastructure/loaders/external/__init__.py +7 -0
cognee/infrastructure/loaders/external/advanced_pdf_loader.py +2 -8
cognee/infrastructure/loaders/external/beautiful_soup_loader.py +310 -0
cognee/infrastructure/loaders/supported_loaders.py +7 -0
cognee/modules/data/exceptions/exceptions.py +1 -1
cognee/modules/data/methods/__init__.py +3 -0
cognee/modules/data/methods/get_dataset_data.py +4 -1
cognee/modules/data/methods/has_dataset_data.py +21 -0
cognee/modules/engine/models/TableRow.py +0 -1
cognee/modules/ingestion/save_data_to_file.py +9 -2
cognee/modules/pipelines/exceptions/exceptions.py +1 -1
cognee/modules/pipelines/operations/pipeline.py +12 -1
cognee/modules/pipelines/operations/run_tasks.py +25 -197
cognee/modules/pipelines/operations/run_tasks_base.py +7 -0
cognee/modules/pipelines/operations/run_tasks_data_item.py +260 -0
cognee/modules/pipelines/operations/run_tasks_distributed.py +121 -38
cognee/modules/pipelines/operations/run_tasks_with_telemetry.py +9 -1
cognee/modules/retrieval/EntityCompletionRetriever.py +48 -8
cognee/modules/retrieval/base_graph_retriever.py +3 -1
cognee/modules/retrieval/base_retriever.py +3 -1
cognee/modules/retrieval/chunks_retriever.py +5 -1
cognee/modules/retrieval/code_retriever.py +20 -2
cognee/modules/retrieval/completion_retriever.py +50 -9
cognee/modules/retrieval/cypher_search_retriever.py +11 -1
cognee/modules/retrieval/graph_completion_context_extension_retriever.py +47 -8
cognee/modules/retrieval/graph_completion_cot_retriever.py +152 -22
cognee/modules/retrieval/graph_completion_retriever.py +54 -10
cognee/modules/retrieval/lexical_retriever.py +20 -2
cognee/modules/retrieval/natural_language_retriever.py +10 -1
cognee/modules/retrieval/summaries_retriever.py +5 -1
cognee/modules/retrieval/temporal_retriever.py +62 -10
cognee/modules/retrieval/user_qa_feedback.py +3 -2
cognee/modules/retrieval/utils/completion.py +30 -4
cognee/modules/retrieval/utils/description_to_codepart_search.py +1 -1
cognee/modules/retrieval/utils/session_cache.py +156 -0
cognee/modules/search/methods/get_search_type_tools.py +0 -5
cognee/modules/search/methods/no_access_control_search.py +12 -1
cognee/modules/search/methods/search.py +51 -5
cognee/modules/search/types/SearchType.py +0 -1
cognee/modules/settings/get_settings.py +23 -0
cognee/modules/users/methods/get_authenticated_user.py +3 -1
cognee/modules/users/methods/get_default_user.py +1 -6
cognee/modules/users/roles/methods/create_role.py +2 -2
cognee/modules/users/tenants/methods/create_tenant.py +2 -2
cognee/shared/exceptions/exceptions.py +1 -1
cognee/shared/logging_utils.py +18 -11
cognee/shared/utils.py +24 -2
cognee/tasks/codingagents/coding_rule_associations.py +1 -2
cognee/tasks/documents/exceptions/exceptions.py +1 -1
cognee/tasks/feedback/__init__.py +13 -0
cognee/tasks/feedback/create_enrichments.py +84 -0
cognee/tasks/feedback/extract_feedback_interactions.py +230 -0
cognee/tasks/feedback/generate_improved_answers.py +130 -0
cognee/tasks/feedback/link_enrichments_to_feedback.py +67 -0
cognee/tasks/feedback/models.py +26 -0
cognee/tasks/graph/extract_graph_from_data.py +2 -0
cognee/tasks/ingestion/data_item_to_text_file.py +3 -3
cognee/tasks/ingestion/ingest_data.py +11 -5
cognee/tasks/ingestion/save_data_item_to_storage.py +12 -1
cognee/tasks/storage/add_data_points.py +3 -10
cognee/tasks/storage/index_data_points.py +19 -14
cognee/tasks/storage/index_graph_edges.py +25 -11
cognee/tasks/web_scraper/__init__.py +34 -0
cognee/tasks/web_scraper/config.py +26 -0
cognee/tasks/web_scraper/default_url_crawler.py +446 -0
cognee/tasks/web_scraper/models.py +46 -0
cognee/tasks/web_scraper/types.py +4 -0
cognee/tasks/web_scraper/utils.py +142 -0
cognee/tasks/web_scraper/web_scraper_task.py +396 -0
cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py +0 -1
cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +13 -0
cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +19 -0
cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +344 -0
cognee/tests/subprocesses/reader.py +25 -0
cognee/tests/subprocesses/simple_cognify_1.py +31 -0
cognee/tests/subprocesses/simple_cognify_2.py +31 -0
cognee/tests/subprocesses/writer.py +32 -0
cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py +0 -2
cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py +8 -3
cognee/tests/tasks/entity_extraction/entity_extraction_test.py +89 -0
cognee/tests/tasks/web_scraping/web_scraping_test.py +172 -0
cognee/tests/test_add_docling_document.py +56 -0
cognee/tests/test_chromadb.py +7 -11
cognee/tests/test_concurrent_subprocess_access.py +76 -0
cognee/tests/test_conversation_history.py +240 -0
cognee/tests/test_feedback_enrichment.py +174 -0
cognee/tests/test_kuzu.py +27 -15
cognee/tests/test_lancedb.py +7 -11
cognee/tests/test_library.py +32 -2
cognee/tests/test_neo4j.py +24 -16
cognee/tests/test_neptune_analytics_vector.py +7 -11
cognee/tests/test_permissions.py +9 -13
cognee/tests/test_pgvector.py +4 -4
cognee/tests/test_remote_kuzu.py +8 -11
cognee/tests/test_s3_file_storage.py +1 -1
cognee/tests/test_search_db.py +6 -8
cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +89 -0
cognee/tests/unit/modules/retrieval/conversation_history_test.py +154 -0
cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +51 -0
{cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/METADATA +21 -6
{cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/RECORD +178 -139
{cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/entry_points.txt +1 -0
distributed/Dockerfile +0 -3
distributed/entrypoint.py +21 -9
distributed/signal.py +5 -0
distributed/workers/data_point_saving_worker.py +64 -34
distributed/workers/graph_saving_worker.py +71 -47
cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py +0 -1116
cognee/modules/retrieval/insights_retriever.py +0 -133
cognee/tests/test_memgraph.py +0 -109
cognee/tests/unit/modules/retrieval/insights_retriever_test.py +0 -251
{cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/WHEEL +0 -0
{cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/LICENSE +0 -0
{cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/NOTICE.md +0 -0

cognee/modules/retrieval/utils/session_cache.py ADDED Viewed

@@ -0,0 +1,156 @@
+from typing import Optional, List, Dict, Any
+from cognee.context_global_variables import session_user
+from cognee.infrastructure.databases.cache.config import CacheConfig
+from cognee.infrastructure.databases.exceptions import CacheConnectionError
+from cognee.shared.logging_utils import get_logger
+logger = get_logger("session_cache")
+async def save_conversation_history(
+    query: str,
+    context_summary: str,
+    answer: str,
+    session_id: Optional[str] = None,
+) -> bool:
+    """
+    Saves Q&A interaction to the session cache if user is authenticated and caching is enabled.
+    Handles cache unavailability gracefully by logging warnings instead of failing.
+    Parameters:
+    -----------
+        - query (str): The user's query/question.
+        - context_summary (str): Summarized context used for generating the answer.
+        - answer (str): The generated answer/completion.
+        - session_id (Optional[str]): Session identifier. Defaults to 'default_session' if None.
+    Returns:
+    --------
+        - bool: True if successfully saved to cache, False otherwise.
+    """
+    try:
+        cache_config = CacheConfig()
+        user = session_user.get()
+        user_id = getattr(user, "id", None)
+        if not (user_id and cache_config.caching):
+            logger.debug("Session caching disabled or user not authenticated")
+            return False
+        if session_id is None:
+            session_id = "default_session"
+        from cognee.infrastructure.databases.cache.get_cache_engine import get_cache_engine
+        cache_engine = get_cache_engine()
+        if cache_engine is None:
+            logger.warning("Cache engine not available, skipping session save")
+            return False
+        await cache_engine.add_qa(
+            str(user_id),
+            session_id=session_id,
+            question=query,
+            context=context_summary,
+            answer=answer,
+        )
+        logger.info(
+            f"Successfully saved Q&A to session cache: user_id={user_id}, session_id={session_id}"
+        )
+        return True
+    except CacheConnectionError as e:
+        logger.warning(f"Cache unavailable, continuing without session save: {e.message}")
+        return False
+    except Exception as e:
+        logger.error(
+            f"Unexpected error saving to session cache: {type(e).__name__}: {str(e)}. Continuing without caching."
+        )
+        return False
+async def get_conversation_history(
+    session_id: Optional[str] = None,
+) -> str:
+    """
+    Retrieves conversation history from cache and formats it as text.
+    Returns formatted conversation history with time, question, context, and answer
+    for the last N Q&A pairs (N is determined by cache engine default).
+    Parameters:
+    -----------
+        - session_id (Optional[str]): Session identifier. Defaults to 'default_session' if None.
+    Returns:
+    --------
+        - str: Formatted conversation history string, or empty string if no history or error.
+    Format:
+    -------
+        Previous conversation:
+        [2024-01-15 10:30:45]
+        QUESTION: What is X?
+        CONTEXT: X is a concept...
+        ANSWER: X is...
+        [2024-01-15 10:31:20]
+        QUESTION: How does Y work?
+        CONTEXT: Y is related to...
+        ANSWER: Y works by...
+    """
+    try:
+        cache_config = CacheConfig()
+        user = session_user.get()
+        user_id = getattr(user, "id", None)
+        if not (user_id and cache_config.caching):
+            logger.debug("Session caching disabled or user not authenticated")
+            return ""
+        if session_id is None:
+            session_id = "default_session"
+        from cognee.infrastructure.databases.cache.get_cache_engine import get_cache_engine
+        cache_engine = get_cache_engine()
+        if cache_engine is None:
+            logger.warning("Cache engine not available, skipping conversation history retrieval")
+            return ""
+        history_entries = await cache_engine.get_latest_qa(str(user_id), session_id)
+        if not history_entries:
+            logger.debug("No conversation history found")
+            return ""
+        history_text = "Previous conversation:\n\n"
+        for entry in history_entries:
+            history_text += f"[{entry.get('time', 'Unknown time')}]\n"
+            history_text += f"QUESTION: {entry.get('question', '')}\n"
+            history_text += f"CONTEXT: {entry.get('context', '')}\n"
+            history_text += f"ANSWER: {entry.get('answer', '')}\n\n"
+        logger.debug(f"Retrieved {len(history_entries)} conversation history entries")
+        return history_text
+    except CacheConnectionError as e:
+        logger.warning(f"Cache unavailable, continuing without conversation history: {e.message}")
+        return ""
+    except Exception as e:
+        logger.warning(
+            f"Unexpected error retrieving conversation history: {type(e).__name__}: {str(e)}"
+        )
+        return ""

cognee/modules/search/methods/get_search_type_tools.py CHANGED Viewed

@@ -9,7 +9,6 @@ from cognee.modules.search.exceptions import UnsupportedSearchTypeError
 # Retrievers
 from cognee.modules.retrieval.user_qa_feedback import UserQAFeedback
 from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
-from cognee.modules.retrieval.insights_retriever import InsightsRetriever
 from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
 from cognee.modules.retrieval.completion_retriever import CompletionRetriever
 from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
@@ -44,10 +43,6 @@ async def get_search_type_tools(
             SummariesRetriever(top_k=top_k).get_completion,
             SummariesRetriever(top_k=top_k).get_context,
         ],
-        SearchType.INSIGHTS: [
-            InsightsRetriever(top_k=top_k).get_completion,
-            InsightsRetriever(top_k=top_k).get_context,
-        ],
         SearchType.CHUNKS: [
             ChunksRetriever(top_k=top_k).get_completion,
             ChunksRetriever(top_k=top_k).get_context,

cognee/modules/search/methods/no_access_control_search.py CHANGED Viewed

@@ -1,12 +1,16 @@
 from typing import Any, List, Optional, Tuple, Type, Union
+from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.modules.data.models.Dataset import Dataset
 from cognee.modules.engine.models.node_set import NodeSet
 from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
 from cognee.modules.search.types import SearchType
+from cognee.shared.logging_utils import get_logger
 from .get_search_type_tools import get_search_type_tools
+logger = get_logger()
 async def no_access_control_search(
     query_type: SearchType,
@@ -19,6 +23,7 @@ async def no_access_control_search(
     save_interaction: bool = False,
     last_k: Optional[int] = None,
     only_context: bool = False,
+    session_id: Optional[str] = None,
 ) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
     search_tools = await get_search_type_tools(
         query_type=query_type,
@@ -31,6 +36,12 @@ async def no_access_control_search(
         save_interaction=save_interaction,
         last_k=last_k,
     )
+    graph_engine = await get_graph_engine()
+    is_empty = await graph_engine.is_empty()
+    if is_empty:
+        # TODO: we can log here, but not all search types use graph. Still keeping this here for reviewer input
+        logger.warning("Search attempt on an empty knowledge graph")
     if len(search_tools) == 2:
         [get_completion, get_context] = search_tools
@@ -38,7 +49,7 @@ async def no_access_control_search(
             return None, await get_context(query_text), []
         context = await get_context(query_text)
-        result = await get_completion(query_text, context)
+        result = await get_completion(query_text, context, session_id=session_id)
     else:
         unknown_tool = search_tools[0]
         result = await unknown_tool(query_text)

cognee/modules/search/methods/search.py CHANGED Viewed

@@ -5,6 +5,8 @@ from uuid import UUID
 from fastapi.encoders import jsonable_encoder
 from typing import Any, List, Optional, Tuple, Type, Union
+from cognee.infrastructure.databases.graph import get_graph_engine
+from cognee.shared.logging_utils import get_logger
 from cognee.shared.utils import send_telemetry
 from cognee.context_global_variables import set_database_global_context_variables
@@ -22,11 +24,13 @@ from cognee.modules.data.models import Dataset
 from cognee.modules.data.methods.get_authorized_existing_datasets import (
     get_authorized_existing_datasets,
 )
+from cognee import __version__ as cognee_version
 from .get_search_type_tools import get_search_type_tools
 from .no_access_control_search import no_access_control_search
 from ..utils.prepare_search_result import prepare_search_result
+logger = get_logger()
 async def search(
     query_text: str,
@@ -42,6 +46,7 @@ async def search(
     last_k: Optional[int] = None,
     only_context: bool = False,
     use_combined_context: bool = False,
+    session_id: Optional[str] = None,
 ) -> Union[CombinedSearchResult, List[SearchResult]]:
     """
@@ -59,7 +64,14 @@ async def search(
         Searching by dataset is only available in ENABLE_BACKEND_ACCESS_CONTROL mode
     """
     query = await log_query(query_text, query_type.value, user.id)
-    send_telemetry("cognee.search EXECUTION STARTED", user.id)
+    send_telemetry(
+        "cognee.search EXECUTION STARTED",
+        user.id,
+        additional_properties={
+            "cognee_version": cognee_version,
+            "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
+        },
+    )
     # Use search function filtered by permissions if access control is enabled
     if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
@@ -77,6 +89,7 @@ async def search(
             last_k=last_k,
             only_context=only_context,
             use_combined_context=use_combined_context,
+            session_id=session_id,
         )
     else:
         search_results = [
@@ -91,10 +104,18 @@ async def search(
                 save_interaction=save_interaction,
                 last_k=last_k,
                 only_context=only_context,
+                session_id=session_id,
             )
         ]
-    send_telemetry("cognee.search EXECUTION COMPLETED", user.id)
+    send_telemetry(
+        "cognee.search EXECUTION COMPLETED",
+        user.id,
+        additional_properties={
+            "cognee_version": cognee_version,
+            "tenant_id": str(user.tenant_id) if user.tenant_id else "Single User Tenant",
+        },
+    )
     await log_result(
         query.id,
@@ -195,6 +216,7 @@ async def authorized_search(
     last_k: Optional[int] = None,
     only_context: bool = False,
     use_combined_context: bool = False,
+    session_id: Optional[str] = None,
 ) -> Union[
     Tuple[Any, Union[List[Edge], str], List[Dataset]],
     List[Tuple[Any, Union[List[Edge], str], List[Dataset]]],
@@ -221,6 +243,7 @@ async def authorized_search(
             save_interaction=save_interaction,
             last_k=last_k,
             only_context=True,
+            session_id=session_id,
         )
         context = {}
@@ -263,7 +286,7 @@ async def authorized_search(
             return combined_context
         combined_context = prepare_combined_context(context)
-        completion = await get_completion(query_text, combined_context)
+        completion = await get_completion(query_text, combined_context, session_id=session_id)
         return completion, combined_context, datasets
@@ -280,6 +303,7 @@ async def authorized_search(
         save_interaction=save_interaction,
         last_k=last_k,
         only_context=only_context,
+        session_id=session_id,
     )
     return search_results
@@ -298,6 +322,7 @@ async def search_in_datasets_context(
     last_k: Optional[int] = None,
     only_context: bool = False,
     context: Optional[Any] = None,
+    session_id: Optional[str] = None,
 ) -> List[Tuple[Any, Union[str, List[Edge]], List[Dataset]]]:
     """
     Searches all provided datasets and handles setting up of appropriate database context based on permissions.
@@ -317,10 +342,30 @@ async def search_in_datasets_context(
         last_k: Optional[int] = None,
         only_context: bool = False,
         context: Optional[Any] = None,
+        session_id: Optional[str] = None,
     ) -> Tuple[Any, Union[str, List[Edge]], List[Dataset]]:
         # Set database configuration in async context for each dataset user has access for
         await set_database_global_context_variables(dataset.id, dataset.owner_id)
+        graph_engine = await get_graph_engine()
+        is_empty = await graph_engine.is_empty()
+        if is_empty:
+            # TODO: we can log here, but not all search types use graph. Still keeping this here for reviewer input
+            from cognee.modules.data.methods import get_dataset_data
+            dataset_data = await get_dataset_data(dataset.id)
+            if len(dataset_data) > 0:
+                logger.warning(
+                    f"Dataset '{dataset.name}' has {len(dataset_data)} data item(s) but the knowledge graph is empty. "
+                    "Please run cognify to process the data before searching."
+                )
+            else:
+                logger.warning(
+                    "Search attempt on an empty knowledge graph - no data has been added to this dataset"
+                )
         specific_search_tools = await get_search_type_tools(
             query_type=query_type,
             query_text=query_text,
@@ -340,7 +385,7 @@ async def search_in_datasets_context(
                 return None, await get_context(query_text), [dataset]
             search_context = context or await get_context(query_text)
-            search_result = await get_completion(query_text, search_context)
+            search_result = await get_completion(query_text, search_context, session_id=session_id)
             return search_result, search_context, [dataset]
         else:
@@ -365,6 +410,7 @@ async def search_in_datasets_context(
                 last_k=last_k,
                 only_context=only_context,
                 context=context,
+                session_id=session_id,
             )
         )

cognee/modules/search/types/SearchType.py CHANGED Viewed

@@ -3,7 +3,6 @@ from enum import Enum
 class SearchType(Enum):
     SUMMARIES = "SUMMARIES"
-    INSIGHTS = "INSIGHTS"
     CHUNKS = "CHUNKS"
     RAG_COMPLETION = "RAG_COMPLETION"
     GRAPH_COMPLETION = "GRAPH_COMPLETION"

cognee/modules/settings/get_settings.py CHANGED Viewed

@@ -15,6 +15,7 @@ class ModelName(Enum):
     ollama = "ollama"
     anthropic = "anthropic"
     gemini = "gemini"
+    mistral = "mistral"
 class LLMConfig(BaseModel):
@@ -72,6 +73,10 @@ def get_settings() -> SettingsDict:
             "value": "gemini",
             "label": "Gemini",
         },
+        {
+            "value": "mistral",
+            "label": "Mistral",
+        },
     ]
     return SettingsDict.model_validate(
@@ -134,6 +139,24 @@ def get_settings() -> SettingsDict:
                             "label": "Gemini 2.0 Flash",
                         },
                     ],
+                    "mistral": [
+                        {
+                            "value": "mistral-medium-2508",
+                            "label": "Mistral Medium 3.1",
+                        },
+                        {
+                            "value": "magistral-medium-2509",
+                            "label": "Magistral Medium 1.2",
+                        },
+                        {
+                            "value": "magistral-medium-2507",
+                            "label": "Magistral Medium 1.1",
+                        },
+                        {
+                            "value": "mistral-large-2411",
+                            "label": "Mistral Large 2.1",
+                        },
+                    ],
                 },
             },
             vector_db={

cognee/modules/users/methods/get_authenticated_user.py CHANGED Viewed

@@ -37,6 +37,8 @@ async def get_authenticated_user(
         except Exception as e:
             # Convert any get_default_user failure into a proper HTTP 500 error
             logger.error(f"Failed to create default user: {str(e)}")
-            raise HTTPException(status_code=500, detail=f"Failed to create default user: {str(e)}")
+            raise HTTPException(
+                status_code=500, detail=f"Failed to create default user: {str(e)}"
+            ) from e
     return user

cognee/modules/users/methods/get_default_user.py CHANGED Viewed

@@ -27,12 +27,7 @@ async def get_default_user() -> SimpleNamespace:
             if user is None:
                 return await create_default_user()
-            # We return a SimpleNamespace to have the same user type as our SaaS
-            # SimpleNamespace is just a dictionary which can be accessed through attributes
-            auth_data = SimpleNamespace(
-                id=user.id, email=user.email, tenant_id=user.tenant_id, roles=[]
-            )
-            return auth_data
+            return user
     except Exception as error:
         if "principals" in str(error.args):
             raise DatabaseNotCreatedError() from error

cognee/modules/users/roles/methods/create_role.py CHANGED Viewed

@@ -40,8 +40,8 @@ async def create_role(
             # Add association directly to the association table
             role = Role(name=role_name, tenant_id=tenant.id)
             session.add(role)
-        except IntegrityError:
-            raise EntityAlreadyExistsError(message="Role already exists for tenant.")
+        except IntegrityError as e:
+            raise EntityAlreadyExistsError(message="Role already exists for tenant.") from e
         await session.commit()
         await session.refresh(role)

cognee/modules/users/tenants/methods/create_tenant.py CHANGED Viewed

@@ -35,5 +35,5 @@ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID:
             await session.merge(user)
             await session.commit()
             return tenant.id
-        except IntegrityError:
-            raise EntityAlreadyExistsError(message="Tenant already exists.")
+        except IntegrityError as e:
+            raise EntityAlreadyExistsError(message="Tenant already exists.") from e

cognee/shared/exceptions/exceptions.py CHANGED Viewed

@@ -7,6 +7,6 @@ class IngestionError(CogneeValidationError):
         self,
         message: str = "Failed to load data.",
         name: str = "IngestionError",
-        status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+        status_code=status.HTTP_422_UNPROCESSABLE_CONTENT,
     ):
         super().__init__(message, name, status_code)

cognee/shared/logging_utils.py CHANGED Viewed

@@ -430,6 +430,15 @@ def setup_logging(log_level=None, name=None):
     stream_handler.setFormatter(console_formatter)
     stream_handler.setLevel(log_level)
+    root_logger = logging.getLogger()
+    if root_logger.hasHandlers():
+        root_logger.handlers.clear()
+    root_logger.addHandler(stream_handler)
+    # Note: root logger needs to be set at NOTSET to allow all messages through and specific stream and file handlers
+    # can define their own levels.
+    root_logger.setLevel(logging.NOTSET)
     # Check if we already have a log file path from the environment
     # NOTE: environment variable must be used here as it allows us to
     # log to a single file with a name based on a timestamp in a multiprocess setting.
@@ -441,17 +450,15 @@ def setup_logging(log_level=None, name=None):
         log_file_path = os.path.join(LOGS_DIR, f"{start_time}.log")
         os.environ["LOG_FILE_NAME"] = log_file_path
-    # Create a file handler that uses our custom PlainFileHandler
-    file_handler = PlainFileHandler(log_file_path, encoding="utf-8")
-    file_handler.setLevel(DEBUG)
-    # Configure root logger
-    root_logger = logging.getLogger()
-    if root_logger.hasHandlers():
-        root_logger.handlers.clear()
-    root_logger.addHandler(stream_handler)
-    root_logger.addHandler(file_handler)
-    root_logger.setLevel(log_level)
+    try:
+        # Create a file handler that uses our custom PlainFileHandler
+        file_handler = PlainFileHandler(log_file_path, encoding="utf-8")
+        file_handler.setLevel(DEBUG)
+        root_logger.addHandler(file_handler)
+    except Exception as e:
+        # Note: Exceptions happen in case of read only file systems or log file path poiting to location where it does
+        # not have write permission. Logging to file is not mandatory so we just log a warning to console.
+        root_logger.warning(f"Warning: Could not create log file handler at {log_file_path}: {e}")
     if log_level > logging.DEBUG:
         import warnings

cognee/shared/utils.py CHANGED Viewed

@@ -8,7 +8,7 @@ import http.server
 import socketserver
 from threading import Thread
 import pathlib
-from uuid import uuid4
+from uuid import uuid4, uuid5, NAMESPACE_OID
 from cognee.base_config import get_base_config
 from cognee.infrastructure.databases.graph import get_graph_engine
@@ -51,6 +51,26 @@ def get_anonymous_id():
     return anonymous_id
+def _sanitize_nested_properties(obj, property_names: list[str]):
+    """
+    Recursively replaces any property whose key matches one of `property_names`
+    (e.g., ['url', 'path']) in a nested dict or list with a uuid5 hash
+    of its string value. Returns a new sanitized copy.
+    """
+    if isinstance(obj, dict):
+        new_obj = {}
+        for k, v in obj.items():
+            if k in property_names and isinstance(v, str):
+                new_obj[k] = str(uuid5(NAMESPACE_OID, v))
+            else:
+                new_obj[k] = _sanitize_nested_properties(v, property_names)
+        return new_obj
+    elif isinstance(obj, list):
+        return [_sanitize_nested_properties(item, property_names) for item in obj]
+    else:
+        return obj
 def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
     if os.getenv("TELEMETRY_DISABLED"):
         return
@@ -58,7 +78,9 @@ def send_telemetry(event_name: str, user_id, additional_properties: dict = {}):
     env = os.getenv("ENV")
     if env in ["test", "dev"]:
         return
+    additional_properties = _sanitize_nested_properties(
+        obj=additional_properties, property_names=["url"]
+    )
     current_time = datetime.now(timezone.utc)
     payload = {
         "anonymous_id": str(get_anonymous_id()),

cognee/tasks/codingagents/coding_rule_associations.py CHANGED Viewed

@@ -124,5 +124,4 @@ async def add_rule_associations(
     if len(edges_to_save) > 0:
         await graph_engine.add_edges(edges_to_save)
-    await index_graph_edges()
+        await index_graph_edges(edges_to_save)

cognee/tasks/documents/exceptions/exceptions.py CHANGED Viewed

@@ -12,7 +12,7 @@ class WrongDataDocumentInputError(CogneeValidationError):
         self,
         field: str,
         name: str = "WrongDataDocumentInputError",
-        status_code: int = status.HTTP_422_UNPROCESSABLE_ENTITY,
+        status_code: int = status.HTTP_422_UNPROCESSABLE_CONTENT,
     ):
         message = f"Missing of invalid parameter: '{field}'."
         super().__init__(message, name, status_code)

cognee/tasks/feedback/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from .extract_feedback_interactions import extract_feedback_interactions
+from .generate_improved_answers import generate_improved_answers
+from .create_enrichments import create_enrichments
+from .link_enrichments_to_feedback import link_enrichments_to_feedback
+from .models import FeedbackEnrichment
+__all__ = [
+    "extract_feedback_interactions",
+    "generate_improved_answers",
+    "create_enrichments",
+    "link_enrichments_to_feedback",
+    "FeedbackEnrichment",
+]

cognee 0.3.6__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl

cognee 0.3.6py3-none-any.whl → 0.3.7.dev1py3-none-any.whl