PyPI - cognee - Versions diffs - 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl - Mend

cognee 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (161) hide show

cognee/__init__.py +1 -0
cognee/api/health.py +2 -12
cognee/api/v1/add/add.py +46 -6
cognee/api/v1/add/routers/get_add_router.py +5 -1
cognee/api/v1/cognify/cognify.py +29 -9
cognee/api/v1/datasets/datasets.py +11 -0
cognee/api/v1/responses/default_tools.py +0 -1
cognee/api/v1/responses/dispatch_function.py +1 -1
cognee/api/v1/responses/routers/default_tools.py +0 -1
cognee/api/v1/search/search.py +11 -9
cognee/api/v1/settings/routers/get_settings_router.py +7 -1
cognee/api/v1/ui/ui.py +47 -16
cognee/api/v1/update/routers/get_update_router.py +1 -1
cognee/api/v1/update/update.py +3 -3
cognee/cli/_cognee.py +61 -10
cognee/cli/commands/add_command.py +3 -3
cognee/cli/commands/cognify_command.py +3 -3
cognee/cli/commands/config_command.py +9 -7
cognee/cli/commands/delete_command.py +3 -3
cognee/cli/commands/search_command.py +3 -7
cognee/cli/config.py +0 -1
cognee/context_global_variables.py +5 -0
cognee/exceptions/exceptions.py +1 -1
cognee/infrastructure/databases/cache/__init__.py +2 -0
cognee/infrastructure/databases/cache/cache_db_interface.py +79 -0
cognee/infrastructure/databases/cache/config.py +44 -0
cognee/infrastructure/databases/cache/get_cache_engine.py +67 -0
cognee/infrastructure/databases/cache/redis/RedisAdapter.py +243 -0
cognee/infrastructure/databases/exceptions/__init__.py +1 -0
cognee/infrastructure/databases/exceptions/exceptions.py +18 -2
cognee/infrastructure/databases/graph/get_graph_engine.py +1 -1
cognee/infrastructure/databases/graph/graph_db_interface.py +5 -0
cognee/infrastructure/databases/graph/kuzu/adapter.py +67 -44
cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +13 -3
cognee/infrastructure/databases/graph/neo4j_driver/deadlock_retry.py +1 -1
cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +1 -1
cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -1
cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +21 -3
cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -10
cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +17 -4
cognee/infrastructure/databases/vector/embeddings/config.py +2 -3
cognee/infrastructure/databases/vector/exceptions/exceptions.py +1 -1
cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -1
cognee/infrastructure/files/exceptions.py +1 -1
cognee/infrastructure/files/storage/LocalFileStorage.py +9 -9
cognee/infrastructure/files/storage/S3FileStorage.py +11 -11
cognee/infrastructure/files/utils/guess_file_type.py +6 -0
cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +0 -5
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +19 -9
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +17 -5
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +17 -5
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +32 -0
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/__init__.py +0 -0
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +109 -0
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +33 -8
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +40 -18
cognee/infrastructure/loaders/LoaderEngine.py +27 -7
cognee/infrastructure/loaders/external/__init__.py +7 -0
cognee/infrastructure/loaders/external/advanced_pdf_loader.py +2 -8
cognee/infrastructure/loaders/external/beautiful_soup_loader.py +310 -0
cognee/infrastructure/loaders/supported_loaders.py +7 -0
cognee/modules/data/exceptions/exceptions.py +1 -1
cognee/modules/data/methods/__init__.py +3 -0
cognee/modules/data/methods/get_dataset_data.py +4 -1
cognee/modules/data/methods/has_dataset_data.py +21 -0
cognee/modules/engine/models/TableRow.py +0 -1
cognee/modules/ingestion/save_data_to_file.py +9 -2
cognee/modules/pipelines/exceptions/exceptions.py +1 -1
cognee/modules/pipelines/operations/pipeline.py +12 -1
cognee/modules/pipelines/operations/run_tasks.py +25 -197
cognee/modules/pipelines/operations/run_tasks_data_item.py +260 -0
cognee/modules/pipelines/operations/run_tasks_distributed.py +121 -38
cognee/modules/retrieval/EntityCompletionRetriever.py +48 -8
cognee/modules/retrieval/base_graph_retriever.py +3 -1
cognee/modules/retrieval/base_retriever.py +3 -1
cognee/modules/retrieval/chunks_retriever.py +5 -1
cognee/modules/retrieval/code_retriever.py +20 -2
cognee/modules/retrieval/completion_retriever.py +50 -9
cognee/modules/retrieval/cypher_search_retriever.py +11 -1
cognee/modules/retrieval/graph_completion_context_extension_retriever.py +47 -8
cognee/modules/retrieval/graph_completion_cot_retriever.py +32 -1
cognee/modules/retrieval/graph_completion_retriever.py +54 -10
cognee/modules/retrieval/lexical_retriever.py +20 -2
cognee/modules/retrieval/natural_language_retriever.py +10 -1
cognee/modules/retrieval/summaries_retriever.py +5 -1
cognee/modules/retrieval/temporal_retriever.py +62 -10
cognee/modules/retrieval/user_qa_feedback.py +3 -2
cognee/modules/retrieval/utils/completion.py +5 -0
cognee/modules/retrieval/utils/description_to_codepart_search.py +1 -1
cognee/modules/retrieval/utils/session_cache.py +156 -0
cognee/modules/search/methods/get_search_type_tools.py +0 -5
cognee/modules/search/methods/no_access_control_search.py +12 -1
cognee/modules/search/methods/search.py +34 -2
cognee/modules/search/types/SearchType.py +0 -1
cognee/modules/settings/get_settings.py +23 -0
cognee/modules/users/methods/get_authenticated_user.py +3 -1
cognee/modules/users/methods/get_default_user.py +1 -6
cognee/modules/users/roles/methods/create_role.py +2 -2
cognee/modules/users/tenants/methods/create_tenant.py +2 -2
cognee/shared/exceptions/exceptions.py +1 -1
cognee/tasks/codingagents/coding_rule_associations.py +1 -2
cognee/tasks/documents/exceptions/exceptions.py +1 -1
cognee/tasks/graph/extract_graph_from_data.py +2 -0
cognee/tasks/ingestion/data_item_to_text_file.py +3 -3
cognee/tasks/ingestion/ingest_data.py +11 -5
cognee/tasks/ingestion/save_data_item_to_storage.py +12 -1
cognee/tasks/storage/add_data_points.py +3 -10
cognee/tasks/storage/index_data_points.py +19 -14
cognee/tasks/storage/index_graph_edges.py +25 -11
cognee/tasks/web_scraper/__init__.py +34 -0
cognee/tasks/web_scraper/config.py +26 -0
cognee/tasks/web_scraper/default_url_crawler.py +446 -0
cognee/tasks/web_scraper/models.py +46 -0
cognee/tasks/web_scraper/types.py +4 -0
cognee/tasks/web_scraper/utils.py +142 -0
cognee/tasks/web_scraper/web_scraper_task.py +396 -0
cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py +0 -1
cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +13 -0
cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +19 -0
cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +344 -0
cognee/tests/subprocesses/reader.py +25 -0
cognee/tests/subprocesses/simple_cognify_1.py +31 -0
cognee/tests/subprocesses/simple_cognify_2.py +31 -0
cognee/tests/subprocesses/writer.py +32 -0
cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py +0 -2
cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py +8 -3
cognee/tests/tasks/entity_extraction/entity_extraction_test.py +89 -0
cognee/tests/tasks/web_scraping/web_scraping_test.py +172 -0
cognee/tests/test_add_docling_document.py +56 -0
cognee/tests/test_chromadb.py +7 -11
cognee/tests/test_concurrent_subprocess_access.py +76 -0
cognee/tests/test_conversation_history.py +240 -0
cognee/tests/test_kuzu.py +27 -15
cognee/tests/test_lancedb.py +7 -11
cognee/tests/test_library.py +32 -2
cognee/tests/test_neo4j.py +24 -16
cognee/tests/test_neptune_analytics_vector.py +7 -11
cognee/tests/test_permissions.py +9 -13
cognee/tests/test_pgvector.py +4 -4
cognee/tests/test_remote_kuzu.py +8 -11
cognee/tests/test_s3_file_storage.py +1 -1
cognee/tests/test_search_db.py +6 -8
cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +89 -0
cognee/tests/unit/modules/retrieval/conversation_history_test.py +154 -0
{cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/METADATA +22 -7
{cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/RECORD +155 -128
{cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/entry_points.txt +1 -0
distributed/Dockerfile +0 -3
distributed/entrypoint.py +21 -9
distributed/signal.py +5 -0
distributed/workers/data_point_saving_worker.py +64 -34
distributed/workers/graph_saving_worker.py +71 -47
cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py +0 -1116
cognee/modules/retrieval/insights_retriever.py +0 -133
cognee/tests/test_memgraph.py +0 -109
cognee/tests/unit/modules/retrieval/insights_retriever_test.py +0 -251
distributed/poetry.lock +0 -12238
distributed/pyproject.toml +0 -185
{cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/WHEEL +0 -0
{cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/licenses/LICENSE +0 -0
{cognee-0.3.5.dist-info → cognee-0.3.7.dist-info}/licenses/NOTICE.md +0 -0

cognee/__init__.py CHANGED Viewed

@@ -19,6 +19,7 @@ from .api.v1.add import add
 from .api.v1.delete import delete
 from .api.v1.cognify import cognify
 from .modules.memify import memify
+from .api.v1.update import update
 from .api.v1.config.config import config
 from .api.v1.datasets.datasets import datasets
 from .api.v1.prune import prune

cognee/api/health.py CHANGED Viewed

@@ -241,16 +241,6 @@ class HealthChecker:
         """Get comprehensive health status."""
         components = {}
-        # Critical services
-        critical_components = [
-            "relational_db",
-            "vector_db",
-            "graph_db",
-            "file_storage",
-            "llm_provider",
-            "embedding_service",
-        ]
         critical_checks = [
             ("relational_db", self.check_relational_db()),
             ("vector_db", self.check_vector_db()),
@@ -296,11 +286,11 @@ class HealthChecker:
                 else:
                     components[name] = result
+        critical_comps = [check[0] for check in critical_checks]
         # Determine overall status
         critical_unhealthy = any(
-            comp.status == HealthStatus.UNHEALTHY
+            comp.status == HealthStatus.UNHEALTHY and name in critical_comps
             for name, comp in components.items()
-            if name in critical_components
         )
         has_degraded = any(comp.status == HealthStatus.DEGRADED for comp in components.values())

cognee/api/v1/add/add.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from uuid import UUID
-from typing import Union, BinaryIO, List, Optional
+from typing import Union, BinaryIO, List, Optional, Any
 from cognee.modules.users.models import User
 from cognee.modules.pipelines import Task, run_pipeline
 from cognee.modules.pipelines.layers.resolve_authorized_user_dataset import (
@@ -11,6 +10,9 @@ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
 )
 from cognee.modules.engine.operations.setup import setup
 from cognee.tasks.ingestion import ingest_data, resolve_data_directories
+from cognee.shared.logging_utils import get_logger
+logger = get_logger()
 async def add(
@@ -21,14 +23,15 @@ async def add(
     vector_db_config: dict = None,
     graph_db_config: dict = None,
     dataset_id: Optional[UUID] = None,
-    preferred_loaders: List[str] = None,
+    preferred_loaders: Optional[List[Union[str, dict[str, dict[str, Any]]]]] = None,
     incremental_loading: bool = True,
+    data_per_batch: Optional[int] = 20,
 ):
     """
     Add data to Cognee for knowledge graph processing.
     This is the first step in the Cognee workflow - it ingests raw data and prepares it
-    for processing. The function accepts various data formats including text, files, and
+    for processing. The function accepts various data formats including text, files, urls and
     binary streams, then stores them in a specified dataset for further processing.
     Prerequisites:
@@ -68,6 +71,7 @@ async def add(
             - S3 path: "s3://my-bucket/documents/file.pdf"
             - List of mixed types: ["text content", "/path/file.pdf", "file://doc.txt", file_handle]
             - Binary file object: open("file.txt", "rb")
+            - url: A web link url (https or http)
         dataset_name: Name of the dataset to store data in. Defaults to "main_dataset".
                     Create separate datasets to organize different knowledge domains.
         user: User object for authentication and permissions. Uses default user if None.
@@ -78,6 +82,9 @@ async def add(
         vector_db_config: Optional configuration for vector database (for custom setups).
         graph_db_config: Optional configuration for graph database (for custom setups).
         dataset_id: Optional specific dataset UUID to use instead of dataset_name.
+        extraction_rules: Optional dictionary of rules (e.g., CSS selectors, XPath) for extracting specific content from web pages using BeautifulSoup
+        tavily_config: Optional configuration for Tavily API, including API key and extraction settings
+        soup_crawler_config: Optional configuration for BeautifulSoup crawler, specifying concurrency, crawl delay, and extraction rules.
     Returns:
         PipelineRunInfo: Information about the ingestion pipeline execution including:
@@ -126,6 +133,21 @@ async def add(
         # Add a single file
         await cognee.add("/home/user/documents/analysis.pdf")
+        # Add a single url and bs4 extract ingestion method
+        extraction_rules = {
+            "title": "h1",
+            "description": "p",
+            "more_info": "a[href*='more-info']"
+        }
+        await cognee.add("https://example.com",extraction_rules=extraction_rules)
+        # Add a single url and tavily extract ingestion method
+        Make sure to set TAVILY_API_KEY = YOUR_TAVILY_API_KEY as a environment variable
+        await cognee.add("https://example.com")
+        # Add multiple urls
+        await cognee.add(["https://example.com","https://books.toscrape.com"])
         ```
     Environment Variables:
@@ -133,17 +155,34 @@ async def add(
         - LLM_API_KEY: API key for your LLM provider (OpenAI, Anthropic, etc.)
         Optional:
-        - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama"
+        - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral"
         - LLM_MODEL: Model name (default: "gpt-5-mini")
         - DEFAULT_USER_EMAIL: Custom default user email
         - DEFAULT_USER_PASSWORD: Custom default user password
         - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
         - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
+        - TAVILY_API_KEY: YOUR_TAVILY_API_KEY
     """
+    if preferred_loaders is not None:
+        transformed = {}
+        for item in preferred_loaders:
+            if isinstance(item, dict):
+                transformed.update(item)
+            else:
+                transformed[item] = {}
+        preferred_loaders = transformed
     tasks = [
         Task(resolve_data_directories, include_subdirectories=True),
-        Task(ingest_data, dataset_name, user, node_set, dataset_id, preferred_loaders),
+        Task(
+            ingest_data,
+            dataset_name,
+            user,
+            node_set,
+            dataset_id,
+            preferred_loaders,
+        ),
     ]
     await setup()
@@ -167,6 +206,7 @@ async def add(
         vector_db_config=vector_db_config,
         graph_db_config=graph_db_config,
         incremental_loading=incremental_loading,
+        data_per_batch=data_per_batch,
     ):
         pipeline_run_info = run_info

cognee/api/v1/add/routers/get_add_router.py CHANGED Viewed

@@ -73,7 +73,11 @@ def get_add_router() -> APIRouter:
         try:
             add_run = await cognee_add(
-                data, datasetName, user=user, dataset_id=datasetId, node_set=node_set
+                data,
+                datasetName,
+                user=user,
+                dataset_id=datasetId,
+                node_set=node_set if node_set else None,
             )
             if isinstance(add_run, PipelineRunErrored):

cognee/api/v1/cognify/cognify.py CHANGED Viewed

@@ -44,6 +44,7 @@ async def cognify(
     graph_model: BaseModel = KnowledgeGraph,
     chunker=TextChunker,
     chunk_size: int = None,
+    chunks_per_batch: int = None,
     config: Config = None,
     vector_db_config: dict = None,
     graph_db_config: dict = None,
@@ -51,6 +52,7 @@ async def cognify(
     incremental_loading: bool = True,
     custom_prompt: Optional[str] = None,
     temporal_cognify: bool = False,
+    data_per_batch: int = 20,
 ):
     """
     Transform ingested data into a structured knowledge graph.
@@ -105,6 +107,7 @@ async def cognify(
                    Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
                    Default limits: ~512-8192 tokens depending on models.
                    Smaller chunks = more granular but potentially fragmented knowledge.
+        chunks_per_batch: Number of chunks to be processed in a single batch in Cognify tasks.
         vector_db_config: Custom vector database configuration for embeddings storage.
         graph_db_config: Custom graph database configuration for relationship storage.
         run_in_background: If True, starts processing asynchronously and returns immediately.
@@ -148,7 +151,7 @@ async def cognify(
         # 2. Get entity relationships and connections
         relationships = await cognee.search(
             "connections between concepts",
-            query_type=SearchType.INSIGHTS
+            query_type=SearchType.GRAPH_COMPLETION
         )
         # 3. Find relevant document chunks
@@ -209,10 +212,18 @@ async def cognify(
             }
     if temporal_cognify:
-        tasks = await get_temporal_tasks(user, chunker, chunk_size)
+        tasks = await get_temporal_tasks(
+            user=user, chunker=chunker, chunk_size=chunk_size, chunks_per_batch=chunks_per_batch
+        )
     else:
         tasks = await get_default_tasks(
-            user, graph_model, chunker, chunk_size, config, custom_prompt
+            user=user,
+            graph_model=graph_model,
+            chunker=chunker,
+            chunk_size=chunk_size,
+            config=config,
+            custom_prompt=custom_prompt,
+            chunks_per_batch=chunks_per_batch,
         )
     # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -228,6 +239,7 @@ async def cognify(
         graph_db_config=graph_db_config,
         incremental_loading=incremental_loading,
         pipeline_name="cognify_pipeline",
+        data_per_batch=data_per_batch,
     )
@@ -238,6 +250,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     chunk_size: int = None,
     config: Config = None,
     custom_prompt: Optional[str] = None,
+    chunks_per_batch: int = 100,
 ) -> list[Task]:
     if config is None:
         ontology_config = get_ontology_env_config()
@@ -256,6 +269,9 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
                 "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
             }
+    if chunks_per_batch is None:
+        chunks_per_batch = 100
     default_tasks = [
         Task(classify_documents),
         Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@@ -269,20 +285,20 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             graph_model=graph_model,
             config=config,
             custom_prompt=custom_prompt,
-            task_config={"batch_size": 10},
+            task_config={"batch_size": chunks_per_batch},
         ),  # Generate knowledge graphs from the document chunks.
         Task(
             summarize_text,
-            task_config={"batch_size": 10},
+            task_config={"batch_size": chunks_per_batch},
         ),
-        Task(add_data_points, task_config={"batch_size": 10}),
+        Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
     ]
     return default_tasks
 async def get_temporal_tasks(
-    user: User = None, chunker=TextChunker, chunk_size: int = None
+    user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = 10
 ) -> list[Task]:
     """
     Builds and returns a list of temporal processing tasks to be executed in sequence.
@@ -299,10 +315,14 @@ async def get_temporal_tasks(
         user (User, optional): The user requesting task execution, used for permission checks.
         chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
         chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
+        chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
     Returns:
         list[Task]: A list of Task objects representing the temporal processing pipeline.
     """
+    if chunks_per_batch is None:
+        chunks_per_batch = 10
     temporal_tasks = [
         Task(classify_documents),
         Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@@ -311,9 +331,9 @@ async def get_temporal_tasks(
             max_chunk_size=chunk_size or get_max_chunk_tokens(),
             chunker=chunker,
         ),
-        Task(extract_events_and_timestamps, task_config={"chunk_size": 10}),
+        Task(extract_events_and_timestamps, task_config={"batch_size": chunks_per_batch}),
         Task(extract_knowledge_graph_from_events),
-        Task(add_data_points, task_config={"batch_size": 10}),
+        Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
     ]
     return temporal_tasks

cognee/api/v1/datasets/datasets.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from uuid import UUID
+from cognee.modules.data.methods import has_dataset_data
 from cognee.modules.users.methods import get_default_user
 from cognee.modules.ingestion import discover_directory_datasets
 from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
@@ -26,6 +27,16 @@ class datasets:
         return await get_dataset_data(dataset.id)
+    @staticmethod
+    async def has_data(dataset_id: str) -> bool:
+        from cognee.modules.data.methods import get_dataset
+        user = await get_default_user()
+        dataset = await get_dataset(user.id, dataset_id)
+        return await has_dataset_data(dataset.id)
     @staticmethod
     async def get_status(dataset_ids: list[UUID]) -> dict:
         return await get_pipeline_status(dataset_ids, pipeline_name="cognify_pipeline")

cognee/api/v1/responses/default_tools.py CHANGED Viewed

@@ -14,7 +14,6 @@ DEFAULT_TOOLS = [
                     "type": "string",
                     "description": "Type of search to perform",
                     "enum": [
-                        "INSIGHTS",
                         "CODE",
                         "GRAPH_COMPLETION",
                         "NATURAL_LANGUAGE",

cognee/api/v1/responses/dispatch_function.py CHANGED Viewed

@@ -59,7 +59,7 @@ async def handle_search(arguments: Dict[str, Any], user) -> list:
     valid_search_types = (
         search_tool["parameters"]["properties"]["search_type"]["enum"]
         if search_tool
-        else ["INSIGHTS", "CODE", "GRAPH_COMPLETION", "NATURAL_LANGUAGE"]
+        else ["CODE", "GRAPH_COMPLETION", "NATURAL_LANGUAGE"]
     )
     if search_type_str not in valid_search_types:

cognee/api/v1/responses/routers/default_tools.py CHANGED Viewed

@@ -14,7 +14,6 @@ DEFAULT_TOOLS = [
                     "type": "string",
                     "description": "Type of search to perform",
                     "enum": [
-                        "INSIGHTS",
                         "CODE",
                         "GRAPH_COMPLETION",
                         "NATURAL_LANGUAGE",

cognee/api/v1/search/search.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from uuid import UUID
 from typing import Union, Optional, List, Type
+from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.modules.engine.models.node_set import NodeSet
 from cognee.modules.users.models import User
 from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
@@ -8,6 +9,10 @@ from cognee.modules.users.methods import get_default_user
 from cognee.modules.search.methods import search as search_function
 from cognee.modules.data.methods import get_authorized_existing_datasets
 from cognee.modules.data.exceptions import DatasetNotFoundError
+from cognee.context_global_variables import set_session_user_context_variable
+from cognee.shared.logging_utils import get_logger
+logger = get_logger()
 async def search(
@@ -25,6 +30,7 @@ async def search(
     last_k: Optional[int] = 1,
     only_context: bool = False,
     use_combined_context: bool = False,
+    session_id: Optional[str] = None,
 ) -> Union[List[SearchResult], CombinedSearchResult]:
     """
     Search and query the knowledge graph for insights, information, and connections.
@@ -52,11 +58,6 @@ async def search(
             Best for: Direct document retrieval, specific fact-finding.
             Returns: LLM responses based on relevant text chunks.
-        **INSIGHTS**:
-            Structured entity relationships and semantic connections.
-            Best for: Understanding concept relationships, knowledge mapping.
-            Returns: Formatted relationship data and entity connections.
         **CHUNKS**:
             Raw text segments that match the query semantically.
             Best for: Finding specific passages, citations, exact content.
@@ -118,15 +119,14 @@ async def search(
         save_interaction: Save interaction (query, context, answer connected to triplet endpoints) results into the graph or not
+        session_id: Optional session identifier for caching Q&A interactions. Defaults to 'default_session' if None.
     Returns:
         list: Search results in format determined by query_type:
             **GRAPH_COMPLETION/RAG_COMPLETION**:
                 [List of conversational AI response strings]
-            **INSIGHTS**:
-                [List of formatted relationship descriptions and entity connections]
             **CHUNKS**:
                 [List of relevant text passages with source metadata]
@@ -146,7 +146,6 @@ async def search(
     Performance & Optimization:
         - **GRAPH_COMPLETION**: Slower but most intelligent, uses LLM + graph context
         - **RAG_COMPLETION**: Medium speed, uses LLM + document chunks (no graph traversal)
-        - **INSIGHTS**: Fast, returns structured relationships without LLM processing
         - **CHUNKS**: Fastest, pure vector similarity search without LLM
         - **SUMMARIES**: Fast, returns pre-computed summaries
         - **CODE**: Medium speed, specialized for code understanding
@@ -177,6 +176,8 @@ async def search(
     if user is None:
         user = await get_default_user()
+    await set_session_user_context_variable(user)
     # Transform string based datasets to UUID - String based datasets can only be found for current user
     if datasets is not None and [all(isinstance(dataset, str) for dataset in datasets)]:
         datasets = await get_authorized_existing_datasets(datasets, "read", user)
@@ -198,6 +199,7 @@ async def search(
         last_k=last_k,
         only_context=only_context,
         use_combined_context=use_combined_context,
+        session_id=session_id,
     )
     return filtered_search_results

cognee/api/v1/settings/routers/get_settings_router.py CHANGED Viewed

@@ -21,7 +21,13 @@ class SettingsDTO(OutDTO):
 class LLMConfigInputDTO(InDTO):
-    provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"], Literal["gemini"]]
+    provider: Union[
+        Literal["openai"],
+        Literal["ollama"],
+        Literal["anthropic"],
+        Literal["gemini"],
+        Literal["mistral"],
+    ]
     model: str
     api_key: str

cognee/api/v1/ui/ui.py CHANGED Viewed

@@ -502,22 +502,48 @@ def start_ui(
     if start_mcp:
         logger.info("Starting Cognee MCP server with Docker...")
-        cwd = os.getcwd()
-        env_file = os.path.join(cwd, ".env")
         try:
+            image = "cognee/cognee-mcp:feature-standalone-mcp"  # TODO: change to "cognee/cognee-mcp:main" right before merging into main
+            subprocess.run(["docker", "pull", image], check=True)
+            import uuid
+            container_name = f"cognee-mcp-{uuid.uuid4().hex[:8]}"
+            docker_cmd = [
+                "docker",
+                "run",
+                "--name",
+                container_name,
+                "-p",
+                f"{mcp_port}:8000",
+                "--rm",
+                "-e",
+                "TRANSPORT_MODE=sse",
+            ]
+            if start_backend:
+                docker_cmd.extend(
+                    [
+                        "-e",
+                        f"API_URL=http://localhost:{backend_port}",
+                    ]
+                )
+                logger.info(
+                    f"Configuring MCP to connect to backend API at http://localhost:{backend_port}"
+                )
+                logger.info("(localhost will be auto-converted to host.docker.internal)")
+            else:
+                cwd = os.getcwd()
+                env_file = os.path.join(cwd, ".env")
+                docker_cmd.extend(["--env-file", env_file])
+            docker_cmd.append(
+                image
+            )  # TODO: change to "cognee/cognee-mcp:main" right before merging into main
             mcp_process = subprocess.Popen(
-                [
-                    "docker",
-                    "run",
-                    "-p",
-                    f"{mcp_port}:8000",
-                    "--rm",
-                    "--env-file",
-                    env_file,
-                    "-e",
-                    "TRANSPORT_MODE=sse",
-                    "cognee/cognee-mcp:main",
-                ],
+                docker_cmd,
                 stdout=subprocess.PIPE,
                 stderr=subprocess.PIPE,
                 preexec_fn=os.setsid if hasattr(os, "setsid") else None,
@@ -526,8 +552,13 @@ def start_ui(
             _stream_process_output(mcp_process, "stdout", "[MCP]", "\033[34m")  # Blue
             _stream_process_output(mcp_process, "stderr", "[MCP]", "\033[34m")  # Blue
-            pid_callback(mcp_process.pid)
-            logger.info(f"✓ Cognee MCP server starting on http://127.0.0.1:{mcp_port}/sse")
+            # Pass both PID and container name using a tuple
+            pid_callback((mcp_process.pid, container_name))
+            mode_info = "API mode" if start_backend else "direct mode"
+            logger.info(
+                f"✓ Cognee MCP server starting on http://127.0.0.1:{mcp_port}/sse ({mode_info})"
+            )
         except Exception as e:
             logger.error(f"Failed to start MCP server with Docker: {str(e)}")
     # Start backend server if requested

cognee/api/v1/update/routers/get_update_router.py CHANGED Viewed

@@ -75,7 +75,7 @@ def get_update_router() -> APIRouter:
                 data=data,
                 dataset_id=dataset_id,
                 user=user,
-                node_set=node_set,
+                node_set=node_set if node_set else None,
             )
             # If any cognify run errored return JSONResponse with proper error status code

cognee/api/v1/update/update.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from uuid import UUID
-from typing import Union, BinaryIO, List, Optional
+from typing import Union, BinaryIO, List, Optional, Any
 from cognee.modules.users.models import User
 from cognee.api.v1.delete import delete
@@ -10,12 +10,12 @@ from cognee.api.v1.cognify import cognify
 async def update(
     data_id: UUID,
     data: Union[BinaryIO, list[BinaryIO], str, list[str]],
+    dataset_id: UUID,
     user: User = None,
     node_set: Optional[List[str]] = None,
-    dataset_id: Optional[UUID] = None,
     vector_db_config: dict = None,
     graph_db_config: dict = None,
-    preferred_loaders: List[str] = None,
+    preferred_loaders: dict[str, dict[str, Any]] = None,
     incremental_loading: bool = True,
 ):
     """

cognee/cli/_cognee.py CHANGED Viewed

@@ -175,19 +175,59 @@ def main() -> int:
     # Handle UI flag
     if hasattr(args, "start_ui") and args.start_ui:
         spawned_pids = []
+        docker_container = None
         def signal_handler(signum, frame):
             """Handle Ctrl+C and other termination signals"""
-            nonlocal spawned_pids
-            fmt.echo("\nShutting down UI server...")
+            nonlocal spawned_pids, docker_container
+            try:
+                fmt.echo("\nShutting down UI server...")
+            except (BrokenPipeError, OSError):
+                pass
+            # First, stop Docker container if running
+            if docker_container:
+                try:
+                    result = subprocess.run(
+                        ["docker", "stop", docker_container],
+                        capture_output=True,
+                        timeout=10,
+                        check=False,
+                    )
+                    try:
+                        if result.returncode == 0:
+                            fmt.success(f"✓ Docker container {docker_container} stopped.")
+                        else:
+                            fmt.warning(
+                                f"Could not stop container {docker_container}: {result.stderr.decode()}"
+                            )
+                    except (BrokenPipeError, OSError):
+                        pass
+                except subprocess.TimeoutExpired:
+                    try:
+                        fmt.warning(
+                            f"Timeout stopping container {docker_container}, forcing removal..."
+                        )
+                    except (BrokenPipeError, OSError):
+                        pass
+                    subprocess.run(
+                        ["docker", "rm", "-f", docker_container], capture_output=True, check=False
+                    )
+                except Exception:
+                    pass
+            # Then, stop regular processes
             for pid in spawned_pids:
                 try:
                     if hasattr(os, "killpg"):
                         # Unix-like systems: Use process groups
                         pgid = os.getpgid(pid)
                         os.killpg(pgid, signal.SIGTERM)
-                        fmt.success(f"✓ Process group {pgid} (PID {pid}) terminated.")
+                        try:
+                            fmt.success(f"✓ Process group {pgid} (PID {pid}) terminated.")
+                        except (BrokenPipeError, OSError):
+                            pass
                     else:
                         # Windows: Use taskkill to terminate process and its children
                         subprocess.run(
@@ -195,24 +235,35 @@ def main() -> int:
                             capture_output=True,
                             check=False,
                         )
-                        fmt.success(f"✓ Process {pid} and its children terminated.")
-                except (OSError, ProcessLookupError, subprocess.SubprocessError) as e:
-                    fmt.warning(f"Could not terminate process {pid}: {e}")
+                        try:
+                            fmt.success(f"✓ Process {pid} and its children terminated.")
+                        except (BrokenPipeError, OSError):
+                            pass
+                except (OSError, ProcessLookupError, subprocess.SubprocessError):
+                    pass
             sys.exit(0)
         signal.signal(signal.SIGINT, signal_handler)  # Ctrl+C
         signal.signal(signal.SIGTERM, signal_handler)  # Termination request
+        if hasattr(signal, "SIGHUP"):
+            signal.signal(signal.SIGHUP, signal_handler)
         try:
             from cognee import start_ui
             fmt.echo("Starting cognee UI...")
-            # Callback to capture PIDs of all spawned processes
-            def pid_callback(pid):
-                nonlocal spawned_pids
-                spawned_pids.append(pid)
+            # Callback to capture PIDs and Docker container of all spawned processes
+            def pid_callback(pid_or_tuple):
+                nonlocal spawned_pids, docker_container
+                # Handle both regular PIDs and (PID, container_name) tuples
+                if isinstance(pid_or_tuple, tuple):
+                    pid, container_name = pid_or_tuple
+                    spawned_pids.append(pid)
+                    docker_container = container_name
+                else:
+                    spawned_pids.append(pid_or_tuple)
             frontend_port = 3000
             start_backend, backend_port = True, 8000

cognee 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

cognee 0.3.5py3-none-any.whl → 0.3.7py3-none-any.whl