PyPI - cognee - Versions diffs - 0.3.6__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl - Mend

cognee 0.3.6py3-none-any.whl → 0.3.7.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (182) hide show

cognee/__init__.py +1 -0
cognee/api/health.py +2 -12
cognee/api/v1/add/add.py +46 -6
cognee/api/v1/add/routers/get_add_router.py +11 -2
cognee/api/v1/cognify/cognify.py +29 -9
cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
cognee/api/v1/datasets/datasets.py +11 -0
cognee/api/v1/datasets/routers/get_datasets_router.py +8 -0
cognee/api/v1/delete/routers/get_delete_router.py +2 -0
cognee/api/v1/memify/routers/get_memify_router.py +2 -1
cognee/api/v1/permissions/routers/get_permissions_router.py +6 -0
cognee/api/v1/responses/default_tools.py +0 -1
cognee/api/v1/responses/dispatch_function.py +1 -1
cognee/api/v1/responses/routers/default_tools.py +0 -1
cognee/api/v1/search/routers/get_search_router.py +3 -3
cognee/api/v1/search/search.py +11 -9
cognee/api/v1/settings/routers/get_settings_router.py +7 -1
cognee/api/v1/sync/routers/get_sync_router.py +3 -0
cognee/api/v1/ui/ui.py +45 -16
cognee/api/v1/update/routers/get_update_router.py +3 -1
cognee/api/v1/update/update.py +3 -3
cognee/api/v1/users/routers/get_visualize_router.py +2 -0
cognee/cli/_cognee.py +61 -10
cognee/cli/commands/add_command.py +3 -3
cognee/cli/commands/cognify_command.py +3 -3
cognee/cli/commands/config_command.py +9 -7
cognee/cli/commands/delete_command.py +3 -3
cognee/cli/commands/search_command.py +3 -7
cognee/cli/config.py +0 -1
cognee/context_global_variables.py +5 -0
cognee/exceptions/exceptions.py +1 -1
cognee/infrastructure/databases/cache/__init__.py +2 -0
cognee/infrastructure/databases/cache/cache_db_interface.py +79 -0
cognee/infrastructure/databases/cache/config.py +44 -0
cognee/infrastructure/databases/cache/get_cache_engine.py +67 -0
cognee/infrastructure/databases/cache/redis/RedisAdapter.py +243 -0
cognee/infrastructure/databases/exceptions/__init__.py +1 -0
cognee/infrastructure/databases/exceptions/exceptions.py +18 -2
cognee/infrastructure/databases/graph/get_graph_engine.py +1 -1
cognee/infrastructure/databases/graph/graph_db_interface.py +5 -0
cognee/infrastructure/databases/graph/kuzu/adapter.py +76 -47
cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +13 -3
cognee/infrastructure/databases/graph/neo4j_driver/deadlock_retry.py +1 -1
cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +1 -1
cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -1
cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +21 -3
cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +17 -10
cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +17 -4
cognee/infrastructure/databases/vector/embeddings/config.py +2 -3
cognee/infrastructure/databases/vector/exceptions/exceptions.py +1 -1
cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -1
cognee/infrastructure/files/exceptions.py +1 -1
cognee/infrastructure/files/storage/LocalFileStorage.py +9 -9
cognee/infrastructure/files/storage/S3FileStorage.py +11 -11
cognee/infrastructure/files/utils/guess_file_type.py +6 -0
cognee/infrastructure/llm/prompts/feedback_reaction_prompt.txt +14 -0
cognee/infrastructure/llm/prompts/feedback_report_prompt.txt +13 -0
cognee/infrastructure/llm/prompts/feedback_user_context_prompt.txt +5 -0
cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +0 -5
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +19 -9
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +17 -5
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +17 -5
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +32 -0
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/__init__.py +0 -0
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +109 -0
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +33 -8
cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +40 -18
cognee/infrastructure/loaders/LoaderEngine.py +27 -7
cognee/infrastructure/loaders/external/__init__.py +7 -0
cognee/infrastructure/loaders/external/advanced_pdf_loader.py +2 -8
cognee/infrastructure/loaders/external/beautiful_soup_loader.py +310 -0
cognee/infrastructure/loaders/supported_loaders.py +7 -0
cognee/modules/data/exceptions/exceptions.py +1 -1
cognee/modules/data/methods/__init__.py +3 -0
cognee/modules/data/methods/get_dataset_data.py +4 -1
cognee/modules/data/methods/has_dataset_data.py +21 -0
cognee/modules/engine/models/TableRow.py +0 -1
cognee/modules/ingestion/save_data_to_file.py +9 -2
cognee/modules/pipelines/exceptions/exceptions.py +1 -1
cognee/modules/pipelines/operations/pipeline.py +12 -1
cognee/modules/pipelines/operations/run_tasks.py +25 -197
cognee/modules/pipelines/operations/run_tasks_base.py +7 -0
cognee/modules/pipelines/operations/run_tasks_data_item.py +260 -0
cognee/modules/pipelines/operations/run_tasks_distributed.py +121 -38
cognee/modules/pipelines/operations/run_tasks_with_telemetry.py +9 -1
cognee/modules/retrieval/EntityCompletionRetriever.py +48 -8
cognee/modules/retrieval/base_graph_retriever.py +3 -1
cognee/modules/retrieval/base_retriever.py +3 -1
cognee/modules/retrieval/chunks_retriever.py +5 -1
cognee/modules/retrieval/code_retriever.py +20 -2
cognee/modules/retrieval/completion_retriever.py +50 -9
cognee/modules/retrieval/cypher_search_retriever.py +11 -1
cognee/modules/retrieval/graph_completion_context_extension_retriever.py +47 -8
cognee/modules/retrieval/graph_completion_cot_retriever.py +152 -22
cognee/modules/retrieval/graph_completion_retriever.py +54 -10
cognee/modules/retrieval/lexical_retriever.py +20 -2
cognee/modules/retrieval/natural_language_retriever.py +10 -1
cognee/modules/retrieval/summaries_retriever.py +5 -1
cognee/modules/retrieval/temporal_retriever.py +62 -10
cognee/modules/retrieval/user_qa_feedback.py +3 -2
cognee/modules/retrieval/utils/completion.py +30 -4
cognee/modules/retrieval/utils/description_to_codepart_search.py +1 -1
cognee/modules/retrieval/utils/session_cache.py +156 -0
cognee/modules/search/methods/get_search_type_tools.py +0 -5
cognee/modules/search/methods/no_access_control_search.py +12 -1
cognee/modules/search/methods/search.py +51 -5
cognee/modules/search/types/SearchType.py +0 -1
cognee/modules/settings/get_settings.py +23 -0
cognee/modules/users/methods/get_authenticated_user.py +3 -1
cognee/modules/users/methods/get_default_user.py +1 -6
cognee/modules/users/roles/methods/create_role.py +2 -2
cognee/modules/users/tenants/methods/create_tenant.py +2 -2
cognee/shared/exceptions/exceptions.py +1 -1
cognee/shared/logging_utils.py +18 -11
cognee/shared/utils.py +24 -2
cognee/tasks/codingagents/coding_rule_associations.py +1 -2
cognee/tasks/documents/exceptions/exceptions.py +1 -1
cognee/tasks/feedback/__init__.py +13 -0
cognee/tasks/feedback/create_enrichments.py +84 -0
cognee/tasks/feedback/extract_feedback_interactions.py +230 -0
cognee/tasks/feedback/generate_improved_answers.py +130 -0
cognee/tasks/feedback/link_enrichments_to_feedback.py +67 -0
cognee/tasks/feedback/models.py +26 -0
cognee/tasks/graph/extract_graph_from_data.py +2 -0
cognee/tasks/ingestion/data_item_to_text_file.py +3 -3
cognee/tasks/ingestion/ingest_data.py +11 -5
cognee/tasks/ingestion/save_data_item_to_storage.py +12 -1
cognee/tasks/storage/add_data_points.py +3 -10
cognee/tasks/storage/index_data_points.py +19 -14
cognee/tasks/storage/index_graph_edges.py +25 -11
cognee/tasks/web_scraper/__init__.py +34 -0
cognee/tasks/web_scraper/config.py +26 -0
cognee/tasks/web_scraper/default_url_crawler.py +446 -0
cognee/tasks/web_scraper/models.py +46 -0
cognee/tasks/web_scraper/types.py +4 -0
cognee/tasks/web_scraper/utils.py +142 -0
cognee/tasks/web_scraper/web_scraper_task.py +396 -0
cognee/tests/cli_tests/cli_unit_tests/test_cli_utils.py +0 -1
cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +13 -0
cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +19 -0
cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +344 -0
cognee/tests/subprocesses/reader.py +25 -0
cognee/tests/subprocesses/simple_cognify_1.py +31 -0
cognee/tests/subprocesses/simple_cognify_2.py +31 -0
cognee/tests/subprocesses/writer.py +32 -0
cognee/tests/tasks/descriptive_metrics/metrics_test_utils.py +0 -2
cognee/tests/tasks/descriptive_metrics/neo4j_metrics_test.py +8 -3
cognee/tests/tasks/entity_extraction/entity_extraction_test.py +89 -0
cognee/tests/tasks/web_scraping/web_scraping_test.py +172 -0
cognee/tests/test_add_docling_document.py +56 -0
cognee/tests/test_chromadb.py +7 -11
cognee/tests/test_concurrent_subprocess_access.py +76 -0
cognee/tests/test_conversation_history.py +240 -0
cognee/tests/test_feedback_enrichment.py +174 -0
cognee/tests/test_kuzu.py +27 -15
cognee/tests/test_lancedb.py +7 -11
cognee/tests/test_library.py +32 -2
cognee/tests/test_neo4j.py +24 -16
cognee/tests/test_neptune_analytics_vector.py +7 -11
cognee/tests/test_permissions.py +9 -13
cognee/tests/test_pgvector.py +4 -4
cognee/tests/test_remote_kuzu.py +8 -11
cognee/tests/test_s3_file_storage.py +1 -1
cognee/tests/test_search_db.py +6 -8
cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +89 -0
cognee/tests/unit/modules/retrieval/conversation_history_test.py +154 -0
cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +51 -0
{cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/METADATA +21 -6
{cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/RECORD +178 -139
{cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/entry_points.txt +1 -0
distributed/Dockerfile +0 -3
distributed/entrypoint.py +21 -9
distributed/signal.py +5 -0
distributed/workers/data_point_saving_worker.py +64 -34
distributed/workers/graph_saving_worker.py +71 -47
cognee/infrastructure/databases/graph/memgraph/memgraph_adapter.py +0 -1116
cognee/modules/retrieval/insights_retriever.py +0 -133
cognee/tests/test_memgraph.py +0 -109
cognee/tests/unit/modules/retrieval/insights_retriever_test.py +0 -251
{cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/WHEEL +0 -0
{cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/LICENSE +0 -0
{cognee-0.3.6.dist-info → cognee-0.3.7.dev1.dist-info}/licenses/NOTICE.md +0 -0

cognee/__init__.py CHANGED Viewed

@@ -19,6 +19,7 @@ from .api.v1.add import add
 from .api.v1.delete import delete
 from .api.v1.cognify import cognify
 from .modules.memify import memify
+from .api.v1.update import update
 from .api.v1.config.config import config
 from .api.v1.datasets.datasets import datasets
 from .api.v1.prune import prune

cognee/api/health.py CHANGED Viewed

@@ -241,16 +241,6 @@ class HealthChecker:
         """Get comprehensive health status."""
         components = {}
-        # Critical services
-        critical_components = [
-            "relational_db",
-            "vector_db",
-            "graph_db",
-            "file_storage",
-            "llm_provider",
-            "embedding_service",
-        ]
         critical_checks = [
             ("relational_db", self.check_relational_db()),
             ("vector_db", self.check_vector_db()),
@@ -296,11 +286,11 @@ class HealthChecker:
                 else:
                     components[name] = result
+        critical_comps = [check[0] for check in critical_checks]
         # Determine overall status
         critical_unhealthy = any(
-            comp.status == HealthStatus.UNHEALTHY
+            comp.status == HealthStatus.UNHEALTHY and name in critical_comps
             for name, comp in components.items()
-            if name in critical_components
         )
         has_degraded = any(comp.status == HealthStatus.DEGRADED for comp in components.values())

cognee/api/v1/add/add.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from uuid import UUID
-from typing import Union, BinaryIO, List, Optional
+from typing import Union, BinaryIO, List, Optional, Any
 from cognee.modules.users.models import User
 from cognee.modules.pipelines import Task, run_pipeline
 from cognee.modules.pipelines.layers.resolve_authorized_user_dataset import (
@@ -11,6 +10,9 @@ from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
 )
 from cognee.modules.engine.operations.setup import setup
 from cognee.tasks.ingestion import ingest_data, resolve_data_directories
+from cognee.shared.logging_utils import get_logger
+logger = get_logger()
 async def add(
@@ -21,14 +23,15 @@ async def add(
     vector_db_config: dict = None,
     graph_db_config: dict = None,
     dataset_id: Optional[UUID] = None,
-    preferred_loaders: List[str] = None,
+    preferred_loaders: Optional[List[Union[str, dict[str, dict[str, Any]]]]] = None,
     incremental_loading: bool = True,
+    data_per_batch: Optional[int] = 20,
 ):
     """
     Add data to Cognee for knowledge graph processing.
     This is the first step in the Cognee workflow - it ingests raw data and prepares it
-    for processing. The function accepts various data formats including text, files, and
+    for processing. The function accepts various data formats including text, files, urls and
     binary streams, then stores them in a specified dataset for further processing.
     Prerequisites:
@@ -68,6 +71,7 @@ async def add(
             - S3 path: "s3://my-bucket/documents/file.pdf"
             - List of mixed types: ["text content", "/path/file.pdf", "file://doc.txt", file_handle]
             - Binary file object: open("file.txt", "rb")
+            - url: A web link url (https or http)
         dataset_name: Name of the dataset to store data in. Defaults to "main_dataset".
                     Create separate datasets to organize different knowledge domains.
         user: User object for authentication and permissions. Uses default user if None.
@@ -78,6 +82,9 @@ async def add(
         vector_db_config: Optional configuration for vector database (for custom setups).
         graph_db_config: Optional configuration for graph database (for custom setups).
         dataset_id: Optional specific dataset UUID to use instead of dataset_name.
+        extraction_rules: Optional dictionary of rules (e.g., CSS selectors, XPath) for extracting specific content from web pages using BeautifulSoup
+        tavily_config: Optional configuration for Tavily API, including API key and extraction settings
+        soup_crawler_config: Optional configuration for BeautifulSoup crawler, specifying concurrency, crawl delay, and extraction rules.
     Returns:
         PipelineRunInfo: Information about the ingestion pipeline execution including:
@@ -126,6 +133,21 @@ async def add(
         # Add a single file
         await cognee.add("/home/user/documents/analysis.pdf")
+        # Add a single url and bs4 extract ingestion method
+        extraction_rules = {
+            "title": "h1",
+            "description": "p",
+            "more_info": "a[href*='more-info']"
+        }
+        await cognee.add("https://example.com",extraction_rules=extraction_rules)
+        # Add a single url and tavily extract ingestion method
+        Make sure to set TAVILY_API_KEY = YOUR_TAVILY_API_KEY as a environment variable
+        await cognee.add("https://example.com")
+        # Add multiple urls
+        await cognee.add(["https://example.com","https://books.toscrape.com"])
         ```
     Environment Variables:
@@ -133,17 +155,34 @@ async def add(
         - LLM_API_KEY: API key for your LLM provider (OpenAI, Anthropic, etc.)
         Optional:
-        - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama"
+        - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral"
         - LLM_MODEL: Model name (default: "gpt-5-mini")
         - DEFAULT_USER_EMAIL: Custom default user email
         - DEFAULT_USER_PASSWORD: Custom default user password
         - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
         - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
+        - TAVILY_API_KEY: YOUR_TAVILY_API_KEY
     """
+    if preferred_loaders is not None:
+        transformed = {}
+        for item in preferred_loaders:
+            if isinstance(item, dict):
+                transformed.update(item)
+            else:
+                transformed[item] = {}
+        preferred_loaders = transformed
     tasks = [
         Task(resolve_data_directories, include_subdirectories=True),
-        Task(ingest_data, dataset_name, user, node_set, dataset_id, preferred_loaders),
+        Task(
+            ingest_data,
+            dataset_name,
+            user,
+            node_set,
+            dataset_id,
+            preferred_loaders,
+        ),
     ]
     await setup()
@@ -167,6 +206,7 @@ async def add(
         vector_db_config=vector_db_config,
         graph_db_config=graph_db_config,
         incremental_loading=incremental_loading,
+        data_per_batch=data_per_batch,
     ):
         pipeline_run_info = run_info

cognee/api/v1/add/routers/get_add_router.py CHANGED Viewed

@@ -10,6 +10,7 @@ from cognee.modules.users.methods import get_authenticated_user
 from cognee.shared.utils import send_telemetry
 from cognee.modules.pipelines.models import PipelineRunErrored
 from cognee.shared.logging_utils import get_logger
+from cognee import __version__ as cognee_version
 logger = get_logger()
@@ -63,7 +64,11 @@ def get_add_router() -> APIRouter:
         send_telemetry(
             "Add API Endpoint Invoked",
             user.id,
-            additional_properties={"endpoint": "POST /v1/add", "node_set": node_set},
+            additional_properties={
+                "endpoint": "POST /v1/add",
+                "node_set": node_set,
+                "cognee_version": cognee_version,
+            },
         )
         from cognee.api.v1.add import add as cognee_add
@@ -73,7 +78,11 @@ def get_add_router() -> APIRouter:
         try:
             add_run = await cognee_add(
-                data, datasetName, user=user, dataset_id=datasetId, node_set=node_set
+                data,
+                datasetName,
+                user=user,
+                dataset_id=datasetId,
+                node_set=node_set if node_set else None,
             )
             if isinstance(add_run, PipelineRunErrored):

cognee/api/v1/cognify/cognify.py CHANGED Viewed

@@ -44,6 +44,7 @@ async def cognify(
     graph_model: BaseModel = KnowledgeGraph,
     chunker=TextChunker,
     chunk_size: int = None,
+    chunks_per_batch: int = None,
     config: Config = None,
     vector_db_config: dict = None,
     graph_db_config: dict = None,
@@ -51,6 +52,7 @@ async def cognify(
     incremental_loading: bool = True,
     custom_prompt: Optional[str] = None,
     temporal_cognify: bool = False,
+    data_per_batch: int = 20,
 ):
     """
     Transform ingested data into a structured knowledge graph.
@@ -105,6 +107,7 @@ async def cognify(
                    Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
                    Default limits: ~512-8192 tokens depending on models.
                    Smaller chunks = more granular but potentially fragmented knowledge.
+        chunks_per_batch: Number of chunks to be processed in a single batch in Cognify tasks.
         vector_db_config: Custom vector database configuration for embeddings storage.
         graph_db_config: Custom graph database configuration for relationship storage.
         run_in_background: If True, starts processing asynchronously and returns immediately.
@@ -148,7 +151,7 @@ async def cognify(
         # 2. Get entity relationships and connections
         relationships = await cognee.search(
             "connections between concepts",
-            query_type=SearchType.INSIGHTS
+            query_type=SearchType.GRAPH_COMPLETION
         )
         # 3. Find relevant document chunks
@@ -209,10 +212,18 @@ async def cognify(
             }
     if temporal_cognify:
-        tasks = await get_temporal_tasks(user, chunker, chunk_size)
+        tasks = await get_temporal_tasks(
+            user=user, chunker=chunker, chunk_size=chunk_size, chunks_per_batch=chunks_per_batch
+        )
     else:
         tasks = await get_default_tasks(
-            user, graph_model, chunker, chunk_size, config, custom_prompt
+            user=user,
+            graph_model=graph_model,
+            chunker=chunker,
+            chunk_size=chunk_size,
+            config=config,
+            custom_prompt=custom_prompt,
+            chunks_per_batch=chunks_per_batch,
         )
     # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -228,6 +239,7 @@ async def cognify(
         graph_db_config=graph_db_config,
         incremental_loading=incremental_loading,
         pipeline_name="cognify_pipeline",
+        data_per_batch=data_per_batch,
     )
@@ -238,6 +250,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     chunk_size: int = None,
     config: Config = None,
     custom_prompt: Optional[str] = None,
+    chunks_per_batch: int = 100,
 ) -> list[Task]:
     if config is None:
         ontology_config = get_ontology_env_config()
@@ -256,6 +269,9 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
                 "ontology_config": {"ontology_resolver": get_default_ontology_resolver()}
             }
+    if chunks_per_batch is None:
+        chunks_per_batch = 100
     default_tasks = [
         Task(classify_documents),
         Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@@ -269,20 +285,20 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             graph_model=graph_model,
             config=config,
             custom_prompt=custom_prompt,
-            task_config={"batch_size": 10},
+            task_config={"batch_size": chunks_per_batch},
         ),  # Generate knowledge graphs from the document chunks.
         Task(
             summarize_text,
-            task_config={"batch_size": 10},
+            task_config={"batch_size": chunks_per_batch},
         ),
-        Task(add_data_points, task_config={"batch_size": 10}),
+        Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
     ]
     return default_tasks
 async def get_temporal_tasks(
-    user: User = None, chunker=TextChunker, chunk_size: int = None
+    user: User = None, chunker=TextChunker, chunk_size: int = None, chunks_per_batch: int = 10
 ) -> list[Task]:
     """
     Builds and returns a list of temporal processing tasks to be executed in sequence.
@@ -299,10 +315,14 @@ async def get_temporal_tasks(
         user (User, optional): The user requesting task execution, used for permission checks.
         chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
         chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
+        chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
     Returns:
         list[Task]: A list of Task objects representing the temporal processing pipeline.
     """
+    if chunks_per_batch is None:
+        chunks_per_batch = 10
     temporal_tasks = [
         Task(classify_documents),
         Task(check_permissions_on_dataset, user=user, permissions=["write"]),
@@ -311,9 +331,9 @@ async def get_temporal_tasks(
             max_chunk_size=chunk_size or get_max_chunk_tokens(),
             chunker=chunker,
         ),
-        Task(extract_events_and_timestamps, task_config={"chunk_size": 10}),
+        Task(extract_events_and_timestamps, task_config={"batch_size": chunks_per_batch}),
         Task(extract_knowledge_graph_from_events),
-        Task(add_data_points, task_config={"batch_size": 10}),
+        Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
     ]
     return temporal_tasks

cognee/api/v1/cognify/routers/get_cognify_router.py CHANGED Viewed

@@ -29,7 +29,7 @@ from cognee.modules.pipelines.queues.pipeline_run_info_queues import (
 )
 from cognee.shared.logging_utils import get_logger
 from cognee.shared.utils import send_telemetry
+from cognee import __version__ as cognee_version
 logger = get_logger("api.cognify")
@@ -98,6 +98,7 @@ def get_cognify_router() -> APIRouter:
             user.id,
             additional_properties={
                 "endpoint": "POST /v1/cognify",
+                "cognee_version": cognee_version,
             },
         )

cognee/api/v1/datasets/datasets.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from uuid import UUID
+from cognee.modules.data.methods import has_dataset_data
 from cognee.modules.users.methods import get_default_user
 from cognee.modules.ingestion import discover_directory_datasets
 from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
@@ -26,6 +27,16 @@ class datasets:
         return await get_dataset_data(dataset.id)
+    @staticmethod
+    async def has_data(dataset_id: str) -> bool:
+        from cognee.modules.data.methods import get_dataset
+        user = await get_default_user()
+        dataset = await get_dataset(user.id, dataset_id)
+        return await has_dataset_data(dataset.id)
     @staticmethod
     async def get_status(dataset_ids: list[UUID]) -> dict:
         return await get_pipeline_status(dataset_ids, pipeline_name="cognify_pipeline")

cognee/api/v1/datasets/routers/get_datasets_router.py CHANGED Viewed

@@ -24,6 +24,7 @@ from cognee.modules.users.permissions.methods import (
 from cognee.modules.graph.methods import get_formatted_graph_data
 from cognee.modules.pipelines.models import PipelineRunStatus
 from cognee.shared.utils import send_telemetry
+from cognee import __version__ as cognee_version
 logger = get_logger()
@@ -100,6 +101,7 @@ def get_datasets_router() -> APIRouter:
             user.id,
             additional_properties={
                 "endpoint": "GET /v1/datasets",
+                "cognee_version": cognee_version,
             },
         )
@@ -147,6 +149,7 @@ def get_datasets_router() -> APIRouter:
             user.id,
             additional_properties={
                 "endpoint": "POST /v1/datasets",
+                "cognee_version": cognee_version,
             },
         )
@@ -201,6 +204,7 @@ def get_datasets_router() -> APIRouter:
             additional_properties={
                 "endpoint": f"DELETE /v1/datasets/{str(dataset_id)}",
                 "dataset_id": str(dataset_id),
+                "cognee_version": cognee_version,
             },
         )
@@ -246,6 +250,7 @@ def get_datasets_router() -> APIRouter:
                 "endpoint": f"DELETE /v1/datasets/{str(dataset_id)}/data/{str(data_id)}",
                 "dataset_id": str(dataset_id),
                 "data_id": str(data_id),
+                "cognee_version": cognee_version,
             },
         )
@@ -327,6 +332,7 @@ def get_datasets_router() -> APIRouter:
             additional_properties={
                 "endpoint": f"GET /v1/datasets/{str(dataset_id)}/data",
                 "dataset_id": str(dataset_id),
+                "cognee_version": cognee_version,
             },
         )
@@ -387,6 +393,7 @@ def get_datasets_router() -> APIRouter:
             additional_properties={
                 "endpoint": "GET /v1/datasets/status",
                 "datasets": [str(dataset_id) for dataset_id in datasets],
+                "cognee_version": cognee_version,
             },
         )
@@ -433,6 +440,7 @@ def get_datasets_router() -> APIRouter:
                 "endpoint": f"GET /v1/datasets/{str(dataset_id)}/data/{str(data_id)}/raw",
                 "dataset_id": str(dataset_id),
                 "data_id": str(data_id),
+                "cognee_version": cognee_version,
             },
         )

cognee/api/v1/delete/routers/get_delete_router.py CHANGED Viewed

@@ -6,6 +6,7 @@ from cognee.shared.logging_utils import get_logger
 from cognee.modules.users.models import User
 from cognee.modules.users.methods import get_authenticated_user
 from cognee.shared.utils import send_telemetry
+from cognee import __version__ as cognee_version
 logger = get_logger()
@@ -39,6 +40,7 @@ def get_delete_router() -> APIRouter:
                 "endpoint": "DELETE /v1/delete",
                 "dataset_id": str(dataset_id),
                 "data_id": str(data_id),
+                "cognee_version": cognee_version,
             },
         )

cognee/api/v1/memify/routers/get_memify_router.py CHANGED Viewed

@@ -12,6 +12,7 @@ from cognee.modules.users.methods import get_authenticated_user
 from cognee.shared.utils import send_telemetry
 from cognee.modules.pipelines.models import PipelineRunErrored
 from cognee.shared.logging_utils import get_logger
+from cognee import __version__ as cognee_version
 logger = get_logger()
@@ -73,7 +74,7 @@ def get_memify_router() -> APIRouter:
         send_telemetry(
             "Memify API Endpoint Invoked",
             user.id,
-            additional_properties={"endpoint": "POST /v1/memify"},
+            additional_properties={"endpoint": "POST /v1/memify", "cognee_version": cognee_version},
         )
         if not payload.dataset_id and not payload.dataset_name:

cognee/api/v1/permissions/routers/get_permissions_router.py CHANGED Viewed

@@ -7,6 +7,7 @@ from fastapi.responses import JSONResponse
 from cognee.modules.users.models import User
 from cognee.modules.users.methods import get_authenticated_user
 from cognee.shared.utils import send_telemetry
+from cognee import __version__ as cognee_version
 def get_permissions_router() -> APIRouter:
@@ -48,6 +49,7 @@ def get_permissions_router() -> APIRouter:
                 "endpoint": f"POST /v1/permissions/datasets/{str(principal_id)}",
                 "dataset_ids": str(dataset_ids),
                 "principal_id": str(principal_id),
+                "cognee_version": cognee_version,
             },
         )
@@ -89,6 +91,7 @@ def get_permissions_router() -> APIRouter:
             additional_properties={
                 "endpoint": "POST /v1/permissions/roles",
                 "role_name": role_name,
+                "cognee_version": cognee_version,
             },
         )
@@ -133,6 +136,7 @@ def get_permissions_router() -> APIRouter:
                 "endpoint": f"POST /v1/permissions/users/{str(user_id)}/roles",
                 "user_id": str(user_id),
                 "role_id": str(role_id),
+                "cognee_version": cognee_version,
             },
         )
@@ -175,6 +179,7 @@ def get_permissions_router() -> APIRouter:
                 "endpoint": f"POST /v1/permissions/users/{str(user_id)}/tenants",
                 "user_id": str(user_id),
                 "tenant_id": str(tenant_id),
+                "cognee_version": cognee_version,
             },
         )
@@ -209,6 +214,7 @@ def get_permissions_router() -> APIRouter:
             additional_properties={
                 "endpoint": "POST /v1/permissions/tenants",
                 "tenant_name": tenant_name,
+                "cognee_version": cognee_version,
             },
         )

cognee/api/v1/responses/default_tools.py CHANGED Viewed

@@ -14,7 +14,6 @@ DEFAULT_TOOLS = [
                     "type": "string",
                     "description": "Type of search to perform",
                     "enum": [
-                        "INSIGHTS",
                         "CODE",
                         "GRAPH_COMPLETION",
                         "NATURAL_LANGUAGE",

cognee/api/v1/responses/dispatch_function.py CHANGED Viewed

@@ -59,7 +59,7 @@ async def handle_search(arguments: Dict[str, Any], user) -> list:
     valid_search_types = (
         search_tool["parameters"]["properties"]["search_type"]["enum"]
         if search_tool
-        else ["INSIGHTS", "CODE", "GRAPH_COMPLETION", "NATURAL_LANGUAGE"]
+        else ["CODE", "GRAPH_COMPLETION", "NATURAL_LANGUAGE"]
     )
     if search_type_str not in valid_search_types:

cognee/api/v1/responses/routers/default_tools.py CHANGED Viewed

@@ -14,7 +14,6 @@ DEFAULT_TOOLS = [
                     "type": "string",
                     "description": "Type of search to perform",
                     "enum": [
-                        "INSIGHTS",
                         "CODE",
                         "GRAPH_COMPLETION",
                         "NATURAL_LANGUAGE",

cognee/api/v1/search/routers/get_search_router.py CHANGED Viewed

@@ -13,6 +13,7 @@ from cognee.modules.users.models import User
 from cognee.modules.search.operations import get_history
 from cognee.modules.users.methods import get_authenticated_user
 from cognee.shared.utils import send_telemetry
+from cognee import __version__ as cognee_version
 # Note: Datasets sent by name will only map to datasets owned by the request sender
@@ -61,9 +62,7 @@ def get_search_router() -> APIRouter:
         send_telemetry(
             "Search API Endpoint Invoked",
             user.id,
-            additional_properties={
-                "endpoint": "GET /v1/search",
-            },
+            additional_properties={"endpoint": "GET /v1/search", "cognee_version": cognee_version},
         )
         try:
@@ -118,6 +117,7 @@ def get_search_router() -> APIRouter:
                 "top_k": payload.top_k,
                 "only_context": payload.only_context,
                 "use_combined_context": payload.use_combined_context,
+                "cognee_version": cognee_version,
             },
         )

cognee/api/v1/search/search.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from uuid import UUID
 from typing import Union, Optional, List, Type
+from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.modules.engine.models.node_set import NodeSet
 from cognee.modules.users.models import User
 from cognee.modules.search.types import SearchResult, SearchType, CombinedSearchResult
@@ -8,6 +9,10 @@ from cognee.modules.users.methods import get_default_user
 from cognee.modules.search.methods import search as search_function
 from cognee.modules.data.methods import get_authorized_existing_datasets
 from cognee.modules.data.exceptions import DatasetNotFoundError
+from cognee.context_global_variables import set_session_user_context_variable
+from cognee.shared.logging_utils import get_logger
+logger = get_logger()
 async def search(
@@ -25,6 +30,7 @@ async def search(
     last_k: Optional[int] = 1,
     only_context: bool = False,
     use_combined_context: bool = False,
+    session_id: Optional[str] = None,
 ) -> Union[List[SearchResult], CombinedSearchResult]:
     """
     Search and query the knowledge graph for insights, information, and connections.
@@ -52,11 +58,6 @@ async def search(
             Best for: Direct document retrieval, specific fact-finding.
             Returns: LLM responses based on relevant text chunks.
-        **INSIGHTS**:
-            Structured entity relationships and semantic connections.
-            Best for: Understanding concept relationships, knowledge mapping.
-            Returns: Formatted relationship data and entity connections.
         **CHUNKS**:
             Raw text segments that match the query semantically.
             Best for: Finding specific passages, citations, exact content.
@@ -118,15 +119,14 @@ async def search(
         save_interaction: Save interaction (query, context, answer connected to triplet endpoints) results into the graph or not
+        session_id: Optional session identifier for caching Q&A interactions. Defaults to 'default_session' if None.
     Returns:
         list: Search results in format determined by query_type:
             **GRAPH_COMPLETION/RAG_COMPLETION**:
                 [List of conversational AI response strings]
-            **INSIGHTS**:
-                [List of formatted relationship descriptions and entity connections]
             **CHUNKS**:
                 [List of relevant text passages with source metadata]
@@ -146,7 +146,6 @@ async def search(
     Performance & Optimization:
         - **GRAPH_COMPLETION**: Slower but most intelligent, uses LLM + graph context
         - **RAG_COMPLETION**: Medium speed, uses LLM + document chunks (no graph traversal)
-        - **INSIGHTS**: Fast, returns structured relationships without LLM processing
         - **CHUNKS**: Fastest, pure vector similarity search without LLM
         - **SUMMARIES**: Fast, returns pre-computed summaries
         - **CODE**: Medium speed, specialized for code understanding
@@ -177,6 +176,8 @@ async def search(
     if user is None:
         user = await get_default_user()
+    await set_session_user_context_variable(user)
     # Transform string based datasets to UUID - String based datasets can only be found for current user
     if datasets is not None and [all(isinstance(dataset, str) for dataset in datasets)]:
         datasets = await get_authorized_existing_datasets(datasets, "read", user)
@@ -198,6 +199,7 @@ async def search(
         last_k=last_k,
         only_context=only_context,
         use_combined_context=use_combined_context,
+        session_id=session_id,
     )
     return filtered_search_results

cognee/api/v1/settings/routers/get_settings_router.py CHANGED Viewed

@@ -21,7 +21,13 @@ class SettingsDTO(OutDTO):
 class LLMConfigInputDTO(InDTO):
-    provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"], Literal["gemini"]]
+    provider: Union[
+        Literal["openai"],
+        Literal["ollama"],
+        Literal["anthropic"],
+        Literal["gemini"],
+        Literal["mistral"],
+    ]
     model: str
     api_key: str

cognee 0.3.6__py3-none-any.whl → 0.3.7.dev1__py3-none-any.whl

cognee 0.3.6py3-none-any.whl → 0.3.7.dev1py3-none-any.whl