PyPI - cognee - Versions diffs - 0.2.3.dev0__py3-none-any.whl → 0.2.4__py3-none-any.whl - Mend

cognee 0.2.3.dev0py3-none-any.whl → 0.2.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (179) hide show

cognee/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from cognee.cli._cognee import main
+if __name__ == "__main__":
+    main()

cognee/api/v1/add/add.py CHANGED Viewed

@@ -1,9 +1,15 @@
 from uuid import UUID
 from typing import Union, BinaryIO, List, Optional
-from cognee.modules.pipelines import Task
 from cognee.modules.users.models import User
-from cognee.modules.pipelines import cognee_pipeline
+from cognee.modules.pipelines import Task, run_pipeline
+from cognee.modules.pipelines.layers.resolve_authorized_user_dataset import (
+    resolve_authorized_user_dataset,
+)
+from cognee.modules.pipelines.layers.reset_dataset_pipeline_run_status import (
+    reset_dataset_pipeline_run_status,
+)
+from cognee.modules.engine.operations.setup import setup
 from cognee.tasks.ingestion import ingest_data, resolve_data_directories
@@ -128,28 +134,29 @@ async def add(
         Optional:
         - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama"
-        - LLM_MODEL: Model name (default: "gpt-4o-mini")
+        - LLM_MODEL: Model name (default: "gpt-5-mini")
         - DEFAULT_USER_EMAIL: Custom default user email
         - DEFAULT_USER_PASSWORD: Custom default user password
         - VECTOR_DB_PROVIDER: "lancedb" (default), "chromadb", "pgvector"
-        - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j", "networkx"
+        - GRAPH_DATABASE_PROVIDER: "kuzu" (default), "neo4j"
-    Raises:
-        FileNotFoundError: If specified file paths don't exist
-        PermissionError: If user lacks access to files or dataset
-        UnsupportedFileTypeError: If file format cannot be processed
-        InvalidValueError: If LLM_API_KEY is not set or invalid
     """
     tasks = [
         Task(resolve_data_directories, include_subdirectories=True),
         Task(ingest_data, dataset_name, user, node_set, dataset_id, preferred_loaders),
     ]
+    await setup()
+    user, authorized_dataset = await resolve_authorized_user_dataset(dataset_id, dataset_name, user)
+    await reset_dataset_pipeline_run_status(authorized_dataset.id, user)
     pipeline_run_info = None
-    async for run_info in cognee_pipeline(
+    async for run_info in run_pipeline(
         tasks=tasks,
-        datasets=dataset_id if dataset_id else dataset_name,
+        datasets=[authorized_dataset.id],
         data=data,
         user=user,
         pipeline_name="add_pipeline",

cognee/api/v1/cognify/code_graph_pipeline.py CHANGED Viewed

@@ -40,8 +40,14 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
     user = await get_default_user()
     detailed_extraction = True
+    # Multi-language support: allow passing supported_languages
+    supported_languages = None  # defer to task defaults
     tasks = [
-        Task(get_repo_file_dependencies, detailed_extraction=detailed_extraction),
+        Task(
+            get_repo_file_dependencies,
+            detailed_extraction=detailed_extraction,
+            supported_languages=supported_languages,
+        ),
         # Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
         Task(add_data_points, task_config={"batch_size": 30}),
     ]

cognee/api/v1/cognify/cognify.py CHANGED Viewed

@@ -7,12 +7,10 @@ from cognee.shared.logging_utils import get_logger
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.infrastructure.llm import get_max_chunk_tokens
-from cognee.modules.pipelines import cognee_pipeline
+from cognee.modules.pipelines import run_pipeline
 from cognee.modules.pipelines.tasks.task import Task
 from cognee.modules.chunking.TextChunker import TextChunker
 from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
-from cognee.modules.pipelines.models.PipelineRunInfo import PipelineRunCompleted, PipelineRunErrored
-from cognee.modules.pipelines.queues.pipeline_run_info_queues import push_to_queue
 from cognee.modules.users.models import User
 from cognee.tasks.documents import (
@@ -23,6 +21,7 @@ from cognee.tasks.documents import (
 from cognee.tasks.graph import extract_graph_from_data
 from cognee.tasks.storage import add_data_points
 from cognee.tasks.summarization import summarize_text
+from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
 logger = get_logger("cognify")
@@ -40,6 +39,7 @@ async def cognify(
     graph_db_config: dict = None,
     run_in_background: bool = False,
     incremental_loading: bool = True,
+    custom_prompt: Optional[str] = None,
 ):
     """
     Transform ingested data into a structured knowledge graph.
@@ -91,7 +91,7 @@ async def cognify(
                 - LangchainChunker: Recursive character splitting with overlap
                 Determines how documents are segmented for processing.
         chunk_size: Maximum tokens per chunk. Auto-calculated based on LLM if None.
-                   Formula: min(embedding_max_tokens, llm_max_tokens // 2)
+                   Formula: min(embedding_max_completion_tokens, llm_max_completion_tokens // 2)
                    Default limits: ~512-8192 tokens depending on models.
                    Smaller chunks = more granular but potentially fragmented knowledge.
         ontology_file_path: Path to RDF/OWL ontology file for domain-specific entity types.
@@ -102,6 +102,10 @@ async def cognify(
                           If False, waits for completion before returning.
                           Background mode recommended for large datasets (>100MB).
                           Use pipeline_run_id from return value to monitor progress.
+        custom_prompt: Optional custom prompt string to use for entity extraction and graph generation.
+                      If provided, this prompt will be used instead of the default prompts for
+                      knowledge graph extraction. The prompt should guide the LLM on how to
+                      extract entities and relationships from the text content.
     Returns:
         Union[dict, list[PipelineRunInfo]]:
@@ -177,124 +181,25 @@ async def cognify(
         - LLM_PROVIDER, LLM_MODEL, VECTOR_DB_PROVIDER, GRAPH_DATABASE_PROVIDER
         - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
         - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
-    Raises:
-        DatasetNotFoundError: If specified datasets don't exist
-        PermissionError: If user lacks processing rights
-        InvalidValueError: If LLM_API_KEY is not set
-        OntologyParsingError: If ontology file is malformed
-        ValueError: If chunks exceed max token limits (reduce chunk_size)
-        DatabaseNotCreatedError: If databases are not properly initialized
     """
-    tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
-    if run_in_background:
-        return await run_cognify_as_background_process(
-            tasks=tasks,
-            user=user,
-            datasets=datasets,
-            vector_db_config=vector_db_config,
-            graph_db_config=graph_db_config,
-            incremental_loading=incremental_loading,
-        )
-    else:
-        return await run_cognify_blocking(
-            tasks=tasks,
-            user=user,
-            datasets=datasets,
-            vector_db_config=vector_db_config,
-            graph_db_config=graph_db_config,
-            incremental_loading=incremental_loading,
-        )
+    tasks = await get_default_tasks(
+        user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
+    )
-async def run_cognify_blocking(
-    tasks,
-    user,
-    datasets,
-    graph_db_config: dict = None,
-    vector_db_config: dict = False,
-    incremental_loading: bool = True,
-):
-    total_run_info = {}
+    # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
+    pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
-    async for run_info in cognee_pipeline(
+    # Run the run_pipeline in the background or blocking based on executor
+    return await pipeline_executor_func(
+        pipeline=run_pipeline,
         tasks=tasks,
-        datasets=datasets,
         user=user,
-        pipeline_name="cognify_pipeline",
-        graph_db_config=graph_db_config,
+        datasets=datasets,
         vector_db_config=vector_db_config,
+        graph_db_config=graph_db_config,
         incremental_loading=incremental_loading,
-    ):
-        if run_info.dataset_id:
-            total_run_info[run_info.dataset_id] = run_info
-        else:
-            total_run_info = run_info
-    return total_run_info
-async def run_cognify_as_background_process(
-    tasks,
-    user,
-    datasets,
-    graph_db_config: dict = None,
-    vector_db_config: dict = False,
-    incremental_loading: bool = True,
-):
-    # Convert dataset to list if it's a string
-    if isinstance(datasets, str):
-        datasets = [datasets]
-    # Store pipeline status for all pipelines
-    pipeline_run_started_info = {}
-    async def handle_rest_of_the_run(pipeline_list):
-        # Execute all provided pipelines one by one to avoid database write conflicts
-        # TODO: Convert to async gather task instead of for loop when Queue mechanism for database is created
-        for pipeline in pipeline_list:
-            while True:
-                try:
-                    pipeline_run_info = await anext(pipeline)
-                    push_to_queue(pipeline_run_info.pipeline_run_id, pipeline_run_info)
-                    if isinstance(pipeline_run_info, PipelineRunCompleted) or isinstance(
-                        pipeline_run_info, PipelineRunErrored
-                    ):
-                        break
-                except StopAsyncIteration:
-                    break
-    # Start all pipelines to get started status
-    pipeline_list = []
-    for dataset in datasets:
-        pipeline_run = cognee_pipeline(
-            tasks=tasks,
-            user=user,
-            datasets=dataset,
-            pipeline_name="cognify_pipeline",
-            graph_db_config=graph_db_config,
-            vector_db_config=vector_db_config,
-            incremental_loading=incremental_loading,
-        )
-        # Save dataset Pipeline run started info
-        run_info = await anext(pipeline_run)
-        pipeline_run_started_info[run_info.dataset_id] = run_info
-        if pipeline_run_started_info[run_info.dataset_id].payload:
-            # Remove payload info to avoid serialization
-            # TODO: Handle payload serialization
-            pipeline_run_started_info[run_info.dataset_id].payload = []
-        pipeline_list.append(pipeline_run)
-    # Send all started pipelines to execute one by one in background
-    asyncio.create_task(handle_rest_of_the_run(pipeline_list=pipeline_list))
-    return pipeline_run_started_info
+        pipeline_name="cognify_pipeline",
+    )
 async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's comment)
@@ -303,6 +208,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     chunker=TextChunker,
     chunk_size: int = None,
     ontology_file_path: Optional[str] = None,
+    custom_prompt: Optional[str] = None,
 ) -> list[Task]:
     default_tasks = [
         Task(classify_documents),
@@ -316,6 +222,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             extract_graph_from_data,
             graph_model=graph_model,
             ontology_adapter=OntologyResolver(ontology_file=ontology_file_path),
+            custom_prompt=custom_prompt,
             task_config={"batch_size": 10},
         ),  # Generate knowledge graphs from the document chunks.
         Task(

cognee/api/v1/cognify/routers/get_cognify_router.py CHANGED Viewed

@@ -37,6 +37,9 @@ class CognifyPayloadDTO(InDTO):
     datasets: Optional[List[str]] = Field(default=None)
     dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]])
     run_in_background: Optional[bool] = Field(default=False)
+    custom_prompt: Optional[str] = Field(
+        default=None, description="Custom prompt for entity extraction and graph generation"
+    )
 def get_cognify_router() -> APIRouter:
@@ -63,6 +66,7 @@ def get_cognify_router() -> APIRouter:
         - **datasets** (Optional[List[str]]): List of dataset names to process. Dataset names are resolved to datasets owned by the authenticated user.
         - **dataset_ids** (Optional[List[UUID]]): List of existing dataset UUIDs to process. UUIDs allow processing of datasets not owned by the user (if permitted).
         - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
+        - **custom_prompt** (Optional[str]): Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts for knowledge graph extraction.
         ## Response
         - **Blocking execution**: Complete pipeline run information with entity counts, processing duration, and success/failure status
@@ -76,7 +80,8 @@ def get_cognify_router() -> APIRouter:
         ```json
         {
             "datasets": ["research_papers", "documentation"],
-            "run_in_background": false
+            "run_in_background": false,
+            "custom_prompt": "Extract entities focusing on technical concepts and their relationships. Identify key technologies, methodologies, and their interconnections."
         }
         ```
@@ -106,7 +111,10 @@ def get_cognify_router() -> APIRouter:
             datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
             cognify_run = await cognee_cognify(
-                datasets, user, run_in_background=payload.run_in_background
+                datasets,
+                user,
+                run_in_background=payload.run_in_background,
+                custom_prompt=payload.custom_prompt,
             )
             # If any cognify run errored return JSONResponse with proper error status code
@@ -164,7 +172,7 @@ def get_cognify_router() -> APIRouter:
                     {
                         "pipeline_run_id": str(pipeline_run_info.pipeline_run_id),
                         "status": pipeline_run_info.status,
-                        "payload": await get_formatted_graph_data(pipeline_run.dataset_id, user.id),
+                        "payload": await get_formatted_graph_data(pipeline_run.dataset_id, user),
                     }
                 )

cognee/api/v1/config/config.py CHANGED Viewed

@@ -2,7 +2,6 @@
 import os
 from cognee.base_config import get_base_config
-from cognee.exceptions import InvalidValueError, InvalidAttributeError
 from cognee.modules.cognify.config import get_cognify_config
 from cognee.infrastructure.data.chunking.config import get_chunk_config
 from cognee.infrastructure.databases.vector import get_vectordb_config
@@ -11,6 +10,7 @@ from cognee.infrastructure.llm.config import (
     get_llm_config,
 )
 from cognee.infrastructure.databases.relational import get_relational_config, get_migration_config
+from cognee.api.v1.exceptions.exceptions import InvalidConfigAttributeError
 class config:
@@ -92,9 +92,7 @@ class config:
             if hasattr(llm_config, key):
                 object.__setattr__(llm_config, key, value)
             else:
-                raise InvalidAttributeError(
-                    message=f"'{key}' is not a valid attribute of the config."
-                )
+                raise InvalidConfigAttributeError(attribute=key)
     @staticmethod
     def set_chunk_strategy(chunk_strategy: object):
@@ -131,9 +129,7 @@ class config:
             if hasattr(relational_db_config, key):
                 object.__setattr__(relational_db_config, key, value)
             else:
-                raise InvalidAttributeError(
-                    message=f"'{key}' is not a valid attribute of the config."
-                )
+                raise InvalidConfigAttributeError(attribute=key)
     @staticmethod
     def set_migration_db_config(config_dict: dict):
@@ -145,9 +141,7 @@ class config:
             if hasattr(migration_db_config, key):
                 object.__setattr__(migration_db_config, key, value)
             else:
-                raise InvalidAttributeError(
-                    message=f"'{key}' is not a valid attribute of the config."
-                )
+                raise InvalidConfigAttributeError(attribute=key)
     @staticmethod
     def set_graph_db_config(config_dict: dict) -> None:
@@ -171,9 +165,7 @@ class config:
             if hasattr(vector_db_config, key):
                 object.__setattr__(vector_db_config, key, value)
             else:
-                raise InvalidAttributeError(
-                    message=f"'{key}' is not a valid attribute of the config."
-                )
+                InvalidConfigAttributeError(attribute=key)
     @staticmethod
     def set_vector_db_key(db_key: str):

cognee/api/v1/datasets/routers/get_datasets_router.py CHANGED Viewed

@@ -13,7 +13,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine
 from cognee.modules.data.methods import get_authorized_existing_datasets
 from cognee.modules.data.methods import create_dataset, get_datasets_by_name
 from cognee.shared.logging_utils import get_logger
-from cognee.api.v1.delete.exceptions import DataNotFoundError, DatasetNotFoundError
+from cognee.api.v1.exceptions import DataNotFoundError, DatasetNotFoundError
 from cognee.modules.users.models import User
 from cognee.modules.users.methods import get_authenticated_user
 from cognee.modules.users.permissions.methods import (
@@ -284,7 +284,7 @@ def get_datasets_router() -> APIRouter:
         - **500 Internal Server Error**: Error retrieving graph data
         """
-        graph_data = await get_formatted_graph_data(dataset_id, user.id)
+        graph_data = await get_formatted_graph_data(dataset_id, user)
         return graph_data

cognee/api/v1/delete/delete.py CHANGED Viewed

@@ -16,7 +16,7 @@ from cognee.modules.users.methods import get_default_user
 from cognee.modules.data.methods import get_authorized_existing_datasets
 from cognee.context_global_variables import set_database_global_context_variables
-from cognee.api.v1.delete.exceptions import (
+from cognee.api.v1.exceptions import (
     DocumentNotFoundError,
     DatasetNotFoundError,
     DocumentSubgraphNotFoundError,

cognee/api/v1/exceptions/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+"""
+Custom exceptions for the Cognee API.
+This module defines a set of exceptions for handling various data errors
+"""
+from .exceptions import (
+    InvalidConfigAttributeError,
+    DocumentNotFoundError,
+    DatasetNotFoundError,
+    DataNotFoundError,
+    DocumentSubgraphNotFoundError,
+)

cognee/api/v1/{delete → exceptions}/exceptions.py RENAMED Viewed

@@ -1,10 +1,19 @@
-from cognee.exceptions import CogneeApiError
+from cognee.exceptions import CogneeConfigurationError, CogneeValidationError
 from fastapi import status
-class DocumentNotFoundError(CogneeApiError):
-    """Raised when a document cannot be found in the database."""
+class InvalidConfigAttributeError(CogneeConfigurationError):
+    def __init__(
+        self,
+        attribute: str,
+        name: str = "InvalidConfigAttributeError",
+        status_code: int = status.HTTP_400_BAD_REQUEST,
+    ):
+        message = f"'{attribute}' is not a valid attribute of the configuration."
+        super().__init__(message, name, status_code)
+class DocumentNotFoundError(CogneeValidationError):
     def __init__(
         self,
         message: str = "Document not found in database.",
@@ -14,9 +23,7 @@ class DocumentNotFoundError(CogneeApiError):
         super().__init__(message, name, status_code)
-class DatasetNotFoundError(CogneeApiError):
-    """Raised when a dataset cannot be found."""
+class DatasetNotFoundError(CogneeValidationError):
     def __init__(
         self,
         message: str = "Dataset not found.",
@@ -26,9 +33,7 @@ class DatasetNotFoundError(CogneeApiError):
         super().__init__(message, name, status_code)
-class DataNotFoundError(CogneeApiError):
-    """Raised when a dataset cannot be found."""
+class DataNotFoundError(CogneeValidationError):
     def __init__(
         self,
         message: str = "Data not found.",
@@ -38,9 +43,7 @@ class DataNotFoundError(CogneeApiError):
         super().__init__(message, name, status_code)
-class DocumentSubgraphNotFoundError(CogneeApiError):
-    """Raised when a document's subgraph cannot be found in the graph database."""
+class DocumentSubgraphNotFoundError(CogneeValidationError):
     def __init__(
         self,
         message: str = "Document subgraph not found in graph database.",

cognee/api/v1/responses/default_tools.py CHANGED Viewed

@@ -49,6 +49,10 @@ DEFAULT_TOOLS = [
                     "type": "string",
                     "description": "Path to a custom ontology file",
                 },
+                "custom_prompt": {
+                    "type": "string",
+                    "description": "Custom prompt for entity extraction and graph generation. If provided, this prompt will be used instead of the default prompts.",
+                },
             },
             "required": ["text"],
         },

cognee/api/v1/responses/dispatch_function.py CHANGED Viewed

@@ -88,11 +88,16 @@ async def handle_cognify(arguments: Dict[str, Any], user) -> str:
     """Handle cognify function call"""
     text = arguments.get("text")
     ontology_file_path = arguments.get("ontology_file_path")
+    custom_prompt = arguments.get("custom_prompt")
     if text:
         await add(data=text, user=user)
-    await cognify(user=user, ontology_file_path=ontology_file_path if ontology_file_path else None)
+    await cognify(
+        user=user,
+        ontology_file_path=ontology_file_path if ontology_file_path else None,
+        custom_prompt=custom_prompt,
+    )
     return (
         "Text successfully converted into knowledge graph."

cognee/api/v1/responses/models.py CHANGED Viewed

@@ -70,7 +70,7 @@ class ResponseRequest(InDTO):
     tool_choice: Optional[Union[str, Dict[str, Any]]] = "auto"
     user: Optional[str] = None
     temperature: Optional[float] = 1.0
-    max_tokens: Optional[int] = None
+    max_completion_tokens: Optional[int] = None
 class ToolCallOutput(BaseModel):

cognee/api/v1/search/search.py CHANGED Viewed

@@ -19,6 +19,8 @@ async def search(
     top_k: int = 10,
     node_type: Optional[Type] = None,
     node_name: Optional[List[str]] = None,
+    save_interaction: bool = False,
+    last_k: Optional[int] = None,
 ) -> list:
     """
     Search and query the knowledge graph for insights, information, and connections.
@@ -107,6 +109,8 @@ async def search(
         node_name: Filter results to specific named entities (for targeted search).
+        save_interaction: Save interaction (query, context, answer connected to triplet endpoints) results into the graph or not
     Returns:
         list: Search results in format determined by query_type:
@@ -158,13 +162,6 @@ async def search(
         - VECTOR_DB_PROVIDER: Must match what was used during cognify
         - GRAPH_DATABASE_PROVIDER: Must match what was used during cognify
-    Raises:
-        DatasetNotFoundError: If specified datasets don't exist or aren't accessible
-        PermissionDeniedError: If user lacks read access to requested datasets
-        NoDataError: If no relevant data found for the search query
-        InvalidValueError: If LLM_API_KEY is not set (for LLM-based search types)
-        ValueError: If query_text is empty or search parameters are invalid
-        CollectionNotFoundError: If vector collection not found (data not processed)
     """
     # We use lists from now on for datasets
     if isinstance(datasets, UUID) or isinstance(datasets, str):
@@ -189,6 +186,8 @@ async def search(
         top_k=top_k,
         node_type=node_type,
         node_name=node_name,
+        save_interaction=save_interaction,
+        last_k=last_k,
     )
     return filtered_search_results

cognee/cli/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from cognee.cli.reference import SupportsCliCommand
+from cognee.cli.exceptions import CliCommandException
+DEFAULT_DOCS_URL = "https://docs.cognee.ai"
+__all__ = [
+    "SupportsCliCommand",
+    "CliCommandException",
+    "DEFAULT_DOCS_URL",
+]

cognee 0.2.3.dev0__py3-none-any.whl → 0.2.4__py3-none-any.whl

cognee 0.2.3.dev0py3-none-any.whl → 0.2.4py3-none-any.whl