PyPI - cognee - Versions diffs - 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

cognee 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (166) hide show

cognee/__init__.py CHANGED Viewed

@@ -18,6 +18,7 @@ logger = setup_logging()
 from .api.v1.add import add
 from .api.v1.delete import delete
 from .api.v1.cognify import cognify
+from .modules.memify import memify
 from .api.v1.config.config import config
 from .api.v1.datasets.datasets import datasets
 from .api.v1.prune import prune
@@ -26,6 +27,7 @@ from .api.v1.visualize import visualize_graph, start_visualization_server
 from cognee.modules.visualization.cognee_network_visualization import (
     cognee_network_visualization,
 )
+from .api.v1.ui import start_ui
 # Pipelines
 from .modules import pipelines

cognee/api/client.py CHANGED Viewed

@@ -9,7 +9,7 @@ from contextlib import asynccontextmanager
 from fastapi import Request
 from fastapi import FastAPI, status
 from fastapi.encoders import jsonable_encoder
-from fastapi.responses import JSONResponse, Response
+from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.exceptions import RequestValidationError
 from fastapi.openapi.utils import get_openapi
@@ -17,14 +17,18 @@ from fastapi.openapi.utils import get_openapi
 from cognee.exceptions import CogneeApiError
 from cognee.shared.logging_utils import get_logger, setup_logging
 from cognee.api.health import health_checker, HealthStatus
+from cognee.api.v1.cloud.routers import get_checks_router
+from cognee.api.v1.notebooks.routers import get_notebooks_router
 from cognee.api.v1.permissions.routers import get_permissions_router
 from cognee.api.v1.settings.routers import get_settings_router
 from cognee.api.v1.datasets.routers import get_datasets_router
 from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
 from cognee.api.v1.search.routers import get_search_router
+from cognee.api.v1.memify.routers import get_memify_router
 from cognee.api.v1.add.routers import get_add_router
 from cognee.api.v1.delete.routers import get_delete_router
 from cognee.api.v1.responses.routers import get_responses_router
+from cognee.api.v1.sync.routers import get_sync_router
 from cognee.api.v1.users.routers import (
     get_auth_router,
     get_register_router,
@@ -33,6 +37,7 @@ from cognee.api.v1.users.routers import (
     get_users_router,
     get_visualize_router,
 )
+from cognee.modules.users.methods.get_authenticated_user import REQUIRE_AUTHENTICATION
 logger = get_logger()
@@ -83,7 +88,7 @@ app.add_middleware(
     CORSMiddleware,
     allow_origins=allowed_origins,  # Now controlled by env var
     allow_credentials=True,
-    allow_methods=["OPTIONS", "GET", "POST", "DELETE"],
+    allow_methods=["OPTIONS", "GET", "PUT", "POST", "DELETE"],
     allow_headers=["*"],
 )
 # To allow origins, set CORS_ALLOWED_ORIGINS env variable to a comma-separated list, e.g.:
@@ -110,7 +115,11 @@ def custom_openapi():
         },
     }
-    openapi_schema["security"] = [{"BearerAuth": []}, {"CookieAuth": []}]
+    if REQUIRE_AUTHENTICATION:
+        openapi_schema["security"] = [{"BearerAuth": []}, {"CookieAuth": []}]
+    # Remove global security requirement - let individual endpoints specify their own security
+    # openapi_schema["security"] = [{"BearerAuth": []}, {"CookieAuth": []}]
     app.openapi_schema = openapi_schema
@@ -230,6 +239,8 @@ app.include_router(get_add_router(), prefix="/api/v1/add", tags=["add"])
 app.include_router(get_cognify_router(), prefix="/api/v1/cognify", tags=["cognify"])
+app.include_router(get_memify_router(), prefix="/api/v1/memify", tags=["memify"])
 app.include_router(get_search_router(), prefix="/api/v1/search", tags=["search"])
 app.include_router(
@@ -248,6 +259,8 @@ app.include_router(get_delete_router(), prefix="/api/v1/delete", tags=["delete"]
 app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["responses"])
+app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
 codegraph_routes = get_code_pipeline_router()
 if codegraph_routes:
     app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"])
@@ -258,6 +271,18 @@ app.include_router(
     tags=["users"],
 )
+app.include_router(
+    get_notebooks_router(),
+    prefix="/api/v1/notebooks",
+    tags=["notebooks"],
+)
+app.include_router(
+    get_checks_router(),
+    prefix="/api/v1/checks",
+    tags=["checks"],
+)
 def start_api_server(host: str = "0.0.0.0", port: int = 8000):
     """

cognee/api/health.py CHANGED Viewed

@@ -1,9 +1,10 @@
 """Health check system for cognee API."""
+from io import BytesIO
 import time
 import asyncio
 from datetime import datetime, timezone
-from typing import Dict, Any, Optional
+from typing import Dict
 from enum import Enum
 from pydantic import BaseModel
@@ -53,7 +54,7 @@ class HealthChecker:
             # Test connection by creating a session
             session = engine.get_session()
             if session:
-                await session.close()
+                session.close()
             response_time = int((time.time() - start_time) * 1000)
             return ComponentHealth(
@@ -117,12 +118,9 @@ class HealthChecker:
             engine = await get_graph_engine()
             # Test basic operation with actual graph query
-            if hasattr(engine, "execute"):
-                # For SQL-like graph DBs (Neo4j, Memgraph)
-                await engine.execute("MATCH () RETURN count(*) LIMIT 1")
-            elif hasattr(engine, "query"):
+            if hasattr(engine, "query"):
                 # For other graph engines
-                engine.query("MATCH () RETURN count(*) LIMIT 1", {})
+                await engine.query("MATCH () RETURN count(*) LIMIT 1", {})
             # If engine exists but no test method, consider it healthy
             response_time = int((time.time() - start_time) * 1000)
@@ -167,8 +165,8 @@ class HealthChecker:
             else:
                 # For S3, test basic operations
                 test_path = "health_check_test"
-                await storage.store(test_path, b"test")
-                await storage.delete(test_path)
+                await storage.store(test_path, BytesIO(b"test"))
+                await storage.remove(test_path)
             response_time = int((time.time() - start_time) * 1000)
             return ComponentHealth(
@@ -190,14 +188,13 @@ class HealthChecker:
         """Check LLM provider health (non-critical)."""
         start_time = time.time()
         try:
-            from cognee.infrastructure.llm.get_llm_client import get_llm_client
             from cognee.infrastructure.llm.config import get_llm_config
+            from cognee.infrastructure.llm import LLMGateway
             config = get_llm_config()
             # Test actual API connection with minimal request
-            client = get_llm_client()
-            await client.show_prompt("test", "test")
+            LLMGateway.show_prompt("test", "test")
             response_time = int((time.time() - start_time) * 1000)
             return ComponentHealth(
@@ -226,7 +223,7 @@ class HealthChecker:
             # Test actual embedding generation with minimal text
             engine = get_embedding_engine()
-            await engine.embed_text("test")
+            await engine.embed_text(["test"])
             response_time = int((time.time() - start_time) * 1000)
             return ComponentHealth(

cognee/api/v1/add/add.py CHANGED Viewed

@@ -150,7 +150,9 @@ async def add(
     user, authorized_dataset = await resolve_authorized_user_dataset(dataset_id, dataset_name, user)
-    await reset_dataset_pipeline_run_status(authorized_dataset.id, user)
+    await reset_dataset_pipeline_run_status(
+        authorized_dataset.id, user, pipeline_names=["add_pipeline", "cognify_pipeline"]
+    )
     pipeline_run_info = None

cognee/api/v1/add/routers/get_add_router.py CHANGED Viewed

@@ -1,6 +1,3 @@
-import os
-import requests
-import subprocess
 from uuid import UUID
 from fastapi import APIRouter
@@ -24,7 +21,9 @@ def get_add_router() -> APIRouter:
     async def add(
         data: List[UploadFile] = File(default=None),
         datasetName: Optional[str] = Form(default=None),
+        # Note: Literal is needed for Swagger use
         datasetId: Union[UUID, Literal[""], None] = Form(default=None, examples=[""]),
+        node_set: Optional[List[str]] = Form(default=[""], example=[""]),
         user: User = Depends(get_authenticated_user),
     ):
         """
@@ -41,6 +40,8 @@ def get_add_router() -> APIRouter:
           - Regular file uploads
         - **datasetName** (Optional[str]): Name of the dataset to add data to
         - **datasetId** (Optional[UUID]): UUID of an already existing dataset
+        - **node_set** Optional[list[str]]: List of node identifiers for graph organization and access control.
+                 Used for grouping related data points in the knowledge graph.
         Either datasetName or datasetId must be provided.
@@ -57,17 +58,12 @@ def get_add_router() -> APIRouter:
         ## Notes
         - To add data to datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
-        - GitHub repositories are cloned and all files are processed
-        - HTTP URLs are fetched and their content is processed
-        - The ALLOW_HTTP_REQUESTS environment variable controls URL processing
         - datasetId value can only be the UUID of an already existing dataset
         """
         send_telemetry(
             "Add API Endpoint Invoked",
             user.id,
-            additional_properties={
-                "endpoint": "POST /v1/add",
-            },
+            additional_properties={"endpoint": "POST /v1/add", "node_set": node_set},
         )
         from cognee.api.v1.add import add as cognee_add
@@ -76,34 +72,13 @@ def get_add_router() -> APIRouter:
             raise ValueError("Either datasetId or datasetName must be provided.")
         try:
-            if (
-                isinstance(data, str)
-                and data.startswith("http")
-                and (os.getenv("ALLOW_HTTP_REQUESTS", "true").lower() == "true")
-            ):
-                if "github" in data:
-                    # Perform git clone if the URL is from GitHub
-                    repo_name = data.split("/")[-1].replace(".git", "")
-                    subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
-                    # TODO: Update add call with dataset info
-                    await cognee_add(
-                        "data://.data/",
-                        f"{repo_name}",
-                    )
-                else:
-                    # Fetch and store the data from other types of URL using curl
-                    response = requests.get(data)
-                    response.raise_for_status()
-                    file_data = await response.content()
-                    # TODO: Update add call with dataset info
-                    return await cognee_add(file_data)
-            else:
-                add_run = await cognee_add(data, datasetName, user=user, dataset_id=datasetId)
-                if isinstance(add_run, PipelineRunErrored):
-                    return JSONResponse(status_code=420, content=add_run.model_dump(mode="json"))
-                return add_run.model_dump()
+            add_run = await cognee_add(
+                data, datasetName, user=user, dataset_id=datasetId, node_set=node_set
+            )
+            if isinstance(add_run, PipelineRunErrored):
+                return JSONResponse(status_code=420, content=add_run.model_dump(mode="json"))
+            return add_run.model_dump()
         except Exception as error:
             return JSONResponse(status_code=409, content={"error": str(error)})

cognee/api/v1/cloud/routers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .get_checks_router import get_checks_router

cognee/api/v1/cloud/routers/get_checks_router.py ADDED Viewed

@@ -0,0 +1,23 @@
+from fastapi import APIRouter, Depends, Request
+from cognee.modules.users.models import User
+from cognee.modules.users.methods import get_authenticated_user
+from cognee.modules.cloud.operations import check_api_key
+from cognee.modules.cloud.exceptions import CloudApiKeyMissingError
+def get_checks_router():
+    router = APIRouter()
+    @router.post("/connection")
+    async def get_connection_check_endpoint(
+        request: Request, user: User = Depends(get_authenticated_user)
+    ):
+        api_token = request.headers.get("X-Api-Key")
+        if api_token is None:
+            return CloudApiKeyMissingError()
+        return await check_api_key(api_token)
+    return router

cognee/api/v1/cognify/code_graph_pipeline.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import pathlib
 import asyncio
+from typing import Optional
 from cognee.shared.logging_utils import get_logger, setup_logging
 from cognee.modules.observability.get_observe import get_observe
@@ -28,7 +29,12 @@ logger = get_logger("code_graph_pipeline")
 @observe
-async def run_code_graph_pipeline(repo_path, include_docs=False):
+async def run_code_graph_pipeline(
+    repo_path,
+    include_docs=False,
+    excluded_paths: Optional[list[str]] = None,
+    supported_languages: Optional[list[str]] = None,
+):
     import cognee
     from cognee.low_level import setup
@@ -40,13 +46,12 @@ async def run_code_graph_pipeline(repo_path, include_docs=False):
     user = await get_default_user()
     detailed_extraction = True
-    # Multi-language support: allow passing supported_languages
-    supported_languages = None  # defer to task defaults
     tasks = [
         Task(
             get_repo_file_dependencies,
             detailed_extraction=detailed_extraction,
             supported_languages=supported_languages,
+            excluded_paths=excluded_paths,
         ),
         # Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
         Task(add_data_points, task_config={"batch_size": 30}),
@@ -95,7 +100,7 @@ if __name__ == "__main__":
     async def main():
         async for run_status in run_code_graph_pipeline("REPO_PATH"):
-            print(f"{run_status.pipeline_name}: {run_status.status}")
+            print(f"{run_status.pipeline_run_id}: {run_status.status}")
         file_path = os.path.join(
             pathlib.Path(__file__).parent, ".artifacts", "graph_visualization.html"

cognee/api/v1/cognify/cognify.py CHANGED Viewed

@@ -22,6 +22,11 @@ from cognee.tasks.graph import extract_graph_from_data
 from cognee.tasks.storage import add_data_points
 from cognee.tasks.summarization import summarize_text
 from cognee.modules.pipelines.layers.pipeline_execution_mode import get_pipeline_executor
+from cognee.tasks.temporal_graph.extract_events_and_entities import extract_events_and_timestamps
+from cognee.tasks.temporal_graph.extract_knowledge_graph_from_events import (
+    extract_knowledge_graph_from_events,
+)
 logger = get_logger("cognify")
@@ -40,6 +45,7 @@ async def cognify(
     run_in_background: bool = False,
     incremental_loading: bool = True,
     custom_prompt: Optional[str] = None,
+    temporal_cognify: bool = False,
 ):
     """
     Transform ingested data into a structured knowledge graph.
@@ -182,9 +188,12 @@ async def cognify(
         - LLM_RATE_LIMIT_ENABLED: Enable rate limiting (default: False)
         - LLM_RATE_LIMIT_REQUESTS: Max requests per interval (default: 60)
     """
-    tasks = await get_default_tasks(
-        user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
-    )
+    if temporal_cognify:
+        tasks = await get_temporal_tasks(user, chunker, chunk_size)
+    else:
+        tasks = await get_default_tasks(
+            user, graph_model, chunker, chunk_size, ontology_file_path, custom_prompt
+        )
     # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
     pipeline_executor_func = get_pipeline_executor(run_in_background=run_in_background)
@@ -233,3 +242,41 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     ]
     return default_tasks
+async def get_temporal_tasks(
+    user: User = None, chunker=TextChunker, chunk_size: int = None
+) -> list[Task]:
+    """
+    Builds and returns a list of temporal processing tasks to be executed in sequence.
+    The pipeline includes:
+    1. Document classification.
+    2. Dataset permission checks (requires "write" access).
+    3. Document chunking with a specified or default chunk size.
+    4. Event and timestamp extraction from chunks.
+    5. Knowledge graph extraction from events.
+    6. Batched insertion of data points.
+    Args:
+        user (User, optional): The user requesting task execution, used for permission checks.
+        chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
+        chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
+    Returns:
+        list[Task]: A list of Task objects representing the temporal processing pipeline.
+    """
+    temporal_tasks = [
+        Task(classify_documents),
+        Task(check_permissions_on_dataset, user=user, permissions=["write"]),
+        Task(
+            extract_chunks_from_documents,
+            max_chunk_size=chunk_size or get_max_chunk_tokens(),
+            chunker=chunker,
+        ),
+        Task(extract_events_and_timestamps, task_config={"chunk_size": 10}),
+        Task(extract_knowledge_graph_from_events),
+        Task(add_data_points, task_config={"batch_size": 10}),
+    ]
+    return temporal_tasks

cognee/api/v1/cognify/routers/get_cognify_router.py CHANGED Viewed

@@ -38,7 +38,7 @@ class CognifyPayloadDTO(InDTO):
     dataset_ids: Optional[List[UUID]] = Field(default=None, examples=[[]])
     run_in_background: Optional[bool] = Field(default=False)
     custom_prompt: Optional[str] = Field(
-        default=None, description="Custom prompt for entity extraction and graph generation"
+        default="", description="Custom prompt for entity extraction and graph generation"
     )

cognee/api/v1/datasets/routers/get_datasets_router.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import List, Optional
 from typing_extensions import Annotated
 from fastapi import status
 from fastapi import APIRouter
+from fastapi.encoders import jsonable_encoder
 from fastapi import HTTPException, Query, Depends
 from fastapi.responses import JSONResponse, FileResponse
@@ -47,6 +48,7 @@ class DataDTO(OutDTO):
     extension: str
     mime_type: str
     raw_data_location: str
+    dataset_id: UUID
 class GraphNodeDTO(OutDTO):
@@ -114,7 +116,8 @@ def get_datasets_router() -> APIRouter:
     @router.post("", response_model=DatasetDTO)
     async def create_new_dataset(
-        dataset_data: DatasetCreationPayload, user: User = Depends(get_authenticated_user)
+        dataset_data: DatasetCreationPayload,
+        user: User = Depends(get_authenticated_user),
     ):
         """
         Create a new dataset or return existing dataset with the same name.
@@ -327,7 +330,7 @@ def get_datasets_router() -> APIRouter:
             },
         )
-        from cognee.modules.data.methods import get_dataset_data, get_dataset
+        from cognee.modules.data.methods import get_dataset_data
         # Verify user has permission to read dataset
         dataset = await get_authorized_existing_datasets([dataset_id], "read", user)
@@ -338,12 +341,20 @@ def get_datasets_router() -> APIRouter:
                 content=ErrorResponseDTO(f"Dataset ({str(dataset_id)}) not found."),
             )
-        dataset_data = await get_dataset_data(dataset_id=dataset[0].id)
+        dataset_id = dataset[0].id
+        dataset_data = await get_dataset_data(dataset_id=dataset_id)
         if dataset_data is None:
             return []
-        return dataset_data
+        return [
+            dict(
+                **jsonable_encoder(data),
+                dataset_id=dataset_id,
+            )
+            for data in dataset_data
+        ]
     @router.get("/status", response_model=dict[str, PipelineRunStatus])
     async def get_dataset_status(

cognee/api/v1/memify/__init__.py ADDED Viewed

File without changes

cognee/api/v1/memify/routers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .get_memify_router import get_memify_router

cognee/api/v1/memify/routers/get_memify_router.py ADDED Viewed

@@ -0,0 +1,100 @@
+from uuid import UUID
+from fastapi import APIRouter
+from fastapi.responses import JSONResponse
+from fastapi import Depends
+from pydantic import Field
+from typing import List, Optional, Union, Literal
+from cognee.api.DTO import InDTO
+from cognee.modules.users.models import User
+from cognee.modules.users.methods import get_authenticated_user
+from cognee.shared.utils import send_telemetry
+from cognee.modules.pipelines.models import PipelineRunErrored
+from cognee.shared.logging_utils import get_logger
+logger = get_logger()
+class MemifyPayloadDTO(InDTO):
+    extraction_tasks: Optional[List[str]] = Field(
+        default=None,
+        examples=[[]],
+    )
+    enrichment_tasks: Optional[List[str]] = Field(default=None, examples=[[]])
+    data: Optional[str] = Field(default="")
+    dataset_name: Optional[str] = Field(default=None)
+    # Note: Literal is needed for Swagger use
+    dataset_id: Union[UUID, Literal[""], None] = Field(default=None, examples=[""])
+    node_name: Optional[List[str]] = Field(default=None, examples=[[]])
+    run_in_background: Optional[bool] = Field(default=False)
+def get_memify_router() -> APIRouter:
+    router = APIRouter()
+    @router.post("", response_model=dict)
+    async def memify(payload: MemifyPayloadDTO, user: User = Depends(get_authenticated_user)):
+        """
+        Enrichment pipeline in Cognee, can work with already built graphs. If no data is provided existing knowledge graph will be used as data,
+        custom data can also be provided instead which can be processed with provided extraction and enrichment tasks.
+        Provided tasks and data will be arranged to run the Cognee pipeline and execute graph enrichment/creation.
+        ## Request Parameters
+        - **extractionTasks** Optional[List[str]]: List of available Cognee Tasks to execute for graph/data extraction.
+        - **enrichmentTasks** Optional[List[str]]: List of available Cognee Tasks to handle enrichment of provided graph/data from extraction tasks.
+        - **data** Optional[List[str]]: The data to ingest. Can be any text data when custom extraction and enrichment tasks are used.
+              Data provided here will be forwarded to the first extraction task in the pipeline as input.
+              If no data is provided the whole graph (or subgraph if node_name/node_type is specified) will be forwarded
+        - **dataset_name** (Optional[str]): Name of the datasets to memify
+        - **dataset_id** (Optional[UUID]): List of UUIDs of an already existing dataset
+        - **node_name** (Optional[List[str]]):  Filter graph to specific named entities (for targeted search). Used when no data is provided.
+        - **run_in_background** (Optional[bool]): Whether to execute processing asynchronously. Defaults to False (blocking).
+        Either datasetName or datasetId must be provided.
+        ## Response
+        Returns information about the add operation containing:
+        - Status of the operation
+        - Details about the processed data
+        - Any relevant metadata from the ingestion process
+        ## Error Codes
+        - **400 Bad Request**: Neither datasetId nor datasetName provided
+        - **409 Conflict**: Error during memify operation
+        - **403 Forbidden**: User doesn't have permission to use dataset
+        ## Notes
+        - To memify datasets not owned by the user, use dataset_id (when ENABLE_BACKEND_ACCESS_CONTROL is set to True)
+        - datasetId value can only be the UUID of an already existing dataset
+        """
+        send_telemetry(
+            "Memify API Endpoint Invoked",
+            user.id,
+            additional_properties={"endpoint": "POST /v1/memify"},
+        )
+        if not payload.dataset_id and not payload.dataset_name:
+            raise ValueError("Either datasetId or datasetName must be provided.")
+        try:
+            from cognee.modules.memify import memify as cognee_memify
+            memify_run = await cognee_memify(
+                extraction_tasks=payload.extraction_tasks,
+                enrichment_tasks=payload.enrichment_tasks,
+                data=payload.data,
+                dataset=payload.dataset_id if payload.dataset_id else payload.dataset_name,
+                node_name=payload.node_name,
+                user=user,
+            )
+            if isinstance(memify_run, PipelineRunErrored):
+                return JSONResponse(status_code=420, content=memify_run)
+            return memify_run
+        except Exception as error:
+            return JSONResponse(status_code=409, content={"error": str(error)})
+    return router

cognee/api/v1/notebooks/routers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .get_notebooks_router import get_notebooks_router

cognee 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

cognee 0.2.4py3-none-any.whl → 0.3.0py3-none-any.whl