PyPI - cognee - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl - Mend

cognee 0.5.0py3-none-any.whl → 0.5.0.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

cognee/api/client.py CHANGED Viewed

@@ -21,7 +21,7 @@ from cognee.api.v1.notebooks.routers import get_notebooks_router
 from cognee.api.v1.permissions.routers import get_permissions_router
 from cognee.api.v1.settings.routers import get_settings_router
 from cognee.api.v1.datasets.routers import get_datasets_router
-from cognee.api.v1.cognify.routers import get_cognify_router
+from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
 from cognee.api.v1.search.routers import get_search_router
 from cognee.api.v1.ontologies.routers.get_ontology_router import get_ontology_router
 from cognee.api.v1.memify.routers import get_memify_router
@@ -278,6 +278,10 @@ app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["re
 app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
+codegraph_routes = get_code_pipeline_router()
+if codegraph_routes:
+    app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"])
 app.include_router(
     get_users_router(),
     prefix="/api/v1/users",

cognee/api/v1/add/add.py CHANGED Viewed

@@ -155,7 +155,7 @@ async def add(
         - LLM_API_KEY: API key for your LLM provider (OpenAI, Anthropic, etc.)
         Optional:
-        - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral", "bedrock"
+        - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral"
         - LLM_MODEL: Model name (default: "gpt-5-mini")
         - DEFAULT_USER_EMAIL: Custom default user email
         - DEFAULT_USER_PASSWORD: Custom default user password
@@ -205,7 +205,6 @@ async def add(
         pipeline_name="add_pipeline",
         vector_db_config=vector_db_config,
         graph_db_config=graph_db_config,
-        use_pipeline_cache=True,
         incremental_loading=incremental_loading,
         data_per_batch=data_per_batch,
     ):

cognee/api/v1/cognify/code_graph_pipeline.py ADDED Viewed

@@ -0,0 +1,119 @@
+import os
+import pathlib
+import asyncio
+from typing import Optional
+from cognee.shared.logging_utils import get_logger, setup_logging
+from cognee.modules.observability.get_observe import get_observe
+from cognee.api.v1.search import SearchType, search
+from cognee.api.v1.visualize.visualize import visualize_graph
+from cognee.modules.cognify.config import get_cognify_config
+from cognee.modules.pipelines import run_tasks
+from cognee.modules.pipelines.tasks.task import Task
+from cognee.modules.users.methods import get_default_user
+from cognee.shared.data_models import KnowledgeGraph
+from cognee.modules.data.methods import create_dataset
+from cognee.tasks.documents import classify_documents, extract_chunks_from_documents
+from cognee.tasks.graph import extract_graph_from_data
+from cognee.tasks.ingestion import ingest_data
+from cognee.tasks.repo_processor import get_non_py_files, get_repo_file_dependencies
+from cognee.tasks.storage import add_data_points
+from cognee.tasks.summarization import summarize_text
+from cognee.infrastructure.llm import get_max_chunk_tokens
+from cognee.infrastructure.databases.relational import get_relational_engine
+observe = get_observe()
+logger = get_logger("code_graph_pipeline")
+@observe
+async def run_code_graph_pipeline(
+    repo_path,
+    include_docs=False,
+    excluded_paths: Optional[list[str]] = None,
+    supported_languages: Optional[list[str]] = None,
+):
+    import cognee
+    from cognee.low_level import setup
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await setup()
+    cognee_config = get_cognify_config()
+    user = await get_default_user()
+    detailed_extraction = True
+    tasks = [
+        Task(
+            get_repo_file_dependencies,
+            detailed_extraction=detailed_extraction,
+            supported_languages=supported_languages,
+            excluded_paths=excluded_paths,
+        ),
+        # Task(summarize_code, task_config={"batch_size": 500}), # This task takes a long time to complete
+        Task(add_data_points, task_config={"batch_size": 30}),
+    ]
+    if include_docs:
+        # This tasks take a long time to complete
+        non_code_tasks = [
+            Task(get_non_py_files, task_config={"batch_size": 50}),
+            Task(ingest_data, dataset_name="repo_docs", user=user),
+            Task(classify_documents),
+            Task(extract_chunks_from_documents, max_chunk_size=get_max_chunk_tokens()),
+            Task(
+                extract_graph_from_data,
+                graph_model=KnowledgeGraph,
+                task_config={"batch_size": 50},
+            ),
+            Task(
+                summarize_text,
+                summarization_model=cognee_config.summarization_model,
+                task_config={"batch_size": 50},
+            ),
+        ]
+    dataset_name = "codebase"
+    # Save dataset to database
+    db_engine = get_relational_engine()
+    async with db_engine.get_async_session() as session:
+        dataset = await create_dataset(dataset_name, user, session)
+    if include_docs:
+        non_code_pipeline_run = run_tasks(
+            non_code_tasks, dataset.id, repo_path, user, "cognify_pipeline"
+        )
+        async for run_status in non_code_pipeline_run:
+            yield run_status
+    async for run_status in run_tasks(
+        tasks, dataset.id, repo_path, user, "cognify_code_pipeline", incremental_loading=False
+    ):
+        yield run_status
+if __name__ == "__main__":
+    async def main():
+        async for run_status in run_code_graph_pipeline("REPO_PATH"):
+            print(f"{run_status.pipeline_run_id}: {run_status.status}")
+        file_path = os.path.join(
+            pathlib.Path(__file__).parent, ".artifacts", "graph_visualization.html"
+        )
+        await visualize_graph(file_path)
+        search_results = await search(
+            query_type=SearchType.CODE,
+            query_text="How is Relationship weight calculated?",
+        )
+        for file in search_results:
+            print(file["name"])
+    logger = setup_logging(name="code_graph_pipeline")
+    asyncio.run(main())

cognee/api/v1/cognify/cognify.py CHANGED Viewed

@@ -3,7 +3,6 @@ from pydantic import BaseModel
 from typing import Union, Optional
 from uuid import UUID
-from cognee.modules.cognify.config import get_cognify_config
 from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
 from cognee.shared.logging_utils import get_logger
 from cognee.shared.data_models import KnowledgeGraph
@@ -20,6 +19,7 @@ from cognee.modules.ontology.get_default_ontology_resolver import (
 from cognee.modules.users.models import User
 from cognee.tasks.documents import (
+    check_permissions_on_dataset,
     classify_documents,
     extract_chunks_from_documents,
 )
@@ -53,7 +53,6 @@ async def cognify(
     custom_prompt: Optional[str] = None,
     temporal_cognify: bool = False,
     data_per_batch: int = 20,
-    **kwargs,
 ):
     """
     Transform ingested data into a structured knowledge graph.
@@ -79,11 +78,12 @@ async def cognify(
     Processing Pipeline:
         1. **Document Classification**: Identifies document types and structures
-        2. **Text Chunking**: Breaks content into semantically meaningful segments
-        3. **Entity Extraction**: Identifies key concepts, people, places, organizations
-        4. **Relationship Detection**: Discovers connections between entities
-        5. **Graph Construction**: Builds semantic knowledge graph with embeddings
-        6. **Content Summarization**: Creates hierarchical summaries for navigation
+        2. **Permission Validation**: Ensures user has processing rights
+        3. **Text Chunking**: Breaks content into semantically meaningful segments
+        4. **Entity Extraction**: Identifies key concepts, people, places, organizations
+        5. **Relationship Detection**: Discovers connections between entities
+        6. **Graph Construction**: Builds semantic knowledge graph with embeddings
+        7. **Content Summarization**: Creates hierarchical summaries for navigation
     Graph Model Customization:
         The `graph_model` parameter allows custom knowledge structures:
@@ -224,7 +224,6 @@ async def cognify(
             config=config,
             custom_prompt=custom_prompt,
             chunks_per_batch=chunks_per_batch,
-            **kwargs,
         )
     # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -239,7 +238,6 @@ async def cognify(
         vector_db_config=vector_db_config,
         graph_db_config=graph_db_config,
         incremental_loading=incremental_loading,
-        use_pipeline_cache=True,
         pipeline_name="cognify_pipeline",
         data_per_batch=data_per_batch,
     )
@@ -253,7 +251,6 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     config: Config = None,
     custom_prompt: Optional[str] = None,
     chunks_per_batch: int = 100,
-    **kwargs,
 ) -> list[Task]:
     if config is None:
         ontology_config = get_ontology_env_config()
@@ -275,11 +272,9 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     if chunks_per_batch is None:
         chunks_per_batch = 100
-    cognify_config = get_cognify_config()
-    embed_triplets = cognify_config.triplet_embedding
     default_tasks = [
         Task(classify_documents),
+        Task(check_permissions_on_dataset, user=user, permissions=["write"]),
         Task(
             extract_chunks_from_documents,
             max_chunk_size=chunk_size or get_max_chunk_tokens(),
@@ -291,17 +286,12 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             config=config,
             custom_prompt=custom_prompt,
             task_config={"batch_size": chunks_per_batch},
-            **kwargs,
         ),  # Generate knowledge graphs from the document chunks.
         Task(
             summarize_text,
             task_config={"batch_size": chunks_per_batch},
         ),
-        Task(
-            add_data_points,
-            embed_triplets=embed_triplets,
-            task_config={"batch_size": chunks_per_batch},
-        ),
+        Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
     ]
     return default_tasks
@@ -315,13 +305,14 @@ async def get_temporal_tasks(
     The pipeline includes:
     1. Document classification.
-    2. Document chunking with a specified or default chunk size.
-    3. Event and timestamp extraction from chunks.
-    4. Knowledge graph extraction from events.
-    5. Batched insertion of data points.
+    2. Dataset permission checks (requires "write" access).
+    3. Document chunking with a specified or default chunk size.
+    4. Event and timestamp extraction from chunks.
+    5. Knowledge graph extraction from events.
+    6. Batched insertion of data points.
     Args:
-        user (User, optional): The user requesting task execution.
+        user (User, optional): The user requesting task execution, used for permission checks.
         chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
         chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
         chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
@@ -334,6 +325,7 @@ async def get_temporal_tasks(
     temporal_tasks = [
         Task(classify_documents),
+        Task(check_permissions_on_dataset, user=user, permissions=["write"]),
         Task(
             extract_chunks_from_documents,
             max_chunk_size=chunk_size or get_max_chunk_tokens(),

cognee/api/v1/cognify/routers/__init__.py CHANGED Viewed

	@@ -1 +1,2 @@
1 1	from .get_cognify_router import get_cognify_router
2	+ from .get_code_pipeline_router import get_code_pipeline_router

cognee/api/v1/cognify/routers/get_code_pipeline_router.py ADDED Viewed

@@ -0,0 +1,90 @@
+import json
+from cognee.shared.logging_utils import get_logger
+from fastapi import APIRouter
+from fastapi.responses import JSONResponse
+from cognee.api.DTO import InDTO
+from cognee.modules.retrieval.code_retriever import CodeRetriever
+from cognee.modules.storage.utils import JSONEncoder
+logger = get_logger()
+class CodePipelineIndexPayloadDTO(InDTO):
+    repo_path: str
+    include_docs: bool = False
+class CodePipelineRetrievePayloadDTO(InDTO):
+    query: str
+    full_input: str
+def get_code_pipeline_router() -> APIRouter:
+    try:
+        import cognee.api.v1.cognify.code_graph_pipeline
+    except ModuleNotFoundError:
+        logger.error("codegraph dependencies not found. Skipping codegraph API routes.")
+        return None
+    router = APIRouter()
+    @router.post("/index", response_model=None)
+    async def code_pipeline_index(payload: CodePipelineIndexPayloadDTO):
+        """
+        Run indexation on a code repository.
+        This endpoint processes a code repository to create a knowledge graph
+        of the codebase structure, dependencies, and relationships.
+        ## Request Parameters
+        - **repo_path** (str): Path to the code repository
+        - **include_docs** (bool): Whether to include documentation files (default: false)
+        ## Response
+        No content returned. Processing results are logged.
+        ## Error Codes
+        - **409 Conflict**: Error during indexation process
+        """
+        from cognee.api.v1.cognify.code_graph_pipeline import run_code_graph_pipeline
+        try:
+            async for result in run_code_graph_pipeline(payload.repo_path, payload.include_docs):
+                logger.info(result)
+        except Exception as error:
+            return JSONResponse(status_code=409, content={"error": str(error)})
+    @router.post("/retrieve", response_model=list[dict])
+    async def code_pipeline_retrieve(payload: CodePipelineRetrievePayloadDTO):
+        """
+        Retrieve context from the code knowledge graph.
+        This endpoint searches the indexed code repository to find relevant
+        context based on the provided query.
+        ## Request Parameters
+        - **query** (str): Search query for code context
+        - **full_input** (str): Full input text for processing
+        ## Response
+        Returns a list of relevant code files and context as JSON.
+        ## Error Codes
+        - **409 Conflict**: Error during retrieval process
+        """
+        try:
+            query = (
+                payload.full_input.replace("cognee ", "")
+                if payload.full_input.startswith("cognee ")
+                else payload.full_input
+            )
+            retriever = CodeRetriever()
+            retrieved_files = await retriever.get_context(query)
+            return json.dumps(retrieved_files, cls=JSONEncoder)
+        except Exception as error:
+            return JSONResponse(status_code=409, content={"error": str(error)})
+    return router

cognee/api/v1/cognify/routers/get_cognify_router.py CHANGED Viewed

@@ -42,9 +42,7 @@ class CognifyPayloadDTO(InDTO):
         default="", description="Custom prompt for entity extraction and graph generation"
     )
     ontology_key: Optional[List[str]] = Field(
-        default=None,
-        examples=[[]],
-        description="Reference to one or more previously uploaded ontologies",
+        default=None, description="Reference to one or more previously uploaded ontologies"
     )

cognee/api/v1/datasets/routers/get_datasets_router.py CHANGED Viewed

@@ -208,14 +208,14 @@ def get_datasets_router() -> APIRouter:
             },
         )
-        from cognee.modules.data.methods import delete_dataset
+        from cognee.modules.data.methods import get_dataset, delete_dataset
-        dataset = await get_authorized_existing_datasets([dataset_id], "delete", user)
+        dataset = await get_dataset(user.id, dataset_id)
         if dataset is None:
             raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
-        await delete_dataset(dataset[0])
+        await delete_dataset(dataset)
     @router.delete(
         "/{dataset_id}/data/{data_id}",

cognee/api/v1/ontologies/ontologies.py CHANGED Viewed

@@ -5,7 +5,6 @@ from pathlib import Path
 from datetime import datetime, timezone
 from typing import Optional, List
 from dataclasses import dataclass
-from fastapi import UploadFile
 @dataclass
@@ -46,10 +45,8 @@ class OntologyService:
             json.dump(metadata, f, indent=2)
     async def upload_ontology(
-        self, ontology_key: str, file: UploadFile, user, description: Optional[str] = None
+        self, ontology_key: str, file, user, description: Optional[str] = None
     ) -> OntologyMetadata:
-        if not file.filename:
-            raise ValueError("File must have a filename")
         if not file.filename.lower().endswith(".owl"):
             raise ValueError("File must be in .owl format")
@@ -60,6 +57,8 @@ class OntologyService:
             raise ValueError(f"Ontology key '{ontology_key}' already exists")
         content = await file.read()
+        if len(content) > 10 * 1024 * 1024:
+            raise ValueError("File size exceeds 10MB limit")
         file_path = user_dir / f"{ontology_key}.owl"
         with open(file_path, "wb") as f:
@@ -83,11 +82,7 @@ class OntologyService:
         )
     async def upload_ontologies(
-        self,
-        ontology_key: List[str],
-        files: List[UploadFile],
-        user,
-        descriptions: Optional[List[str]] = None,
+        self, ontology_key: List[str], files: List, user, descriptions: Optional[List[str]] = None
     ) -> List[OntologyMetadata]:
         """
         Upload ontology files with their respective keys.
@@ -110,17 +105,47 @@ class OntologyService:
         if len(set(ontology_key)) != len(ontology_key):
             raise ValueError("Duplicate ontology keys not allowed")
+        if descriptions and len(descriptions) != len(files):
+            raise ValueError("Number of descriptions must match number of files")
         results = []
+        user_dir = self._get_user_dir(str(user.id))
+        metadata = self._load_metadata(user_dir)
         for i, (key, file) in enumerate(zip(ontology_key, files)):
+            if key in metadata:
+                raise ValueError(f"Ontology key '{key}' already exists")
+            if not file.filename.lower().endswith(".owl"):
+                raise ValueError(f"File '{file.filename}' must be in .owl format")
+            content = await file.read()
+            if len(content) > 10 * 1024 * 1024:
+                raise ValueError(f"File '{file.filename}' exceeds 10MB limit")
+            file_path = user_dir / f"{key}.owl"
+            with open(file_path, "wb") as f:
+                f.write(content)
+            ontology_metadata = {
+                "filename": file.filename,
+                "size_bytes": len(content),
+                "uploaded_at": datetime.now(timezone.utc).isoformat(),
+                "description": descriptions[i] if descriptions else None,
+            }
+            metadata[key] = ontology_metadata
             results.append(
-                await self.upload_ontology(
+                OntologyMetadata(
                     ontology_key=key,
-                    file=file,
-                    user=user,
+                    filename=file.filename,
+                    size_bytes=len(content),
+                    uploaded_at=ontology_metadata["uploaded_at"],
                     description=descriptions[i] if descriptions else None,
                 )
             )
+        self._save_metadata(user_dir, metadata)
         return results
     def get_ontology_contents(self, ontology_key: List[str], user) -> List[str]:

cognee/api/v1/ontologies/routers/get_ontology_router.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import APIRouter, File, Form, UploadFile, Depends, Request
+from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
 from fastapi.responses import JSONResponse
 from typing import Optional, List
@@ -15,25 +15,28 @@ def get_ontology_router() -> APIRouter:
     @router.post("", response_model=dict)
     async def upload_ontology(
-        request: Request,
         ontology_key: str = Form(...),
-        ontology_file: UploadFile = File(...),
-        description: Optional[str] = Form(None),
+        ontology_file: List[UploadFile] = File(...),
+        descriptions: Optional[str] = Form(None),
         user: User = Depends(get_authenticated_user),
     ):
         """
-        Upload a single ontology file for later use in cognify operations.
+        Upload ontology files with their respective keys for later use in cognify operations.
+        Supports both single and multiple file uploads:
+        - Single file: ontology_key=["key"], ontology_file=[file]
+        - Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2]
         ## Request Parameters
-        - **ontology_key** (str): User-defined identifier for the ontology.
-        - **ontology_file** (UploadFile): Single OWL format ontology file
-        - **description** (Optional[str]): Optional description for the ontology.
+        - **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies
+        - **ontology_file** (List[UploadFile]): OWL format ontology files
+        - **descriptions** (Optional[str]): JSON array string of optional descriptions
         ## Response
-        Returns metadata about the uploaded ontology including key, filename, size, and upload timestamp.
+        Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps.
         ## Error Codes
-        - **400 Bad Request**: Invalid file format, duplicate key, multiple files uploaded
+        - **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded
         - **500 Internal Server Error**: File system or processing errors
         """
         send_telemetry(
@@ -46,22 +49,16 @@ def get_ontology_router() -> APIRouter:
         )
         try:
-            # Enforce: exactly one uploaded file for "ontology_file"
-            form = await request.form()
-            uploaded_files = form.getlist("ontology_file")
-            if len(uploaded_files) != 1:
-                raise ValueError("Only one ontology_file is allowed")
-            if ontology_key.strip().startswith(("[", "{")):
-                raise ValueError("ontology_key must be a string")
-            if description is not None and description.strip().startswith(("[", "{")):
-                raise ValueError("description must be a string")
-            result = await ontology_service.upload_ontology(
-                ontology_key=ontology_key,
-                file=ontology_file,
-                user=user,
-                description=description,
+            import json
+            ontology_keys = json.loads(ontology_key)
+            description_list = json.loads(descriptions) if descriptions else None
+            if not isinstance(ontology_keys, list):
+                raise ValueError("ontology_key must be a JSON array")
+            results = await ontology_service.upload_ontologies(
+                ontology_keys, ontology_file, user, description_list
             )
             return {
@@ -73,9 +70,10 @@ def get_ontology_router() -> APIRouter:
                         "uploaded_at": result.uploaded_at,
                         "description": result.description,
                     }
+                    for result in results
                 ]
             }
-        except ValueError as e:
+        except (json.JSONDecodeError, ValueError) as e:
             return JSONResponse(status_code=400, content={"error": str(e)})
         except Exception as e:
             return JSONResponse(status_code=500, content={"error": str(e)})

cognee/api/v1/search/search.py CHANGED Viewed

@@ -31,8 +31,6 @@ async def search(
     only_context: bool = False,
     use_combined_context: bool = False,
     session_id: Optional[str] = None,
-    wide_search_top_k: Optional[int] = 100,
-    triplet_distance_penalty: Optional[float] = 3.5,
 ) -> Union[List[SearchResult], CombinedSearchResult]:
     """
     Search and query the knowledge graph for insights, information, and connections.
@@ -202,8 +200,6 @@ async def search(
         only_context=only_context,
         use_combined_context=use_combined_context,
         session_id=session_id,
-        wide_search_top_k=wide_search_top_k,
-        triplet_distance_penalty=triplet_distance_penalty,
     )
     return filtered_search_results

cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl

cognee 0.5.0py3-none-any.whl → 0.5.0.dev0py3-none-any.whl