PyPI - cognee - Versions diffs - 0.5.0.dev0__py3-none-any.whl → 0.5.0.dev1__py3-none-any.whl - Mend

cognee 0.5.0.dev0py3-none-any.whl → 0.5.0.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

cognee/api/client.py CHANGED Viewed

@@ -21,7 +21,7 @@ from cognee.api.v1.notebooks.routers import get_notebooks_router
 from cognee.api.v1.permissions.routers import get_permissions_router
 from cognee.api.v1.settings.routers import get_settings_router
 from cognee.api.v1.datasets.routers import get_datasets_router
-from cognee.api.v1.cognify.routers import get_code_pipeline_router, get_cognify_router
+from cognee.api.v1.cognify.routers import get_cognify_router
 from cognee.api.v1.search.routers import get_search_router
 from cognee.api.v1.ontologies.routers.get_ontology_router import get_ontology_router
 from cognee.api.v1.memify.routers import get_memify_router
@@ -278,10 +278,6 @@ app.include_router(get_responses_router(), prefix="/api/v1/responses", tags=["re
 app.include_router(get_sync_router(), prefix="/api/v1/sync", tags=["sync"])
-codegraph_routes = get_code_pipeline_router()
-if codegraph_routes:
-    app.include_router(codegraph_routes, prefix="/api/v1/code-pipeline", tags=["code-pipeline"])
 app.include_router(
     get_users_router(),
     prefix="/api/v1/users",

cognee/api/v1/add/add.py CHANGED Viewed

@@ -155,7 +155,7 @@ async def add(
         - LLM_API_KEY: API key for your LLM provider (OpenAI, Anthropic, etc.)
         Optional:
-        - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral"
+        - LLM_PROVIDER: "openai" (default), "anthropic", "gemini", "ollama", "mistral", "bedrock"
         - LLM_MODEL: Model name (default: "gpt-5-mini")
         - DEFAULT_USER_EMAIL: Custom default user email
         - DEFAULT_USER_PASSWORD: Custom default user password
@@ -205,6 +205,7 @@ async def add(
         pipeline_name="add_pipeline",
         vector_db_config=vector_db_config,
         graph_db_config=graph_db_config,
+        use_pipeline_cache=True,
         incremental_loading=incremental_loading,
         data_per_batch=data_per_batch,
     ):

cognee/api/v1/cognify/cognify.py CHANGED Viewed

@@ -3,6 +3,7 @@ from pydantic import BaseModel
 from typing import Union, Optional
 from uuid import UUID
+from cognee.modules.cognify.config import get_cognify_config
 from cognee.modules.ontology.ontology_env_config import get_ontology_env_config
 from cognee.shared.logging_utils import get_logger
 from cognee.shared.data_models import KnowledgeGraph
@@ -19,7 +20,6 @@ from cognee.modules.ontology.get_default_ontology_resolver import (
 from cognee.modules.users.models import User
 from cognee.tasks.documents import (
-    check_permissions_on_dataset,
     classify_documents,
     extract_chunks_from_documents,
 )
@@ -53,6 +53,7 @@ async def cognify(
     custom_prompt: Optional[str] = None,
     temporal_cognify: bool = False,
     data_per_batch: int = 20,
+    **kwargs,
 ):
     """
     Transform ingested data into a structured knowledge graph.
@@ -78,12 +79,11 @@ async def cognify(
     Processing Pipeline:
         1. **Document Classification**: Identifies document types and structures
-        2. **Permission Validation**: Ensures user has processing rights
-        3. **Text Chunking**: Breaks content into semantically meaningful segments
-        4. **Entity Extraction**: Identifies key concepts, people, places, organizations
-        5. **Relationship Detection**: Discovers connections between entities
-        6. **Graph Construction**: Builds semantic knowledge graph with embeddings
-        7. **Content Summarization**: Creates hierarchical summaries for navigation
+        2. **Text Chunking**: Breaks content into semantically meaningful segments
+        3. **Entity Extraction**: Identifies key concepts, people, places, organizations
+        4. **Relationship Detection**: Discovers connections between entities
+        5. **Graph Construction**: Builds semantic knowledge graph with embeddings
+        6. **Content Summarization**: Creates hierarchical summaries for navigation
     Graph Model Customization:
         The `graph_model` parameter allows custom knowledge structures:
@@ -224,6 +224,7 @@ async def cognify(
             config=config,
             custom_prompt=custom_prompt,
             chunks_per_batch=chunks_per_batch,
+            **kwargs,
         )
     # By calling get pipeline executor we get a function that will have the run_pipeline run in the background or a function that we will need to wait for
@@ -238,6 +239,7 @@ async def cognify(
         vector_db_config=vector_db_config,
         graph_db_config=graph_db_config,
         incremental_loading=incremental_loading,
+        use_pipeline_cache=True,
         pipeline_name="cognify_pipeline",
         data_per_batch=data_per_batch,
     )
@@ -251,6 +253,7 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     config: Config = None,
     custom_prompt: Optional[str] = None,
     chunks_per_batch: int = 100,
+    **kwargs,
 ) -> list[Task]:
     if config is None:
         ontology_config = get_ontology_env_config()
@@ -272,9 +275,11 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
     if chunks_per_batch is None:
         chunks_per_batch = 100
+    cognify_config = get_cognify_config()
+    embed_triplets = cognify_config.triplet_embedding
     default_tasks = [
         Task(classify_documents),
-        Task(check_permissions_on_dataset, user=user, permissions=["write"]),
         Task(
             extract_chunks_from_documents,
             max_chunk_size=chunk_size or get_max_chunk_tokens(),
@@ -286,12 +291,17 @@ async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's
             config=config,
             custom_prompt=custom_prompt,
             task_config={"batch_size": chunks_per_batch},
+            **kwargs,
         ),  # Generate knowledge graphs from the document chunks.
         Task(
             summarize_text,
             task_config={"batch_size": chunks_per_batch},
         ),
-        Task(add_data_points, task_config={"batch_size": chunks_per_batch}),
+        Task(
+            add_data_points,
+            embed_triplets=embed_triplets,
+            task_config={"batch_size": chunks_per_batch},
+        ),
     ]
     return default_tasks
@@ -305,14 +315,13 @@ async def get_temporal_tasks(
     The pipeline includes:
     1. Document classification.
-    2. Dataset permission checks (requires "write" access).
-    3. Document chunking with a specified or default chunk size.
-    4. Event and timestamp extraction from chunks.
-    5. Knowledge graph extraction from events.
-    6. Batched insertion of data points.
+    2. Document chunking with a specified or default chunk size.
+    3. Event and timestamp extraction from chunks.
+    4. Knowledge graph extraction from events.
+    5. Batched insertion of data points.
     Args:
-        user (User, optional): The user requesting task execution, used for permission checks.
+        user (User, optional): The user requesting task execution.
         chunker (Callable, optional): A text chunking function/class to split documents. Defaults to TextChunker.
         chunk_size (int, optional): Maximum token size per chunk. If not provided, uses system default.
         chunks_per_batch (int, optional): Number of chunks to process in a single batch in Cognify
@@ -325,7 +334,6 @@ async def get_temporal_tasks(
     temporal_tasks = [
         Task(classify_documents),
-        Task(check_permissions_on_dataset, user=user, permissions=["write"]),
         Task(
             extract_chunks_from_documents,
             max_chunk_size=chunk_size or get_max_chunk_tokens(),

cognee/api/v1/cognify/routers/__init__.py CHANGED Viewed

	@@ -1,2 +1 @@
1 1	from .get_cognify_router import get_cognify_router
2	- from .get_code_pipeline_router import get_code_pipeline_router

cognee/api/v1/cognify/routers/get_cognify_router.py CHANGED Viewed

@@ -42,7 +42,9 @@ class CognifyPayloadDTO(InDTO):
         default="", description="Custom prompt for entity extraction and graph generation"
     )
     ontology_key: Optional[List[str]] = Field(
-        default=None, description="Reference to one or more previously uploaded ontologies"
+        default=None,
+        examples=[[]],
+        description="Reference to one or more previously uploaded ontologies",
     )

cognee/api/v1/datasets/routers/get_datasets_router.py CHANGED Viewed

@@ -208,14 +208,14 @@ def get_datasets_router() -> APIRouter:
             },
         )
-        from cognee.modules.data.methods import get_dataset, delete_dataset
+        from cognee.modules.data.methods import delete_dataset
-        dataset = await get_dataset(user.id, dataset_id)
+        dataset = await get_authorized_existing_datasets([dataset_id], "delete", user)
         if dataset is None:
             raise DatasetNotFoundError(message=f"Dataset ({str(dataset_id)}) not found.")
-        await delete_dataset(dataset)
+        await delete_dataset(dataset[0])
     @router.delete(
         "/{dataset_id}/data/{data_id}",

cognee/api/v1/ontologies/ontologies.py CHANGED Viewed

@@ -5,6 +5,7 @@ from pathlib import Path
 from datetime import datetime, timezone
 from typing import Optional, List
 from dataclasses import dataclass
+from fastapi import UploadFile
 @dataclass
@@ -45,8 +46,10 @@ class OntologyService:
             json.dump(metadata, f, indent=2)
     async def upload_ontology(
-        self, ontology_key: str, file, user, description: Optional[str] = None
+        self, ontology_key: str, file: UploadFile, user, description: Optional[str] = None
     ) -> OntologyMetadata:
+        if not file.filename:
+            raise ValueError("File must have a filename")
         if not file.filename.lower().endswith(".owl"):
             raise ValueError("File must be in .owl format")
@@ -57,8 +60,6 @@ class OntologyService:
             raise ValueError(f"Ontology key '{ontology_key}' already exists")
         content = await file.read()
-        if len(content) > 10 * 1024 * 1024:
-            raise ValueError("File size exceeds 10MB limit")
         file_path = user_dir / f"{ontology_key}.owl"
         with open(file_path, "wb") as f:
@@ -82,7 +83,11 @@ class OntologyService:
         )
     async def upload_ontologies(
-        self, ontology_key: List[str], files: List, user, descriptions: Optional[List[str]] = None
+        self,
+        ontology_key: List[str],
+        files: List[UploadFile],
+        user,
+        descriptions: Optional[List[str]] = None,
     ) -> List[OntologyMetadata]:
         """
         Upload ontology files with their respective keys.
@@ -105,47 +110,17 @@ class OntologyService:
         if len(set(ontology_key)) != len(ontology_key):
             raise ValueError("Duplicate ontology keys not allowed")
-        if descriptions and len(descriptions) != len(files):
-            raise ValueError("Number of descriptions must match number of files")
         results = []
-        user_dir = self._get_user_dir(str(user.id))
-        metadata = self._load_metadata(user_dir)
         for i, (key, file) in enumerate(zip(ontology_key, files)):
-            if key in metadata:
-                raise ValueError(f"Ontology key '{key}' already exists")
-            if not file.filename.lower().endswith(".owl"):
-                raise ValueError(f"File '{file.filename}' must be in .owl format")
-            content = await file.read()
-            if len(content) > 10 * 1024 * 1024:
-                raise ValueError(f"File '{file.filename}' exceeds 10MB limit")
-            file_path = user_dir / f"{key}.owl"
-            with open(file_path, "wb") as f:
-                f.write(content)
-            ontology_metadata = {
-                "filename": file.filename,
-                "size_bytes": len(content),
-                "uploaded_at": datetime.now(timezone.utc).isoformat(),
-                "description": descriptions[i] if descriptions else None,
-            }
-            metadata[key] = ontology_metadata
             results.append(
-                OntologyMetadata(
+                await self.upload_ontology(
                     ontology_key=key,
-                    filename=file.filename,
-                    size_bytes=len(content),
-                    uploaded_at=ontology_metadata["uploaded_at"],
+                    file=file,
+                    user=user,
                     description=descriptions[i] if descriptions else None,
                 )
             )
-        self._save_metadata(user_dir, metadata)
         return results
     def get_ontology_contents(self, ontology_key: List[str], user) -> List[str]:

cognee/api/v1/ontologies/routers/get_ontology_router.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from fastapi import APIRouter, File, Form, UploadFile, Depends, HTTPException
+from fastapi import APIRouter, File, Form, UploadFile, Depends, Request
 from fastapi.responses import JSONResponse
 from typing import Optional, List
@@ -15,28 +15,25 @@ def get_ontology_router() -> APIRouter:
     @router.post("", response_model=dict)
     async def upload_ontology(
+        request: Request,
         ontology_key: str = Form(...),
-        ontology_file: List[UploadFile] = File(...),
-        descriptions: Optional[str] = Form(None),
+        ontology_file: UploadFile = File(...),
+        description: Optional[str] = Form(None),
         user: User = Depends(get_authenticated_user),
     ):
         """
-        Upload ontology files with their respective keys for later use in cognify operations.
-        Supports both single and multiple file uploads:
-        - Single file: ontology_key=["key"], ontology_file=[file]
-        - Multiple files: ontology_key=["key1", "key2"], ontology_file=[file1, file2]
+        Upload a single ontology file for later use in cognify operations.
         ## Request Parameters
-        - **ontology_key** (str): JSON array string of user-defined identifiers for the ontologies
-        - **ontology_file** (List[UploadFile]): OWL format ontology files
-        - **descriptions** (Optional[str]): JSON array string of optional descriptions
+        - **ontology_key** (str): User-defined identifier for the ontology.
+        - **ontology_file** (UploadFile): Single OWL format ontology file
+        - **description** (Optional[str]): Optional description for the ontology.
         ## Response
-        Returns metadata about uploaded ontologies including keys, filenames, sizes, and upload timestamps.
+        Returns metadata about the uploaded ontology including key, filename, size, and upload timestamp.
         ## Error Codes
-        - **400 Bad Request**: Invalid file format, duplicate keys, array length mismatches, file size exceeded
+        - **400 Bad Request**: Invalid file format, duplicate key, multiple files uploaded
         - **500 Internal Server Error**: File system or processing errors
         """
         send_telemetry(
@@ -49,16 +46,22 @@ def get_ontology_router() -> APIRouter:
         )
         try:
-            import json
-            ontology_keys = json.loads(ontology_key)
-            description_list = json.loads(descriptions) if descriptions else None
-            if not isinstance(ontology_keys, list):
-                raise ValueError("ontology_key must be a JSON array")
-            results = await ontology_service.upload_ontologies(
-                ontology_keys, ontology_file, user, description_list
+            # Enforce: exactly one uploaded file for "ontology_file"
+            form = await request.form()
+            uploaded_files = form.getlist("ontology_file")
+            if len(uploaded_files) != 1:
+                raise ValueError("Only one ontology_file is allowed")
+            if ontology_key.strip().startswith(("[", "{")):
+                raise ValueError("ontology_key must be a string")
+            if description is not None and description.strip().startswith(("[", "{")):
+                raise ValueError("description must be a string")
+            result = await ontology_service.upload_ontology(
+                ontology_key=ontology_key,
+                file=ontology_file,
+                user=user,
+                description=description,
             )
             return {
@@ -70,10 +73,9 @@ def get_ontology_router() -> APIRouter:
                         "uploaded_at": result.uploaded_at,
                         "description": result.description,
                     }
-                    for result in results
                 ]
             }
-        except (json.JSONDecodeError, ValueError) as e:
+        except ValueError as e:
             return JSONResponse(status_code=400, content={"error": str(e)})
         except Exception as e:
             return JSONResponse(status_code=500, content={"error": str(e)})

cognee/api/v1/search/search.py CHANGED Viewed

@@ -31,6 +31,8 @@ async def search(
     only_context: bool = False,
     use_combined_context: bool = False,
     session_id: Optional[str] = None,
+    wide_search_top_k: Optional[int] = 100,
+    triplet_distance_penalty: Optional[float] = 3.5,
 ) -> Union[List[SearchResult], CombinedSearchResult]:
     """
     Search and query the knowledge graph for insights, information, and connections.
@@ -200,6 +202,8 @@ async def search(
         only_context=only_context,
         use_combined_context=use_combined_context,
         session_id=session_id,
+        wide_search_top_k=wide_search_top_k,
+        triplet_distance_penalty=triplet_distance_penalty,
     )
     return filtered_search_results

cognee 0.5.0.dev0__py3-none-any.whl → 0.5.0.dev1__py3-none-any.whl

cognee 0.5.0.dev0py3-none-any.whl → 0.5.0.dev1py3-none-any.whl