PyPI - cognee - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl - Mend

cognee 0.5.0py3-none-any.whl → 0.5.0.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

cognee/api/v1/ui/ui.py CHANGED Viewed

@@ -15,8 +15,6 @@ import shutil
 from cognee.shared.logging_utils import get_logger
 from cognee.version import get_cognee_version
-from .node_setup import check_node_npm, get_nvm_dir, get_nvm_sh_path
-from .npm_utils import run_npm_command
 logger = get_logger()
@@ -287,6 +285,48 @@ def find_frontend_path() -> Optional[Path]:
     return None
+def check_node_npm() -> tuple[bool, str]:
+    """
+    Check if Node.js and npm are available.
+    Returns (is_available, error_message)
+    """
+    try:
+        # Check Node.js
+        result = subprocess.run(["node", "--version"], capture_output=True, text=True, timeout=10)
+        if result.returncode != 0:
+            return False, "Node.js is not installed or not in PATH"
+        node_version = result.stdout.strip()
+        logger.debug(f"Found Node.js version: {node_version}")
+        # Check npm - handle Windows PowerShell scripts
+        if platform.system() == "Windows":
+            # On Windows, npm might be a PowerShell script, so we need to use shell=True
+            result = subprocess.run(
+                ["npm", "--version"], capture_output=True, text=True, timeout=10, shell=True
+            )
+        else:
+            result = subprocess.run(
+                ["npm", "--version"], capture_output=True, text=True, timeout=10
+            )
+        if result.returncode != 0:
+            return False, "npm is not installed or not in PATH"
+        npm_version = result.stdout.strip()
+        logger.debug(f"Found npm version: {npm_version}")
+        return True, f"Node.js {node_version}, npm {npm_version}"
+    except subprocess.TimeoutExpired:
+        return False, "Timeout checking Node.js/npm installation"
+    except FileNotFoundError:
+        return False, "Node.js/npm not found. Please install Node.js from https://nodejs.org/"
+    except Exception as e:
+        return False, f"Error checking Node.js/npm: {str(e)}"
 def install_frontend_dependencies(frontend_path: Path) -> bool:
     """
     Install frontend dependencies if node_modules doesn't exist.
@@ -301,7 +341,24 @@ def install_frontend_dependencies(frontend_path: Path) -> bool:
     logger.info("Installing frontend dependencies (this may take a few minutes)...")
     try:
-        result = run_npm_command(["npm", "install"], frontend_path, timeout=300)
+        # Use shell=True on Windows for npm commands
+        if platform.system() == "Windows":
+            result = subprocess.run(
+                ["npm", "install"],
+                cwd=frontend_path,
+                capture_output=True,
+                text=True,
+                timeout=300,  # 5 minutes timeout
+                shell=True,
+            )
+        else:
+            result = subprocess.run(
+                ["npm", "install"],
+                cwd=frontend_path,
+                capture_output=True,
+                text=True,
+                timeout=300,  # 5 minutes timeout
+            )
         if result.returncode == 0:
             logger.info("Frontend dependencies installed successfully")
@@ -585,21 +642,6 @@ def start_ui(
     env["HOST"] = "localhost"
     env["PORT"] = str(port)
-    # If nvm is installed, ensure it's available in the environment
-    nvm_path = get_nvm_sh_path()
-    if platform.system() != "Windows" and nvm_path.exists():
-        # Add nvm to PATH for the subprocess
-        nvm_dir = get_nvm_dir()
-        # Find the latest Node.js version installed via nvm
-        nvm_versions = nvm_dir / "versions" / "node"
-        if nvm_versions.exists():
-            versions = sorted(nvm_versions.iterdir(), reverse=True)
-            if versions:
-                latest_node_bin = versions[0] / "bin"
-                if latest_node_bin.exists():
-                    current_path = env.get("PATH", "")
-                    env["PATH"] = f"{latest_node_bin}:{current_path}"
     # Start the development server
     logger.info(f"Starting frontend server at http://localhost:{port}")
     logger.info("This may take a moment to compile and start...")
@@ -617,26 +659,14 @@ def start_ui(
                 shell=True,
             )
         else:
-            # On Unix-like systems, use bash with nvm sourced if available
-            if nvm_path.exists():
-                # Use bash to source nvm and run npm
-                process = subprocess.Popen(
-                    ["bash", "-c", f"source {nvm_path} && npm run dev"],
-                    cwd=frontend_path,
-                    env=env,
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    preexec_fn=os.setsid if hasattr(os, "setsid") else None,
-                )
-            else:
-                process = subprocess.Popen(
-                    ["npm", "run", "dev"],
-                    cwd=frontend_path,
-                    env=env,
-                    stdout=subprocess.PIPE,
-                    stderr=subprocess.PIPE,
-                    preexec_fn=os.setsid if hasattr(os, "setsid") else None,
-                )
+            process = subprocess.Popen(
+                ["npm", "run", "dev"],
+                cwd=frontend_path,
+                env=env,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                preexec_fn=os.setsid if hasattr(os, "setsid") else None,
+            )
         # Start threads to stream frontend output with prefix
         _stream_process_output(process, "stdout", "[FRONTEND]", "\033[33m")  # Yellow

cognee/context_global_variables.py CHANGED Viewed

@@ -4,10 +4,9 @@ from typing import Union
 from uuid import UUID
 from cognee.base_config import get_base_config
-from cognee.infrastructure.databases.vector.config import get_vectordb_config
-from cognee.infrastructure.databases.graph.config import get_graph_config
+from cognee.infrastructure.databases.vector.config import get_vectordb_context_config
+from cognee.infrastructure.databases.graph.config import get_graph_context_config
 from cognee.infrastructure.databases.utils import get_or_create_dataset_database
-from cognee.infrastructure.databases.utils import resolve_dataset_database_connection_info
 from cognee.infrastructure.files.storage.config import file_storage_config
 from cognee.modules.users.methods import get_user
@@ -17,59 +16,22 @@ vector_db_config = ContextVar("vector_db_config", default=None)
 graph_db_config = ContextVar("graph_db_config", default=None)
 session_user = ContextVar("session_user", default=None)
+VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"]
+GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor"]
 async def set_session_user_context_variable(user):
     session_user.set(user)
 def multi_user_support_possible():
-    graph_db_config = get_graph_config()
-    vector_db_config = get_vectordb_config()
-    graph_handler = graph_db_config.graph_dataset_database_handler
-    vector_handler = vector_db_config.vector_dataset_database_handler
-    from cognee.infrastructure.databases.dataset_database_handler import (
-        supported_dataset_database_handlers,
+    graph_db_config = get_graph_context_config()
+    vector_db_config = get_vectordb_context_config()
+    return (
+        graph_db_config["graph_database_provider"] in GRAPH_DBS_WITH_MULTI_USER_SUPPORT
+        and vector_db_config["vector_db_provider"] in VECTOR_DBS_WITH_MULTI_USER_SUPPORT
     )
-    if graph_handler not in supported_dataset_database_handlers:
-        raise EnvironmentError(
-            "Unsupported graph dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
-            f"Selected graph dataset to database handler: {graph_handler}\n"
-            f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
-        )
-    if vector_handler not in supported_dataset_database_handlers:
-        raise EnvironmentError(
-            "Unsupported vector dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
-            f"Selected vector dataset to database handler: {vector_handler}\n"
-            f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
-        )
-    if (
-        supported_dataset_database_handlers[graph_handler]["handler_provider"]
-        != graph_db_config.graph_database_provider
-    ):
-        raise EnvironmentError(
-            "The selected graph dataset to database handler does not work with the configured graph database provider. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
-            f"Selected graph database provider: {graph_db_config.graph_database_provider}\n"
-            f"Selected graph dataset to database handler: {graph_handler}\n"
-            f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
-        )
-    if (
-        supported_dataset_database_handlers[vector_handler]["handler_provider"]
-        != vector_db_config.vector_db_provider
-    ):
-        raise EnvironmentError(
-            "The selected vector dataset to database handler does not work with the configured vector database provider. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
-            f"Selected vector database provider: {vector_db_config.vector_db_provider}\n"
-            f"Selected vector dataset to database handler: {vector_handler}\n"
-            f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
-        )
-    return True
 def backend_access_control_enabled():
     backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
@@ -79,7 +41,12 @@ def backend_access_control_enabled():
         return multi_user_support_possible()
     elif backend_access_control.lower() == "true":
         # If enabled, ensure that the current graph and vector DBs can support it
-        return multi_user_support_possible()
+        multi_user_support = multi_user_support_possible()
+        if not multi_user_support:
+            raise EnvironmentError(
+                "ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control."
+            )
+        return True
     return False
@@ -109,8 +76,6 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
     # To ensure permissions are enforced properly all datasets will have their own databases
     dataset_database = await get_or_create_dataset_database(dataset, user)
-    # Ensure that all connection info is resolved properly
-    dataset_database = await resolve_dataset_database_connection_info(dataset_database)
     base_config = get_base_config()
     data_root_directory = os.path.join(
@@ -121,8 +86,6 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
     )
     # Set vector and graph database configuration based on dataset database information
-    # TODO: Add better handling of vector and graph config accross Cognee.
-    #  LRU_CACHE takes into account order of inputs, if order of inputs is changed it will be registered as a new DB adapter
     vector_config = {
         "vector_db_provider": dataset_database.vector_database_provider,
         "vector_db_url": dataset_database.vector_database_url,
@@ -138,14 +101,6 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
         "graph_file_path": os.path.join(
             databases_directory_path, dataset_database.graph_database_name
         ),
-        "graph_database_username": dataset_database.graph_database_connection_info.get(
-            "graph_database_username", ""
-        ),
-        "graph_database_password": dataset_database.graph_database_connection_info.get(
-            "graph_database_password", ""
-        ),
-        "graph_dataset_database_handler": "",
-        "graph_database_port": "",
     }
     storage_config = {

cognee/eval_framework/answer_generation/answer_generation_executor.py CHANGED Viewed

@@ -35,16 +35,6 @@ class AnswerGeneratorExecutor:
             retrieval_context = await retriever.get_context(query_text)
             search_results = await retriever.get_completion(query_text, retrieval_context)
-            ############
-            #:TODO This is a quick fix until we don't structure retriever results properly but lets not leave it like this...this is needed now due to the changed combined retriever structure..
-            if isinstance(retrieval_context, list):
-                retrieval_context = await retriever.convert_retrieved_objects_to_context(
-                    triplets=retrieval_context
-                )
-            if isinstance(search_results, str):
-                search_results = [search_results]
-            #############
             answer = {
                 "question": query_text,
                 "answer": search_results[0],

cognee/eval_framework/answer_generation/run_question_answering_module.py CHANGED Viewed

@@ -35,7 +35,7 @@ async def create_and_insert_answers_table(questions_payload):
 async def run_question_answering(
-    params: dict, system_prompt="answer_simple_question_benchmark.txt", top_k: Optional[int] = None
+    params: dict, system_prompt="answer_simple_question.txt", top_k: Optional[int] = None
 ) -> List[dict]:
     if params.get("answering_questions"):
         logger.info("Question answering started...")

cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py CHANGED Viewed

@@ -8,6 +8,7 @@ from cognee.modules.users.models import User
 from cognee.shared.data_models import KnowledgeGraph
 from cognee.shared.utils import send_telemetry
 from cognee.tasks.documents import (
+    check_permissions_on_dataset,
     classify_documents,
     extract_chunks_from_documents,
 )
@@ -30,6 +31,7 @@ async def get_cascade_graph_tasks(
         cognee_config = get_cognify_config()
         default_tasks = [
             Task(classify_documents),
+            Task(check_permissions_on_dataset, user=user, permissions=["write"]),
             Task(
                 extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
             ),  # Extract text chunks based on the document type.

cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py CHANGED Viewed

@@ -30,8 +30,8 @@ async def get_no_summary_tasks(
     ontology_file_path=None,
 ) -> List[Task]:
     """Returns default tasks without summarization tasks."""
-    # Get base tasks (0=classify, 1=extract_chunks)
-    base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
+    # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
+    base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
     ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
@@ -51,8 +51,8 @@ async def get_just_chunks_tasks(
     chunk_size: int = None, chunker=TextChunker, user=None
 ) -> List[Task]:
     """Returns default tasks with only chunk extraction and data points addition."""
-    # Get base tasks (0=classify, 1=extract_chunks)
-    base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
+    # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
+    base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
     add_data_points_task = Task(add_data_points, task_config={"batch_size": 10})

cognee/eval_framework/eval_config.py CHANGED Viewed

@@ -14,7 +14,7 @@ class EvalConfig(BaseSettings):
     # Question answering params
     answering_questions: bool = True
-    qa_engine: str = "cognee_graph_completion"  # Options: 'cognee_completion' or 'cognee_graph_completion' or 'cognee_graph_completion_cot' or 'cognee_graph_completion_context_extension'
+    qa_engine: str = "cognee_completion"  # Options: 'cognee_completion' or 'cognee_graph_completion' or 'cognee_graph_completion_cot' or 'cognee_graph_completion_context_extension'
     # Evaluation params
     evaluating_answers: bool = True
@@ -25,7 +25,7 @@ class EvalConfig(BaseSettings):
         "EM",
         "f1",
     ]  # Use only 'correctness' for DirectLLM
-    deepeval_model: str = "gpt-4o-mini"
+    deepeval_model: str = "gpt-5-mini"
     # Metrics params
     calculate_metrics: bool = True

cognee/eval_framework/modal_run_eval.py CHANGED Viewed

@@ -2,6 +2,7 @@ import modal
 import os
 import asyncio
 import datetime
+import hashlib
 import json
 from cognee.shared.logging_utils import get_logger
 from cognee.eval_framework.eval_config import EvalConfig
@@ -9,9 +10,6 @@ from cognee.eval_framework.corpus_builder.run_corpus_builder import run_corpus_b
 from cognee.eval_framework.answer_generation.run_question_answering_module import (
     run_question_answering,
 )
-import pathlib
-from os import path
-from modal import Image
 from cognee.eval_framework.evaluation.run_evaluation_module import run_evaluation
 from cognee.eval_framework.metrics_dashboard import create_dashboard
@@ -40,19 +38,22 @@ def read_and_combine_metrics(eval_params: dict) -> dict:
 app = modal.App("modal-run-eval")
-image = Image.from_dockerfile(
-    path=pathlib.Path(path.join(path.dirname(__file__), "Dockerfile")).resolve(),
-    force_build=False,
-).add_local_python_source("cognee")
+image = (
+    modal.Image.from_dockerfile(path="Dockerfile_modal", force_build=False)
+    .copy_local_file("pyproject.toml", "pyproject.toml")
+    .copy_local_file("poetry.lock", "poetry.lock")
+    .env(
+        {
+            "ENV": os.getenv("ENV"),
+            "LLM_API_KEY": os.getenv("LLM_API_KEY"),
+            "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
+        }
+    )
+    .pip_install("protobuf", "h2", "deepeval", "gdown", "plotly")
+)
-@app.function(
-    image=image,
-    max_containers=10,
-    timeout=86400,
-    volumes={"/data": vol},
-    secrets=[modal.Secret.from_name("eval_secrets")],
-)
+@app.function(image=image, concurrency_limit=10, timeout=86400, volumes={"/data": vol})
 async def modal_run_eval(eval_params=None):
     """Runs evaluation pipeline and returns combined metrics results."""
     if eval_params is None:
@@ -104,7 +105,18 @@ async def main():
     configs = [
         EvalConfig(
             task_getter_type="Default",
-            number_of_samples_in_corpus=25,
+            number_of_samples_in_corpus=10,
+            benchmark="HotPotQA",
+            qa_engine="cognee_graph_completion",
+            building_corpus_from_scratch=True,
+            answering_questions=True,
+            evaluating_answers=True,
+            calculate_metrics=True,
+            dashboard=True,
+        ),
+        EvalConfig(
+            task_getter_type="Default",
+            number_of_samples_in_corpus=10,
             benchmark="TwoWikiMultiHop",
             qa_engine="cognee_graph_completion",
             building_corpus_from_scratch=True,
@@ -115,7 +127,7 @@ async def main():
         ),
         EvalConfig(
             task_getter_type="Default",
-            number_of_samples_in_corpus=25,
+            number_of_samples_in_corpus=10,
             benchmark="Musique",
             qa_engine="cognee_graph_completion",
             building_corpus_from_scratch=True,

cognee/infrastructure/databases/graph/config.py CHANGED Viewed

@@ -47,7 +47,6 @@ class GraphConfig(BaseSettings):
     graph_filename: str = ""
     graph_model: object = KnowledgeGraph
     graph_topology: object = KnowledgeGraph
-    graph_dataset_database_handler: str = "kuzu"
     model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
     # Model validator updates graph_filename and path dynamically after class creation based on current database provider
@@ -98,7 +97,6 @@ class GraphConfig(BaseSettings):
             "graph_model": self.graph_model,
             "graph_topology": self.graph_topology,
             "model_config": self.model_config,
-            "graph_dataset_database_handler": self.graph_dataset_database_handler,
         }
     def to_hashable_dict(self) -> dict:
@@ -123,7 +121,6 @@ class GraphConfig(BaseSettings):
             "graph_database_port": self.graph_database_port,
             "graph_database_key": self.graph_database_key,
             "graph_file_path": self.graph_file_path,
-            "graph_dataset_database_handler": self.graph_dataset_database_handler,
         }

cognee/infrastructure/databases/graph/get_graph_engine.py CHANGED Viewed

@@ -34,7 +34,6 @@ def create_graph_engine(
     graph_database_password="",
     graph_database_port="",
     graph_database_key="",
-    graph_dataset_database_handler="",
 ):
     """
     Create a graph engine based on the specified provider type.

cognee/infrastructure/databases/graph/graph_db_interface.py CHANGED Viewed

@@ -398,18 +398,3 @@ class GraphDBInterface(ABC):
             - node_id (Union[str, UUID]): Unique identifier of the node for which to retrieve connections.
         """
         raise NotImplementedError
-    @abstractmethod
-    async def get_filtered_graph_data(
-        self, attribute_filters: List[Dict[str, List[Union[str, int]]]]
-    ) -> Tuple[List[Node], List[EdgeData]]:
-        """
-        Retrieve nodes and edges filtered by the provided attribute criteria.
-        Parameters:
-        -----------
-            - attribute_filters: A list of dictionaries where keys are attribute names and values
-              are lists of attribute values to filter by.
-        """
-        raise NotImplementedError

cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl

cognee 0.5.0py3-none-any.whl → 0.5.0.dev0py3-none-any.whl