PyPI - cognee - Versions diffs - 0.5.0.dev0__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

cognee 0.5.0.dev0py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (132) hide show

cognee/infrastructure/databases/graph/kuzu/adapter.py CHANGED Viewed

@@ -12,6 +12,7 @@ from contextlib import asynccontextmanager
 from concurrent.futures import ThreadPoolExecutor
 from typing import Dict, Any, List, Union, Optional, Tuple, Type
+from cognee.exceptions import CogneeValidationError
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.utils.run_sync import run_sync
 from cognee.infrastructure.files.storage import get_file_storage
@@ -1186,6 +1187,11 @@ class KuzuAdapter(GraphDBInterface):
               A tuple with two elements: a list of tuples of (node_id, properties) and a list of
               tuples of (source_id, target_id, relationship_name, properties).
         """
+        import time
+        start_time = time.time()
         try:
             nodes_query = """
             MATCH (n:Node)
@@ -1249,6 +1255,11 @@ class KuzuAdapter(GraphDBInterface):
                             },
                         )
                     )
+            retrieval_time = time.time() - start_time
+            logger.info(
+                f"Retrieved {len(nodes)} nodes and {len(edges)} edges in {retrieval_time:.2f} seconds"
+            )
             return formatted_nodes, formatted_edges
         except Exception as e:
             logger.error(f"Failed to get graph data: {e}")
@@ -1417,6 +1428,92 @@ class KuzuAdapter(GraphDBInterface):
                 formatted_edges.append((source_id, target_id, rel_type, props))
         return formatted_nodes, formatted_edges
+    async def get_id_filtered_graph_data(self, target_ids: list[str]):
+        """
+        Retrieve graph data filtered by specific node IDs, including their direct neighbors
+        and only edges where one endpoint matches those IDs.
+        Returns:
+            nodes: List[dict]   -> Each dict includes "id" and all node properties
+            edges: List[dict]   -> Each dict includes "source", "target", "type", "properties"
+        """
+        import time
+        start_time = time.time()
+        try:
+            if not target_ids:
+                logger.warning("No target IDs provided for ID-filtered graph retrieval.")
+                return [], []
+            if not all(isinstance(x, str) for x in target_ids):
+                raise CogneeValidationError("target_ids must be a list of strings")
+            query = """
+            MATCH (n:Node)-[r]->(m:Node)
+            WHERE n.id IN $target_ids OR m.id IN $target_ids
+            RETURN n.id, {
+                name: n.name,
+                type: n.type,
+                properties: n.properties
+            }, m.id, {
+                name: m.name,
+                type: m.type,
+                properties: m.properties
+            }, r.relationship_name, r.properties
+            """
+            result = await self.query(query, {"target_ids": target_ids})
+            if not result:
+                logger.info("No data returned for the supplied IDs")
+                return [], []
+            nodes_dict = {}
+            edges = []
+            for n_id, n_props, m_id, m_props, r_type, r_props_raw in result:
+                if n_props.get("properties"):
+                    try:
+                        additional_props = json.loads(n_props["properties"])
+                        n_props.update(additional_props)
+                        del n_props["properties"]
+                    except json.JSONDecodeError:
+                        logger.warning(f"Failed to parse properties JSON for node {n_id}")
+                if m_props.get("properties"):
+                    try:
+                        additional_props = json.loads(m_props["properties"])
+                        m_props.update(additional_props)
+                        del m_props["properties"]
+                    except json.JSONDecodeError:
+                        logger.warning(f"Failed to parse properties JSON for node {m_id}")
+                nodes_dict[n_id] = (n_id, n_props)
+                nodes_dict[m_id] = (m_id, m_props)
+                edge_props = {}
+                if r_props_raw:
+                    try:
+                        edge_props = json.loads(r_props_raw)
+                    except (json.JSONDecodeError, TypeError):
+                        logger.warning(f"Failed to parse edge properties for {n_id}->{m_id}")
+                source_id = edge_props.get("source_node_id", n_id)
+                target_id = edge_props.get("target_node_id", m_id)
+                edges.append((source_id, target_id, r_type, edge_props))
+            retrieval_time = time.time() - start_time
+            logger.info(
+                f"ID-filtered retrieval: {len(nodes_dict)} nodes and {len(edges)} edges in {retrieval_time:.2f}s"
+            )
+            return list(nodes_dict.values()), edges
+        except Exception as e:
+            logger.error(f"Error during ID-filtered graph data retrieval: {str(e)}")
+            raise
     async def get_graph_metrics(self, include_optional=False) -> Dict[str, Any]:
         """
         Get metrics on graph structure and connectivity.
@@ -1908,3 +2005,134 @@ class KuzuAdapter(GraphDBInterface):
         time_ids_list = [item[0] for item in time_nodes]
         return ", ".join(f"'{uid}'" for uid in time_ids_list)
+    async def get_triplets_batch(self, offset: int, limit: int) -> list[dict[str, Any]]:
+        """
+        Retrieve a batch of triplets (start_node, relationship, end_node) from the graph.
+        Parameters:
+        -----------
+            - offset (int): Number of triplets to skip before returning results.
+            - limit (int): Maximum number of triplets to return.
+        Returns:
+        --------
+            - list[dict[str, Any]]: A list of triplets, where each triplet is a dictionary
+              with keys: 'start_node', 'relationship_properties', 'end_node'.
+        Raises:
+        -------
+            - ValueError: If offset or limit are negative.
+            - Exception: Re-raises any exceptions from query execution.
+        """
+        if offset < 0:
+            raise ValueError(f"Offset must be non-negative, got {offset}")
+        if limit < 0:
+            raise ValueError(f"Limit must be non-negative, got {limit}")
+        query = """
+        MATCH (start_node:Node)-[relationship:EDGE]->(end_node:Node)
+        RETURN {
+            start_node: {
+                id: start_node.id,
+                name: start_node.name,
+                type: start_node.type,
+                properties: start_node.properties
+            },
+            relationship_properties: {
+                relationship_name: relationship.relationship_name,
+                properties: relationship.properties
+            },
+            end_node: {
+                id: end_node.id,
+                name: end_node.name,
+                type: end_node.type,
+                properties: end_node.properties
+            }
+        } AS triplet
+        SKIP $offset LIMIT $limit
+        """
+        try:
+            results = await self.query(query, {"offset": offset, "limit": limit})
+        except Exception as e:
+            logger.error(f"Failed to execute triplet query: {str(e)}")
+            logger.error(f"Query: {query}")
+            logger.error(f"Parameters: offset={offset}, limit={limit}")
+            raise
+        triplets = []
+        for idx, row in enumerate(results):
+            try:
+                if not row or len(row) == 0:
+                    logger.warning(f"Skipping empty row at index {idx} in triplet batch")
+                    continue
+                if not isinstance(row[0], dict):
+                    logger.warning(
+                        f"Skipping invalid row at index {idx}: expected dict, got {type(row[0])}"
+                    )
+                    continue
+                triplet = row[0]
+                if "start_node" not in triplet:
+                    logger.warning(f"Skipping triplet at index {idx}: missing 'start_node' key")
+                    continue
+                if not isinstance(triplet["start_node"], dict):
+                    logger.warning(f"Skipping triplet at index {idx}: 'start_node' is not a dict")
+                    continue
+                triplet["start_node"] = self._parse_node_properties(triplet["start_node"].copy())
+                if "relationship_properties" not in triplet:
+                    logger.warning(
+                        f"Skipping triplet at index {idx}: missing 'relationship_properties' key"
+                    )
+                    continue
+                if not isinstance(triplet["relationship_properties"], dict):
+                    logger.warning(
+                        f"Skipping triplet at index {idx}: 'relationship_properties' is not a dict"
+                    )
+                    continue
+                rel_props = triplet["relationship_properties"].copy()
+                relationship_name = rel_props.get("relationship_name") or ""
+                if rel_props.get("properties"):
+                    try:
+                        parsed_props = json.loads(rel_props["properties"])
+                        if isinstance(parsed_props, dict):
+                            rel_props.update(parsed_props)
+                            del rel_props["properties"]
+                        else:
+                            logger.warning(
+                                f"Parsed relationship properties is not a dict for triplet at index {idx}"
+                            )
+                    except (json.JSONDecodeError, TypeError) as e:
+                        logger.warning(
+                            f"Failed to parse relationship properties JSON for triplet at index {idx}: {e}"
+                        )
+                rel_props["relationship_name"] = relationship_name
+                triplet["relationship_properties"] = rel_props
+                if "end_node" not in triplet:
+                    logger.warning(f"Skipping triplet at index {idx}: missing 'end_node' key")
+                    continue
+                if not isinstance(triplet["end_node"], dict):
+                    logger.warning(f"Skipping triplet at index {idx}: 'end_node' is not a dict")
+                    continue
+                triplet["end_node"] = self._parse_node_properties(triplet["end_node"].copy())
+                triplets.append(triplet)
+            except Exception as e:
+                logger.error(f"Error processing triplet at index {idx}: {e}", exc_info=True)
+                continue
+        return triplets

cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py ADDED Viewed

@@ -0,0 +1,168 @@
+import os
+import asyncio
+import requests
+import base64
+import hashlib
+from uuid import UUID
+from typing import Optional
+from cryptography.fernet import Fernet
+from cognee.infrastructure.databases.graph import get_graph_config
+from cognee.modules.users.models import User, DatasetDatabase
+from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
+class Neo4jAuraDevDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
+    """
+    Handler for a quick development PoC integration of Cognee multi-user and permission mode with Neo4j Aura databases.
+    This handler creates a new Neo4j Aura instance for each Cognee dataset created.
+    Improvements needed to be production ready:
+    - Secret management for client credentials, currently secrets are encrypted and stored in the Cognee relational database,
+      a secret manager or a similar system should be used instead.
+    Quality of life improvements:
+    - Allow configuration of different Neo4j Aura plans and regions.
+    - Requests should be made async, currently a blocking requests library is used.
+    """
+    @classmethod
+    async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
+        """
+        Create a new Neo4j Aura instance for the dataset. Return connection info that will be mapped to the dataset.
+        Args:
+            dataset_id: Dataset UUID
+            user: User object who owns the dataset and is making the request
+        Returns:
+            dict: Connection details for the created Neo4j instance
+        """
+        graph_config = get_graph_config()
+        if graph_config.graph_database_provider != "neo4j":
+            raise ValueError(
+                "Neo4jAuraDevDatasetDatabaseHandler can only be used with Neo4j graph database provider."
+            )
+        graph_db_name = f"{dataset_id}"
+        # Client credentials and encryption
+        client_id = os.environ.get("NEO4J_CLIENT_ID", None)
+        client_secret = os.environ.get("NEO4J_CLIENT_SECRET", None)
+        tenant_id = os.environ.get("NEO4J_TENANT_ID", None)
+        encryption_env_key = os.environ.get("NEO4J_ENCRYPTION_KEY", "test_key")
+        encryption_key = base64.urlsafe_b64encode(
+            hashlib.sha256(encryption_env_key.encode()).digest()
+        )
+        cipher = Fernet(encryption_key)
+        if client_id is None or client_secret is None or tenant_id is None:
+            raise ValueError(
+                "NEO4J_CLIENT_ID, NEO4J_CLIENT_SECRET, and NEO4J_TENANT_ID environment variables must be set to use Neo4j Aura DatasetDatabase Handling."
+            )
+        # Make the request with HTTP Basic Auth
+        def get_aura_token(client_id: str, client_secret: str) -> dict:
+            url = "https://api.neo4j.io/oauth/token"
+            data = {"grant_type": "client_credentials"}  # sent as application/x-www-form-urlencoded
+            resp = requests.post(url, data=data, auth=(client_id, client_secret))
+            resp.raise_for_status()  # raises if the request failed
+            return resp.json()
+        resp = get_aura_token(client_id, client_secret)
+        url = "https://api.neo4j.io/v1/instances"
+        headers = {
+            "accept": "application/json",
+            "Authorization": f"Bearer {resp['access_token']}",
+            "Content-Type": "application/json",
+        }
+        # TODO: Maybe we can allow **kwargs parameter forwarding for cases like these
+        #       Too allow different configurations between datasets
+        payload = {
+            "version": "5",
+            "region": "europe-west1",
+            "memory": "1GB",
+            "name": graph_db_name[
+                0:29
+            ],  # TODO: Find better name to name Neo4j instance within 30 character limit
+            "type": "professional-db",
+            "tenant_id": tenant_id,
+            "cloud_provider": "gcp",
+        }
+        response = requests.post(url, headers=headers, json=payload)
+        graph_db_name = "neo4j"  # Has to be 'neo4j' for Aura
+        graph_db_url = response.json()["data"]["connection_url"]
+        graph_db_key = resp["access_token"]
+        graph_db_username = response.json()["data"]["username"]
+        graph_db_password = response.json()["data"]["password"]
+        async def _wait_for_neo4j_instance_provisioning(instance_id: str, headers: dict):
+            # Poll until the instance is running
+            status_url = f"https://api.neo4j.io/v1/instances/{instance_id}"
+            status = ""
+            for attempt in range(30):  # Try for up to ~5 minutes
+                status_resp = requests.get(
+                    status_url, headers=headers
+                )  # TODO: Use async requests with httpx
+                status = status_resp.json()["data"]["status"]
+                if status.lower() == "running":
+                    return
+                await asyncio.sleep(10)
+            raise TimeoutError(
+                f"Neo4j instance '{graph_db_name}' did not become ready within 5 minutes. Status: {status}"
+            )
+        instance_id = response.json()["data"]["id"]
+        await _wait_for_neo4j_instance_provisioning(instance_id, headers)
+        encrypted_db_password_bytes = cipher.encrypt(graph_db_password.encode())
+        encrypted_db_password_string = encrypted_db_password_bytes.decode()
+        return {
+            "graph_database_name": graph_db_name,
+            "graph_database_url": graph_db_url,
+            "graph_database_provider": "neo4j",
+            "graph_database_key": graph_db_key,
+            "graph_dataset_database_handler": "neo4j_aura_dev",
+            "graph_database_connection_info": {
+                "graph_database_username": graph_db_username,
+                "graph_database_password": encrypted_db_password_string,
+            },
+        }
+    @classmethod
+    async def resolve_dataset_connection_info(
+        cls, dataset_database: DatasetDatabase
+    ) -> DatasetDatabase:
+        """
+        Resolve and decrypt connection info for the Neo4j dataset database.
+        In this case, decrypt the password stored in the database.
+        Args:
+            dataset_database: DatasetDatabase instance containing encrypted connection info.
+        """
+        encryption_env_key = os.environ.get("NEO4J_ENCRYPTION_KEY", "test_key")
+        encryption_key = base64.urlsafe_b64encode(
+            hashlib.sha256(encryption_env_key.encode()).digest()
+        )
+        cipher = Fernet(encryption_key)
+        graph_db_password = cipher.decrypt(
+            dataset_database.graph_database_connection_info["graph_database_password"].encode()
+        ).decode()
+        dataset_database.graph_database_connection_info["graph_database_password"] = (
+            graph_db_password
+        )
+        return dataset_database
+    @classmethod
+    async def delete_dataset(cls, dataset_database: DatasetDatabase):
+        pass

cognee/infrastructure/databases/graph/neo4j_driver/adapter.py CHANGED Viewed

@@ -8,7 +8,7 @@ from neo4j import AsyncSession
 from neo4j import AsyncGraphDatabase
 from neo4j.exceptions import Neo4jError
 from contextlib import asynccontextmanager
-from typing import Optional, Any, List, Dict, Type, Tuple
+from typing import Optional, Any, List, Dict, Type, Tuple, Coroutine
 from cognee.infrastructure.engine import DataPoint
 from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int
@@ -964,6 +964,63 @@ class Neo4jAdapter(GraphDBInterface):
             logger.error(f"Error during graph data retrieval: {str(e)}")
             raise
+    async def get_id_filtered_graph_data(self, target_ids: list[str]):
+        """
+        Retrieve graph data filtered by specific node IDs, including their direct neighbors
+        and only edges where one endpoint matches those IDs.
+        This version uses a single Cypher query for efficiency.
+        """
+        import time
+        start_time = time.time()
+        try:
+            if not target_ids:
+                logger.warning("No target IDs provided for ID-filtered graph retrieval.")
+                return [], []
+            query = """
+            MATCH ()-[r]-()
+            WHERE startNode(r).id IN $target_ids
+               OR endNode(r).id IN $target_ids
+            WITH DISTINCT r, startNode(r) AS a, endNode(r) AS b
+            RETURN
+                properties(a) AS n_properties,
+                properties(b) AS m_properties,
+                type(r) AS type,
+                properties(r) AS properties
+            """
+            result = await self.query(query, {"target_ids": target_ids})
+            nodes_dict = {}
+            edges = []
+            for record in result:
+                n_props = record["n_properties"]
+                m_props = record["m_properties"]
+                r_props = record["properties"]
+                r_type = record["type"]
+                nodes_dict[n_props["id"]] = (n_props["id"], n_props)
+                nodes_dict[m_props["id"]] = (m_props["id"], m_props)
+                source_id = r_props.get("source_node_id", n_props["id"])
+                target_id = r_props.get("target_node_id", m_props["id"])
+                edges.append((source_id, target_id, r_type, r_props))
+            retrieval_time = time.time() - start_time
+            logger.info(
+                f"ID-filtered retrieval: {len(nodes_dict)} nodes and {len(edges)} edges in {retrieval_time:.2f}s"
+            )
+            return list(nodes_dict.values()), edges
+        except Exception as e:
+            logger.error(f"Error during ID-filtered graph data retrieval: {str(e)}")
+            raise
     async def get_nodeset_subgraph(
         self, node_type: Type[Any], node_name: List[str]
     ) -> Tuple[List[Tuple[int, dict]], List[Tuple[int, int, str, dict]]]:
@@ -1470,3 +1527,25 @@ class Neo4jAdapter(GraphDBInterface):
         time_ids_list = [item["id"] for item in time_nodes if "id" in item]
         return ", ".join(f"'{uid}'" for uid in time_ids_list)
+    async def get_triplets_batch(self, offset: int, limit: int) -> list[dict[str, Any]]:
+        """
+        Retrieve a batch of triplets (start_node, relationship, end_node) from the graph.
+        Parameters:
+        -----------
+            - offset (int): Number of triplets to skip before returning results.
+            - limit (int): Maximum number of triplets to return.
+        Returns:
+        --------
+            - list[dict[str, Any]]: A list of triplets.
+        """
+        query = f"""
+        MATCH (start_node:`{BASE_LABEL}`)-[relationship]->(end_node:`{BASE_LABEL}`)
+        RETURN start_node, properties(relationship) AS relationship_properties, end_node
+        SKIP $offset LIMIT $limit
+        """
+        results = await self.query(query, {"offset": offset, "limit": limit})
+        return results

cognee/infrastructure/databases/utils/__init__.py CHANGED Viewed

@@ -1 +1,4 @@
 from .get_or_create_dataset_database import get_or_create_dataset_database
+from .resolve_dataset_database_connection_info import resolve_dataset_database_connection_info
+from .get_graph_dataset_database_handler import get_graph_dataset_database_handler
+from .get_vector_dataset_database_handler import get_vector_dataset_database_handler

cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py ADDED Viewed

@@ -0,0 +1,10 @@
+from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
+def get_graph_dataset_database_handler(dataset_database: DatasetDatabase) -> dict:
+    from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
+        supported_dataset_database_handlers,
+    )
+    handler = supported_dataset_database_handlers[dataset_database.graph_dataset_database_handler]
+    return handler

cognee/infrastructure/databases/utils/get_or_create_dataset_database.py CHANGED Viewed

@@ -1,11 +1,9 @@
-import os
 from uuid import UUID
-from typing import Union
+from typing import Union, Optional
 from sqlalchemy import select
 from sqlalchemy.exc import IntegrityError
-from cognee.base_config import get_base_config
 from cognee.modules.data.methods import create_dataset
 from cognee.infrastructure.databases.relational import get_relational_engine
 from cognee.infrastructure.databases.vector import get_vectordb_config
@@ -15,6 +13,53 @@ from cognee.modules.users.models import DatasetDatabase
 from cognee.modules.users.models import User
+async def _get_vector_db_info(dataset_id: UUID, user: User) -> dict:
+    vector_config = get_vectordb_config()
+    from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
+        supported_dataset_database_handlers,
+    )
+    handler = supported_dataset_database_handlers[vector_config.vector_dataset_database_handler]
+    return await handler["handler_instance"].create_dataset(dataset_id, user)
+async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict:
+    graph_config = get_graph_config()
+    from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
+        supported_dataset_database_handlers,
+    )
+    handler = supported_dataset_database_handlers[graph_config.graph_dataset_database_handler]
+    return await handler["handler_instance"].create_dataset(dataset_id, user)
+async def _existing_dataset_database(
+    dataset_id: UUID,
+    user: User,
+) -> Optional[DatasetDatabase]:
+    """
+    Check if a DatasetDatabase row already exists for the given owner + dataset.
+    Return None if it doesn't exist, return the row if it does.
+    Args:
+        dataset_id:
+        user:
+    Returns:
+        DatasetDatabase or None
+    """
+    db_engine = get_relational_engine()
+    async with db_engine.get_async_session() as session:
+        stmt = select(DatasetDatabase).where(
+            DatasetDatabase.owner_id == user.id,
+            DatasetDatabase.dataset_id == dataset_id,
+        )
+        existing: DatasetDatabase = await session.scalar(stmt)
+        return existing
 async def get_or_create_dataset_database(
     dataset: Union[str, UUID],
     user: User,
@@ -25,6 +70,8 @@ async def get_or_create_dataset_database(
     • If the row already exists, it is fetched and returned.
     • Otherwise a new one is created atomically and returned.
+    DatasetDatabase row contains connection and provider info for vector and graph databases.
     Parameters
     ----------
     user : User
@@ -36,59 +83,26 @@ async def get_or_create_dataset_database(
     dataset_id = await get_unique_dataset_id(dataset, user)
-    vector_config = get_vectordb_config()
-    graph_config = get_graph_config()
+    # If dataset is given as name make sure the dataset is created first
+    if isinstance(dataset, str):
+        async with db_engine.get_async_session() as session:
+            await create_dataset(dataset, user, session)
-    # Note: for hybrid databases both graph and vector DB name have to be the same
-    if graph_config.graph_database_provider == "kuzu":
-        graph_db_name = f"{dataset_id}.pkl"
-    else:
-        graph_db_name = f"{dataset_id}"
+    # If dataset database already exists return it
+    existing_dataset_database = await _existing_dataset_database(dataset_id, user)
+    if existing_dataset_database:
+        return existing_dataset_database
-    if vector_config.vector_db_provider == "lancedb":
-        vector_db_name = f"{dataset_id}.lance.db"
-    else:
-        vector_db_name = f"{dataset_id}"
-    base_config = get_base_config()
-    databases_directory_path = os.path.join(
-        base_config.system_root_directory, "databases", str(user.id)
-    )
-    # Determine vector database URL
-    if vector_config.vector_db_provider == "lancedb":
-        vector_db_url = os.path.join(databases_directory_path, vector_config.vector_db_name)
-    else:
-        vector_db_url = vector_config.vector_database_url
-    # Determine graph database URL
+    graph_config_dict = await _get_graph_db_info(dataset_id, user)
+    vector_config_dict = await _get_vector_db_info(dataset_id, user)
     async with db_engine.get_async_session() as session:
-        # Create dataset if it doesn't exist
-        if isinstance(dataset, str):
-            dataset = await create_dataset(dataset, user, session)
-        # Try to fetch an existing row first
-        stmt = select(DatasetDatabase).where(
-            DatasetDatabase.owner_id == user.id,
-            DatasetDatabase.dataset_id == dataset_id,
-        )
-        existing: DatasetDatabase = await session.scalar(stmt)
-        if existing:
-            return existing
         # If there are no existing rows build a new row
         record = DatasetDatabase(
             owner_id=user.id,
             dataset_id=dataset_id,
-            vector_database_name=vector_db_name,
-            graph_database_name=graph_db_name,
-            vector_database_provider=vector_config.vector_db_provider,
-            graph_database_provider=graph_config.graph_database_provider,
-            vector_database_url=vector_db_url,
-            graph_database_url=graph_config.graph_database_url,
-            vector_database_key=vector_config.vector_db_key,
-            graph_database_key=graph_config.graph_database_key,
+            **graph_config_dict,  # Unpack graph db config
+            **vector_config_dict,  # Unpack vector db config
         )
         try:

cognee 0.5.0.dev0__py3-none-any.whl → 0.5.1__py3-none-any.whl

cognee 0.5.0.dev0py3-none-any.whl → 0.5.1py3-none-any.whl