PyPI - cognee - Versions diffs - 0.3.4.dev4__py3-none-any.whl → 0.3.6__py3-none-any.whl - Mend

cognee 0.3.4.dev4py3-none-any.whl → 0.3.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (184) hide show

cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py DELETED Viewed

@@ -1,1227 +0,0 @@
-import asyncio
-# from datetime import datetime
-import json
-from textwrap import dedent
-from uuid import UUID
-from webbrowser import Error
-from typing import List, Dict, Any, Optional, Tuple, Type, Union
-from falkordb import FalkorDB
-from cognee.infrastructure.databases.exceptions import MissingQueryParameterError
-from cognee.infrastructure.databases.graph.graph_db_interface import (
-    GraphDBInterface,
-    record_graph_changes,
-    NodeData,
-    EdgeData,
-    Node,
-)
-from cognee.infrastructure.databases.vector.embeddings import EmbeddingEngine
-from cognee.infrastructure.databases.vector.vector_db_interface import VectorDBInterface
-from cognee.infrastructure.engine import DataPoint
-class IndexSchema(DataPoint):
-    """
-    Define a schema for indexing that includes text data and associated metadata.
-    This class inherits from the DataPoint class. It contains a string attribute 'text' and
-    a dictionary 'metadata' that specifies the index fields for this schema.
-    """
-    text: str
-    metadata: dict = {"index_fields": ["text"]}
-class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
-    """
-    Manage and interact with a graph database using vector embeddings.
-    Public methods include:
-    - query
-    - embed_data
-    - stringify_properties
-    - create_data_point_query
-    - create_edge_query
-    - create_collection
-    - has_collection
-    - create_data_points
-    - create_vector_index
-    - has_vector_index
-    - index_data_points
-    - add_node
-    - add_nodes
-    - add_edge
-    - add_edges
-    - has_edges
-    - retrieve
-    - extract_node
-    - extract_nodes
-    - get_connections
-    - search
-    - batch_search
-    - get_graph_data
-    - delete_data_points
-    - delete_node
-    - delete_nodes
-    - delete_graph
-    - prune
-    - get_node
-    - get_nodes
-    - get_neighbors
-    - get_graph_metrics
-    - get_document_subgraph
-    - get_degree_one_nodes
-    """
-    def __init__(
-        self,
-        database_url: str,
-        database_port: int,
-        embedding_engine=EmbeddingEngine,
-    ):
-        self.driver = FalkorDB(
-            host=database_url,
-            port=database_port,
-        )
-        self.embedding_engine = embedding_engine
-        self.graph_name = "cognee_graph"
-    def query(self, query: str, params: dict = {}):
-        """
-        Execute a query against the graph database.
-        Handles exceptions during the query execution by logging errors and re-raising the
-        exception.
-        The method can be called only if a valid query string and parameters are provided.
-        Parameters:
-        -----------
-            - query (str): The query string to be executed against the graph database.
-            - params (dict): A dictionary of parameters to be used in the query. (default {})
-        Returns:
-        --------
-            The result of the query execution, returned by the graph database.
-        """
-        graph = self.driver.select_graph(self.graph_name)
-        try:
-            result = graph.query(query, params)
-            return result
-        except Exception as e:
-            print(f"Error executing query: {e}")
-            raise e
-    async def embed_data(self, data: list[str]) -> list[list[float]]:
-        """
-        Embed a list of text data into vector representations using the embedding engine.
-        Parameters:
-        -----------
-            - data (list[str]): A list of strings that should be embedded into vectors.
-        Returns:
-        --------
-            - list[list[float]]: A list of lists, where each inner list contains float values
-              representing the embedded vectors.
-        """
-        return await self.embedding_engine.embed_text(data)
-    async def stringify_properties(self, properties: dict) -> str:
-        """
-        Convert properties dictionary to a string format suitable for database queries.
-        Parameters:
-        -----------
-            - properties (dict): A dictionary containing properties to be converted to string
-              format.
-        Returns:
-        --------
-            - str: A string representation of the properties in the appropriate format.
-        """
-        def parse_value(value):
-            """
-            Convert a value to its string representation based on type for database queries.
-            Parameters:
-            -----------
-                - value: The value to parse into a string representation.
-            Returns:
-            --------
-                Returns the string representation of the value in the appropriate format.
-            """
-            if type(value) is UUID:
-                return f"'{str(value)}'"
-            if type(value) is int or type(value) is float:
-                return value
-            if (
-                type(value) is list
-                and len(value) > 0
-                and type(value[0]) is float
-                and len(value) == self.embedding_engine.get_vector_size()
-            ):
-                return f"'vecf32({value})'"
-            # if type(value) is datetime:
-            #     return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%f%z")
-            if type(value) is dict:
-                return f"'{json.dumps(value).replace(chr(39), chr(34))}'"
-            if type(value) is str:
-                # Escape single quotes and handle special characters
-                escaped_value = (
-                    str(value)
-                    .replace("'", "\\'")
-                    .replace('"', '\\"')
-                    .replace("\n", "\\n")
-                    .replace("\r", "\\r")
-                    .replace("\t", "\\t")
-                )
-                return f"'{escaped_value}'"
-            return f"'{str(value)}'"
-        return ",".join([f"{key}:{parse_value(value)}" for key, value in properties.items()])
-    async def create_data_point_query(self, data_point: DataPoint, vectorized_values: dict):
-        """
-        Compose a query to create or update a data point in the database.
-        Parameters:
-        -----------
-            - data_point (DataPoint): An instance of DataPoint containing information about the
-              entity.
-            - vectorized_values (dict): A dictionary of vectorized values related to the data
-              point.
-        Returns:
-        --------
-            A tuple containing the query string and parameters dictionary.
-        """
-        node_label = type(data_point).__name__
-        property_names = DataPoint.get_embeddable_property_names(data_point)
-        properties = {
-            **data_point.model_dump(),
-            **(
-                {
-                    property_names[index]: (
-                        vectorized_values[index]
-                        if index < len(vectorized_values)
-                        else getattr(data_point, property_name, None)
-                    )
-                    for index, property_name in enumerate(property_names)
-                }
-            ),
-        }
-        # Clean the properties - remove None values and handle special types
-        clean_properties = {}
-        for key, value in properties.items():
-            if value is not None:
-                if isinstance(value, UUID):
-                    clean_properties[key] = str(value)
-                elif isinstance(value, dict):
-                    clean_properties[key] = json.dumps(value)
-                elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], float):
-                    # This is likely a vector - convert to string representation
-                    clean_properties[key] = f"vecf32({value})"
-                else:
-                    clean_properties[key] = value
-        query = dedent(
-            f"""
-            MERGE (node:{node_label} {{id: $node_id}})
-            SET node += $properties, node.updated_at = timestamp()
-        """
-        ).strip()
-        params = {"node_id": str(data_point.id), "properties": clean_properties}
-        return query, params
-    def sanitize_relationship_name(self, relationship_name: str) -> str:
-        """
-        Sanitize relationship name to be valid for Cypher queries.
-        Parameters:
-        -----------
-            - relationship_name (str): The original relationship name
-        Returns:
-        --------
-            - str: A sanitized relationship name valid for Cypher
-        """
-        # Replace hyphens, spaces, and other special characters with underscores
-        import re
-        sanitized = re.sub(r"[^\w]", "_", relationship_name)
-        # Remove consecutive underscores
-        sanitized = re.sub(r"_+", "_", sanitized)
-        # Remove leading/trailing underscores
-        sanitized = sanitized.strip("_")
-        # Ensure it starts with a letter or underscore
-        if sanitized and not sanitized[0].isalpha() and sanitized[0] != "_":
-            sanitized = "_" + sanitized
-        return sanitized or "RELATIONSHIP"
-    async def create_edge_query(self, edge: tuple[str, str, str, dict]) -> str:
-        """
-        Generate a query to create or update an edge between two nodes in the graph.
-        Parameters:
-        -----------
-            - edge (tuple[str, str, str, dict]): A tuple consisting of source and target node
-              IDs, edge type, and edge properties.
-        Returns:
-        --------
-            - str: A string containing the query to be executed for creating the edge.
-        """
-        # Sanitize the relationship name for Cypher compatibility
-        sanitized_relationship = self.sanitize_relationship_name(edge[2])
-        # Add the original relationship name to properties
-        edge_properties = {**edge[3], "relationship_name": edge[2]}
-        properties = await self.stringify_properties(edge_properties)
-        properties = f"{{{properties}}}"
-        return dedent(
-            f"""
-            MERGE (source {{id:'{edge[0]}'}})
-            MERGE (target {{id: '{edge[1]}'}})
-            MERGE (source)-[edge:{sanitized_relationship} {properties}]->(target)
-            ON MATCH SET edge.updated_at = timestamp()
-            ON CREATE SET edge.updated_at = timestamp()
-        """
-        ).strip()
-    async def create_collection(self, collection_name: str):
-        """
-        Create a collection in the graph database with the specified name.
-        Parameters:
-        -----------
-            - collection_name (str): The name of the collection to be created.
-        """
-        pass
-    async def has_collection(self, collection_name: str) -> bool:
-        """
-        Check if a collection with the specified name exists in the graph database.
-        Parameters:
-        -----------
-            - collection_name (str): The name of the collection to check for existence.
-        Returns:
-        --------
-            - bool: Returns true if the collection exists, otherwise false.
-        """
-        collections = self.driver.list_graphs()
-        return collection_name in collections
-    async def create_data_points(self, data_points: list[DataPoint]):
-        """
-        Add a list of data points to the graph database via batching.
-        Can raise exceptions if there are issues during the database operations.
-        Parameters:
-        -----------
-            - data_points (list[DataPoint]): A list of DataPoint instances to be inserted into
-              the database.
-        """
-        embeddable_values = []
-        vector_map = {}
-        for data_point in data_points:
-            property_names = DataPoint.get_embeddable_property_names(data_point)
-            key = str(data_point.id)
-            vector_map[key] = {}
-            for property_name in property_names:
-                property_value = getattr(data_point, property_name, None)
-                if property_value is not None:
-                    vector_map[key][property_name] = len(embeddable_values)
-                    embeddable_values.append(property_value)
-                else:
-                    vector_map[key][property_name] = None
-        vectorized_values = await self.embed_data(embeddable_values)
-        for data_point in data_points:
-            vectorized_data = [
-                vectorized_values[vector_map[str(data_point.id)][property_name]]
-                if vector_map[str(data_point.id)][property_name] is not None
-                else None
-                for property_name in DataPoint.get_embeddable_property_names(data_point)
-            ]
-            query, params = await self.create_data_point_query(data_point, vectorized_data)
-            self.query(query, params)
-    async def create_vector_index(self, index_name: str, index_property_name: str):
-        """
-        Create a vector index in the specified graph for a given property if it does not already
-        exist.
-        Parameters:
-        -----------
-            - index_name (str): The name of the vector index to be created.
-            - index_property_name (str): The name of the property on which the vector index will
-              be created.
-        """
-        graph = self.driver.select_graph(self.graph_name)
-        if not self.has_vector_index(graph, index_name, index_property_name):
-            graph.create_node_vector_index(
-                index_name, index_property_name, dim=self.embedding_engine.get_vector_size()
-            )
-    def has_vector_index(self, graph, index_name: str, index_property_name: str) -> bool:
-        """
-        Determine if a vector index exists on the specified property of the given graph.
-        Parameters:
-        -----------
-            - graph: The graph instance to check for the vector index.
-            - index_name (str): The name of the index to check for existence.
-            - index_property_name (str): The property name associated with the index.
-        Returns:
-        --------
-            - bool: Returns true if the vector index exists, otherwise false.
-        """
-        try:
-            indices = graph.list_indices()
-            return any(
-                [
-                    (index[0] == index_name and index_property_name in index[1])
-                    for index in indices.result_set
-                ]
-            )
-        except Error as e:
-            print(e)
-            return False
-    async def index_data_points(
-        self, index_name: str, index_property_name: str, data_points: list[DataPoint]
-    ):
-        """
-        Index a list of data points in the specified graph database based on properties.
-        To be implemented: does not yet have a defined behavior.
-        Parameters:
-        -----------
-            - index_name (str): The name of the index to be created for the data points.
-            - index_property_name (str): The property name on which to index the data points.
-            - data_points (list[DataPoint]): A list of DataPoint instances to be indexed.
-        """
-        pass
-    async def add_node(self, node_id: str, properties: Dict[str, Any]) -> None:
-        """
-        Add a single node with specified properties to the graph.
-        Parameters:
-        -----------
-            - node_id (str): Unique identifier for the node being added.
-            - properties (Dict[str, Any]): A dictionary of properties associated with the node.
-        """
-        # Clean the properties - remove None values and handle special types
-        clean_properties = {"id": node_id}
-        for key, value in properties.items():
-            if value is not None:
-                if isinstance(value, UUID):
-                    clean_properties[key] = str(value)
-                elif isinstance(value, dict):
-                    clean_properties[key] = json.dumps(value)
-                elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], float):
-                    # This is likely a vector - convert to string representation
-                    clean_properties[key] = f"vecf32({value})"
-                else:
-                    clean_properties[key] = value
-        query = "MERGE (node {id: $node_id}) SET node += $properties, node.updated_at = timestamp()"
-        params = {"node_id": node_id, "properties": clean_properties}
-        self.query(query, params)
-    # Helper methods for DataPoint compatibility
-    async def add_data_point_node(self, node: DataPoint):
-        """
-        Add a single data point as a node in the graph.
-        Parameters:
-        -----------
-            - node (DataPoint): An instance of DataPoint to be added to the graph.
-        """
-        await self.create_data_points([node])
-    async def add_data_point_nodes(self, nodes: list[DataPoint]):
-        """
-        Add multiple data points as nodes in the graph.
-        Parameters:
-        -----------
-            - nodes (list[DataPoint]): A list of DataPoint instances to be added to the graph.
-        """
-        await self.create_data_points(nodes)
-    @record_graph_changes
-    async def add_nodes(self, nodes: Union[List[Node], List[DataPoint]]) -> None:
-        """
-        Add multiple nodes to the graph in a single operation.
-        Parameters:
-        -----------
-            - nodes (Union[List[Node], List[DataPoint]]): A list of Node tuples or DataPoint objects to be added to the graph.
-        """
-        for node in nodes:
-            if isinstance(node, tuple) and len(node) == 2:
-                # Node is in (node_id, properties) format
-                node_id, properties = node
-                await self.add_node(node_id, properties)
-            elif hasattr(node, "id") and hasattr(node, "model_dump"):
-                # Node is a DataPoint object
-                await self.add_node(str(node.id), node.model_dump())
-            else:
-                raise ValueError(
-                    f"Invalid node format: {node}. Expected tuple (node_id, properties) or DataPoint object."
-                )
-    async def add_edge(
-        self,
-        source_id: str,
-        target_id: str,
-        relationship_name: str,
-        properties: Optional[Dict[str, Any]] = None,
-    ) -> None:
-        """
-        Create a new edge between two nodes in the graph.
-        Parameters:
-        -----------
-            - source_id (str): The unique identifier of the source node.
-            - target_id (str): The unique identifier of the target node.
-            - relationship_name (str): The name of the relationship to be established by the
-              edge.
-            - properties (Optional[Dict[str, Any]]): Optional dictionary of properties
-              associated with the edge. (default None)
-        """
-        if properties is None:
-            properties = {}
-        edge_tuple = (source_id, target_id, relationship_name, properties)
-        query = await self.create_edge_query(edge_tuple)
-        self.query(query)
-    @record_graph_changes
-    async def add_edges(self, edges: List[EdgeData]) -> None:
-        """
-        Add multiple edges to the graph in a single operation.
-        Parameters:
-        -----------
-            - edges (List[EdgeData]): A list of EdgeData objects representing edges to be added.
-        """
-        for edge in edges:
-            if isinstance(edge, tuple) and len(edge) == 4:
-                # Edge is in (source_id, target_id, relationship_name, properties) format
-                source_id, target_id, relationship_name, properties = edge
-                await self.add_edge(source_id, target_id, relationship_name, properties)
-            else:
-                raise ValueError(
-                    f"Invalid edge format: {edge}. Expected tuple (source_id, target_id, relationship_name, properties)."
-                )
-    async def has_edges(self, edges):
-        """
-        Check if the specified edges exist in the graph based on their attributes.
-        Parameters:
-        -----------
-            - edges: A list of edges to check for existence in the graph.
-        Returns:
-        --------
-            Returns a list of edge tuples that exist in the graph.
-        """
-        existing_edges = []
-        for edge in edges:
-            exists = await self.has_edge(str(edge[0]), str(edge[1]), edge[2])
-            if exists:
-                existing_edges.append(edge)
-        return existing_edges
-    async def retrieve(self, data_point_ids: list[UUID]):
-        """
-        Retrieve data points from the graph based on their IDs.
-        Parameters:
-        -----------
-            - data_point_ids (list[UUID]): A list of UUIDs representing the data points to
-              retrieve.
-        Returns:
-        --------
-            Returns the result set containing the retrieved nodes or an empty list if not found.
-        """
-        result = self.query(
-            "MATCH (node) WHERE node.id IN $node_ids RETURN node",
-            {
-                "node_ids": [str(data_point) for data_point in data_point_ids],
-            },
-        )
-        return result.result_set
-    async def extract_node(self, data_point_id: UUID):
-        """
-        Extract the properties of a single node identified by its data point ID.
-        Parameters:
-        -----------
-            - data_point_id (UUID): The UUID of the data point to extract.
-        Returns:
-        --------
-            Returns the properties of the node if found, otherwise None.
-        """
-        result = await self.retrieve([data_point_id])
-        result = result[0][0] if len(result[0]) > 0 else None
-        return result.properties if result else None
-    async def extract_nodes(self, data_point_ids: list[UUID]):
-        """
-        Extract properties of multiple nodes identified by their data point IDs.
-        Parameters:
-        -----------
-            - data_point_ids (list[UUID]): A list of UUIDs representing the data points to
-              extract.
-        Returns:
-        --------
-            Returns the properties of the nodes in a list.
-        """
-        return await self.retrieve(data_point_ids)
-    async def get_connections(self, node_id: UUID) -> list:
-        """
-        Retrieve connection details (predecessors and successors) for a given node ID.
-        Parameters:
-        -----------
-            - node_id (UUID): The UUID of the node whose connections are to be retrieved.
-        Returns:
-        --------
-            - list: Returns a list of tuples representing the connections of the node.
-        """
-        predecessors_query = """
-        MATCH (node)<-[relation]-(neighbour)
-        WHERE node.id = $node_id
-        RETURN neighbour, relation, node
-        """
-        successors_query = """
-        MATCH (node)-[relation]->(neighbour)
-        WHERE node.id = $node_id
-        RETURN node, relation, neighbour
-        """
-        predecessors, successors = await asyncio.gather(
-            self.query(predecessors_query, dict(node_id=node_id)),
-            self.query(successors_query, dict(node_id=node_id)),
-        )
-        connections = []
-        for neighbour in predecessors:
-            neighbour = neighbour["relation"]
-            connections.append((neighbour[0], {"relationship_name": neighbour[1]}, neighbour[2]))
-        for neighbour in successors:
-            neighbour = neighbour["relation"]
-            connections.append((neighbour[0], {"relationship_name": neighbour[1]}, neighbour[2]))
-        return connections
-    async def search(
-        self,
-        collection_name: str,
-        query_text: str = None,
-        query_vector: list[float] = None,
-        limit: int = 10,
-        with_vector: bool = False,
-    ):
-        """
-        Search for nodes in a collection based on text or vector query, with optional limitation
-        on results.
-        Parameters:
-        -----------
-            - collection_name (str): The name of the collection in which to search.
-            - query_text (str): The text to search for (if using text-based query). (default
-              None)
-            - query_vector (list[float]): The vector representation of the query if using
-              vector-based search. (default None)
-            - limit (int): Maximum number of results to return from the search. (default 10)
-            - with_vector (bool): Flag indicating whether to return vectors with the search
-              results. (default False)
-        Returns:
-        --------
-            Returns the search results as a result set from the graph database.
-        """
-        if query_text is None and query_vector is None:
-            raise MissingQueryParameterError()
-        if query_text and not query_vector:
-            query_vector = (await self.embed_data([query_text]))[0]
-        # For FalkorDB, let's do a simple property-based search instead of vector search for now
-        # since the vector index might not be set up correctly
-        if "." in collection_name:
-            [label, attribute_name] = collection_name.split(".")
-        else:
-            # If no dot, treat the whole thing as a property search
-            label = ""
-            attribute_name = collection_name
-        # Simple text-based search if we have query_text
-        if query_text:
-            if label:
-                query = f"""
-                MATCH (n:{label})
-                WHERE toLower(toString(n.{attribute_name})) CONTAINS toLower($query_text)
-                RETURN n, 1.0 as score
-                LIMIT $limit
-                """
-            else:
-                query = f"""
-                MATCH (n)
-                WHERE toLower(toString(n.{attribute_name})) CONTAINS toLower($query_text)
-                RETURN n, 1.0 as score
-                LIMIT $limit
-                """
-            params = {"query_text": query_text, "limit": limit}
-            result = self.query(query, params)
-            return result.result_set
-        else:
-            # For vector search, return empty for now since vector indexing needs proper setup
-            return []
-    async def batch_search(
-        self,
-        collection_name: str,
-        query_texts: list[str],
-        limit: int = None,
-        with_vectors: bool = False,
-    ):
-        """
-        Perform batch search across multiple queries based on text inputs and return results
-        asynchronously.
-        Parameters:
-        -----------
-            - collection_name (str): The name of the collection in which to perform the
-              searches.
-            - query_texts (list[str]): A list of text queries to search for.
-            - limit (int): Optional limit for the search results for each query. (default None)
-            - with_vectors (bool): Flag indicating whether to return vectors with the results.
-              (default False)
-        Returns:
-        --------
-            Returns a list of results for each search query executed in parallel.
-        """
-        query_vectors = await self.embedding_engine.embed_text(query_texts)
-        return await asyncio.gather(
-            *[
-                self.search(
-                    collection_name=collection_name,
-                    query_vector=query_vector,
-                    limit=limit,
-                    with_vector=with_vectors,
-                )
-                for query_vector in query_vectors
-            ]
-        )
-    async def get_graph_data(self):
-        """
-        Retrieve all nodes and edges from the graph along with their properties.
-        Returns:
-        --------
-            Returns a tuple containing lists of nodes and edges data retrieved from the graph.
-        """
-        query = "MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties"
-        result = self.query(query)
-        nodes = [
-            (
-                record[2]["id"],
-                record[2],
-            )
-            for record in result.result_set
-        ]
-        query = """
-        MATCH (n)-[r]->(m)
-        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
-        """
-        result = self.query(query)
-        edges = [
-            (
-                record[3]["source_node_id"],
-                record[3]["target_node_id"],
-                record[2],
-                record[3],
-            )
-            for record in result.result_set
-        ]
-        return (nodes, edges)
-    async def delete_data_points(self, collection_name: str, data_point_ids: list[UUID]):
-        """
-        Remove specified data points from the graph database based on their IDs.
-        Parameters:
-        -----------
-            - collection_name (str): The name of the collection from which to delete the data
-              points.
-            - data_point_ids (list[UUID]): A list of UUIDs representing the data points to
-              delete.
-        Returns:
-        --------
-            Returns the result of the deletion operation from the database.
-        """
-        return self.query(
-            "MATCH (node) WHERE node.id IN $node_ids DETACH DELETE node",
-            {
-                "node_ids": [str(data_point) for data_point in data_point_ids],
-            },
-        )
-    async def delete_node(self, node_id: str) -> None:
-        """
-        Delete a specified node from the graph by its ID.
-        Parameters:
-        -----------
-            - node_id (str): Unique identifier for the node to delete.
-        """
-        query = f"MATCH (node {{id: '{node_id}'}}) DETACH DELETE node"
-        self.query(query)
-    async def delete_nodes(self, node_ids: List[str]) -> None:
-        """
-        Delete multiple nodes from the graph by their identifiers.
-        Parameters:
-        -----------
-            - node_ids (List[str]): A list of unique identifiers for the nodes to delete.
-        """
-        for node_id in node_ids:
-            await self.delete_node(node_id)
-    async def delete_graph(self):
-        """
-        Delete the entire graph along with all its indices and nodes.
-        """
-        try:
-            graph = self.driver.select_graph(self.graph_name)
-            indices = graph.list_indices()
-            for index in indices.result_set:
-                for field in index[1]:
-                    graph.drop_node_vector_index(index[0], field)
-            graph.delete()
-        except Exception as e:
-            print(f"Error deleting graph: {e}")
-    async def get_node(self, node_id: str) -> Optional[NodeData]:
-        """
-        Retrieve a single node from the graph using its ID.
-        Parameters:
-        -----------
-            - node_id (str): Unique identifier of the node to retrieve.
-        """
-        result = self.query(
-            "MATCH (node) WHERE node.id = $node_id RETURN node",
-            {"node_id": node_id},
-        )
-        if result.result_set and len(result.result_set) > 0:
-            # FalkorDB returns node objects as first element in the result list
-            return result.result_set[0][0].properties
-        return None
-    async def get_nodes(self, node_ids: List[str]) -> List[NodeData]:
-        """
-        Retrieve multiple nodes from the graph using their IDs.
-        Parameters:
-        -----------
-            - node_ids (List[str]): A list of unique identifiers for the nodes to retrieve.
-        """
-        result = self.query(
-            "MATCH (node) WHERE node.id IN $node_ids RETURN node",
-            {"node_ids": node_ids},
-        )
-        nodes = []
-        if result.result_set:
-            for record in result.result_set:
-                # FalkorDB returns node objects as first element in each record
-                nodes.append(record[0].properties)
-        return nodes
-    async def get_neighbors(self, node_id: str) -> List[NodeData]:
-        """
-        Get all neighboring nodes connected to the specified node.
-        Parameters:
-        -----------
-            - node_id (str): Unique identifier of the node for which to retrieve neighbors.
-        """
-        result = self.query(
-            "MATCH (node)-[]-(neighbor) WHERE node.id = $node_id RETURN DISTINCT neighbor",
-            {"node_id": node_id},
-        )
-        neighbors = []
-        if result.result_set:
-            for record in result.result_set:
-                # FalkorDB returns neighbor objects as first element in each record
-                neighbors.append(record[0].properties)
-        return neighbors
-    async def get_edges(self, node_id: str) -> List[EdgeData]:
-        """
-        Retrieve all edges that are connected to the specified node.
-        Parameters:
-        -----------
-            - node_id (str): Unique identifier of the node whose edges are to be retrieved.
-        """
-        result = self.query(
-            """
-            MATCH (n)-[r]-(m)
-            WHERE n.id = $node_id
-            RETURN n.id AS source_id, m.id AS target_id, type(r) AS relationship_name, properties(r) AS properties
-            """,
-            {"node_id": node_id},
-        )
-        edges = []
-        if result.result_set:
-            for record in result.result_set:
-                # FalkorDB returns values by index: source_id, target_id, relationship_name, properties
-                edges.append(
-                    (
-                        record[0],  # source_id
-                        record[1],  # target_id
-                        record[2],  # relationship_name
-                        record[3],  # properties
-                    )
-                )
-        return edges
-    async def has_edge(self, source_id: str, target_id: str, relationship_name: str) -> bool:
-        """
-        Verify if an edge exists between two specified nodes.
-        Parameters:
-        -----------
-            - source_id (str): Unique identifier of the source node.
-            - target_id (str): Unique identifier of the target node.
-            - relationship_name (str): Name of the relationship to verify.
-        """
-        # Check both the sanitized relationship type and the original name in properties
-        sanitized_relationship = self.sanitize_relationship_name(relationship_name)
-        result = self.query(
-            f"""
-            MATCH (source)-[r:{sanitized_relationship}]->(target)
-            WHERE source.id = $source_id AND target.id = $target_id
-            AND (r.relationship_name = $relationship_name OR NOT EXISTS(r.relationship_name))
-            RETURN COUNT(r) > 0 AS edge_exists
-            """,
-            {
-                "source_id": source_id,
-                "target_id": target_id,
-                "relationship_name": relationship_name,
-            },
-        )
-        if result.result_set and len(result.result_set) > 0:
-            # FalkorDB returns scalar results as a list, access by index instead of key
-            return result.result_set[0][0]
-        return False
-    async def get_graph_metrics(self, include_optional: bool = False) -> Dict[str, Any]:
-        """
-        Fetch metrics and statistics of the graph, possibly including optional details.
-        Parameters:
-        -----------
-            - include_optional (bool): Flag indicating whether to include optional metrics or
-              not. (default False)
-        """
-        # Get basic node and edge counts
-        node_result = self.query("MATCH (n) RETURN count(n) AS node_count")
-        edge_result = self.query("MATCH ()-[r]->() RETURN count(r) AS edge_count")
-        # FalkorDB returns scalar results as a list, access by index instead of key
-        num_nodes = node_result.result_set[0][0] if node_result.result_set else 0
-        num_edges = edge_result.result_set[0][0] if edge_result.result_set else 0
-        metrics = {
-            "num_nodes": num_nodes,
-            "num_edges": num_edges,
-            "mean_degree": (2 * num_edges) / num_nodes if num_nodes > 0 else 0,
-            "edge_density": num_edges / (num_nodes * (num_nodes - 1)) if num_nodes > 1 else 0,
-            "num_connected_components": 1,  # Simplified for now
-            "sizes_of_connected_components": [num_nodes] if num_nodes > 0 else [],
-        }
-        if include_optional:
-            # Add optional metrics - simplified implementation
-            metrics.update(
-                {
-                    "num_selfloops": 0,  # Simplified
-                    "diameter": -1,  # Not implemented
-                    "avg_shortest_path_length": -1,  # Not implemented
-                    "avg_clustering": -1,  # Not implemented
-                }
-            )
-        else:
-            metrics.update(
-                {
-                    "num_selfloops": -1,
-                    "diameter": -1,
-                    "avg_shortest_path_length": -1,
-                    "avg_clustering": -1,
-                }
-            )
-        return metrics
-    async def get_document_subgraph(self, content_hash: str):
-        """
-        Get a subgraph related to a specific document by content hash.
-        Parameters:
-        -----------
-            - content_hash (str): The content hash of the document to find.
-        """
-        query = """
-        MATCH (d) WHERE d.id CONTAINS $content_hash
-        OPTIONAL MATCH (d)<-[:CHUNK_OF]-(c)
-        OPTIONAL MATCH (c)-[:HAS_ENTITY]->(e)
-        OPTIONAL MATCH (e)-[:IS_INSTANCE_OF]->(et)
-        RETURN d AS document,
-               COLLECT(DISTINCT c) AS chunks,
-               COLLECT(DISTINCT e) AS orphan_entities,
-               COLLECT(DISTINCT c) AS made_from_nodes,
-               COLLECT(DISTINCT et) AS orphan_types
-        """
-        result = self.query(query, {"content_hash": f"text_{content_hash}"})
-        if not result.result_set or not result.result_set[0]:
-            return None
-        # Convert result to dictionary format
-        # FalkorDB returns values by index: document, chunks, orphan_entities, made_from_nodes, orphan_types
-        record = result.result_set[0]
-        return {
-            "document": record[0],
-            "chunks": record[1],
-            "orphan_entities": record[2],
-            "made_from_nodes": record[3],
-            "orphan_types": record[4],
-        }
-    async def get_degree_one_nodes(self, node_type: str):
-        """
-        Get all nodes that have only one connection.
-        Parameters:
-        -----------
-            - node_type (str): The type of nodes to filter by, must be 'Entity' or 'EntityType'.
-        """
-        if not node_type or node_type not in ["Entity", "EntityType"]:
-            raise ValueError("node_type must be either 'Entity' or 'EntityType'")
-        result = self.query(
-            f"""
-            MATCH (n:{node_type})
-            WITH n, COUNT {{ MATCH (n)--() }} as degree
-            WHERE degree = 1
-            RETURN n
-            """
-        )
-        # FalkorDB returns node objects as first element in each record
-        return [record[0] for record in result.result_set] if result.result_set else []
-    async def get_nodeset_subgraph(
-        self, node_type: Type[Any], node_name: List[str]
-    ) -> Tuple[List[Tuple[int, dict]], List[Tuple[int, int, str, dict]]]:
-        """
-        Fetch a subgraph consisting of a specific set of nodes and their relationships.
-        Parameters:
-        -----------
-            - node_type (Type[Any]): The type of nodes to include in the subgraph.
-            - node_name (List[str]): A list of names of the nodes to include in the subgraph.
-        """
-        label = node_type.__name__
-        # Find primary nodes of the specified type and names
-        primary_query = f"""
-        UNWIND $names AS wantedName
-        MATCH (n:{label})
-        WHERE n.name = wantedName
-        RETURN DISTINCT n.id, properties(n) AS properties
-        """
-        primary_result = self.query(primary_query, {"names": node_name})
-        if not primary_result.result_set:
-            return [], []
-        # FalkorDB returns values by index: id, properties
-        primary_ids = [record[0] for record in primary_result.result_set]
-        # Find neighbors of primary nodes
-        neighbor_query = """
-        MATCH (n)-[]-(neighbor)
-        WHERE n.id IN $ids
-        RETURN DISTINCT neighbor.id, properties(neighbor) AS properties
-        """
-        neighbor_result = self.query(neighbor_query, {"ids": primary_ids})
-        # FalkorDB returns values by index: id, properties
-        neighbor_ids = (
-            [record[0] for record in neighbor_result.result_set]
-            if neighbor_result.result_set
-            else []
-        )
-        all_ids = list(set(primary_ids + neighbor_ids))
-        # Get all nodes in the subgraph
-        nodes_query = """
-        MATCH (n)
-        WHERE n.id IN $ids
-        RETURN n.id, properties(n) AS properties
-        """
-        nodes_result = self.query(nodes_query, {"ids": all_ids})
-        nodes = []
-        if nodes_result.result_set:
-            for record in nodes_result.result_set:
-                # FalkorDB returns values by index: id, properties
-                nodes.append((record[0], record[1]))
-        # Get edges between these nodes
-        edges_query = """
-        MATCH (a)-[r]->(b)
-        WHERE a.id IN $ids AND b.id IN $ids
-        RETURN a.id AS source_id, b.id AS target_id, type(r) AS relationship_name, properties(r) AS properties
-        """
-        edges_result = self.query(edges_query, {"ids": all_ids})
-        edges = []
-        if edges_result.result_set:
-            for record in edges_result.result_set:
-                # FalkorDB returns values by index: source_id, target_id, relationship_name, properties
-                edges.append(
-                    (
-                        record[0],  # source_id
-                        record[1],  # target_id
-                        record[2],  # relationship_name
-                        record[3],  # properties
-                    )
-                )
-        return nodes, edges
-    async def prune(self):
-        """
-        Prune the graph by deleting the entire graph structure.
-        """
-        await self.delete_graph()

cognee 0.3.4.dev4__py3-none-any.whl → 0.3.6__py3-none-any.whl

cognee 0.3.4.dev4py3-none-any.whl → 0.3.6py3-none-any.whl