PyPI - cognee - Versions diffs - 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

cognee 0.2.2.dev0py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (214) hide show

cognee/api/v1/search/search.py CHANGED Viewed

@@ -71,6 +71,12 @@ async def search(
             Best for: Advanced users, specific graph traversals, debugging.
             Returns: Raw graph query results.
+        **FEELING_LUCKY**:
+            Intelligently selects and runs the most appropriate search type.
+            Best for: General-purpose queries or when you're unsure which search type is best.
+            Returns: The results from the automatically selected search type.
     Args:
         query_text: Your question or search query in natural language.
             Examples:
@@ -119,6 +125,9 @@ async def search(
             **CODE**:
                 [List of structured code information with context]
+            **FEELING_LUCKY**:
+                [List of results in the format of the search type that is automatically selected]
@@ -130,6 +139,7 @@ async def search(
         - **CHUNKS**: Fastest, pure vector similarity search without LLM
         - **SUMMARIES**: Fast, returns pre-computed summaries
         - **CODE**: Medium speed, specialized for code understanding
+        - **FEELING_LUCKY**: Variable speed, uses LLM + search type selection intelligently
         - **top_k**: Start with 10, increase for comprehensive analysis (max 100)
         - **datasets**: Specify datasets to improve speed and relevance

cognee/api/v1/settings/routers/get_settings_router.py CHANGED Viewed

@@ -30,8 +30,6 @@ class VectorDBConfigInputDTO(InDTO):
     provider: Union[
         Literal["lancedb"],
         Literal["chromadb"],
-        Literal["qdrant"],
-        Literal["weaviate"],
         Literal["pgvector"],
     ]
     url: str

cognee/base_config.py CHANGED Viewed

@@ -20,6 +20,7 @@ class BaseConfig(BaseSettings):
     def to_dict(self) -> dict:
         return {
             "data_root_directory": self.data_root_directory,
+            "system_root_directory": self.system_root_directory,
             "monitoring_tool": self.monitoring_tool,
         }

cognee/eval_framework/evaluation/direct_llm_eval_adapter.py CHANGED Viewed

@@ -1,10 +1,10 @@
 from typing import Any, Dict, List
 from pydantic import BaseModel
-from cognee.infrastructure.llm.get_llm_client import get_llm_client
 from cognee.eval_framework.evaluation.base_eval_adapter import BaseEvalAdapter
-from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
 from cognee.eval_framework.eval_config import EvalConfig
+from cognee.infrastructure.llm import LLMGateway
 class CorrectnessEvaluation(BaseModel):
     """Response model containing evaluation score and explanation."""
@@ -19,17 +19,16 @@ class DirectLLMEvalAdapter(BaseEvalAdapter):
         config = EvalConfig()
         self.system_prompt_path = config.direct_llm_system_prompt
         self.eval_prompt_path = config.direct_llm_eval_prompt
-        self.llm_client = get_llm_client()
     async def evaluate_correctness(
         self, question: str, answer: str, golden_answer: str
     ) -> Dict[str, Any]:
         args = {"question": question, "answer": answer, "golden_answer": golden_answer}
-        user_prompt = render_prompt(self.eval_prompt_path, args)
-        system_prompt = read_query_prompt(self.system_prompt_path)
+        user_prompt = LLMGateway.render_prompt(self.eval_prompt_path, args)
+        system_prompt = LLMGateway.read_query_prompt(self.system_prompt_path)
-        evaluation = await self.llm_client.acreate_structured_output(
+        evaluation = await LLMGateway.acreate_structured_output(
             text_input=user_prompt,
             system_prompt=system_prompt,
             response_model=CorrectnessEvaluation,

cognee/infrastructure/databases/graph/config.py CHANGED Viewed

@@ -36,6 +36,7 @@ class GraphConfig(BaseSettings):
     graph_database_provider: str = Field("kuzu", env="GRAPH_DATABASE_PROVIDER")
     graph_database_url: str = ""
+    graph_database_name: str = ""
     graph_database_username: str = ""
     graph_database_password: str = ""
     graph_database_port: int = 123
@@ -105,6 +106,7 @@ class GraphConfig(BaseSettings):
         return {
             "graph_database_provider": self.graph_database_provider,
             "graph_database_url": self.graph_database_url,
+            "graph_database_name": self.graph_database_name,
             "graph_database_username": self.graph_database_username,
             "graph_database_password": self.graph_database_password,
             "graph_database_port": self.graph_database_port,

cognee/infrastructure/databases/graph/get_graph_engine.py CHANGED Viewed

@@ -33,6 +33,7 @@ def create_graph_engine(
     graph_database_provider,
     graph_file_path,
     graph_database_url="",
+    graph_database_name="",
     graph_database_username="",
     graph_database_password="",
     graph_database_port="",
@@ -48,13 +49,13 @@ def create_graph_engine(
     -----------
         - graph_database_provider: The type of graph database provider to use (e.g., neo4j,
-          falkordb, kuzu, memgraph).
-        - graph_database_url: The URL for the graph database instance. Required for neo4j,
-          falkordb, and memgraph providers.
+          falkordb, kuzu).
+        - graph_database_url: The URL for the graph database instance. Required for neo4j
+          and falkordb providers.
         - graph_database_username: The username for authentication with the graph database.
-          Required for neo4j and memgraph providers.
+          Required for neo4j provider.
         - graph_database_password: The password for authentication with the graph database.
-          Required for neo4j and memgraph providers.
+          Required for neo4j provider.
         - graph_database_port: The port number for the graph database connection. Required
           for the falkordb provider.
         - graph_file_path: The filesystem path to the graph file. Required for the kuzu
@@ -86,6 +87,7 @@ def create_graph_engine(
             graph_database_url=graph_database_url,
             graph_database_username=graph_database_username or None,
             graph_database_password=graph_database_password or None,
+            graph_database_name=graph_database_name or None,
         )
     elif graph_database_provider == "falkordb":
@@ -122,17 +124,61 @@ def create_graph_engine(
             username=graph_database_username,
             password=graph_database_password,
         )
+    elif graph_database_provider == "neptune":
+        try:
+            from langchain_aws import NeptuneAnalyticsGraph
+        except ImportError:
+            raise ImportError(
+                "langchain_aws is not installed. Please install it with 'pip install langchain_aws'"
+            )
-    elif graph_database_provider == "memgraph":
         if not graph_database_url:
-            raise EnvironmentError("Missing required Memgraph URL.")
+            raise EnvironmentError("Missing Neptune endpoint.")
-        from .memgraph.memgraph_adapter import MemgraphAdapter
+        from .neptune_driver.adapter import NeptuneGraphDB, NEPTUNE_ENDPOINT_URL
-        return MemgraphAdapter(
-            graph_database_url=graph_database_url,
-            graph_database_username=graph_database_username or None,
-            graph_database_password=graph_database_password or None,
+        if not graph_database_url.startswith(NEPTUNE_ENDPOINT_URL):
+            raise ValueError(
+                f"Neptune endpoint must have the format {NEPTUNE_ENDPOINT_URL}<GRAPH_ID>"
+            )
+        graph_identifier = graph_database_url.replace(NEPTUNE_ENDPOINT_URL, "")
+        return NeptuneGraphDB(
+            graph_id=graph_identifier,
+        )
+    elif graph_database_provider == "neptune_analytics":
+        """
+        Creates a graph DB from config
+        We want to use a hybrid (graph & vector) DB and we should update this
+        to make a single instance of the hybrid configuration (with embedder)
+        instead of creating the hybrid object twice.
+        """
+        try:
+            from langchain_aws import NeptuneAnalyticsGraph
+        except ImportError:
+            raise ImportError(
+                "langchain_aws is not installed. Please install it with 'pip install langchain_aws'"
+            )
+        if not graph_database_url:
+            raise EnvironmentError("Missing Neptune endpoint.")
+        from ..hybrid.neptune_analytics.NeptuneAnalyticsAdapter import (
+            NeptuneAnalyticsAdapter,
+            NEPTUNE_ANALYTICS_ENDPOINT_URL,
+        )
+        if not graph_database_url.startswith(NEPTUNE_ANALYTICS_ENDPOINT_URL):
+            raise ValueError(
+                f"Neptune endpoint must have the format '{NEPTUNE_ANALYTICS_ENDPOINT_URL}<GRAPH_ID>'"
+            )
+        graph_identifier = graph_database_url.replace(NEPTUNE_ANALYTICS_ENDPOINT_URL, "")
+        return NeptuneAnalyticsAdapter(
+            graph_id=graph_identifier,
         )
     from .networkx.adapter import NetworkXAdapter

cognee/infrastructure/databases/graph/graph_db_interface.py CHANGED Viewed

@@ -2,7 +2,7 @@ import inspect
 from functools import wraps
 from abc import abstractmethod, ABC
 from datetime import datetime, timezone
-from typing import Optional, Dict, Any, List, Tuple, Type
+from typing import Optional, Dict, Any, List, Tuple, Type, Union
 from uuid import NAMESPACE_OID, UUID, uuid5
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.engine import DataPoint
@@ -173,28 +173,31 @@ class GraphDBInterface(ABC):
         raise NotImplementedError
     @abstractmethod
-    async def add_node(self, node_id: str, properties: Dict[str, Any]) -> None:
+    async def add_node(
+        self, node: Union[DataPoint, str], properties: Optional[Dict[str, Any]] = None
+    ) -> None:
         """
         Add a single node with specified properties to the graph.
         Parameters:
         -----------
-            - node_id (str): Unique identifier for the node being added.
-            - properties (Dict[str, Any]): A dictionary of properties associated with the node.
+            - node (Union[DataPoint, str]): Either a DataPoint object or a string identifier for the node being added.
+            - properties (Optional[Dict[str, Any]]): A dictionary of properties associated with the node.
+              Required when node is a string, ignored when node is a DataPoint.
         """
         raise NotImplementedError
     @abstractmethod
     @record_graph_changes
-    async def add_nodes(self, nodes: List[Node]) -> None:
+    async def add_nodes(self, nodes: Union[List[Node], List[DataPoint]]) -> None:
         """
         Add multiple nodes to the graph in a single operation.
         Parameters:
         -----------
-            - nodes (List[Node]): A list of Node objects to be added to the graph.
+            - nodes (Union[List[Node], List[DataPoint]]): A list of Node objects or DataPoint objects to be added to the graph.
         """
         raise NotImplementedError
@@ -271,14 +274,16 @@ class GraphDBInterface(ABC):
     @abstractmethod
     @record_graph_changes
-    async def add_edges(self, edges: List[EdgeData]) -> None:
+    async def add_edges(
+        self, edges: Union[List[EdgeData], List[Tuple[str, str, str, Optional[Dict[str, Any]]]]]
+    ) -> None:
         """
         Add multiple edges to the graph in a single operation.
         Parameters:
         -----------
-            - edges (List[EdgeData]): A list of EdgeData objects representing edges to be added.
+            - edges (Union[List[EdgeData], List[Tuple[str, str, str, Optional[Dict[str, Any]]]]]): A list of EdgeData objects or tuples representing edges to be added.
         """
         raise NotImplementedError
@@ -377,7 +382,7 @@ class GraphDBInterface(ABC):
     @abstractmethod
     async def get_connections(
-        self, node_id: str
+        self, node_id: Union[str, UUID]
     ) -> List[Tuple[NodeData, Dict[str, Any], NodeData]]:
         """
         Get all nodes connected to a specified node and their relationship details.
@@ -385,6 +390,6 @@ class GraphDBInterface(ABC):
         Parameters:
         -----------
-            - node_id (str): Unique identifier of the node for which to retrieve connections.
+            - node_id (Union[str, UUID]): Unique identifier of the node for which to retrieve connections.
         """
         raise NotImplementedError

cognee/infrastructure/databases/graph/kuzu/adapter.py CHANGED Viewed

@@ -42,6 +42,7 @@ class KuzuAdapter(GraphDBInterface):
         self.connection: Optional[Connection] = None
         self.executor = ThreadPoolExecutor()
         self._initialize_connection()
+        self.KUZU_ASYNC_LOCK = asyncio.Lock()
     def _initialize_connection(self) -> None:
         """Initialize the Kuzu database connection and schema."""
@@ -136,6 +137,10 @@ class KuzuAdapter(GraphDBInterface):
             from cognee.infrastructure.files.storage.S3FileStorage import S3FileStorage
             s3_file_storage = S3FileStorage("")
+            async with self.KUZU_ASYNC_LOCK:
+                self.connection.execute("CHECKPOINT;")
             s3_file_storage.s3.put(self.temp_graph_file, self.db_path, recursive=True)
     async def pull_from_s3(self) -> None:
@@ -145,7 +150,7 @@ class KuzuAdapter(GraphDBInterface):
         try:
             s3_file_storage.s3.get(self.db_path, self.temp_graph_file, recursive=True)
         except FileNotFoundError:
-            pass
+            logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
     async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
         """
@@ -1524,7 +1529,7 @@ class KuzuAdapter(GraphDBInterface):
             logger.error(f"Error during database clearing: {e}")
             raise
-    async def get_document_subgraph(self, content_hash: str):
+    async def get_document_subgraph(self, data_id: str):
         """
         Get all nodes that should be deleted when removing a document.
@@ -1535,7 +1540,7 @@ class KuzuAdapter(GraphDBInterface):
         Parameters:
         -----------
-            - content_hash (str): The identifier for the document to query against.
+            - data_id (str): The identifier for the document to query against.
         Returns:
         --------
@@ -1545,7 +1550,7 @@ class KuzuAdapter(GraphDBInterface):
         """
         query = """
         MATCH (doc:Node)
-        WHERE (doc.type = 'TextDocument' OR doc.type = 'PdfDocument') AND doc.name = $content_hash
+        WHERE (doc.type = 'TextDocument' OR doc.type = 'PdfDocument' OR doc.type = 'AudioDocument' OR doc.type = 'ImageDocument' OR doc.type = 'UnstructuredDocument') AND doc.id = $data_id
         OPTIONAL MATCH (doc)<-[e1:EDGE]-(chunk:Node)
         WHERE e1.relationship_name = 'is_part_of' AND chunk.type = 'DocumentChunk'
@@ -1556,7 +1561,7 @@ class KuzuAdapter(GraphDBInterface):
             MATCH (entity)<-[e3:EDGE]-(otherChunk:Node)-[e4:EDGE]->(otherDoc:Node)
             WHERE e3.relationship_name = 'contains'
             AND e4.relationship_name = 'is_part_of'
-            AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument')
+            AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument' OR otherDoc.type = 'AudioDocument' OR otherDoc.type = 'ImageDocument' OR otherDoc.type = 'UnstructuredDocument')
             AND otherDoc.id <> doc.id
         }
@@ -1572,7 +1577,7 @@ class KuzuAdapter(GraphDBInterface):
             AND e9.relationship_name = 'is_part_of'
             AND otherEntity.type = 'Entity'
             AND otherChunk.type = 'DocumentChunk'
-            AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument')
+            AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument' OR otherDoc.type = 'AudioDocument' OR otherDoc.type = 'ImageDocument' OR otherDoc.type = 'UnstructuredDocument')
             AND otherDoc.id <> doc.id
         }
@@ -1583,7 +1588,7 @@ class KuzuAdapter(GraphDBInterface):
             COLLECT(DISTINCT made_node) as made_from_nodes,
             COLLECT(DISTINCT type) as orphan_types
         """
-        result = await self.query(query, {"content_hash": f"text_{content_hash}"})
+        result = await self.query(query, {"data_id": f"{data_id}"})
         if not result or not result[0]:
             return None

cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py CHANGED Viewed

@@ -74,7 +74,7 @@ def read_kuzu_storage_version(kuzu_db_path: str) -> int:
     if kuzu_version_mapping.get(version_code):
         return kuzu_version_mapping[version_code]
     else:
-        ValueError("Could not map version_code to proper Kuzu version.")
+        raise ValueError("Could not map version_code to proper Kuzu version.")
 def ensure_env(version: str, export_dir) -> str:

cognee/infrastructure/databases/graph/neo4j_driver/adapter.py CHANGED Viewed

@@ -50,6 +50,7 @@ class Neo4jAdapter(GraphDBInterface):
         graph_database_url: str,
         graph_database_username: Optional[str] = None,
         graph_database_password: Optional[str] = None,
+        graph_database_name: Optional[str] = None,
         driver: Optional[Any] = None,
     ):
         # Only use auth if both username and password are provided
@@ -59,7 +60,7 @@ class Neo4jAdapter(GraphDBInterface):
         elif graph_database_username or graph_database_password:
             logger = get_logger(__name__)
             logger.warning("Neo4j credentials incomplete – falling back to anonymous connection.")
+        self.graph_database_name = graph_database_name
         self.driver = driver or AsyncGraphDatabase.driver(
             graph_database_url,
             auth=auth,
@@ -80,7 +81,7 @@ class Neo4jAdapter(GraphDBInterface):
         """
         Get a session for database operations.
         """
-        async with self.driver.session() as session:
+        async with self.driver.session(database=self.graph_database_name) as session:
             yield session
     @deadlock_retry()
@@ -410,6 +411,38 @@ class Neo4jAdapter(GraphDBInterface):
         return await self.query(query, params)
+    def _flatten_edge_properties(self, properties: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Flatten edge properties to handle nested dictionaries like weights.
+        Neo4j doesn't support nested dictionaries as property values, so we need to
+        flatten the 'weights' dictionary into individual properties with prefixes.
+        Args:
+            properties: Dictionary of edge properties that may contain nested dicts
+        Returns:
+            Flattened properties dictionary suitable for Neo4j storage
+        """
+        flattened = {}
+        for key, value in properties.items():
+            if key == "weights" and isinstance(value, dict):
+                # Flatten weights dictionary into individual properties
+                for weight_name, weight_value in value.items():
+                    flattened[f"weight_{weight_name}"] = weight_value
+            elif isinstance(value, dict):
+                # For other nested dictionaries, serialize as JSON string
+                flattened[f"{key}_json"] = json.dumps(value, cls=JSONEncoder)
+            elif isinstance(value, list):
+                # For lists, serialize as JSON string
+                flattened[f"{key}_json"] = json.dumps(value, cls=JSONEncoder)
+            else:
+                # Keep primitive types as-is
+                flattened[key] = value
+        return flattened
     @record_graph_changes
     @override_distributed(queued_add_edges)
     async def add_edges(self, edges: list[tuple[str, str, str, dict[str, Any]]]) -> None:
@@ -448,11 +481,13 @@ class Neo4jAdapter(GraphDBInterface):
                 "from_node": str(edge[0]),
                 "to_node": str(edge[1]),
                 "relationship_name": edge[2],
-                "properties": {
-                    **(edge[3] if edge[3] else {}),
-                    "source_node_id": str(edge[0]),
-                    "target_node_id": str(edge[1]),
-                },
+                "properties": self._flatten_edge_properties(
+                    {
+                        **(edge[3] if edge[3] else {}),
+                        "source_node_id": str(edge[0]),
+                        "target_node_id": str(edge[1]),
+                    }
+                ),
             }
             for edge in edges
         ]
@@ -1217,7 +1252,7 @@ class Neo4jAdapter(GraphDBInterface):
         return mandatory_metrics | optional_metrics
-    async def get_document_subgraph(self, content_hash: str):
+    async def get_document_subgraph(self, data_id: str):
         """
         Retrieve a subgraph related to a document identified by its content hash, including
         related entities and chunks.
@@ -1235,21 +1270,21 @@ class Neo4jAdapter(GraphDBInterface):
         """
         query = """
         MATCH (doc)
-        WHERE (doc:TextDocument OR doc:PdfDocument)
-        AND doc.name = 'text_' + $content_hash
+        WHERE (doc:TextDocument OR doc:PdfDocument OR doc:UnstructuredDocument OR doc:AudioDocument or doc:ImageDocument)
+        AND doc.id = $data_id
         OPTIONAL MATCH (doc)<-[:is_part_of]-(chunk:DocumentChunk)
         OPTIONAL MATCH (chunk)-[:contains]->(entity:Entity)
         WHERE NOT EXISTS {
             MATCH (entity)<-[:contains]-(otherChunk:DocumentChunk)-[:is_part_of]->(otherDoc)
-            WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument)
+            WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument OR otherDoc:UnstructuredDocument OR otherDoc:AudioDocument or otherDoc:ImageDocument)
             AND otherDoc.id <> doc.id
         }
         OPTIONAL MATCH (chunk)<-[:made_from]-(made_node:TextSummary)
         OPTIONAL MATCH (entity)-[:is_a]->(type:EntityType)
         WHERE NOT EXISTS {
             MATCH (type)<-[:is_a]-(otherEntity:Entity)<-[:contains]-(otherChunk:DocumentChunk)-[:is_part_of]->(otherDoc)
-            WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument)
+            WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument OR otherDoc:UnstructuredDocument OR otherDoc:AudioDocument or otherDoc:ImageDocument)
             AND otherDoc.id <> doc.id
         }
@@ -1260,7 +1295,7 @@ class Neo4jAdapter(GraphDBInterface):
             collect(DISTINCT made_node) as made_from_nodes,
             collect(DISTINCT type) as orphan_types
         """
-        result = await self.query(query, {"content_hash": content_hash})
+        result = await self.query(query, {"data_id": data_id})
         return result[0] if result else None
     async def get_degree_one_nodes(self, node_type: str):

cognee/infrastructure/databases/graph/neptune_driver/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+"""Neptune Analytics Driver Module
+This module provides the Neptune Analytics adapter and utilities for interacting
+with Amazon Neptune Analytics graph databases.
+"""
+from .adapter import NeptuneGraphDB
+from . import neptune_utils
+from . import exceptions
+__all__ = [
+    "NeptuneGraphDB",
+    "neptune_utils",
+    "exceptions",
+]

cognee 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl

cognee 0.2.2.dev0py3-none-any.whl → 0.2.3py3-none-any.whl