PyPI - cognee - Versions diffs - 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl - Mend

cognee 0.3.2py3-none-any.whl → 0.3.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

cognee/modules/ontology/matching_strategies.py ADDED Viewed

@@ -0,0 +1,53 @@
+import difflib
+from abc import ABC, abstractmethod
+from typing import List, Optional
+class MatchingStrategy(ABC):
+    """Abstract base class for ontology entity matching strategies."""
+    @abstractmethod
+    def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
+        """Find the best match for a given name from a list of candidates.
+        Args:
+            name: The name to match
+            candidates: List of candidate names to match against
+        Returns:
+            The best matching candidate name, or None if no match found
+        """
+        pass
+class FuzzyMatchingStrategy(MatchingStrategy):
+    """Fuzzy matching strategy using difflib for approximate string matching."""
+    def __init__(self, cutoff: float = 0.8):
+        """Initialize fuzzy matching strategy.
+        Args:
+            cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
+        """
+        self.cutoff = cutoff
+    def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
+        """Find the closest fuzzy match for a given name.
+        Args:
+            name: The normalized name to match
+            candidates: List of normalized candidate names
+        Returns:
+            The best matching candidate name, or None if no match meets the cutoff
+        """
+        if not candidates:
+            return None
+        # Check for exact match first
+        if name in candidates:
+            return name
+        # Find fuzzy match
+        best_match = difflib.get_close_matches(name, candidates, n=1, cutoff=self.cutoff)
+        return best_match[0] if best_match else None

cognee/modules/ontology/models.py ADDED Viewed

@@ -0,0 +1,20 @@
+from typing import Any
+class AttachedOntologyNode:
+    """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
+    def __init__(self, uri: Any, category: str):
+        self.uri = uri
+        self.name = self._extract_name(uri)
+        self.category = category
+    @staticmethod
+    def _extract_name(uri: Any) -> str:
+        uri_str = str(uri)
+        if "#" in uri_str:
+            return uri_str.split("#")[-1]
+        return uri_str.rstrip("/").split("/")[-1]
+    def __repr__(self):
+        return f"AttachedOntologyNode(name={self.name}, category={self.category})"

cognee/modules/ontology/ontology_config.py ADDED Viewed

@@ -0,0 +1,24 @@
+from typing import TypedDict, Optional
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+from cognee.modules.ontology.matching_strategies import MatchingStrategy
+class OntologyConfig(TypedDict, total=False):
+    """Configuration containing ontology resolver.
+    Attributes:
+        ontology_resolver: The ontology resolver instance to use
+    """
+    ontology_resolver: Optional[BaseOntologyResolver]
+class Config(TypedDict, total=False):
+    """Top-level configuration dictionary.
+    Attributes:
+        ontology_config: Configuration containing ontology resolver
+    """
+    ontology_config: Optional[OntologyConfig]

cognee/modules/ontology/ontology_env_config.py ADDED Viewed

@@ -0,0 +1,45 @@
+"""This module contains the configuration for ontology handling."""
+from functools import lru_cache
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class OntologyEnvConfig(BaseSettings):
+    """
+    Represents the configuration for ontology handling, including parameters for
+    ontology file storage and resolution/matching strategies.
+    Public methods:
+    - to_dict
+    Instance variables:
+    - ontology_resolver
+    - ontology_matching
+    - ontology_file_path
+    - model_config
+    """
+    ontology_resolver: str = "rdflib"
+    matching_strategy: str = "fuzzy"
+    ontology_file_path: str = ""
+    model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
+    def to_dict(self) -> dict:
+        """
+        Return the configuration as a dictionary.
+        """
+        return {
+            "ontology_resolver": self.ontology_resolver,
+            "matching_strategy": self.matching_strategy,
+            "ontology_file_path": self.ontology_file_path,
+        }
+@lru_cache
+def get_ontology_env_config():
+    """
+    Retrieve the ontology configuration. This function utilizes caching to return a
+    singleton instance of the OntologyConfig class for efficiency.
+    """
+    return OntologyEnvConfig()

cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} RENAMED Viewed

@@ -10,31 +10,26 @@ from cognee.modules.ontology.exceptions import (
     FindClosestMatchError,
     GetSubgraphError,
 )
+from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
+from cognee.modules.ontology.models import AttachedOntologyNode
+from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
 logger = get_logger("OntologyAdapter")
-class AttachedOntologyNode:
-    """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
+class RDFLibOntologyResolver(BaseOntologyResolver):
+    """RDFLib-based ontology resolver implementation.
-    def __init__(self, uri: URIRef, category: str):
-        self.uri = uri
-        self.name = self._extract_name(uri)
-        self.category = category
+    This implementation uses RDFLib to parse and work with RDF/OWL ontology files.
+    It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
+    """
-    @staticmethod
-    def _extract_name(uri: URIRef) -> str:
-        uri_str = str(uri)
-        if "#" in uri_str:
-            return uri_str.split("#")[-1]
-        return uri_str.rstrip("/").split("/")[-1]
-    def __repr__(self):
-        return f"AttachedOntologyNode(name={self.name}, category={self.category})"
-class OntologyResolver:
-    def __init__(self, ontology_file: Optional[str] = None):
+    def __init__(
+        self,
+        ontology_file: Optional[str] = None,
+        matching_strategy: Optional[MatchingStrategy] = None,
+    ) -> None:
+        super().__init__(matching_strategy)
         self.ontology_file = ontology_file
         try:
             if ontology_file and os.path.exists(ontology_file):
@@ -60,7 +55,7 @@ class OntologyResolver:
             name = uri_str.rstrip("/").split("/")[-1]
         return name.lower().replace(" ", "_").strip()
-    def build_lookup(self):
+    def build_lookup(self) -> None:
         try:
             classes: Dict[str, URIRef] = {}
             individuals: Dict[str, URIRef] = {}
@@ -97,7 +92,7 @@ class OntologyResolver:
             logger.error("Failed to build lookup dictionary: %s", str(e))
             raise RuntimeError("Lookup build failed") from e
-    def refresh_lookup(self):
+    def refresh_lookup(self) -> None:
         self.build_lookup()
         logger.info("Ontology lookup refreshed.")
@@ -105,13 +100,8 @@ class OntologyResolver:
         try:
             normalized_name = name.lower().replace(" ", "_").strip()
             possible_matches = list(self.lookup.get(category, {}).keys())
-            if normalized_name in possible_matches:
-                return normalized_name
-            best_match = difflib.get_close_matches(
-                normalized_name, possible_matches, n=1, cutoff=0.8
-            )
-            return best_match[0] if best_match else None
+            return self.matching_strategy.find_match(normalized_name, possible_matches)
         except Exception as e:
             logger.error("Error in find_closest_match: %s", str(e))
             raise FindClosestMatchError() from e
@@ -125,7 +115,9 @@ class OntologyResolver:
     def get_subgraph(
         self, node_name: str, node_type: str = "individuals", directed: bool = True
-    ) -> Tuple[List[Any], List[Tuple[str, str, str]], Optional[Any]]:
+    ) -> Tuple[
+        List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
+    ]:
         nodes_set = set()
         edges: List[Tuple[str, str, str]] = []
         visited = set()

cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py CHANGED Viewed

@@ -11,6 +11,19 @@ from cognee.modules.data.methods import (
 async def resolve_authorized_user_dataset(dataset_id: UUID, dataset_name: str, user: User):
+    """
+    Function handles creation and dataset authorization if dataset already exist for Cognee.
+    Verifies that provided user has necessary permission for provided Dataset.
+    If Dataset does not exist creates the Dataset and gives permission for the user creating the dataset.
+    Args:
+        dataset_id: Id of the dataset.
+        dataset_name: Name of the dataset.
+        user: Cognee User request is being processed for, if None default user will be used.
+    Returns:
+        Tuple[User, Dataset]: A tuple containing the user and the authorized dataset.
+    """
     if not user:
         user = await get_default_user()

cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py CHANGED Viewed

@@ -25,7 +25,7 @@ async def resolve_authorized_user_datasets(
         datasets: Dataset names or Dataset UUID (in case Datasets already exist)
     Returns:
+        Tuple[User, List[Dataset]]: A tuple containing the user and the list of authorized datasets.
     """
     # If no user is provided use default user
     if user is None:

cognee/modules/pipelines/models/PipelineRunInfo.py CHANGED Viewed

@@ -1,6 +1,7 @@
-from typing import Any, Optional
+from typing import Any, Optional, List, Union
 from uuid import UUID
 from pydantic import BaseModel
+from cognee.modules.data.models.Data import Data
 class PipelineRunInfo(BaseModel):
@@ -8,11 +9,15 @@ class PipelineRunInfo(BaseModel):
     pipeline_run_id: UUID
     dataset_id: UUID
     dataset_name: str
-    payload: Optional[Any] = None
+    # Data must be mentioned in typing to allow custom encoders for Data to be activated
+    payload: Optional[Union[Any, List[Data]]] = None
     data_ingestion_info: Optional[list] = None
     model_config = {
         "arbitrary_types_allowed": True,
+        "from_attributes": True,
+        # Add custom encoding handler for Data ORM model
+        "json_encoders": {Data: lambda d: d.to_json()},
     }

cognee/modules/retrieval/graph_completion_context_extension_retriever.py CHANGED Viewed

@@ -48,7 +48,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
         query: str,
         context: Optional[List[Edge]] = None,
         context_extension_rounds=4,
-    ) -> str:
+    ) -> List[str]:
         """
         Extends the context for a given query by retrieving related triplets and generating new
         completions based on them.

cognee/modules/retrieval/graph_completion_cot_retriever.py CHANGED Viewed

@@ -58,7 +58,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
         query: str,
         context: Optional[List[Edge]] = None,
         max_iter=4,
-    ) -> str:
+    ) -> List[str]:
         """
         Generate completion responses based on a user query and contextual information.

cognee/modules/retrieval/graph_completion_retriever.py CHANGED Viewed

@@ -135,7 +135,7 @@ class GraphCompletionRetriever(BaseGraphRetriever):
         self,
         query: str,
         context: Optional[List[Edge]] = None,
-    ) -> Any:
+    ) -> List[str]:
         """
         Generates a completion using graph connections context based on a query.

cognee/modules/retrieval/temporal_retriever.py CHANGED Viewed

@@ -113,7 +113,7 @@ class TemporalRetriever(GraphCompletionRetriever):
             logger.info(
                 "No timestamps identified based on the query, performing retrieval using triplet search on events and entities."
             )
-            triplets = await self.get_context(query)
+            triplets = await self.get_triplets(query)
             return await self.resolve_edges_to_text(triplets)
         if ids:
@@ -122,7 +122,7 @@ class TemporalRetriever(GraphCompletionRetriever):
             logger.info(
                 "No events identified based on timestamp filtering, performing retrieval using triplet search on events and entities."
             )
-            triplets = await self.get_context(query)
+            triplets = await self.get_triplets(query)
             return await self.resolve_edges_to_text(triplets)
         vector_engine = get_vector_engine()
@@ -136,7 +136,7 @@ class TemporalRetriever(GraphCompletionRetriever):
         return self.descriptions_to_string(top_k_events)
-    async def get_completion(self, query: str, context: Optional[str] = None) -> str:
+    async def get_completion(self, query: str, context: Optional[str] = None) -> List[str]:
         """Generates a response using the query and optional context."""
         if not context:
             context = await self.get_context(query=query)

cognee/modules/retrieval/user_qa_feedback.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Any, Optional, List
+from typing import Optional, List
 from uuid import NAMESPACE_OID, uuid5, UUID
 from cognee.infrastructure.databases.graph import get_graph_engine

cognee/modules/search/methods/get_search_type_tools.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import os
 from typing import Callable, List, Optional, Type
 from cognee.modules.engine.models.node_set import NodeSet
@@ -160,6 +161,12 @@ async def get_search_type_tools(
     if query_type is SearchType.FEELING_LUCKY:
         query_type = await select_search_type(query_text)
+    if (
+        query_type in [SearchType.CYPHER, SearchType.NATURAL_LANGUAGE]
+        and os.getenv("ALLOW_CYPHER_QUERY", "true").lower() == "false"
+    ):
+        raise UnsupportedSearchTypeError("Cypher query search types are disabled.")
     search_type_tools = search_tasks.get(query_type)
     if not search_type_tools:

cognee/modules/search/methods/search.py CHANGED Viewed

@@ -136,12 +136,19 @@ async def search(
         if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
             return_value = []
             for search_result in search_results:
-                result, context, datasets = search_result
+                prepared_search_results = await prepare_search_result(search_result)
+                result = prepared_search_results["result"]
+                graphs = prepared_search_results["graphs"]
+                context = prepared_search_results["context"]
+                datasets = prepared_search_results["datasets"]
                 return_value.append(
                     {
-                        "search_result": result,
+                        "search_result": [result] if result else None,
                         "dataset_id": datasets[0].id,
                         "dataset_name": datasets[0].name,
+                        "graphs": graphs,
                     }
                 )
             return return_value
@@ -155,14 +162,6 @@ async def search(
                 return return_value[0]
             else:
                 return return_value
-        # return [
-        #     SearchResult(
-        #         search_result=result,
-        #         dataset_id=datasets[min(index, len(datasets) - 1)].id if datasets else None,
-        #         dataset_name=datasets[min(index, len(datasets) - 1)].name if datasets else None,
-        #     )
-        #     for index, (result, _, datasets) in enumerate(search_results)
-        # ]
 async def authorized_search(
@@ -208,11 +207,11 @@ async def authorized_search(
         context = {}
         datasets: List[Dataset] = []
-        for _, search_context, datasets in search_responses:
-            for dataset in datasets:
+        for _, search_context, search_datasets in search_responses:
+            for dataset in search_datasets:
                 context[str(dataset.id)] = search_context
-            datasets.extend(datasets)
+            datasets.extend(search_datasets)
         specific_search_tools = await get_search_type_tools(
             query_type=query_type,

cognee/modules/search/utils/prepare_search_result.py CHANGED Viewed

@@ -1,40 +1,62 @@
 from typing import List, cast
+from uuid import uuid5, NAMESPACE_OID
 from cognee.modules.graph.utils import resolve_edges_to_text
 from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
+from cognee.modules.search.types.SearchResult import SearchResultDataset
 from cognee.modules.search.utils.transform_context_to_graph import transform_context_to_graph
+from cognee.modules.search.utils.transform_insights_to_graph import transform_insights_to_graph
 async def prepare_search_result(search_result):
-    result, context, datasets = search_result
+    results, context, datasets = search_result
     graphs = None
     result_graph = None
     context_texts = {}
-    if isinstance(context, List) and len(context) > 0 and isinstance(context[0], Edge):
+    if isinstance(datasets, list) and len(datasets) == 0:
+        datasets = [
+            SearchResultDataset(
+                id=uuid5(NAMESPACE_OID, "*"),
+                name="all available datasets",
+            )
+        ]
+    if (
+        isinstance(context, List)
+        and len(context) > 0
+        and isinstance(context[0], tuple)
+        and context[0][1].get("relationship_name")
+    ):
+        context_graph = transform_insights_to_graph(context)
+        graphs = {
+            ", ".join([dataset.name for dataset in datasets]): context_graph,
+        }
+        results = None
+    elif isinstance(context, List) and len(context) > 0 and isinstance(context[0], Edge):
         context_graph = transform_context_to_graph(context)
         graphs = {
-            "*": context_graph,
+            ", ".join([dataset.name for dataset in datasets]): context_graph,
         }
         context_texts = {
-            "*": await resolve_edges_to_text(context),
+            ", ".join([dataset.name for dataset in datasets]): await resolve_edges_to_text(context),
         }
     elif isinstance(context, str):
         context_texts = {
-            "*": context,
+            ", ".join([dataset.name for dataset in datasets]): context,
         }
     elif isinstance(context, List) and len(context) > 0 and isinstance(context[0], str):
         context_texts = {
-            "*": "\n".join(cast(List[str], context)),
+            ", ".join([dataset.name for dataset in datasets]): "\n".join(cast(List[str], context)),
         }
-    if isinstance(result, List) and len(result) > 0 and isinstance(result[0], Edge):
-        result_graph = transform_context_to_graph(result)
+    if isinstance(results, List) and len(results) > 0 and isinstance(results[0], Edge):
+        result_graph = transform_context_to_graph(results)
     return {
-        "result": result_graph or result,
+        "result": result_graph or results[0] if results and len(results) == 1 else results,
         "graphs": graphs,
         "context": context_texts,
         "datasets": datasets,

cognee/modules/search/utils/transform_context_to_graph.py CHANGED Viewed

@@ -14,7 +14,7 @@ def transform_context_to_graph(context: List[Edge]):
             if "name" in triplet.node1.attributes
             else triplet.node1.id,
             "type": triplet.node1.attributes["type"],
-            "attributes": triplet.node2.attributes,
+            "attributes": triplet.node1.attributes,
         }
         nodes[triplet.node2.id] = {
             "id": triplet.node2.id,

cognee/modules/search/utils/transform_insights_to_graph.py ADDED Viewed

@@ -0,0 +1,28 @@
+from typing import Dict, List, Tuple
+def transform_insights_to_graph(context: List[Tuple[Dict, Dict, Dict]]):
+    nodes = {}
+    edges = {}
+    for triplet in context:
+        nodes[triplet[0]["id"]] = {
+            "id": triplet[0]["id"],
+            "label": triplet[0]["name"] if "name" in triplet[0] else triplet[0]["id"],
+            "type": triplet[0]["type"],
+        }
+        nodes[triplet[2]["id"]] = {
+            "id": triplet[2]["id"],
+            "label": triplet[2]["name"] if "name" in triplet[2] else triplet[2]["id"],
+            "type": triplet[2]["type"],
+        }
+        edges[f"{triplet[0]['id']}_{triplet[1]['relationship_name']}_{triplet[2]['id']}"] = {
+            "source": triplet[0]["id"],
+            "target": triplet[2]["id"],
+            "label": triplet[1]["relationship_name"],
+        }
+    return {
+        "nodes": list(nodes.values()),
+        "edges": list(edges.values()),
+    }

cognee/modules/users/methods/create_user.py CHANGED Viewed

@@ -1,9 +1,10 @@
-from uuid import uuid4
+from uuid import UUID, uuid4
 from fastapi_users.exceptions import UserAlreadyExists
+from sqlalchemy.ext.asyncio import AsyncSession
 from cognee.infrastructure.databases.relational import get_relational_engine
-from cognee.modules.notebooks.methods import create_notebook
-from cognee.modules.notebooks.models.Notebook import NotebookCell
+from cognee.modules.notebooks.models.Notebook import Notebook
+from cognee.modules.notebooks.methods.create_notebook import _create_tutorial_notebook
 from cognee.modules.users.exceptions import TenantNotFoundError
 from cognee.modules.users.get_user_manager import get_user_manager_context
 from cognee.modules.users.get_user_db import get_user_db_context
@@ -60,27 +61,6 @@ async def create_user(
                     if auto_login:
                         await session.refresh(user)
-                    await create_notebook(
-                        user_id=user.id,
-                        notebook_name="Welcome to cognee 🧠",
-                        cells=[
-                            NotebookCell(
-                                id=uuid4(),
-                                name="Welcome",
-                                content="Cognee is your toolkit for turning text into a structured knowledge graph, optionally enhanced by ontologies, and then querying it with advanced retrieval techniques. This notebook will guide you through a simple example.",
-                                type="markdown",
-                            ),
-                            NotebookCell(
-                                id=uuid4(),
-                                name="Example",
-                                content="",
-                                type="code",
-                            ),
-                        ],
-                        deletable=False,
-                        session=session,
-                    )
                     return user
     except UserAlreadyExists as error:
         print(f"User {email} already exists")

cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py CHANGED Viewed

@@ -9,6 +9,18 @@ from uuid import UUID
 async def authorized_give_permission_on_datasets(
     principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
 ):
+    """
+        Give permission to certain datasets to a user.
+        The request owner must have the necessary permission to share the datasets.
+    Args:
+        principal_id: Id of user to whom datasets are shared
+        dataset_ids: Ids of datasets to share
+        permission_name: Name of permission to give
+        owner_id: Id of the request owner
+    Returns:
+        None
+    """
     # If only a single dataset UUID is provided transform it to a list
     if not isinstance(dataset_ids, list):
         dataset_ids = [dataset_ids]

cognee/modules/users/permissions/methods/check_permission_on_dataset.py CHANGED Viewed

@@ -10,6 +10,17 @@ logger = get_logger()
 async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
+    """
+        Check if a user has a specific permission on a dataset.
+    Args:
+        user: User whose permission is checked
+        permission_type: Type of permission to check
+        dataset_id: Id of the dataset
+    Returns:
+        None
+    """
     if user is None:
         user = await get_default_user()

cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from types import SimpleNamespace
 from cognee.shared.logging_utils import get_logger
 from ...models.User import User
@@ -9,6 +11,16 @@ logger = get_logger()
 async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
+    """
+        Return a list of datasets the user has permission for.
+        If the user is part of a tenant, return datasets his roles have permission for.
+    Args:
+        user
+        permission_type
+    Returns:
+        list[Dataset]: List of datasets user has permission for
+    """
     datasets = list()
     # Get all datasets User has explicit access to
     datasets.extend(await get_principal_datasets(user, permission_type))
@@ -17,9 +29,14 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) ->
         # Get all datasets all tenants have access to
         tenant = await get_tenant(user.tenant_id)
         datasets.extend(await get_principal_datasets(tenant, permission_type))
         # Get all datasets Users roles have access to
-        for role_name in user.roles:
-            role = await get_role(user.tenant_id, role_name)
+        if isinstance(user, SimpleNamespace):
+            # If simple namespace use roles defined in user
+            roles = user.roles
+        else:
+            roles = await user.awaitable_attrs.roles
+        for role in roles:
             datasets.extend(await get_principal_datasets(role, permission_type))
     # Deduplicate datasets with same ID

cognee 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

cognee 0.3.2py3-none-any.whl → 0.3.4py3-none-any.whl