PyPI - cognee - Versions diffs - 0.3.4.dev3__py3-none-any.whl → 0.3.5__py3-none-any.whl - Mend

cognee 0.3.4.dev3py3-none-any.whl → 0.3.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (186) hide show

cognee/modules/retrieval/utils/brute_force_triplet_search.py CHANGED Viewed

@@ -89,7 +89,6 @@ async def get_memory_fragment(
 async def brute_force_triplet_search(
     query: str,
-    user: User,
     top_k: int = 5,
     collections: Optional[List[str]] = None,
     properties_to_project: Optional[List[str]] = None,
@@ -102,7 +101,6 @@ async def brute_force_triplet_search(
     Args:
         query (str): The search query.
-        user (User): The user performing the search.
         top_k (int): The number of top results to retrieve.
         collections (Optional[List[str]]): List of collections to query.
         properties_to_project (Optional[List[str]]): List of properties to project.
@@ -139,12 +137,10 @@ async def brute_force_triplet_search(
     query_vector = (await vector_engine.embedding_engine.embed_text([query]))[0]
-    send_telemetry("cognee.brute_force_triplet_search EXECUTION STARTED", user.id)
     async def search_in_collection(collection_name: str):
         try:
             return await vector_engine.search(
-                collection_name=collection_name, query_vector=query_vector, limit=0
+                collection_name=collection_name, query_vector=query_vector, limit=None
             )
         except CollectionNotFoundError:
             return []
@@ -176,20 +172,14 @@ async def brute_force_triplet_search(
         results = await memory_fragment.calculate_top_triplet_importances(k=top_k)
-        send_telemetry("cognee.brute_force_triplet_search EXECUTION COMPLETED", user.id)
         return results
     except CollectionNotFoundError:
         return []
     except Exception as error:
         logger.error(
-            "Error during brute force search for user: %s, query: %s. Error: %s",
-            user.id,
+            "Error during brute force search for query: %s. Error: %s",
             query,
             error,
         )
-        send_telemetry(
-            "cognee.brute_force_triplet_search EXECUTION FAILED", user.id, {"error": str(error)}
-        )
         raise error

cognee/modules/retrieval/utils/completion.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import Optional
 from cognee.infrastructure.llm.LLMGateway import LLMGateway
+from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
 async def generate_completion(
@@ -11,10 +12,8 @@ async def generate_completion(
 ) -> str:
     """Generates a completion using LLM with given context and prompts."""
     args = {"question": query, "context": context}
-    user_prompt = LLMGateway.render_prompt(user_prompt_path, args)
-    system_prompt = (
-        system_prompt if system_prompt else LLMGateway.read_query_prompt(system_prompt_path)
-    )
+    user_prompt = render_prompt(user_prompt_path, args)
+    system_prompt = system_prompt if system_prompt else read_query_prompt(system_prompt_path)
     return await LLMGateway.acreate_structured_output(
         text_input=user_prompt,
@@ -29,9 +28,7 @@ async def summarize_text(
     system_prompt: str = None,
 ) -> str:
     """Summarizes text using LLM with the specified prompt."""
-    system_prompt = (
-        system_prompt if system_prompt else LLMGateway.read_query_prompt(system_prompt_path)
-    )
+    system_prompt = system_prompt if system_prompt else read_query_prompt(system_prompt_path)
     return await LLMGateway.acreate_structured_output(
         text_input=text,

cognee/modules/search/methods/get_search_type_tools.py CHANGED Viewed

@@ -15,6 +15,7 @@ from cognee.modules.retrieval.completion_retriever import CompletionRetriever
 from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
 from cognee.modules.retrieval.temporal_retriever import TemporalRetriever
 from cognee.modules.retrieval.coding_rules_retriever import CodingRulesRetriever
+from cognee.modules.retrieval.jaccard_retrival import JaccardChunksRetriever
 from cognee.modules.retrieval.graph_summary_completion_retriever import (
     GraphSummaryCompletionRetriever,
 )
@@ -152,6 +153,12 @@ async def get_search_type_tools(
             TemporalRetriever(top_k=top_k).get_completion,
             TemporalRetriever(top_k=top_k).get_context,
         ],
+        SearchType.CHUNKS_LEXICAL: (
+            lambda _r=JaccardChunksRetriever(top_k=top_k): [
+                _r.get_completion,
+                _r.get_context,
+            ]
+        )(),
         SearchType.CODING_RULES: [
             CodingRulesRetriever(rules_nodeset_name=node_name).get_existing_rules,
         ],

cognee/modules/search/methods/no_access_control_search.py CHANGED Viewed

@@ -35,7 +35,7 @@ async def no_access_control_search(
         [get_completion, get_context] = search_tools
         if only_context:
-            return await get_context(query_text)
+            return None, await get_context(query_text), []
         context = await get_context(query_text)
         result = await get_completion(query_text, context)

cognee/modules/search/methods/search.py CHANGED Viewed

@@ -19,7 +19,9 @@ from cognee.modules.search.types import (
 from cognee.modules.search.operations import log_query, log_result
 from cognee.modules.users.models import User
 from cognee.modules.data.models import Dataset
-from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
+from cognee.modules.data.methods.get_authorized_existing_datasets import (
+    get_authorized_existing_datasets,
+)
 from .get_search_type_tools import get_search_type_tools
 from .no_access_control_search import no_access_control_search
@@ -143,20 +145,35 @@ async def search(
                 context = prepared_search_results["context"]
                 datasets = prepared_search_results["datasets"]
-                return_value.append(
-                    {
-                        "search_result": [result] if result else None,
-                        "dataset_id": datasets[0].id,
-                        "dataset_name": datasets[0].name,
-                        "graphs": graphs,
-                    }
-                )
+                if only_context:
+                    return_value.append(
+                        {
+                            "search_result": [context] if context else None,
+                            "dataset_id": datasets[0].id,
+                            "dataset_name": datasets[0].name,
+                            "graphs": graphs,
+                        }
+                    )
+                else:
+                    return_value.append(
+                        {
+                            "search_result": [result] if result else None,
+                            "dataset_id": datasets[0].id,
+                            "dataset_name": datasets[0].name,
+                            "graphs": graphs,
+                        }
+                    )
             return return_value
         else:
             return_value = []
-            for search_result in search_results:
-                result, context, datasets = search_result
-                return_value.append(result)
+            if only_context:
+                for search_result in search_results:
+                    prepared_search_results = await prepare_search_result(search_result)
+                    return_value.append(prepared_search_results["context"])
+            else:
+                for search_result in search_results:
+                    result, context, datasets = search_result
+                    return_value.append(result)
             # For maintaining backwards compatibility
             if len(return_value) == 1 and isinstance(return_value[0], list):
                 return return_value[0]
@@ -187,7 +204,9 @@ async def authorized_search(
     Not to be used outside of active access control mode.
     """
     # Find datasets user has read access for (if datasets are provided only return them. Provided user has read access)
-    search_datasets = await get_specific_user_permission_datasets(user.id, "read", dataset_ids)
+    search_datasets = await get_authorized_existing_datasets(
+        datasets=dataset_ids, permission_type="read", user=user
+    )
     if use_combined_context:
         search_responses = await search_in_datasets_context(

cognee/modules/search/types/SearchType.py CHANGED Viewed

@@ -17,3 +17,4 @@ class SearchType(Enum):
     FEEDBACK = "FEEDBACK"
     TEMPORAL = "TEMPORAL"
     CODING_RULES = "CODING_RULES"
+    CHUNKS_LEXICAL = "CHUNKS_LEXICAL"

cognee/modules/users/methods/create_user.py CHANGED Viewed

@@ -61,8 +61,6 @@ async def create_user(
                     if auto_login:
                         await session.refresh(user)
-                    await _create_tutorial_notebook(user.id, session)
                     return user
     except UserAlreadyExists as error:
         print(f"User {email} already exists")

cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py CHANGED Viewed

@@ -9,6 +9,18 @@ from uuid import UUID
 async def authorized_give_permission_on_datasets(
     principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
 ):
+    """
+        Give permission to certain datasets to a user.
+        The request owner must have the necessary permission to share the datasets.
+    Args:
+        principal_id: Id of user to whom datasets are shared
+        dataset_ids: Ids of datasets to share
+        permission_name: Name of permission to give
+        owner_id: Id of the request owner
+    Returns:
+        None
+    """
     # If only a single dataset UUID is provided transform it to a list
     if not isinstance(dataset_ids, list):
         dataset_ids = [dataset_ids]

cognee/modules/users/permissions/methods/check_permission_on_dataset.py CHANGED Viewed

@@ -10,6 +10,17 @@ logger = get_logger()
 async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
+    """
+        Check if a user has a specific permission on a dataset.
+    Args:
+        user: User whose permission is checked
+        permission_type: Type of permission to check
+        dataset_id: Id of the dataset
+    Returns:
+        None
+    """
     if user is None:
         user = await get_default_user()

cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py CHANGED Viewed

@@ -11,6 +11,16 @@ logger = get_logger()
 async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
+    """
+        Return a list of datasets the user has permission for.
+        If the user is part of a tenant, return datasets his roles have permission for.
+    Args:
+        user
+        permission_type
+    Returns:
+        list[Dataset]: List of datasets user has permission for
+    """
     datasets = list()
     # Get all datasets User has explicit access to
     datasets.extend(await get_principal_datasets(user, permission_type))

cognee/modules/users/permissions/methods/get_document_ids_for_user.py CHANGED Viewed

@@ -8,6 +8,16 @@ from ...models import ACL, Permission
 async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -> list[str]:
+    """
+        Return a list of documents ids for which the user has read permission.
+        If datasets are specified, return only documents from those datasets.
+    Args:
+        user_id: Id of the user
+        datasets: List of datasets
+    Returns:
+        list[str]: List of documents for which the user has read permission
+    """
     db_engine = get_relational_engine()
     async with db_engine.get_async_session() as session:

cognee/modules/users/permissions/methods/get_principal.py CHANGED Viewed

@@ -6,6 +6,15 @@ from ...models.Principal import Principal
 async def get_principal(principal_id: UUID):
+    """
+        Return information about a user based on their id
+    Args:
+        principal_id: Id of the user
+    Returns:
+        principal: Information about the user (principal)
+    """
     db_engine = get_relational_engine()
     async with db_engine.get_async_session() as session:

cognee/modules/users/permissions/methods/get_principal_datasets.py CHANGED Viewed

@@ -9,6 +9,17 @@ from ...models.ACL import ACL
 async def get_principal_datasets(principal: Principal, permission_type: str) -> list[Dataset]:
+    """
+        Return a list of datasets for which the user (principal) has a certain permission.
+    Args:
+        principal: Information about the user
+        permission_type: Type of permission
+    Returns:
+        list[Dataset]: List of datasets for which the user (principal)
+        has the permission (permission_type).
+    """
     db_engine = get_relational_engine()
     async with db_engine.get_async_session() as session:

cognee/modules/users/permissions/methods/get_role.py CHANGED Viewed

@@ -9,6 +9,16 @@ from ...models.Role import Role
 async def get_role(tenant_id: UUID, role_name: str):
+    """
+        Return the role with the name role_name of the given tenant.
+    Args:
+        tenant_id: Id of the given tenant
+        role_name: Name of the role
+    Returns
+        The role for the given tenant.
+    """
     db_engine = get_relational_engine()
     async with db_engine.get_async_session() as session:

cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py CHANGED Viewed

@@ -15,9 +15,9 @@ async def get_specific_user_permission_datasets(
         Return a list of datasets user has given permission for. If a list of datasets is provided,
         verify for which datasets user has appropriate permission for and return list of datasets he has permission for.
     Args:
-        user_id:
-        permission_type:
-        dataset_ids:
+        user_id: Id of the user.
+        permission_type: Type of the permission.
+        dataset_ids: Ids of the provided datasets
     Returns:
         list[Dataset]: List of datasets user has permission for

cognee/modules/users/permissions/methods/get_tenant.py CHANGED Viewed

@@ -8,6 +8,15 @@ from ...models.Tenant import Tenant
 async def get_tenant(tenant_id: UUID):
+    """
+        Return information about the tenant based on the given id.
+    Args:
+        tenant_id: Id of the given tenant
+    Returns
+        Information about the given tenant.
+    """
     db_engine = get_relational_engine()
     async with db_engine.get_async_session() as session:

cognee/modules/users/permissions/methods/give_default_permission_to_role.py CHANGED Viewed

@@ -16,6 +16,15 @@ from cognee.modules.users.models import (
 async def give_default_permission_to_role(role_id: UUID, permission_name: str):
+    """
+        Give the permission with given name to the role with the given id as a default permission.
+    Args:
+        role_id: Id of the role
+        permission_name: Name of the permission
+    Returns:
+        None
+    """
     db_engine = get_relational_engine()
     async with db_engine.get_async_session() as session:

cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py CHANGED Viewed

@@ -16,6 +16,15 @@ from cognee.modules.users.models import (
 async def give_default_permission_to_tenant(tenant_id: UUID, permission_name: str):
+    """
+        Give the permission with given name to the tenant with the given id as a default permission.
+    Args:
+        tenant_id: Id of the tenant
+        permission_name: Name of the permission
+    Returns:
+        None
+    """
     db_engine = get_relational_engine()
     async with db_engine.get_async_session() as session:
         tenant = (

cognee/modules/users/permissions/methods/give_default_permission_to_user.py CHANGED Viewed

@@ -16,6 +16,15 @@ from cognee.modules.users.models import (
 async def give_default_permission_to_user(user_id: UUID, permission_name: str):
+    """
+        Give the permission with given name to the user with the given id as a default permission.
+    Args:
+        user_id: Id of the tenant
+        permission_name: Name of the permission
+    Returns:
+        None
+    """
     db_engine = get_relational_engine()
     async with db_engine.get_async_session() as session:
         user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()

cognee/modules/users/permissions/methods/give_permission_on_dataset.py CHANGED Viewed

@@ -24,6 +24,16 @@ async def give_permission_on_dataset(
     dataset_id: UUID,
     permission_name: str,
 ):
+    """
+        Give a specific permission on a dataset to a user.
+    Args:
+        principal: User who is being given the permission on the dataset
+        dataset_id: Id of the dataset
+        permission_name: Name of permission to give
+    Returns:
+        None
+    """
     db_engine = get_relational_engine()
     async with db_engine.get_async_session() as session:

cognee/modules/users/roles/methods/add_user_to_role.py CHANGED Viewed

@@ -21,6 +21,17 @@ from cognee.modules.users.models import (
 async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
+    """
+        Add a user with the given id to the role with the given id.
+    Args:
+        user_id: Id of the user.
+        role_id: Id of the role.
+        owner_id: Id of the request owner.
+    Returns:
+        None
+    """
     db_engine = get_relational_engine()
     async with db_engine.get_async_session() as session:
         user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()

cognee/modules/users/roles/methods/create_role.py CHANGED Viewed

@@ -15,7 +15,17 @@ from cognee.modules.users.models import (
 async def create_role(
     role_name: str,
     owner_id: UUID,
-):
+) -> UUID:
+    """
+        Create a new role with the given name, if the request owner with the given id
+        has the necessary permission.
+    Args:
+        role_name: Name of the new role.
+        owner_id: Id of the request owner.
+    Returns:
+        None
+    """
     db_engine = get_relational_engine()
     async with db_engine.get_async_session() as session:
         user = await get_user(owner_id)
@@ -35,3 +45,4 @@ async def create_role(
         await session.commit()
         await session.refresh(role)
+        return role.id

cognee/modules/users/tenants/methods/add_user_to_tenant.py CHANGED Viewed

@@ -13,6 +13,18 @@ from cognee.modules.users.exceptions import (
 async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
+    """
+        Add a user with the given id to the tenant with the given id.
+        This can only be successful if the request owner with the given id is the tenant owner.
+    Args:
+        user_id: Id of the user.
+        tenant_id: Id of the tenant.
+        owner_id: Id of the request owner.
+    Returns:
+        None
+    """
     db_engine = get_relational_engine()
     async with db_engine.get_async_session() as session:
         user = await get_user(user_id)

cognee/modules/users/tenants/methods/create_tenant.py CHANGED Viewed

@@ -7,7 +7,17 @@ from cognee.modules.users.models import Tenant
 from cognee.modules.users.methods import get_user
-async def create_tenant(tenant_name: str, user_id: UUID):
+async def create_tenant(tenant_name: str, user_id: UUID) -> UUID:
+    """
+        Create a new tenant with the given name, for the user with the given id.
+        This user is the owner of the tenant.
+    Args:
+        tenant_name: Name of the new tenant.
+        user_id: Id of the user.
+    Returns:
+        None
+    """
     db_engine = get_relational_engine()
     async with db_engine.get_async_session() as session:
         try:
@@ -24,5 +34,6 @@ async def create_tenant(tenant_name: str, user_id: UUID):
             user.tenant_id = tenant.id
             await session.merge(user)
             await session.commit()
+            return tenant.id
         except IntegrityError:
             raise EntityAlreadyExistsError(message="Tenant already exists.")

cognee/modules/visualization/cognee_network_visualization.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import json
-import networkx
 from cognee.shared.logging_utils import get_logger
 from cognee.infrastructure.files.storage.LocalFileStorage import LocalFileStorage
@@ -9,6 +8,8 @@ logger = get_logger()
 async def cognee_network_visualization(graph_data, destination_file_path: str = None):
+    import networkx
     nodes_data, edges_data = graph_data
     G = networkx.DiGraph()
@@ -22,6 +23,9 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
         "TableRow": "#f47710",
         "TableType": "#6510f4",
         "ColumnValue": "#13613a",
+        "SchemaTable": "#f47710",
+        "DatabaseSchema": "#6510f4",
+        "SchemaRelationship": "#13613a",
         "default": "#D3D3D3",
     }
@@ -104,7 +108,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
             .nodes circle { stroke: white; stroke-width: 0.5px; filter: drop-shadow(0 0 5px rgba(255,255,255,0.3)); }
             .node-label { font-size: 5px; font-weight: bold; fill: white; text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
             .edge-label { font-size: 3px; fill: rgba(255, 255, 255, 0.7); text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
             .tooltip {
                 position: absolute;
                 text-align: left;
@@ -166,7 +170,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
                     // Create tooltip content for edge
                     var content = "<strong>Edge Information</strong><br/>";
                     content += "Relationship: " + d.relation + "<br/>";
                     // Show all weights
                     if (d.all_weights && Object.keys(d.all_weights).length > 0) {
                         content += "<strong>Weights:</strong><br/>";
@@ -176,23 +180,23 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
                     } else if (d.weight !== null && d.weight !== undefined) {
                         content += "Weight: " + d.weight + "<br/>";
                     }
                     if (d.relationship_type) {
                         content += "Type: " + d.relationship_type + "<br/>";
                     }
                     // Add other edge properties
                     if (d.edge_info) {
                         Object.keys(d.edge_info).forEach(function(key) {
-                            if (key !== 'weight' && key !== 'weights' && key !== 'relationship_type' &&
-                                key !== 'source_node_id' && key !== 'target_node_id' &&
-                                key !== 'relationship_name' && key !== 'updated_at' &&
+                            if (key !== 'weight' && key !== 'weights' && key !== 'relationship_type' &&
+                                key !== 'source_node_id' && key !== 'target_node_id' &&
+                                key !== 'relationship_name' && key !== 'updated_at' &&
                                 !key.startsWith('weight_')) {
                                 content += key + ": " + d.edge_info[key] + "<br/>";
                             }
                         });
                     }
                     tooltip.html(content)
                         .style("left", (d3.event.pageX + 10) + "px")
                         .style("top", (d3.event.pageY - 10) + "px")

cognee/shared/data_models.py CHANGED Viewed

@@ -288,7 +288,6 @@ class SummarizedCode(BaseModel):
 class GraphDBType(Enum):
     NETWORKX = auto()
     NEO4J = auto()
-    FALKORDB = auto()
     KUZU = auto()

cognee/shared/utils.py CHANGED Viewed

@@ -4,7 +4,6 @@ import os
 import ssl
 import requests
 from datetime import datetime, timezone
-import matplotlib.pyplot as plt
 import http.server
 import socketserver
 from threading import Thread
@@ -30,37 +29,6 @@ def create_secure_ssl_context() -> ssl.SSLContext:
     return ssl.create_default_context()
-def get_entities(tagged_tokens):
-    import nltk
-    nltk.download("maxent_ne_chunker", quiet=True)
-    from nltk.chunk import ne_chunk
-    return ne_chunk(tagged_tokens)
-def extract_pos_tags(sentence):
-    """Extract Part-of-Speech (POS) tags for words in a sentence."""
-    import nltk
-    # Ensure that the necessary NLTK resources are downloaded
-    nltk.download("words", quiet=True)
-    nltk.download("punkt", quiet=True)
-    nltk.download("averaged_perceptron_tagger", quiet=True)
-    from nltk.tag import pos_tag
-    from nltk.tokenize import word_tokenize
-    # Tokenize the sentence into words
-    tokens = word_tokenize(sentence)
-    # Tag each word with its corresponding POS tag
-    pos_tags = pos_tag(tokens)
-    return pos_tags
 def get_anonymous_id():
     """Creates or reads a anonymous user id"""
     tracking_id = os.getenv("TRACKING_ID", None)

cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py CHANGED Viewed

@@ -7,7 +7,7 @@ from pydantic import BaseModel
 from cognee.infrastructure.databases.graph import get_graph_engine
 from cognee.infrastructure.databases.vector import get_vector_engine
 from cognee.infrastructure.engine.models import DataPoint
-from cognee.infrastructure.llm.LLMGateway import LLMGateway
+from cognee.infrastructure.llm.extraction import extract_categories
 from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
@@ -40,7 +40,7 @@ async def chunk_naive_llm_classifier(
         return data_chunks
     chunk_classifications = await asyncio.gather(
-        *[LLMGateway.extract_categories(chunk.text, classification_model) for chunk in data_chunks],
+        *[extract_categories(chunk.text, classification_model) for chunk in data_chunks],
     )
     classification_data_points = []

cognee 0.3.4.dev3__py3-none-any.whl → 0.3.5__py3-none-any.whl

cognee 0.3.4.dev3py3-none-any.whl → 0.3.5py3-none-any.whl