cognee 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/v1/cloud/routers/get_checks_router.py +1 -1
- cognee/api/v1/cognify/cognify.py +44 -7
- cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
- cognee/api/v1/notebooks/routers/get_notebooks_router.py +2 -1
- cognee/api/v1/prune/prune.py +2 -2
- cognee/api/v1/search/search.py +1 -1
- cognee/api/v1/sync/sync.py +16 -5
- cognee/base_config.py +19 -1
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
- cognee/infrastructure/databases/graph/kuzu/remote_kuzu_adapter.py +4 -1
- cognee/infrastructure/databases/relational/ModelBase.py +2 -1
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -2
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -6
- cognee/infrastructure/databases/vector/config.py +1 -1
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +6 -5
- cognee/infrastructure/files/storage/LocalFileStorage.py +50 -0
- cognee/infrastructure/files/storage/S3FileStorage.py +56 -9
- cognee/infrastructure/files/storage/StorageManager.py +18 -0
- cognee/infrastructure/files/utils/get_file_metadata.py +6 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +4 -2
- cognee/infrastructure/utils/run_async.py +9 -4
- cognee/infrastructure/utils/run_sync.py +4 -3
- cognee/modules/cloud/operations/check_api_key.py +4 -1
- cognee/modules/data/deletion/prune_system.py +5 -1
- cognee/modules/data/methods/create_authorized_dataset.py +9 -0
- cognee/modules/data/methods/get_authorized_dataset.py +1 -1
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
- cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
- cognee/modules/notebooks/methods/create_notebook.py +34 -0
- cognee/modules/notebooks/methods/get_notebook.py +2 -2
- cognee/modules/notebooks/methods/get_notebooks.py +27 -1
- cognee/modules/notebooks/methods/update_notebook.py +0 -1
- cognee/modules/notebooks/models/Notebook.py +206 -1
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +8 -5
- cognee/modules/observability/get_observe.py +14 -0
- cognee/modules/observability/observers.py +1 -0
- cognee/modules/ontology/base_ontology_resolver.py +42 -0
- cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
- cognee/modules/ontology/matching_strategies.py +53 -0
- cognee/modules/ontology/models.py +20 -0
- cognee/modules/ontology/ontology_config.py +24 -0
- cognee/modules/ontology/ontology_env_config.py +45 -0
- cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +13 -0
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +1 -1
- cognee/modules/pipelines/models/PipelineRunInfo.py +7 -2
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +1 -1
- cognee/modules/retrieval/graph_completion_cot_retriever.py +1 -1
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/temporal_retriever.py +3 -3
- cognee/modules/retrieval/user_qa_feedback.py +1 -1
- cognee/modules/search/methods/get_search_type_tools.py +7 -0
- cognee/modules/search/methods/search.py +12 -13
- cognee/modules/search/utils/prepare_search_result.py +31 -9
- cognee/modules/search/utils/transform_context_to_graph.py +1 -1
- cognee/modules/search/utils/transform_insights_to_graph.py +28 -0
- cognee/modules/users/methods/create_user.py +4 -24
- cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
- cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +19 -2
- cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
- cognee/modules/users/permissions/methods/get_principal.py +9 -0
- cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
- cognee/modules/users/permissions/methods/get_role.py +10 -0
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
- cognee/modules/users/permissions/methods/get_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
- cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
- cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
- cognee/modules/users/roles/methods/create_role.py +10 -0
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
- cognee/modules/users/tenants/methods/create_tenant.py +10 -0
- cognee/root_dir.py +5 -0
- cognee/shared/cache.py +346 -0
- cognee/shared/utils.py +12 -0
- cognee/tasks/graph/extract_graph_from_data.py +53 -10
- cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
- cognee/tasks/ingestion/save_data_item_to_storage.py +1 -0
- cognee/tasks/temporal_graph/models.py +11 -6
- cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py +5 -5
- cognee/tests/test_cognee_server_start.py +4 -4
- cognee/tests/test_temporal_graph.py +6 -34
- cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +399 -0
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/METADATA +11 -8
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/RECORD +93 -86
- cognee-0.3.4.dist-info/entry_points.txt +2 -0
- cognee/api/v1/save/save.py +0 -335
- cognee/tests/test_save_export_path.py +0 -116
- cognee-0.3.2.dist-info/entry_points.txt +0 -2
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/WHEEL +0 -0
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import difflib
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MatchingStrategy(ABC):
|
|
7
|
+
"""Abstract base class for ontology entity matching strategies."""
|
|
8
|
+
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
|
|
11
|
+
"""Find the best match for a given name from a list of candidates.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
name: The name to match
|
|
15
|
+
candidates: List of candidate names to match against
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
The best matching candidate name, or None if no match found
|
|
19
|
+
"""
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FuzzyMatchingStrategy(MatchingStrategy):
|
|
24
|
+
"""Fuzzy matching strategy using difflib for approximate string matching."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, cutoff: float = 0.8):
|
|
27
|
+
"""Initialize fuzzy matching strategy.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
|
|
31
|
+
"""
|
|
32
|
+
self.cutoff = cutoff
|
|
33
|
+
|
|
34
|
+
def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
|
|
35
|
+
"""Find the closest fuzzy match for a given name.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
name: The normalized name to match
|
|
39
|
+
candidates: List of normalized candidate names
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
The best matching candidate name, or None if no match meets the cutoff
|
|
43
|
+
"""
|
|
44
|
+
if not candidates:
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
# Check for exact match first
|
|
48
|
+
if name in candidates:
|
|
49
|
+
return name
|
|
50
|
+
|
|
51
|
+
# Find fuzzy match
|
|
52
|
+
best_match = difflib.get_close_matches(name, candidates, n=1, cutoff=self.cutoff)
|
|
53
|
+
return best_match[0] if best_match else None
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AttachedOntologyNode:
|
|
5
|
+
"""Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
|
|
6
|
+
|
|
7
|
+
def __init__(self, uri: Any, category: str):
|
|
8
|
+
self.uri = uri
|
|
9
|
+
self.name = self._extract_name(uri)
|
|
10
|
+
self.category = category
|
|
11
|
+
|
|
12
|
+
@staticmethod
|
|
13
|
+
def _extract_name(uri: Any) -> str:
|
|
14
|
+
uri_str = str(uri)
|
|
15
|
+
if "#" in uri_str:
|
|
16
|
+
return uri_str.split("#")[-1]
|
|
17
|
+
return uri_str.rstrip("/").split("/")[-1]
|
|
18
|
+
|
|
19
|
+
def __repr__(self):
|
|
20
|
+
return f"AttachedOntologyNode(name={self.name}, category={self.category})"
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import TypedDict, Optional
|
|
2
|
+
|
|
3
|
+
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
|
4
|
+
from cognee.modules.ontology.matching_strategies import MatchingStrategy
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class OntologyConfig(TypedDict, total=False):
|
|
8
|
+
"""Configuration containing ontology resolver.
|
|
9
|
+
|
|
10
|
+
Attributes:
|
|
11
|
+
ontology_resolver: The ontology resolver instance to use
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
ontology_resolver: Optional[BaseOntologyResolver]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Config(TypedDict, total=False):
|
|
18
|
+
"""Top-level configuration dictionary.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
ontology_config: Configuration containing ontology resolver
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
ontology_config: Optional[OntologyConfig]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""This module contains the configuration for ontology handling."""
|
|
2
|
+
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class OntologyEnvConfig(BaseSettings):
|
|
8
|
+
"""
|
|
9
|
+
Represents the configuration for ontology handling, including parameters for
|
|
10
|
+
ontology file storage and resolution/matching strategies.
|
|
11
|
+
|
|
12
|
+
Public methods:
|
|
13
|
+
- to_dict
|
|
14
|
+
|
|
15
|
+
Instance variables:
|
|
16
|
+
- ontology_resolver
|
|
17
|
+
- ontology_matching
|
|
18
|
+
- ontology_file_path
|
|
19
|
+
- model_config
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
ontology_resolver: str = "rdflib"
|
|
23
|
+
matching_strategy: str = "fuzzy"
|
|
24
|
+
ontology_file_path: str = ""
|
|
25
|
+
|
|
26
|
+
model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
|
|
27
|
+
|
|
28
|
+
def to_dict(self) -> dict:
|
|
29
|
+
"""
|
|
30
|
+
Return the configuration as a dictionary.
|
|
31
|
+
"""
|
|
32
|
+
return {
|
|
33
|
+
"ontology_resolver": self.ontology_resolver,
|
|
34
|
+
"matching_strategy": self.matching_strategy,
|
|
35
|
+
"ontology_file_path": self.ontology_file_path,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@lru_cache
|
|
40
|
+
def get_ontology_env_config():
|
|
41
|
+
"""
|
|
42
|
+
Retrieve the ontology configuration. This function utilizes caching to return a
|
|
43
|
+
singleton instance of the OntologyConfig class for efficiency.
|
|
44
|
+
"""
|
|
45
|
+
return OntologyEnvConfig()
|
|
@@ -10,31 +10,26 @@ from cognee.modules.ontology.exceptions import (
|
|
|
10
10
|
FindClosestMatchError,
|
|
11
11
|
GetSubgraphError,
|
|
12
12
|
)
|
|
13
|
+
from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
|
|
14
|
+
from cognee.modules.ontology.models import AttachedOntologyNode
|
|
15
|
+
from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
|
|
13
16
|
|
|
14
17
|
logger = get_logger("OntologyAdapter")
|
|
15
18
|
|
|
16
19
|
|
|
17
|
-
class
|
|
18
|
-
"""
|
|
20
|
+
class RDFLibOntologyResolver(BaseOntologyResolver):
|
|
21
|
+
"""RDFLib-based ontology resolver implementation.
|
|
19
22
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
self.category = category
|
|
23
|
+
This implementation uses RDFLib to parse and work with RDF/OWL ontology files.
|
|
24
|
+
It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
|
|
25
|
+
"""
|
|
24
26
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def __repr__(self):
|
|
33
|
-
return f"AttachedOntologyNode(name={self.name}, category={self.category})"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class OntologyResolver:
|
|
37
|
-
def __init__(self, ontology_file: Optional[str] = None):
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
ontology_file: Optional[str] = None,
|
|
30
|
+
matching_strategy: Optional[MatchingStrategy] = None,
|
|
31
|
+
) -> None:
|
|
32
|
+
super().__init__(matching_strategy)
|
|
38
33
|
self.ontology_file = ontology_file
|
|
39
34
|
try:
|
|
40
35
|
if ontology_file and os.path.exists(ontology_file):
|
|
@@ -60,7 +55,7 @@ class OntologyResolver:
|
|
|
60
55
|
name = uri_str.rstrip("/").split("/")[-1]
|
|
61
56
|
return name.lower().replace(" ", "_").strip()
|
|
62
57
|
|
|
63
|
-
def build_lookup(self):
|
|
58
|
+
def build_lookup(self) -> None:
|
|
64
59
|
try:
|
|
65
60
|
classes: Dict[str, URIRef] = {}
|
|
66
61
|
individuals: Dict[str, URIRef] = {}
|
|
@@ -97,7 +92,7 @@ class OntologyResolver:
|
|
|
97
92
|
logger.error("Failed to build lookup dictionary: %s", str(e))
|
|
98
93
|
raise RuntimeError("Lookup build failed") from e
|
|
99
94
|
|
|
100
|
-
def refresh_lookup(self):
|
|
95
|
+
def refresh_lookup(self) -> None:
|
|
101
96
|
self.build_lookup()
|
|
102
97
|
logger.info("Ontology lookup refreshed.")
|
|
103
98
|
|
|
@@ -105,13 +100,8 @@ class OntologyResolver:
|
|
|
105
100
|
try:
|
|
106
101
|
normalized_name = name.lower().replace(" ", "_").strip()
|
|
107
102
|
possible_matches = list(self.lookup.get(category, {}).keys())
|
|
108
|
-
if normalized_name in possible_matches:
|
|
109
|
-
return normalized_name
|
|
110
103
|
|
|
111
|
-
|
|
112
|
-
normalized_name, possible_matches, n=1, cutoff=0.8
|
|
113
|
-
)
|
|
114
|
-
return best_match[0] if best_match else None
|
|
104
|
+
return self.matching_strategy.find_match(normalized_name, possible_matches)
|
|
115
105
|
except Exception as e:
|
|
116
106
|
logger.error("Error in find_closest_match: %s", str(e))
|
|
117
107
|
raise FindClosestMatchError() from e
|
|
@@ -125,7 +115,9 @@ class OntologyResolver:
|
|
|
125
115
|
|
|
126
116
|
def get_subgraph(
|
|
127
117
|
self, node_name: str, node_type: str = "individuals", directed: bool = True
|
|
128
|
-
) -> Tuple[
|
|
118
|
+
) -> Tuple[
|
|
119
|
+
List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
|
|
120
|
+
]:
|
|
129
121
|
nodes_set = set()
|
|
130
122
|
edges: List[Tuple[str, str, str]] = []
|
|
131
123
|
visited = set()
|
|
@@ -11,6 +11,19 @@ from cognee.modules.data.methods import (
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
async def resolve_authorized_user_dataset(dataset_id: UUID, dataset_name: str, user: User):
|
|
14
|
+
"""
|
|
15
|
+
Function handles creation and dataset authorization if dataset already exist for Cognee.
|
|
16
|
+
Verifies that provided user has necessary permission for provided Dataset.
|
|
17
|
+
If Dataset does not exist creates the Dataset and gives permission for the user creating the dataset.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
dataset_id: Id of the dataset.
|
|
21
|
+
dataset_name: Name of the dataset.
|
|
22
|
+
user: Cognee User request is being processed for, if None default user will be used.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Tuple[User, Dataset]: A tuple containing the user and the authorized dataset.
|
|
26
|
+
"""
|
|
14
27
|
if not user:
|
|
15
28
|
user = await get_default_user()
|
|
16
29
|
|
|
@@ -25,7 +25,7 @@ async def resolve_authorized_user_datasets(
|
|
|
25
25
|
datasets: Dataset names or Dataset UUID (in case Datasets already exist)
|
|
26
26
|
|
|
27
27
|
Returns:
|
|
28
|
-
|
|
28
|
+
Tuple[User, List[Dataset]]: A tuple containing the user and the list of authorized datasets.
|
|
29
29
|
"""
|
|
30
30
|
# If no user is provided use default user
|
|
31
31
|
if user is None:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
from typing import Any, Optional
|
|
1
|
+
from typing import Any, Optional, List, Union
|
|
2
2
|
from uuid import UUID
|
|
3
3
|
from pydantic import BaseModel
|
|
4
|
+
from cognee.modules.data.models.Data import Data
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class PipelineRunInfo(BaseModel):
|
|
@@ -8,11 +9,15 @@ class PipelineRunInfo(BaseModel):
|
|
|
8
9
|
pipeline_run_id: UUID
|
|
9
10
|
dataset_id: UUID
|
|
10
11
|
dataset_name: str
|
|
11
|
-
|
|
12
|
+
# Data must be mentioned in typing to allow custom encoders for Data to be activated
|
|
13
|
+
payload: Optional[Union[Any, List[Data]]] = None
|
|
12
14
|
data_ingestion_info: Optional[list] = None
|
|
13
15
|
|
|
14
16
|
model_config = {
|
|
15
17
|
"arbitrary_types_allowed": True,
|
|
18
|
+
"from_attributes": True,
|
|
19
|
+
# Add custom encoding handler for Data ORM model
|
|
20
|
+
"json_encoders": {Data: lambda d: d.to_json()},
|
|
16
21
|
}
|
|
17
22
|
|
|
18
23
|
|
|
@@ -48,7 +48,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
|
|
|
48
48
|
query: str,
|
|
49
49
|
context: Optional[List[Edge]] = None,
|
|
50
50
|
context_extension_rounds=4,
|
|
51
|
-
) -> str:
|
|
51
|
+
) -> List[str]:
|
|
52
52
|
"""
|
|
53
53
|
Extends the context for a given query by retrieving related triplets and generating new
|
|
54
54
|
completions based on them.
|
|
@@ -113,7 +113,7 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
113
113
|
logger.info(
|
|
114
114
|
"No timestamps identified based on the query, performing retrieval using triplet search on events and entities."
|
|
115
115
|
)
|
|
116
|
-
triplets = await self.
|
|
116
|
+
triplets = await self.get_triplets(query)
|
|
117
117
|
return await self.resolve_edges_to_text(triplets)
|
|
118
118
|
|
|
119
119
|
if ids:
|
|
@@ -122,7 +122,7 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
122
122
|
logger.info(
|
|
123
123
|
"No events identified based on timestamp filtering, performing retrieval using triplet search on events and entities."
|
|
124
124
|
)
|
|
125
|
-
triplets = await self.
|
|
125
|
+
triplets = await self.get_triplets(query)
|
|
126
126
|
return await self.resolve_edges_to_text(triplets)
|
|
127
127
|
|
|
128
128
|
vector_engine = get_vector_engine()
|
|
@@ -136,7 +136,7 @@ class TemporalRetriever(GraphCompletionRetriever):
|
|
|
136
136
|
|
|
137
137
|
return self.descriptions_to_string(top_k_events)
|
|
138
138
|
|
|
139
|
-
async def get_completion(self, query: str, context: Optional[str] = None) -> str:
|
|
139
|
+
async def get_completion(self, query: str, context: Optional[str] = None) -> List[str]:
|
|
140
140
|
"""Generates a response using the query and optional context."""
|
|
141
141
|
if not context:
|
|
142
142
|
context = await self.get_context(query=query)
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from typing import Callable, List, Optional, Type
|
|
2
3
|
|
|
3
4
|
from cognee.modules.engine.models.node_set import NodeSet
|
|
@@ -160,6 +161,12 @@ async def get_search_type_tools(
|
|
|
160
161
|
if query_type is SearchType.FEELING_LUCKY:
|
|
161
162
|
query_type = await select_search_type(query_text)
|
|
162
163
|
|
|
164
|
+
if (
|
|
165
|
+
query_type in [SearchType.CYPHER, SearchType.NATURAL_LANGUAGE]
|
|
166
|
+
and os.getenv("ALLOW_CYPHER_QUERY", "true").lower() == "false"
|
|
167
|
+
):
|
|
168
|
+
raise UnsupportedSearchTypeError("Cypher query search types are disabled.")
|
|
169
|
+
|
|
163
170
|
search_type_tools = search_tasks.get(query_type)
|
|
164
171
|
|
|
165
172
|
if not search_type_tools:
|
|
@@ -136,12 +136,19 @@ async def search(
|
|
|
136
136
|
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
|
|
137
137
|
return_value = []
|
|
138
138
|
for search_result in search_results:
|
|
139
|
-
|
|
139
|
+
prepared_search_results = await prepare_search_result(search_result)
|
|
140
|
+
|
|
141
|
+
result = prepared_search_results["result"]
|
|
142
|
+
graphs = prepared_search_results["graphs"]
|
|
143
|
+
context = prepared_search_results["context"]
|
|
144
|
+
datasets = prepared_search_results["datasets"]
|
|
145
|
+
|
|
140
146
|
return_value.append(
|
|
141
147
|
{
|
|
142
|
-
"search_result": result,
|
|
148
|
+
"search_result": [result] if result else None,
|
|
143
149
|
"dataset_id": datasets[0].id,
|
|
144
150
|
"dataset_name": datasets[0].name,
|
|
151
|
+
"graphs": graphs,
|
|
145
152
|
}
|
|
146
153
|
)
|
|
147
154
|
return return_value
|
|
@@ -155,14 +162,6 @@ async def search(
|
|
|
155
162
|
return return_value[0]
|
|
156
163
|
else:
|
|
157
164
|
return return_value
|
|
158
|
-
# return [
|
|
159
|
-
# SearchResult(
|
|
160
|
-
# search_result=result,
|
|
161
|
-
# dataset_id=datasets[min(index, len(datasets) - 1)].id if datasets else None,
|
|
162
|
-
# dataset_name=datasets[min(index, len(datasets) - 1)].name if datasets else None,
|
|
163
|
-
# )
|
|
164
|
-
# for index, (result, _, datasets) in enumerate(search_results)
|
|
165
|
-
# ]
|
|
166
165
|
|
|
167
166
|
|
|
168
167
|
async def authorized_search(
|
|
@@ -208,11 +207,11 @@ async def authorized_search(
|
|
|
208
207
|
context = {}
|
|
209
208
|
datasets: List[Dataset] = []
|
|
210
209
|
|
|
211
|
-
for _, search_context,
|
|
212
|
-
for dataset in
|
|
210
|
+
for _, search_context, search_datasets in search_responses:
|
|
211
|
+
for dataset in search_datasets:
|
|
213
212
|
context[str(dataset.id)] = search_context
|
|
214
213
|
|
|
215
|
-
datasets.extend(
|
|
214
|
+
datasets.extend(search_datasets)
|
|
216
215
|
|
|
217
216
|
specific_search_tools = await get_search_type_tools(
|
|
218
217
|
query_type=query_type,
|
|
@@ -1,40 +1,62 @@
|
|
|
1
1
|
from typing import List, cast
|
|
2
|
+
from uuid import uuid5, NAMESPACE_OID
|
|
2
3
|
|
|
3
4
|
from cognee.modules.graph.utils import resolve_edges_to_text
|
|
4
5
|
from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
|
|
6
|
+
from cognee.modules.search.types.SearchResult import SearchResultDataset
|
|
5
7
|
from cognee.modules.search.utils.transform_context_to_graph import transform_context_to_graph
|
|
8
|
+
from cognee.modules.search.utils.transform_insights_to_graph import transform_insights_to_graph
|
|
6
9
|
|
|
7
10
|
|
|
8
11
|
async def prepare_search_result(search_result):
|
|
9
|
-
|
|
12
|
+
results, context, datasets = search_result
|
|
10
13
|
|
|
11
14
|
graphs = None
|
|
12
15
|
result_graph = None
|
|
13
16
|
context_texts = {}
|
|
14
17
|
|
|
15
|
-
if isinstance(
|
|
18
|
+
if isinstance(datasets, list) and len(datasets) == 0:
|
|
19
|
+
datasets = [
|
|
20
|
+
SearchResultDataset(
|
|
21
|
+
id=uuid5(NAMESPACE_OID, "*"),
|
|
22
|
+
name="all available datasets",
|
|
23
|
+
)
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
if (
|
|
27
|
+
isinstance(context, List)
|
|
28
|
+
and len(context) > 0
|
|
29
|
+
and isinstance(context[0], tuple)
|
|
30
|
+
and context[0][1].get("relationship_name")
|
|
31
|
+
):
|
|
32
|
+
context_graph = transform_insights_to_graph(context)
|
|
33
|
+
graphs = {
|
|
34
|
+
", ".join([dataset.name for dataset in datasets]): context_graph,
|
|
35
|
+
}
|
|
36
|
+
results = None
|
|
37
|
+
elif isinstance(context, List) and len(context) > 0 and isinstance(context[0], Edge):
|
|
16
38
|
context_graph = transform_context_to_graph(context)
|
|
17
39
|
|
|
18
40
|
graphs = {
|
|
19
|
-
"
|
|
41
|
+
", ".join([dataset.name for dataset in datasets]): context_graph,
|
|
20
42
|
}
|
|
21
43
|
context_texts = {
|
|
22
|
-
"
|
|
44
|
+
", ".join([dataset.name for dataset in datasets]): await resolve_edges_to_text(context),
|
|
23
45
|
}
|
|
24
46
|
elif isinstance(context, str):
|
|
25
47
|
context_texts = {
|
|
26
|
-
"
|
|
48
|
+
", ".join([dataset.name for dataset in datasets]): context,
|
|
27
49
|
}
|
|
28
50
|
elif isinstance(context, List) and len(context) > 0 and isinstance(context[0], str):
|
|
29
51
|
context_texts = {
|
|
30
|
-
"
|
|
52
|
+
", ".join([dataset.name for dataset in datasets]): "\n".join(cast(List[str], context)),
|
|
31
53
|
}
|
|
32
54
|
|
|
33
|
-
if isinstance(
|
|
34
|
-
result_graph = transform_context_to_graph(
|
|
55
|
+
if isinstance(results, List) and len(results) > 0 and isinstance(results[0], Edge):
|
|
56
|
+
result_graph = transform_context_to_graph(results)
|
|
35
57
|
|
|
36
58
|
return {
|
|
37
|
-
"result": result_graph or
|
|
59
|
+
"result": result_graph or results[0] if results and len(results) == 1 else results,
|
|
38
60
|
"graphs": graphs,
|
|
39
61
|
"context": context_texts,
|
|
40
62
|
"datasets": datasets,
|
|
@@ -14,7 +14,7 @@ def transform_context_to_graph(context: List[Edge]):
|
|
|
14
14
|
if "name" in triplet.node1.attributes
|
|
15
15
|
else triplet.node1.id,
|
|
16
16
|
"type": triplet.node1.attributes["type"],
|
|
17
|
-
"attributes": triplet.
|
|
17
|
+
"attributes": triplet.node1.attributes,
|
|
18
18
|
}
|
|
19
19
|
nodes[triplet.node2.id] = {
|
|
20
20
|
"id": triplet.node2.id,
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from typing import Dict, List, Tuple
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def transform_insights_to_graph(context: List[Tuple[Dict, Dict, Dict]]):
|
|
5
|
+
nodes = {}
|
|
6
|
+
edges = {}
|
|
7
|
+
|
|
8
|
+
for triplet in context:
|
|
9
|
+
nodes[triplet[0]["id"]] = {
|
|
10
|
+
"id": triplet[0]["id"],
|
|
11
|
+
"label": triplet[0]["name"] if "name" in triplet[0] else triplet[0]["id"],
|
|
12
|
+
"type": triplet[0]["type"],
|
|
13
|
+
}
|
|
14
|
+
nodes[triplet[2]["id"]] = {
|
|
15
|
+
"id": triplet[2]["id"],
|
|
16
|
+
"label": triplet[2]["name"] if "name" in triplet[2] else triplet[2]["id"],
|
|
17
|
+
"type": triplet[2]["type"],
|
|
18
|
+
}
|
|
19
|
+
edges[f"{triplet[0]['id']}_{triplet[1]['relationship_name']}_{triplet[2]['id']}"] = {
|
|
20
|
+
"source": triplet[0]["id"],
|
|
21
|
+
"target": triplet[2]["id"],
|
|
22
|
+
"label": triplet[1]["relationship_name"],
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
return {
|
|
26
|
+
"nodes": list(nodes.values()),
|
|
27
|
+
"edges": list(edges.values()),
|
|
28
|
+
}
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
from uuid import uuid4
|
|
1
|
+
from uuid import UUID, uuid4
|
|
2
2
|
from fastapi_users.exceptions import UserAlreadyExists
|
|
3
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
3
4
|
|
|
4
5
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
5
|
-
from cognee.modules.notebooks.
|
|
6
|
-
from cognee.modules.notebooks.
|
|
6
|
+
from cognee.modules.notebooks.models.Notebook import Notebook
|
|
7
|
+
from cognee.modules.notebooks.methods.create_notebook import _create_tutorial_notebook
|
|
7
8
|
from cognee.modules.users.exceptions import TenantNotFoundError
|
|
8
9
|
from cognee.modules.users.get_user_manager import get_user_manager_context
|
|
9
10
|
from cognee.modules.users.get_user_db import get_user_db_context
|
|
@@ -60,27 +61,6 @@ async def create_user(
|
|
|
60
61
|
if auto_login:
|
|
61
62
|
await session.refresh(user)
|
|
62
63
|
|
|
63
|
-
await create_notebook(
|
|
64
|
-
user_id=user.id,
|
|
65
|
-
notebook_name="Welcome to cognee 🧠",
|
|
66
|
-
cells=[
|
|
67
|
-
NotebookCell(
|
|
68
|
-
id=uuid4(),
|
|
69
|
-
name="Welcome",
|
|
70
|
-
content="Cognee is your toolkit for turning text into a structured knowledge graph, optionally enhanced by ontologies, and then querying it with advanced retrieval techniques. This notebook will guide you through a simple example.",
|
|
71
|
-
type="markdown",
|
|
72
|
-
),
|
|
73
|
-
NotebookCell(
|
|
74
|
-
id=uuid4(),
|
|
75
|
-
name="Example",
|
|
76
|
-
content="",
|
|
77
|
-
type="code",
|
|
78
|
-
),
|
|
79
|
-
],
|
|
80
|
-
deletable=False,
|
|
81
|
-
session=session,
|
|
82
|
-
)
|
|
83
|
-
|
|
84
64
|
return user
|
|
85
65
|
except UserAlreadyExists as error:
|
|
86
66
|
print(f"User {email} already exists")
|
|
@@ -9,6 +9,18 @@ from uuid import UUID
|
|
|
9
9
|
async def authorized_give_permission_on_datasets(
|
|
10
10
|
principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
|
|
11
11
|
):
|
|
12
|
+
"""
|
|
13
|
+
Give permission to certain datasets to a user.
|
|
14
|
+
The request owner must have the necessary permission to share the datasets.
|
|
15
|
+
Args:
|
|
16
|
+
principal_id: Id of user to whom datasets are shared
|
|
17
|
+
dataset_ids: Ids of datasets to share
|
|
18
|
+
permission_name: Name of permission to give
|
|
19
|
+
owner_id: Id of the request owner
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
None
|
|
23
|
+
"""
|
|
12
24
|
# If only a single dataset UUID is provided transform it to a list
|
|
13
25
|
if not isinstance(dataset_ids, list):
|
|
14
26
|
dataset_ids = [dataset_ids]
|
|
@@ -10,6 +10,17 @@ logger = get_logger()
|
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
|
|
13
|
+
"""
|
|
14
|
+
Check if a user has a specific permission on a dataset.
|
|
15
|
+
Args:
|
|
16
|
+
user: User whose permission is checked
|
|
17
|
+
permission_type: Type of permission to check
|
|
18
|
+
dataset_id: Id of the dataset
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
None
|
|
22
|
+
|
|
23
|
+
"""
|
|
13
24
|
if user is None:
|
|
14
25
|
user = await get_default_user()
|
|
15
26
|
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from types import SimpleNamespace
|
|
2
|
+
|
|
1
3
|
from cognee.shared.logging_utils import get_logger
|
|
2
4
|
|
|
3
5
|
from ...models.User import User
|
|
@@ -9,6 +11,16 @@ logger = get_logger()
|
|
|
9
11
|
|
|
10
12
|
|
|
11
13
|
async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
|
|
14
|
+
"""
|
|
15
|
+
Return a list of datasets the user has permission for.
|
|
16
|
+
If the user is part of a tenant, return datasets his roles have permission for.
|
|
17
|
+
Args:
|
|
18
|
+
user
|
|
19
|
+
permission_type
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
list[Dataset]: List of datasets user has permission for
|
|
23
|
+
"""
|
|
12
24
|
datasets = list()
|
|
13
25
|
# Get all datasets User has explicit access to
|
|
14
26
|
datasets.extend(await get_principal_datasets(user, permission_type))
|
|
@@ -17,9 +29,14 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) ->
|
|
|
17
29
|
# Get all datasets all tenants have access to
|
|
18
30
|
tenant = await get_tenant(user.tenant_id)
|
|
19
31
|
datasets.extend(await get_principal_datasets(tenant, permission_type))
|
|
32
|
+
|
|
20
33
|
# Get all datasets Users roles have access to
|
|
21
|
-
|
|
22
|
-
|
|
34
|
+
if isinstance(user, SimpleNamespace):
|
|
35
|
+
# If simple namespace use roles defined in user
|
|
36
|
+
roles = user.roles
|
|
37
|
+
else:
|
|
38
|
+
roles = await user.awaitable_attrs.roles
|
|
39
|
+
for role in roles:
|
|
23
40
|
datasets.extend(await get_principal_datasets(role, permission_type))
|
|
24
41
|
|
|
25
42
|
# Deduplicate datasets with same ID
|