cognee 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. cognee/api/v1/cloud/routers/get_checks_router.py +1 -1
  2. cognee/api/v1/cognify/cognify.py +44 -7
  3. cognee/api/v1/cognify/routers/get_cognify_router.py +2 -1
  4. cognee/api/v1/notebooks/routers/get_notebooks_router.py +2 -1
  5. cognee/api/v1/prune/prune.py +2 -2
  6. cognee/api/v1/search/search.py +1 -1
  7. cognee/api/v1/sync/sync.py +16 -5
  8. cognee/base_config.py +19 -1
  9. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
  10. cognee/infrastructure/databases/graph/kuzu/remote_kuzu_adapter.py +4 -1
  11. cognee/infrastructure/databases/relational/ModelBase.py +2 -1
  12. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -2
  13. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -6
  14. cognee/infrastructure/databases/vector/config.py +1 -1
  15. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +6 -5
  16. cognee/infrastructure/files/storage/LocalFileStorage.py +50 -0
  17. cognee/infrastructure/files/storage/S3FileStorage.py +56 -9
  18. cognee/infrastructure/files/storage/StorageManager.py +18 -0
  19. cognee/infrastructure/files/utils/get_file_metadata.py +6 -1
  20. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +4 -2
  21. cognee/infrastructure/utils/run_async.py +9 -4
  22. cognee/infrastructure/utils/run_sync.py +4 -3
  23. cognee/modules/cloud/operations/check_api_key.py +4 -1
  24. cognee/modules/data/deletion/prune_system.py +5 -1
  25. cognee/modules/data/methods/create_authorized_dataset.py +9 -0
  26. cognee/modules/data/methods/get_authorized_dataset.py +1 -1
  27. cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
  28. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
  29. cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
  30. cognee/modules/notebooks/methods/create_notebook.py +34 -0
  31. cognee/modules/notebooks/methods/get_notebook.py +2 -2
  32. cognee/modules/notebooks/methods/get_notebooks.py +27 -1
  33. cognee/modules/notebooks/methods/update_notebook.py +0 -1
  34. cognee/modules/notebooks/models/Notebook.py +206 -1
  35. cognee/modules/notebooks/operations/run_in_local_sandbox.py +8 -5
  36. cognee/modules/observability/get_observe.py +14 -0
  37. cognee/modules/observability/observers.py +1 -0
  38. cognee/modules/ontology/base_ontology_resolver.py +42 -0
  39. cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
  40. cognee/modules/ontology/matching_strategies.py +53 -0
  41. cognee/modules/ontology/models.py +20 -0
  42. cognee/modules/ontology/ontology_config.py +24 -0
  43. cognee/modules/ontology/ontology_env_config.py +45 -0
  44. cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
  45. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +13 -0
  46. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +1 -1
  47. cognee/modules/pipelines/models/PipelineRunInfo.py +7 -2
  48. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +1 -1
  49. cognee/modules/retrieval/graph_completion_cot_retriever.py +1 -1
  50. cognee/modules/retrieval/graph_completion_retriever.py +1 -1
  51. cognee/modules/retrieval/temporal_retriever.py +3 -3
  52. cognee/modules/retrieval/user_qa_feedback.py +1 -1
  53. cognee/modules/search/methods/get_search_type_tools.py +7 -0
  54. cognee/modules/search/methods/search.py +12 -13
  55. cognee/modules/search/utils/prepare_search_result.py +31 -9
  56. cognee/modules/search/utils/transform_context_to_graph.py +1 -1
  57. cognee/modules/search/utils/transform_insights_to_graph.py +28 -0
  58. cognee/modules/users/methods/create_user.py +4 -24
  59. cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
  60. cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
  61. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +19 -2
  62. cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
  63. cognee/modules/users/permissions/methods/get_principal.py +9 -0
  64. cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
  65. cognee/modules/users/permissions/methods/get_role.py +10 -0
  66. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
  67. cognee/modules/users/permissions/methods/get_tenant.py +9 -0
  68. cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
  69. cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
  70. cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
  71. cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
  72. cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
  73. cognee/modules/users/roles/methods/create_role.py +10 -0
  74. cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
  75. cognee/modules/users/tenants/methods/create_tenant.py +10 -0
  76. cognee/root_dir.py +5 -0
  77. cognee/shared/cache.py +346 -0
  78. cognee/shared/utils.py +12 -0
  79. cognee/tasks/graph/extract_graph_from_data.py +53 -10
  80. cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
  81. cognee/tasks/ingestion/save_data_item_to_storage.py +1 -0
  82. cognee/tasks/temporal_graph/models.py +11 -6
  83. cognee/tests/cli_tests/cli_unit_tests/test_cli_main.py +5 -5
  84. cognee/tests/test_cognee_server_start.py +4 -4
  85. cognee/tests/test_temporal_graph.py +6 -34
  86. cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
  87. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +399 -0
  88. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/METADATA +11 -8
  89. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/RECORD +93 -86
  90. cognee-0.3.4.dist-info/entry_points.txt +2 -0
  91. cognee/api/v1/save/save.py +0 -335
  92. cognee/tests/test_save_export_path.py +0 -116
  93. cognee-0.3.2.dist-info/entry_points.txt +0 -2
  94. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/WHEEL +0 -0
  95. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/LICENSE +0 -0
  96. {cognee-0.3.2.dist-info → cognee-0.3.4.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,53 @@
1
+ import difflib
2
+ from abc import ABC, abstractmethod
3
+ from typing import List, Optional
4
+
5
+
6
+ class MatchingStrategy(ABC):
7
+ """Abstract base class for ontology entity matching strategies."""
8
+
9
+ @abstractmethod
10
+ def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
11
+ """Find the best match for a given name from a list of candidates.
12
+
13
+ Args:
14
+ name: The name to match
15
+ candidates: List of candidate names to match against
16
+
17
+ Returns:
18
+ The best matching candidate name, or None if no match found
19
+ """
20
+ pass
21
+
22
+
23
+ class FuzzyMatchingStrategy(MatchingStrategy):
24
+ """Fuzzy matching strategy using difflib for approximate string matching."""
25
+
26
+ def __init__(self, cutoff: float = 0.8):
27
+ """Initialize fuzzy matching strategy.
28
+
29
+ Args:
30
+ cutoff: Minimum similarity score (0.0 to 1.0) for a match to be considered valid
31
+ """
32
+ self.cutoff = cutoff
33
+
34
+ def find_match(self, name: str, candidates: List[str]) -> Optional[str]:
35
+ """Find the closest fuzzy match for a given name.
36
+
37
+ Args:
38
+ name: The normalized name to match
39
+ candidates: List of normalized candidate names
40
+
41
+ Returns:
42
+ The best matching candidate name, or None if no match meets the cutoff
43
+ """
44
+ if not candidates:
45
+ return None
46
+
47
+ # Check for exact match first
48
+ if name in candidates:
49
+ return name
50
+
51
+ # Find fuzzy match
52
+ best_match = difflib.get_close_matches(name, candidates, n=1, cutoff=self.cutoff)
53
+ return best_match[0] if best_match else None
@@ -0,0 +1,20 @@
1
+ from typing import Any
2
+
3
+
4
+ class AttachedOntologyNode:
5
+ """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
6
+
7
+ def __init__(self, uri: Any, category: str):
8
+ self.uri = uri
9
+ self.name = self._extract_name(uri)
10
+ self.category = category
11
+
12
+ @staticmethod
13
+ def _extract_name(uri: Any) -> str:
14
+ uri_str = str(uri)
15
+ if "#" in uri_str:
16
+ return uri_str.split("#")[-1]
17
+ return uri_str.rstrip("/").split("/")[-1]
18
+
19
+ def __repr__(self):
20
+ return f"AttachedOntologyNode(name={self.name}, category={self.category})"
@@ -0,0 +1,24 @@
1
+ from typing import TypedDict, Optional
2
+
3
+ from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
4
+ from cognee.modules.ontology.matching_strategies import MatchingStrategy
5
+
6
+
7
+ class OntologyConfig(TypedDict, total=False):
8
+ """Configuration containing ontology resolver.
9
+
10
+ Attributes:
11
+ ontology_resolver: The ontology resolver instance to use
12
+ """
13
+
14
+ ontology_resolver: Optional[BaseOntologyResolver]
15
+
16
+
17
+ class Config(TypedDict, total=False):
18
+ """Top-level configuration dictionary.
19
+
20
+ Attributes:
21
+ ontology_config: Configuration containing ontology resolver
22
+ """
23
+
24
+ ontology_config: Optional[OntologyConfig]
@@ -0,0 +1,45 @@
1
+ """This module contains the configuration for ontology handling."""
2
+
3
+ from functools import lru_cache
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
5
+
6
+
7
+ class OntologyEnvConfig(BaseSettings):
8
+ """
9
+ Represents the configuration for ontology handling, including parameters for
10
+ ontology file storage and resolution/matching strategies.
11
+
12
+ Public methods:
13
+ - to_dict
14
+
15
+ Instance variables:
16
+ - ontology_resolver
17
+ - ontology_matching
18
+ - ontology_file_path
19
+ - model_config
20
+ """
21
+
22
+ ontology_resolver: str = "rdflib"
23
+ matching_strategy: str = "fuzzy"
24
+ ontology_file_path: str = ""
25
+
26
+ model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
27
+
28
+ def to_dict(self) -> dict:
29
+ """
30
+ Return the configuration as a dictionary.
31
+ """
32
+ return {
33
+ "ontology_resolver": self.ontology_resolver,
34
+ "matching_strategy": self.matching_strategy,
35
+ "ontology_file_path": self.ontology_file_path,
36
+ }
37
+
38
+
39
+ @lru_cache
40
+ def get_ontology_env_config():
41
+ """
42
+ Retrieve the ontology configuration. This function utilizes caching to return a
43
+ singleton instance of the OntologyConfig class for efficiency.
44
+ """
45
+ return OntologyEnvConfig()
@@ -10,31 +10,26 @@ from cognee.modules.ontology.exceptions import (
10
10
  FindClosestMatchError,
11
11
  GetSubgraphError,
12
12
  )
13
+ from cognee.modules.ontology.base_ontology_resolver import BaseOntologyResolver
14
+ from cognee.modules.ontology.models import AttachedOntologyNode
15
+ from cognee.modules.ontology.matching_strategies import MatchingStrategy, FuzzyMatchingStrategy
13
16
 
14
17
  logger = get_logger("OntologyAdapter")
15
18
 
16
19
 
17
- class AttachedOntologyNode:
18
- """Lightweight wrapper to be able to parse any ontology solution and generalize cognee interface."""
20
+ class RDFLibOntologyResolver(BaseOntologyResolver):
21
+ """RDFLib-based ontology resolver implementation.
19
22
 
20
- def __init__(self, uri: URIRef, category: str):
21
- self.uri = uri
22
- self.name = self._extract_name(uri)
23
- self.category = category
23
+ This implementation uses RDFLib to parse and work with RDF/OWL ontology files.
24
+ It provides fuzzy matching and subgraph extraction capabilities for ontology entities.
25
+ """
24
26
 
25
- @staticmethod
26
- def _extract_name(uri: URIRef) -> str:
27
- uri_str = str(uri)
28
- if "#" in uri_str:
29
- return uri_str.split("#")[-1]
30
- return uri_str.rstrip("/").split("/")[-1]
31
-
32
- def __repr__(self):
33
- return f"AttachedOntologyNode(name={self.name}, category={self.category})"
34
-
35
-
36
- class OntologyResolver:
37
- def __init__(self, ontology_file: Optional[str] = None):
27
+ def __init__(
28
+ self,
29
+ ontology_file: Optional[str] = None,
30
+ matching_strategy: Optional[MatchingStrategy] = None,
31
+ ) -> None:
32
+ super().__init__(matching_strategy)
38
33
  self.ontology_file = ontology_file
39
34
  try:
40
35
  if ontology_file and os.path.exists(ontology_file):
@@ -60,7 +55,7 @@ class OntologyResolver:
60
55
  name = uri_str.rstrip("/").split("/")[-1]
61
56
  return name.lower().replace(" ", "_").strip()
62
57
 
63
- def build_lookup(self):
58
+ def build_lookup(self) -> None:
64
59
  try:
65
60
  classes: Dict[str, URIRef] = {}
66
61
  individuals: Dict[str, URIRef] = {}
@@ -97,7 +92,7 @@ class OntologyResolver:
97
92
  logger.error("Failed to build lookup dictionary: %s", str(e))
98
93
  raise RuntimeError("Lookup build failed") from e
99
94
 
100
- def refresh_lookup(self):
95
+ def refresh_lookup(self) -> None:
101
96
  self.build_lookup()
102
97
  logger.info("Ontology lookup refreshed.")
103
98
 
@@ -105,13 +100,8 @@ class OntologyResolver:
105
100
  try:
106
101
  normalized_name = name.lower().replace(" ", "_").strip()
107
102
  possible_matches = list(self.lookup.get(category, {}).keys())
108
- if normalized_name in possible_matches:
109
- return normalized_name
110
103
 
111
- best_match = difflib.get_close_matches(
112
- normalized_name, possible_matches, n=1, cutoff=0.8
113
- )
114
- return best_match[0] if best_match else None
104
+ return self.matching_strategy.find_match(normalized_name, possible_matches)
115
105
  except Exception as e:
116
106
  logger.error("Error in find_closest_match: %s", str(e))
117
107
  raise FindClosestMatchError() from e
@@ -125,7 +115,9 @@ class OntologyResolver:
125
115
 
126
116
  def get_subgraph(
127
117
  self, node_name: str, node_type: str = "individuals", directed: bool = True
128
- ) -> Tuple[List[Any], List[Tuple[str, str, str]], Optional[Any]]:
118
+ ) -> Tuple[
119
+ List[AttachedOntologyNode], List[Tuple[str, str, str]], Optional[AttachedOntologyNode]
120
+ ]:
129
121
  nodes_set = set()
130
122
  edges: List[Tuple[str, str, str]] = []
131
123
  visited = set()
@@ -11,6 +11,19 @@ from cognee.modules.data.methods import (
11
11
 
12
12
 
13
13
  async def resolve_authorized_user_dataset(dataset_id: UUID, dataset_name: str, user: User):
14
+ """
15
+ Function handles creation and dataset authorization if dataset already exist for Cognee.
16
+ Verifies that provided user has necessary permission for provided Dataset.
17
+ If Dataset does not exist creates the Dataset and gives permission for the user creating the dataset.
18
+
19
+ Args:
20
+ dataset_id: Id of the dataset.
21
+ dataset_name: Name of the dataset.
22
+ user: Cognee User request is being processed for, if None default user will be used.
23
+
24
+ Returns:
25
+ Tuple[User, Dataset]: A tuple containing the user and the authorized dataset.
26
+ """
14
27
  if not user:
15
28
  user = await get_default_user()
16
29
 
@@ -25,7 +25,7 @@ async def resolve_authorized_user_datasets(
25
25
  datasets: Dataset names or Dataset UUID (in case Datasets already exist)
26
26
 
27
27
  Returns:
28
-
28
+ Tuple[User, List[Dataset]]: A tuple containing the user and the list of authorized datasets.
29
29
  """
30
30
  # If no user is provided use default user
31
31
  if user is None:
@@ -1,6 +1,7 @@
1
- from typing import Any, Optional
1
+ from typing import Any, Optional, List, Union
2
2
  from uuid import UUID
3
3
  from pydantic import BaseModel
4
+ from cognee.modules.data.models.Data import Data
4
5
 
5
6
 
6
7
  class PipelineRunInfo(BaseModel):
@@ -8,11 +9,15 @@ class PipelineRunInfo(BaseModel):
8
9
  pipeline_run_id: UUID
9
10
  dataset_id: UUID
10
11
  dataset_name: str
11
- payload: Optional[Any] = None
12
+ # Data must be mentioned in typing to allow custom encoders for Data to be activated
13
+ payload: Optional[Union[Any, List[Data]]] = None
12
14
  data_ingestion_info: Optional[list] = None
13
15
 
14
16
  model_config = {
15
17
  "arbitrary_types_allowed": True,
18
+ "from_attributes": True,
19
+ # Add custom encoding handler for Data ORM model
20
+ "json_encoders": {Data: lambda d: d.to_json()},
16
21
  }
17
22
 
18
23
 
@@ -48,7 +48,7 @@ class GraphCompletionContextExtensionRetriever(GraphCompletionRetriever):
48
48
  query: str,
49
49
  context: Optional[List[Edge]] = None,
50
50
  context_extension_rounds=4,
51
- ) -> str:
51
+ ) -> List[str]:
52
52
  """
53
53
  Extends the context for a given query by retrieving related triplets and generating new
54
54
  completions based on them.
@@ -58,7 +58,7 @@ class GraphCompletionCotRetriever(GraphCompletionRetriever):
58
58
  query: str,
59
59
  context: Optional[List[Edge]] = None,
60
60
  max_iter=4,
61
- ) -> str:
61
+ ) -> List[str]:
62
62
  """
63
63
  Generate completion responses based on a user query and contextual information.
64
64
 
@@ -135,7 +135,7 @@ class GraphCompletionRetriever(BaseGraphRetriever):
135
135
  self,
136
136
  query: str,
137
137
  context: Optional[List[Edge]] = None,
138
- ) -> Any:
138
+ ) -> List[str]:
139
139
  """
140
140
  Generates a completion using graph connections context based on a query.
141
141
 
@@ -113,7 +113,7 @@ class TemporalRetriever(GraphCompletionRetriever):
113
113
  logger.info(
114
114
  "No timestamps identified based on the query, performing retrieval using triplet search on events and entities."
115
115
  )
116
- triplets = await self.get_context(query)
116
+ triplets = await self.get_triplets(query)
117
117
  return await self.resolve_edges_to_text(triplets)
118
118
 
119
119
  if ids:
@@ -122,7 +122,7 @@ class TemporalRetriever(GraphCompletionRetriever):
122
122
  logger.info(
123
123
  "No events identified based on timestamp filtering, performing retrieval using triplet search on events and entities."
124
124
  )
125
- triplets = await self.get_context(query)
125
+ triplets = await self.get_triplets(query)
126
126
  return await self.resolve_edges_to_text(triplets)
127
127
 
128
128
  vector_engine = get_vector_engine()
@@ -136,7 +136,7 @@ class TemporalRetriever(GraphCompletionRetriever):
136
136
 
137
137
  return self.descriptions_to_string(top_k_events)
138
138
 
139
- async def get_completion(self, query: str, context: Optional[str] = None) -> str:
139
+ async def get_completion(self, query: str, context: Optional[str] = None) -> List[str]:
140
140
  """Generates a response using the query and optional context."""
141
141
  if not context:
142
142
  context = await self.get_context(query=query)
@@ -1,4 +1,4 @@
1
- from typing import Any, Optional, List
1
+ from typing import Optional, List
2
2
 
3
3
  from uuid import NAMESPACE_OID, uuid5, UUID
4
4
  from cognee.infrastructure.databases.graph import get_graph_engine
@@ -1,3 +1,4 @@
1
+ import os
1
2
  from typing import Callable, List, Optional, Type
2
3
 
3
4
  from cognee.modules.engine.models.node_set import NodeSet
@@ -160,6 +161,12 @@ async def get_search_type_tools(
160
161
  if query_type is SearchType.FEELING_LUCKY:
161
162
  query_type = await select_search_type(query_text)
162
163
 
164
+ if (
165
+ query_type in [SearchType.CYPHER, SearchType.NATURAL_LANGUAGE]
166
+ and os.getenv("ALLOW_CYPHER_QUERY", "true").lower() == "false"
167
+ ):
168
+ raise UnsupportedSearchTypeError("Cypher query search types are disabled.")
169
+
163
170
  search_type_tools = search_tasks.get(query_type)
164
171
 
165
172
  if not search_type_tools:
@@ -136,12 +136,19 @@ async def search(
136
136
  if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
137
137
  return_value = []
138
138
  for search_result in search_results:
139
- result, context, datasets = search_result
139
+ prepared_search_results = await prepare_search_result(search_result)
140
+
141
+ result = prepared_search_results["result"]
142
+ graphs = prepared_search_results["graphs"]
143
+ context = prepared_search_results["context"]
144
+ datasets = prepared_search_results["datasets"]
145
+
140
146
  return_value.append(
141
147
  {
142
- "search_result": result,
148
+ "search_result": [result] if result else None,
143
149
  "dataset_id": datasets[0].id,
144
150
  "dataset_name": datasets[0].name,
151
+ "graphs": graphs,
145
152
  }
146
153
  )
147
154
  return return_value
@@ -155,14 +162,6 @@ async def search(
155
162
  return return_value[0]
156
163
  else:
157
164
  return return_value
158
- # return [
159
- # SearchResult(
160
- # search_result=result,
161
- # dataset_id=datasets[min(index, len(datasets) - 1)].id if datasets else None,
162
- # dataset_name=datasets[min(index, len(datasets) - 1)].name if datasets else None,
163
- # )
164
- # for index, (result, _, datasets) in enumerate(search_results)
165
- # ]
166
165
 
167
166
 
168
167
  async def authorized_search(
@@ -208,11 +207,11 @@ async def authorized_search(
208
207
  context = {}
209
208
  datasets: List[Dataset] = []
210
209
 
211
- for _, search_context, datasets in search_responses:
212
- for dataset in datasets:
210
+ for _, search_context, search_datasets in search_responses:
211
+ for dataset in search_datasets:
213
212
  context[str(dataset.id)] = search_context
214
213
 
215
- datasets.extend(datasets)
214
+ datasets.extend(search_datasets)
216
215
 
217
216
  specific_search_tools = await get_search_type_tools(
218
217
  query_type=query_type,
@@ -1,40 +1,62 @@
1
1
  from typing import List, cast
2
+ from uuid import uuid5, NAMESPACE_OID
2
3
 
3
4
  from cognee.modules.graph.utils import resolve_edges_to_text
4
5
  from cognee.modules.graph.cognee_graph.CogneeGraphElements import Edge
6
+ from cognee.modules.search.types.SearchResult import SearchResultDataset
5
7
  from cognee.modules.search.utils.transform_context_to_graph import transform_context_to_graph
8
+ from cognee.modules.search.utils.transform_insights_to_graph import transform_insights_to_graph
6
9
 
7
10
 
8
11
  async def prepare_search_result(search_result):
9
- result, context, datasets = search_result
12
+ results, context, datasets = search_result
10
13
 
11
14
  graphs = None
12
15
  result_graph = None
13
16
  context_texts = {}
14
17
 
15
- if isinstance(context, List) and len(context) > 0 and isinstance(context[0], Edge):
18
+ if isinstance(datasets, list) and len(datasets) == 0:
19
+ datasets = [
20
+ SearchResultDataset(
21
+ id=uuid5(NAMESPACE_OID, "*"),
22
+ name="all available datasets",
23
+ )
24
+ ]
25
+
26
+ if (
27
+ isinstance(context, List)
28
+ and len(context) > 0
29
+ and isinstance(context[0], tuple)
30
+ and context[0][1].get("relationship_name")
31
+ ):
32
+ context_graph = transform_insights_to_graph(context)
33
+ graphs = {
34
+ ", ".join([dataset.name for dataset in datasets]): context_graph,
35
+ }
36
+ results = None
37
+ elif isinstance(context, List) and len(context) > 0 and isinstance(context[0], Edge):
16
38
  context_graph = transform_context_to_graph(context)
17
39
 
18
40
  graphs = {
19
- "*": context_graph,
41
+ ", ".join([dataset.name for dataset in datasets]): context_graph,
20
42
  }
21
43
  context_texts = {
22
- "*": await resolve_edges_to_text(context),
44
+ ", ".join([dataset.name for dataset in datasets]): await resolve_edges_to_text(context),
23
45
  }
24
46
  elif isinstance(context, str):
25
47
  context_texts = {
26
- "*": context,
48
+ ", ".join([dataset.name for dataset in datasets]): context,
27
49
  }
28
50
  elif isinstance(context, List) and len(context) > 0 and isinstance(context[0], str):
29
51
  context_texts = {
30
- "*": "\n".join(cast(List[str], context)),
52
+ ", ".join([dataset.name for dataset in datasets]): "\n".join(cast(List[str], context)),
31
53
  }
32
54
 
33
- if isinstance(result, List) and len(result) > 0 and isinstance(result[0], Edge):
34
- result_graph = transform_context_to_graph(result)
55
+ if isinstance(results, List) and len(results) > 0 and isinstance(results[0], Edge):
56
+ result_graph = transform_context_to_graph(results)
35
57
 
36
58
  return {
37
- "result": result_graph or result,
59
+ "result": result_graph or results[0] if results and len(results) == 1 else results,
38
60
  "graphs": graphs,
39
61
  "context": context_texts,
40
62
  "datasets": datasets,
@@ -14,7 +14,7 @@ def transform_context_to_graph(context: List[Edge]):
14
14
  if "name" in triplet.node1.attributes
15
15
  else triplet.node1.id,
16
16
  "type": triplet.node1.attributes["type"],
17
- "attributes": triplet.node2.attributes,
17
+ "attributes": triplet.node1.attributes,
18
18
  }
19
19
  nodes[triplet.node2.id] = {
20
20
  "id": triplet.node2.id,
@@ -0,0 +1,28 @@
1
+ from typing import Dict, List, Tuple
2
+
3
+
4
+ def transform_insights_to_graph(context: List[Tuple[Dict, Dict, Dict]]):
5
+ nodes = {}
6
+ edges = {}
7
+
8
+ for triplet in context:
9
+ nodes[triplet[0]["id"]] = {
10
+ "id": triplet[0]["id"],
11
+ "label": triplet[0]["name"] if "name" in triplet[0] else triplet[0]["id"],
12
+ "type": triplet[0]["type"],
13
+ }
14
+ nodes[triplet[2]["id"]] = {
15
+ "id": triplet[2]["id"],
16
+ "label": triplet[2]["name"] if "name" in triplet[2] else triplet[2]["id"],
17
+ "type": triplet[2]["type"],
18
+ }
19
+ edges[f"{triplet[0]['id']}_{triplet[1]['relationship_name']}_{triplet[2]['id']}"] = {
20
+ "source": triplet[0]["id"],
21
+ "target": triplet[2]["id"],
22
+ "label": triplet[1]["relationship_name"],
23
+ }
24
+
25
+ return {
26
+ "nodes": list(nodes.values()),
27
+ "edges": list(edges.values()),
28
+ }
@@ -1,9 +1,10 @@
1
- from uuid import uuid4
1
+ from uuid import UUID, uuid4
2
2
  from fastapi_users.exceptions import UserAlreadyExists
3
+ from sqlalchemy.ext.asyncio import AsyncSession
3
4
 
4
5
  from cognee.infrastructure.databases.relational import get_relational_engine
5
- from cognee.modules.notebooks.methods import create_notebook
6
- from cognee.modules.notebooks.models.Notebook import NotebookCell
6
+ from cognee.modules.notebooks.models.Notebook import Notebook
7
+ from cognee.modules.notebooks.methods.create_notebook import _create_tutorial_notebook
7
8
  from cognee.modules.users.exceptions import TenantNotFoundError
8
9
  from cognee.modules.users.get_user_manager import get_user_manager_context
9
10
  from cognee.modules.users.get_user_db import get_user_db_context
@@ -60,27 +61,6 @@ async def create_user(
60
61
  if auto_login:
61
62
  await session.refresh(user)
62
63
 
63
- await create_notebook(
64
- user_id=user.id,
65
- notebook_name="Welcome to cognee 🧠",
66
- cells=[
67
- NotebookCell(
68
- id=uuid4(),
69
- name="Welcome",
70
- content="Cognee is your toolkit for turning text into a structured knowledge graph, optionally enhanced by ontologies, and then querying it with advanced retrieval techniques. This notebook will guide you through a simple example.",
71
- type="markdown",
72
- ),
73
- NotebookCell(
74
- id=uuid4(),
75
- name="Example",
76
- content="",
77
- type="code",
78
- ),
79
- ],
80
- deletable=False,
81
- session=session,
82
- )
83
-
84
64
  return user
85
65
  except UserAlreadyExists as error:
86
66
  print(f"User {email} already exists")
@@ -9,6 +9,18 @@ from uuid import UUID
9
9
  async def authorized_give_permission_on_datasets(
10
10
  principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
11
11
  ):
12
+ """
13
+ Give permission to certain datasets to a user.
14
+ The request owner must have the necessary permission to share the datasets.
15
+ Args:
16
+ principal_id: Id of user to whom datasets are shared
17
+ dataset_ids: Ids of datasets to share
18
+ permission_name: Name of permission to give
19
+ owner_id: Id of the request owner
20
+
21
+ Returns:
22
+ None
23
+ """
12
24
  # If only a single dataset UUID is provided transform it to a list
13
25
  if not isinstance(dataset_ids, list):
14
26
  dataset_ids = [dataset_ids]
@@ -10,6 +10,17 @@ logger = get_logger()
10
10
 
11
11
 
12
12
  async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
13
+ """
14
+ Check if a user has a specific permission on a dataset.
15
+ Args:
16
+ user: User whose permission is checked
17
+ permission_type: Type of permission to check
18
+ dataset_id: Id of the dataset
19
+
20
+ Returns:
21
+ None
22
+
23
+ """
13
24
  if user is None:
14
25
  user = await get_default_user()
15
26
 
@@ -1,3 +1,5 @@
1
+ from types import SimpleNamespace
2
+
1
3
  from cognee.shared.logging_utils import get_logger
2
4
 
3
5
  from ...models.User import User
@@ -9,6 +11,16 @@ logger = get_logger()
9
11
 
10
12
 
11
13
  async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
14
+ """
15
+ Return a list of datasets the user has permission for.
16
+ If the user is part of a tenant, return datasets his roles have permission for.
17
+ Args:
18
+ user
19
+ permission_type
20
+
21
+ Returns:
22
+ list[Dataset]: List of datasets user has permission for
23
+ """
12
24
  datasets = list()
13
25
  # Get all datasets User has explicit access to
14
26
  datasets.extend(await get_principal_datasets(user, permission_type))
@@ -17,9 +29,14 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) ->
17
29
  # Get all datasets all tenants have access to
18
30
  tenant = await get_tenant(user.tenant_id)
19
31
  datasets.extend(await get_principal_datasets(tenant, permission_type))
32
+
20
33
  # Get all datasets Users roles have access to
21
- for role_name in user.roles:
22
- role = await get_role(user.tenant_id, role_name)
34
+ if isinstance(user, SimpleNamespace):
35
+ # If simple namespace use roles defined in user
36
+ roles = user.roles
37
+ else:
38
+ roles = await user.awaitable_attrs.roles
39
+ for role in roles:
23
40
  datasets.extend(await get_principal_datasets(role, permission_type))
24
41
 
25
42
  # Deduplicate datasets with same ID