cognee 0.3.4.dev4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. cognee/api/client.py +16 -7
  2. cognee/api/health.py +5 -9
  3. cognee/api/v1/add/add.py +3 -1
  4. cognee/api/v1/cognify/cognify.py +44 -7
  5. cognee/api/v1/permissions/routers/get_permissions_router.py +8 -4
  6. cognee/api/v1/search/search.py +3 -0
  7. cognee/api/v1/ui/__init__.py +1 -1
  8. cognee/api/v1/ui/ui.py +215 -150
  9. cognee/api/v1/update/__init__.py +1 -0
  10. cognee/api/v1/update/routers/__init__.py +1 -0
  11. cognee/api/v1/update/routers/get_update_router.py +90 -0
  12. cognee/api/v1/update/update.py +100 -0
  13. cognee/base_config.py +5 -2
  14. cognee/cli/_cognee.py +28 -10
  15. cognee/cli/commands/delete_command.py +34 -2
  16. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
  17. cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +3 -2
  18. cognee/eval_framework/modal_eval_dashboard.py +9 -1
  19. cognee/infrastructure/databases/graph/config.py +9 -9
  20. cognee/infrastructure/databases/graph/get_graph_engine.py +4 -21
  21. cognee/infrastructure/databases/graph/kuzu/adapter.py +60 -9
  22. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +3 -3
  23. cognee/infrastructure/databases/relational/config.py +4 -4
  24. cognee/infrastructure/databases/relational/create_relational_engine.py +11 -3
  25. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +7 -3
  26. cognee/infrastructure/databases/vector/config.py +7 -7
  27. cognee/infrastructure/databases/vector/create_vector_engine.py +7 -15
  28. cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py +9 -0
  29. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +11 -0
  30. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +19 -2
  31. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -0
  32. cognee/infrastructure/databases/vector/embeddings/config.py +8 -0
  33. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +5 -0
  34. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +11 -10
  35. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +48 -38
  36. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -4
  37. cognee/infrastructure/files/storage/S3FileStorage.py +15 -5
  38. cognee/infrastructure/files/storage/s3_config.py +1 -0
  39. cognee/infrastructure/files/utils/open_data_file.py +7 -14
  40. cognee/infrastructure/llm/LLMGateway.py +19 -117
  41. cognee/infrastructure/llm/config.py +28 -13
  42. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_categories.py +2 -1
  43. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_event_entities.py +3 -2
  44. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_summary.py +3 -2
  45. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_content_graph.py +2 -1
  46. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_event_graph.py +3 -2
  47. cognee/infrastructure/llm/prompts/read_query_prompt.py +3 -2
  48. cognee/infrastructure/llm/prompts/show_prompt.py +35 -0
  49. cognee/infrastructure/llm/prompts/test.txt +1 -0
  50. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +2 -2
  51. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +50 -397
  52. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +2 -3
  53. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +8 -88
  54. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +78 -0
  55. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +2 -99
  56. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +49 -401
  57. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +19 -882
  58. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +2 -34
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +2 -107
  60. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/acreate_structured_output.baml +26 -0
  61. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/__init__.py +1 -2
  62. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +76 -0
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/create_dynamic_baml_type.py +122 -0
  64. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +3 -3
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +0 -32
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +107 -98
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +5 -6
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +5 -6
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +0 -26
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +17 -67
  71. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +8 -7
  72. cognee/infrastructure/llm/utils.py +4 -4
  73. cognee/infrastructure/loaders/LoaderEngine.py +5 -2
  74. cognee/infrastructure/loaders/external/__init__.py +7 -0
  75. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +244 -0
  76. cognee/infrastructure/loaders/supported_loaders.py +7 -0
  77. cognee/modules/data/methods/create_authorized_dataset.py +9 -0
  78. cognee/modules/data/methods/get_authorized_dataset.py +1 -1
  79. cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
  80. cognee/modules/data/methods/get_deletion_counts.py +92 -0
  81. cognee/modules/graph/cognee_graph/CogneeGraph.py +1 -1
  82. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
  83. cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
  84. cognee/modules/ingestion/data_types/TextData.py +0 -1
  85. cognee/modules/observability/get_observe.py +14 -0
  86. cognee/modules/observability/observers.py +1 -0
  87. cognee/modules/ontology/base_ontology_resolver.py +42 -0
  88. cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
  89. cognee/modules/ontology/matching_strategies.py +53 -0
  90. cognee/modules/ontology/models.py +20 -0
  91. cognee/modules/ontology/ontology_config.py +24 -0
  92. cognee/modules/ontology/ontology_env_config.py +45 -0
  93. cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
  94. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +21 -24
  95. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +3 -3
  96. cognee/modules/retrieval/code_retriever.py +2 -1
  97. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -4
  98. cognee/modules/retrieval/graph_completion_cot_retriever.py +6 -5
  99. cognee/modules/retrieval/graph_completion_retriever.py +0 -3
  100. cognee/modules/retrieval/insights_retriever.py +1 -1
  101. cognee/modules/retrieval/jaccard_retrival.py +60 -0
  102. cognee/modules/retrieval/lexical_retriever.py +123 -0
  103. cognee/modules/retrieval/natural_language_retriever.py +2 -1
  104. cognee/modules/retrieval/temporal_retriever.py +3 -2
  105. cognee/modules/retrieval/utils/brute_force_triplet_search.py +2 -12
  106. cognee/modules/retrieval/utils/completion.py +4 -7
  107. cognee/modules/search/methods/get_search_type_tools.py +7 -0
  108. cognee/modules/search/methods/no_access_control_search.py +1 -1
  109. cognee/modules/search/methods/search.py +32 -13
  110. cognee/modules/search/types/SearchType.py +1 -0
  111. cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
  112. cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
  113. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +10 -0
  114. cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
  115. cognee/modules/users/permissions/methods/get_principal.py +9 -0
  116. cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
  117. cognee/modules/users/permissions/methods/get_role.py +10 -0
  118. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
  119. cognee/modules/users/permissions/methods/get_tenant.py +9 -0
  120. cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
  121. cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
  122. cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
  123. cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
  124. cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
  125. cognee/modules/users/roles/methods/create_role.py +12 -1
  126. cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
  127. cognee/modules/users/tenants/methods/create_tenant.py +12 -1
  128. cognee/modules/visualization/cognee_network_visualization.py +13 -9
  129. cognee/shared/data_models.py +0 -1
  130. cognee/shared/utils.py +0 -32
  131. cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
  132. cognee/tasks/codingagents/coding_rule_associations.py +3 -2
  133. cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +3 -2
  134. cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +3 -2
  135. cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +3 -2
  136. cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +3 -2
  137. cognee/tasks/graph/extract_graph_from_code.py +2 -2
  138. cognee/tasks/graph/extract_graph_from_data.py +55 -12
  139. cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
  140. cognee/tasks/ingestion/migrate_relational_database.py +132 -41
  141. cognee/tasks/ingestion/resolve_data_directories.py +4 -1
  142. cognee/tasks/schema/ingest_database_schema.py +134 -0
  143. cognee/tasks/schema/models.py +40 -0
  144. cognee/tasks/storage/index_data_points.py +1 -1
  145. cognee/tasks/storage/index_graph_edges.py +3 -1
  146. cognee/tasks/summarization/summarize_code.py +2 -2
  147. cognee/tasks/summarization/summarize_text.py +2 -2
  148. cognee/tasks/temporal_graph/enrich_events.py +2 -2
  149. cognee/tasks/temporal_graph/extract_events_and_entities.py +2 -2
  150. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +13 -4
  151. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +13 -3
  152. cognee/tests/test_advanced_pdf_loader.py +141 -0
  153. cognee/tests/test_chromadb.py +40 -0
  154. cognee/tests/test_cognee_server_start.py +6 -1
  155. cognee/tests/test_data/Quantum_computers.txt +9 -0
  156. cognee/tests/test_lancedb.py +211 -0
  157. cognee/tests/test_pgvector.py +40 -0
  158. cognee/tests/test_relational_db_migration.py +76 -0
  159. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +2 -1
  160. cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
  161. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +0 -4
  162. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -4
  163. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +0 -4
  164. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/METADATA +92 -96
  165. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/RECORD +172 -160
  166. cognee/infrastructure/data/utils/extract_keywords.py +0 -48
  167. cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +0 -1227
  168. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +0 -109
  169. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +0 -343
  170. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_categories.py +0 -0
  171. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +0 -89
  172. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/__init__.py +0 -0
  173. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +0 -44
  174. cognee/tasks/graph/infer_data_ontology.py +0 -309
  175. cognee/tests/test_falkordb.py +0 -174
  176. distributed/poetry.lock +0 -12238
  177. distributed/pyproject.toml +0 -186
  178. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/__init__.py +0 -0
  179. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/__init__.py +0 -0
  180. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/texts.json +0 -0
  181. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/WHEEL +0 -0
  182. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/entry_points.txt +0 -0
  183. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/licenses/LICENSE +0 -0
  184. {cognee-0.3.4.dev4.dist-info → cognee-0.3.6.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,5 +1,6 @@
1
1
  from typing import Optional
2
2
  from cognee.infrastructure.llm.LLMGateway import LLMGateway
3
+ from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
3
4
 
4
5
 
5
6
  async def generate_completion(
@@ -11,10 +12,8 @@ async def generate_completion(
11
12
  ) -> str:
12
13
  """Generates a completion using LLM with given context and prompts."""
13
14
  args = {"question": query, "context": context}
14
- user_prompt = LLMGateway.render_prompt(user_prompt_path, args)
15
- system_prompt = (
16
- system_prompt if system_prompt else LLMGateway.read_query_prompt(system_prompt_path)
17
- )
15
+ user_prompt = render_prompt(user_prompt_path, args)
16
+ system_prompt = system_prompt if system_prompt else read_query_prompt(system_prompt_path)
18
17
 
19
18
  return await LLMGateway.acreate_structured_output(
20
19
  text_input=user_prompt,
@@ -29,9 +28,7 @@ async def summarize_text(
29
28
  system_prompt: str = None,
30
29
  ) -> str:
31
30
  """Summarizes text using LLM with the specified prompt."""
32
- system_prompt = (
33
- system_prompt if system_prompt else LLMGateway.read_query_prompt(system_prompt_path)
34
- )
31
+ system_prompt = system_prompt if system_prompt else read_query_prompt(system_prompt_path)
35
32
 
36
33
  return await LLMGateway.acreate_structured_output(
37
34
  text_input=text,
@@ -15,6 +15,7 @@ from cognee.modules.retrieval.completion_retriever import CompletionRetriever
15
15
  from cognee.modules.retrieval.graph_completion_retriever import GraphCompletionRetriever
16
16
  from cognee.modules.retrieval.temporal_retriever import TemporalRetriever
17
17
  from cognee.modules.retrieval.coding_rules_retriever import CodingRulesRetriever
18
+ from cognee.modules.retrieval.jaccard_retrival import JaccardChunksRetriever
18
19
  from cognee.modules.retrieval.graph_summary_completion_retriever import (
19
20
  GraphSummaryCompletionRetriever,
20
21
  )
@@ -152,6 +153,12 @@ async def get_search_type_tools(
152
153
  TemporalRetriever(top_k=top_k).get_completion,
153
154
  TemporalRetriever(top_k=top_k).get_context,
154
155
  ],
156
+ SearchType.CHUNKS_LEXICAL: (
157
+ lambda _r=JaccardChunksRetriever(top_k=top_k): [
158
+ _r.get_completion,
159
+ _r.get_context,
160
+ ]
161
+ )(),
155
162
  SearchType.CODING_RULES: [
156
163
  CodingRulesRetriever(rules_nodeset_name=node_name).get_existing_rules,
157
164
  ],
@@ -35,7 +35,7 @@ async def no_access_control_search(
35
35
  [get_completion, get_context] = search_tools
36
36
 
37
37
  if only_context:
38
- return await get_context(query_text)
38
+ return None, await get_context(query_text), []
39
39
 
40
40
  context = await get_context(query_text)
41
41
  result = await get_completion(query_text, context)
@@ -19,7 +19,9 @@ from cognee.modules.search.types import (
19
19
  from cognee.modules.search.operations import log_query, log_result
20
20
  from cognee.modules.users.models import User
21
21
  from cognee.modules.data.models import Dataset
22
- from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
22
+ from cognee.modules.data.methods.get_authorized_existing_datasets import (
23
+ get_authorized_existing_datasets,
24
+ )
23
25
 
24
26
  from .get_search_type_tools import get_search_type_tools
25
27
  from .no_access_control_search import no_access_control_search
@@ -143,20 +145,35 @@ async def search(
143
145
  context = prepared_search_results["context"]
144
146
  datasets = prepared_search_results["datasets"]
145
147
 
146
- return_value.append(
147
- {
148
- "search_result": [result] if result else None,
149
- "dataset_id": datasets[0].id,
150
- "dataset_name": datasets[0].name,
151
- "graphs": graphs,
152
- }
153
- )
148
+ if only_context:
149
+ return_value.append(
150
+ {
151
+ "search_result": [context] if context else None,
152
+ "dataset_id": datasets[0].id,
153
+ "dataset_name": datasets[0].name,
154
+ "graphs": graphs,
155
+ }
156
+ )
157
+ else:
158
+ return_value.append(
159
+ {
160
+ "search_result": [result] if result else None,
161
+ "dataset_id": datasets[0].id,
162
+ "dataset_name": datasets[0].name,
163
+ "graphs": graphs,
164
+ }
165
+ )
154
166
  return return_value
155
167
  else:
156
168
  return_value = []
157
- for search_result in search_results:
158
- result, context, datasets = search_result
159
- return_value.append(result)
169
+ if only_context:
170
+ for search_result in search_results:
171
+ prepared_search_results = await prepare_search_result(search_result)
172
+ return_value.append(prepared_search_results["context"])
173
+ else:
174
+ for search_result in search_results:
175
+ result, context, datasets = search_result
176
+ return_value.append(result)
160
177
  # For maintaining backwards compatibility
161
178
  if len(return_value) == 1 and isinstance(return_value[0], list):
162
179
  return return_value[0]
@@ -187,7 +204,9 @@ async def authorized_search(
187
204
  Not to be used outside of active access control mode.
188
205
  """
189
206
  # Find datasets user has read access for (if datasets are provided only return them. Provided user has read access)
190
- search_datasets = await get_specific_user_permission_datasets(user.id, "read", dataset_ids)
207
+ search_datasets = await get_authorized_existing_datasets(
208
+ datasets=dataset_ids, permission_type="read", user=user
209
+ )
191
210
 
192
211
  if use_combined_context:
193
212
  search_responses = await search_in_datasets_context(
@@ -17,3 +17,4 @@ class SearchType(Enum):
17
17
  FEEDBACK = "FEEDBACK"
18
18
  TEMPORAL = "TEMPORAL"
19
19
  CODING_RULES = "CODING_RULES"
20
+ CHUNKS_LEXICAL = "CHUNKS_LEXICAL"
@@ -9,6 +9,18 @@ from uuid import UUID
9
9
  async def authorized_give_permission_on_datasets(
10
10
  principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
11
11
  ):
12
+ """
13
+ Give permission to certain datasets to a user.
14
+ The request owner must have the necessary permission to share the datasets.
15
+ Args:
16
+ principal_id: Id of user to whom datasets are shared
17
+ dataset_ids: Ids of datasets to share
18
+ permission_name: Name of permission to give
19
+ owner_id: Id of the request owner
20
+
21
+ Returns:
22
+ None
23
+ """
12
24
  # If only a single dataset UUID is provided transform it to a list
13
25
  if not isinstance(dataset_ids, list):
14
26
  dataset_ids = [dataset_ids]
@@ -10,6 +10,17 @@ logger = get_logger()
10
10
 
11
11
 
12
12
  async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
13
+ """
14
+ Check if a user has a specific permission on a dataset.
15
+ Args:
16
+ user: User whose permission is checked
17
+ permission_type: Type of permission to check
18
+ dataset_id: Id of the dataset
19
+
20
+ Returns:
21
+ None
22
+
23
+ """
13
24
  if user is None:
14
25
  user = await get_default_user()
15
26
 
@@ -11,6 +11,16 @@ logger = get_logger()
11
11
 
12
12
 
13
13
  async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
14
+ """
15
+ Return a list of datasets the user has permission for.
16
+ If the user is part of a tenant, return datasets his roles have permission for.
17
+ Args:
18
+ user
19
+ permission_type
20
+
21
+ Returns:
22
+ list[Dataset]: List of datasets user has permission for
23
+ """
14
24
  datasets = list()
15
25
  # Get all datasets User has explicit access to
16
26
  datasets.extend(await get_principal_datasets(user, permission_type))
@@ -8,6 +8,16 @@ from ...models import ACL, Permission
8
8
 
9
9
 
10
10
  async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -> list[str]:
11
+ """
12
+ Return a list of documents ids for which the user has read permission.
13
+ If datasets are specified, return only documents from those datasets.
14
+ Args:
15
+ user_id: Id of the user
16
+ datasets: List of datasets
17
+
18
+ Returns:
19
+ list[str]: List of documents for which the user has read permission
20
+ """
11
21
  db_engine = get_relational_engine()
12
22
 
13
23
  async with db_engine.get_async_session() as session:
@@ -6,6 +6,15 @@ from ...models.Principal import Principal
6
6
 
7
7
 
8
8
  async def get_principal(principal_id: UUID):
9
+ """
10
+ Return information about a user based on their id
11
+ Args:
12
+ principal_id: Id of the user
13
+
14
+ Returns:
15
+ principal: Information about the user (principal)
16
+
17
+ """
9
18
  db_engine = get_relational_engine()
10
19
 
11
20
  async with db_engine.get_async_session() as session:
@@ -9,6 +9,17 @@ from ...models.ACL import ACL
9
9
 
10
10
 
11
11
  async def get_principal_datasets(principal: Principal, permission_type: str) -> list[Dataset]:
12
+ """
13
+ Return a list of datasets for which the user (principal) has a certain permission.
14
+ Args:
15
+ principal: Information about the user
16
+ permission_type: Type of permission
17
+
18
+ Returns:
19
+ list[Dataset]: List of datasets for which the user (principal)
20
+ has the permission (permission_type).
21
+
22
+ """
12
23
  db_engine = get_relational_engine()
13
24
 
14
25
  async with db_engine.get_async_session() as session:
@@ -9,6 +9,16 @@ from ...models.Role import Role
9
9
 
10
10
 
11
11
  async def get_role(tenant_id: UUID, role_name: str):
12
+ """
13
+ Return the role with the name role_name of the given tenant.
14
+ Args:
15
+ tenant_id: Id of the given tenant
16
+ role_name: Name of the role
17
+
18
+ Returns
19
+ The role for the given tenant.
20
+
21
+ """
12
22
  db_engine = get_relational_engine()
13
23
 
14
24
  async with db_engine.get_async_session() as session:
@@ -15,9 +15,9 @@ async def get_specific_user_permission_datasets(
15
15
  Return a list of datasets user has given permission for. If a list of datasets is provided,
16
16
  verify for which datasets user has appropriate permission for and return list of datasets he has permission for.
17
17
  Args:
18
- user_id:
19
- permission_type:
20
- dataset_ids:
18
+ user_id: Id of the user.
19
+ permission_type: Type of the permission.
20
+ dataset_ids: Ids of the provided datasets
21
21
 
22
22
  Returns:
23
23
  list[Dataset]: List of datasets user has permission for
@@ -8,6 +8,15 @@ from ...models.Tenant import Tenant
8
8
 
9
9
 
10
10
  async def get_tenant(tenant_id: UUID):
11
+ """
12
+ Return information about the tenant based on the given id.
13
+ Args:
14
+ tenant_id: Id of the given tenant
15
+
16
+ Returns
17
+ Information about the given tenant.
18
+
19
+ """
11
20
  db_engine = get_relational_engine()
12
21
 
13
22
  async with db_engine.get_async_session() as session:
@@ -16,6 +16,15 @@ from cognee.modules.users.models import (
16
16
 
17
17
 
18
18
  async def give_default_permission_to_role(role_id: UUID, permission_name: str):
19
+ """
20
+ Give the permission with given name to the role with the given id as a default permission.
21
+ Args:
22
+ role_id: Id of the role
23
+ permission_name: Name of the permission
24
+
25
+ Returns:
26
+ None
27
+ """
19
28
  db_engine = get_relational_engine()
20
29
 
21
30
  async with db_engine.get_async_session() as session:
@@ -16,6 +16,15 @@ from cognee.modules.users.models import (
16
16
 
17
17
 
18
18
  async def give_default_permission_to_tenant(tenant_id: UUID, permission_name: str):
19
+ """
20
+ Give the permission with given name to the tenant with the given id as a default permission.
21
+ Args:
22
+ tenant_id: Id of the tenant
23
+ permission_name: Name of the permission
24
+
25
+ Returns:
26
+ None
27
+ """
19
28
  db_engine = get_relational_engine()
20
29
  async with db_engine.get_async_session() as session:
21
30
  tenant = (
@@ -16,6 +16,15 @@ from cognee.modules.users.models import (
16
16
 
17
17
 
18
18
  async def give_default_permission_to_user(user_id: UUID, permission_name: str):
19
+ """
20
+ Give the permission with given name to the user with the given id as a default permission.
21
+ Args:
22
+ user_id: Id of the tenant
23
+ permission_name: Name of the permission
24
+
25
+ Returns:
26
+ None
27
+ """
19
28
  db_engine = get_relational_engine()
20
29
  async with db_engine.get_async_session() as session:
21
30
  user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
@@ -24,6 +24,16 @@ async def give_permission_on_dataset(
24
24
  dataset_id: UUID,
25
25
  permission_name: str,
26
26
  ):
27
+ """
28
+ Give a specific permission on a dataset to a user.
29
+ Args:
30
+ principal: User who is being given the permission on the dataset
31
+ dataset_id: Id of the dataset
32
+ permission_name: Name of permission to give
33
+
34
+ Returns:
35
+ None
36
+ """
27
37
  db_engine = get_relational_engine()
28
38
 
29
39
  async with db_engine.get_async_session() as session:
@@ -21,6 +21,17 @@ from cognee.modules.users.models import (
21
21
 
22
22
 
23
23
  async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
24
+ """
25
+ Add a user with the given id to the role with the given id.
26
+ Args:
27
+ user_id: Id of the user.
28
+ role_id: Id of the role.
29
+ owner_id: Id of the request owner.
30
+
31
+ Returns:
32
+ None
33
+
34
+ """
24
35
  db_engine = get_relational_engine()
25
36
  async with db_engine.get_async_session() as session:
26
37
  user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
@@ -15,7 +15,17 @@ from cognee.modules.users.models import (
15
15
  async def create_role(
16
16
  role_name: str,
17
17
  owner_id: UUID,
18
- ):
18
+ ) -> UUID:
19
+ """
20
+ Create a new role with the given name, if the request owner with the given id
21
+ has the necessary permission.
22
+ Args:
23
+ role_name: Name of the new role.
24
+ owner_id: Id of the request owner.
25
+
26
+ Returns:
27
+ None
28
+ """
19
29
  db_engine = get_relational_engine()
20
30
  async with db_engine.get_async_session() as session:
21
31
  user = await get_user(owner_id)
@@ -35,3 +45,4 @@ async def create_role(
35
45
 
36
46
  await session.commit()
37
47
  await session.refresh(role)
48
+ return role.id
@@ -13,6 +13,18 @@ from cognee.modules.users.exceptions import (
13
13
 
14
14
 
15
15
  async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
16
+ """
17
+ Add a user with the given id to the tenant with the given id.
18
+ This can only be successful if the request owner with the given id is the tenant owner.
19
+ Args:
20
+ user_id: Id of the user.
21
+ tenant_id: Id of the tenant.
22
+ owner_id: Id of the request owner.
23
+
24
+ Returns:
25
+ None
26
+
27
+ """
16
28
  db_engine = get_relational_engine()
17
29
  async with db_engine.get_async_session() as session:
18
30
  user = await get_user(user_id)
@@ -7,7 +7,17 @@ from cognee.modules.users.models import Tenant
7
7
  from cognee.modules.users.methods import get_user
8
8
 
9
9
 
10
- async def create_tenant(tenant_name: str, user_id: UUID):
10
+ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID:
11
+ """
12
+ Create a new tenant with the given name, for the user with the given id.
13
+ This user is the owner of the tenant.
14
+ Args:
15
+ tenant_name: Name of the new tenant.
16
+ user_id: Id of the user.
17
+
18
+ Returns:
19
+ None
20
+ """
11
21
  db_engine = get_relational_engine()
12
22
  async with db_engine.get_async_session() as session:
13
23
  try:
@@ -24,5 +34,6 @@ async def create_tenant(tenant_name: str, user_id: UUID):
24
34
  user.tenant_id = tenant.id
25
35
  await session.merge(user)
26
36
  await session.commit()
37
+ return tenant.id
27
38
  except IntegrityError:
28
39
  raise EntityAlreadyExistsError(message="Tenant already exists.")
@@ -1,6 +1,5 @@
1
1
  import os
2
2
  import json
3
- import networkx
4
3
 
5
4
  from cognee.shared.logging_utils import get_logger
6
5
  from cognee.infrastructure.files.storage.LocalFileStorage import LocalFileStorage
@@ -9,6 +8,8 @@ logger = get_logger()
9
8
 
10
9
 
11
10
  async def cognee_network_visualization(graph_data, destination_file_path: str = None):
11
+ import networkx
12
+
12
13
  nodes_data, edges_data = graph_data
13
14
 
14
15
  G = networkx.DiGraph()
@@ -22,6 +23,9 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
22
23
  "TableRow": "#f47710",
23
24
  "TableType": "#6510f4",
24
25
  "ColumnValue": "#13613a",
26
+ "SchemaTable": "#f47710",
27
+ "DatabaseSchema": "#6510f4",
28
+ "SchemaRelationship": "#13613a",
25
29
  "default": "#D3D3D3",
26
30
  }
27
31
 
@@ -104,7 +108,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
104
108
  .nodes circle { stroke: white; stroke-width: 0.5px; filter: drop-shadow(0 0 5px rgba(255,255,255,0.3)); }
105
109
  .node-label { font-size: 5px; font-weight: bold; fill: white; text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
106
110
  .edge-label { font-size: 3px; fill: rgba(255, 255, 255, 0.7); text-anchor: middle; dominant-baseline: middle; font-family: 'Inter', sans-serif; pointer-events: none; }
107
-
111
+
108
112
  .tooltip {
109
113
  position: absolute;
110
114
  text-align: left;
@@ -166,7 +170,7 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
166
170
  // Create tooltip content for edge
167
171
  var content = "<strong>Edge Information</strong><br/>";
168
172
  content += "Relationship: " + d.relation + "<br/>";
169
-
173
+
170
174
  // Show all weights
171
175
  if (d.all_weights && Object.keys(d.all_weights).length > 0) {
172
176
  content += "<strong>Weights:</strong><br/>";
@@ -176,23 +180,23 @@ async def cognee_network_visualization(graph_data, destination_file_path: str =
176
180
  } else if (d.weight !== null && d.weight !== undefined) {
177
181
  content += "Weight: " + d.weight + "<br/>";
178
182
  }
179
-
183
+
180
184
  if (d.relationship_type) {
181
185
  content += "Type: " + d.relationship_type + "<br/>";
182
186
  }
183
-
187
+
184
188
  // Add other edge properties
185
189
  if (d.edge_info) {
186
190
  Object.keys(d.edge_info).forEach(function(key) {
187
- if (key !== 'weight' && key !== 'weights' && key !== 'relationship_type' &&
188
- key !== 'source_node_id' && key !== 'target_node_id' &&
189
- key !== 'relationship_name' && key !== 'updated_at' &&
191
+ if (key !== 'weight' && key !== 'weights' && key !== 'relationship_type' &&
192
+ key !== 'source_node_id' && key !== 'target_node_id' &&
193
+ key !== 'relationship_name' && key !== 'updated_at' &&
190
194
  !key.startsWith('weight_')) {
191
195
  content += key + ": " + d.edge_info[key] + "<br/>";
192
196
  }
193
197
  });
194
198
  }
195
-
199
+
196
200
  tooltip.html(content)
197
201
  .style("left", (d3.event.pageX + 10) + "px")
198
202
  .style("top", (d3.event.pageY - 10) + "px")
@@ -288,7 +288,6 @@ class SummarizedCode(BaseModel):
288
288
  class GraphDBType(Enum):
289
289
  NETWORKX = auto()
290
290
  NEO4J = auto()
291
- FALKORDB = auto()
292
291
  KUZU = auto()
293
292
 
294
293
 
cognee/shared/utils.py CHANGED
@@ -4,7 +4,6 @@ import os
4
4
  import ssl
5
5
  import requests
6
6
  from datetime import datetime, timezone
7
- import matplotlib.pyplot as plt
8
7
  import http.server
9
8
  import socketserver
10
9
  from threading import Thread
@@ -30,37 +29,6 @@ def create_secure_ssl_context() -> ssl.SSLContext:
30
29
  return ssl.create_default_context()
31
30
 
32
31
 
33
- def get_entities(tagged_tokens):
34
- import nltk
35
-
36
- nltk.download("maxent_ne_chunker", quiet=True)
37
-
38
- from nltk.chunk import ne_chunk
39
-
40
- return ne_chunk(tagged_tokens)
41
-
42
-
43
- def extract_pos_tags(sentence):
44
- """Extract Part-of-Speech (POS) tags for words in a sentence."""
45
- import nltk
46
-
47
- # Ensure that the necessary NLTK resources are downloaded
48
- nltk.download("words", quiet=True)
49
- nltk.download("punkt", quiet=True)
50
- nltk.download("averaged_perceptron_tagger", quiet=True)
51
-
52
- from nltk.tag import pos_tag
53
- from nltk.tokenize import word_tokenize
54
-
55
- # Tokenize the sentence into words
56
- tokens = word_tokenize(sentence)
57
-
58
- # Tag each word with its corresponding POS tag
59
- pos_tags = pos_tag(tokens)
60
-
61
- return pos_tags
62
-
63
-
64
32
  def get_anonymous_id():
65
33
  """Creates or reads a anonymous user id"""
66
34
  tracking_id = os.getenv("TRACKING_ID", None)
@@ -7,7 +7,7 @@ from pydantic import BaseModel
7
7
  from cognee.infrastructure.databases.graph import get_graph_engine
8
8
  from cognee.infrastructure.databases.vector import get_vector_engine
9
9
  from cognee.infrastructure.engine.models import DataPoint
10
- from cognee.infrastructure.llm.LLMGateway import LLMGateway
10
+ from cognee.infrastructure.llm.extraction import extract_categories
11
11
  from cognee.modules.chunking.models.DocumentChunk import DocumentChunk
12
12
 
13
13
 
@@ -40,7 +40,7 @@ async def chunk_naive_llm_classifier(
40
40
  return data_chunks
41
41
 
42
42
  chunk_classifications = await asyncio.gather(
43
- *[LLMGateway.extract_categories(chunk.text, classification_model) for chunk in data_chunks],
43
+ *[extract_categories(chunk.text, classification_model) for chunk in data_chunks],
44
44
  )
45
45
 
46
46
  classification_data_points = []
@@ -4,6 +4,7 @@ from cognee.infrastructure.databases.graph import get_graph_engine
4
4
  from cognee.infrastructure.databases.vector import get_vector_engine
5
5
 
6
6
  from cognee.low_level import DataPoint
7
+ from cognee.infrastructure.llm.prompts import render_prompt
7
8
  from cognee.infrastructure.llm import LLMGateway
8
9
  from cognee.shared.logging_utils import get_logger
9
10
  from cognee.modules.engine.models import NodeSet
@@ -104,8 +105,8 @@ async def add_rule_associations(
104
105
 
105
106
  user_context = {"chat": data, "rules": existing_rules}
106
107
 
107
- user_prompt = LLMGateway.render_prompt(user_prompt_location, context=user_context)
108
- system_prompt = LLMGateway.render_prompt(system_prompt_location, context={})
108
+ user_prompt = render_prompt(user_prompt_location, context=user_context)
109
+ system_prompt = render_prompt(system_prompt_location, context={})
109
110
 
110
111
  rule_list = await LLMGateway.acreate_structured_output(
111
112
  text_input=user_prompt, system_prompt=system_prompt, response_model=RuleSet
@@ -3,6 +3,7 @@ from typing import List
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
6
+ from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
6
7
  from cognee.infrastructure.entities.BaseEntityExtractor import BaseEntityExtractor
7
8
  from cognee.modules.engine.models import Entity
8
9
  from cognee.modules.engine.models.EntityType import EntityType
@@ -50,8 +51,8 @@ class LLMEntityExtractor(BaseEntityExtractor):
50
51
  try:
51
52
  logger.info(f"Extracting entities from text: {text[:100]}...")
52
53
 
53
- user_prompt = LLMGateway.render_prompt(self.user_prompt_template, {"text": text})
54
- system_prompt = LLMGateway.read_query_prompt(self.system_prompt_template)
54
+ user_prompt = render_prompt(self.user_prompt_template, {"text": text})
55
+ system_prompt = read_query_prompt(self.system_prompt_template)
55
56
 
56
57
  response = await LLMGateway.acreate_structured_output(
57
58
  text_input=user_prompt,