cognee 0.3.4.dev4__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. cognee/api/client.py +16 -7
  2. cognee/api/health.py +5 -9
  3. cognee/api/v1/add/add.py +3 -1
  4. cognee/api/v1/cognify/cognify.py +44 -7
  5. cognee/api/v1/permissions/routers/get_permissions_router.py +8 -4
  6. cognee/api/v1/search/search.py +3 -0
  7. cognee/api/v1/ui/__init__.py +1 -1
  8. cognee/api/v1/ui/ui.py +215 -150
  9. cognee/api/v1/update/__init__.py +1 -0
  10. cognee/api/v1/update/routers/__init__.py +1 -0
  11. cognee/api/v1/update/routers/get_update_router.py +90 -0
  12. cognee/api/v1/update/update.py +100 -0
  13. cognee/base_config.py +5 -2
  14. cognee/cli/_cognee.py +28 -10
  15. cognee/cli/commands/delete_command.py +34 -2
  16. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
  17. cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +3 -2
  18. cognee/eval_framework/modal_eval_dashboard.py +9 -1
  19. cognee/infrastructure/databases/graph/config.py +9 -9
  20. cognee/infrastructure/databases/graph/get_graph_engine.py +4 -21
  21. cognee/infrastructure/databases/graph/kuzu/adapter.py +60 -9
  22. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +3 -3
  23. cognee/infrastructure/databases/relational/config.py +4 -4
  24. cognee/infrastructure/databases/relational/create_relational_engine.py +11 -3
  25. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +7 -3
  26. cognee/infrastructure/databases/vector/config.py +7 -7
  27. cognee/infrastructure/databases/vector/create_vector_engine.py +7 -15
  28. cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py +9 -0
  29. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +11 -0
  30. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +19 -2
  31. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -0
  32. cognee/infrastructure/databases/vector/embeddings/config.py +8 -0
  33. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +5 -0
  34. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +11 -10
  35. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +48 -38
  36. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -4
  37. cognee/infrastructure/files/storage/S3FileStorage.py +15 -5
  38. cognee/infrastructure/files/storage/s3_config.py +1 -0
  39. cognee/infrastructure/files/utils/open_data_file.py +7 -14
  40. cognee/infrastructure/llm/LLMGateway.py +19 -117
  41. cognee/infrastructure/llm/config.py +28 -13
  42. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_categories.py +2 -1
  43. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_event_entities.py +3 -2
  44. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_summary.py +3 -2
  45. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_content_graph.py +2 -1
  46. cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_event_graph.py +3 -2
  47. cognee/infrastructure/llm/prompts/read_query_prompt.py +3 -2
  48. cognee/infrastructure/llm/prompts/show_prompt.py +35 -0
  49. cognee/infrastructure/llm/prompts/test.txt +1 -0
  50. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +2 -2
  51. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +50 -397
  52. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +2 -3
  53. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +8 -88
  54. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +78 -0
  55. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +2 -99
  56. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +49 -401
  57. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +19 -882
  58. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +2 -34
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +2 -107
  60. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/acreate_structured_output.baml +26 -0
  61. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/__init__.py +1 -2
  62. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +76 -0
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/create_dynamic_baml_type.py +122 -0
  64. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +3 -3
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +0 -32
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +107 -98
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +5 -6
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +5 -6
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +0 -26
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +17 -67
  71. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +8 -7
  72. cognee/infrastructure/llm/utils.py +4 -4
  73. cognee/infrastructure/loaders/LoaderEngine.py +5 -2
  74. cognee/infrastructure/loaders/external/__init__.py +7 -0
  75. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +244 -0
  76. cognee/infrastructure/loaders/supported_loaders.py +7 -0
  77. cognee/modules/data/methods/create_authorized_dataset.py +9 -0
  78. cognee/modules/data/methods/get_authorized_dataset.py +1 -1
  79. cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
  80. cognee/modules/data/methods/get_deletion_counts.py +92 -0
  81. cognee/modules/graph/cognee_graph/CogneeGraph.py +1 -1
  82. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
  83. cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
  84. cognee/modules/ingestion/data_types/TextData.py +0 -1
  85. cognee/modules/observability/get_observe.py +14 -0
  86. cognee/modules/observability/observers.py +1 -0
  87. cognee/modules/ontology/base_ontology_resolver.py +42 -0
  88. cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
  89. cognee/modules/ontology/matching_strategies.py +53 -0
  90. cognee/modules/ontology/models.py +20 -0
  91. cognee/modules/ontology/ontology_config.py +24 -0
  92. cognee/modules/ontology/ontology_env_config.py +45 -0
  93. cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
  94. cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +21 -24
  95. cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +3 -3
  96. cognee/modules/retrieval/code_retriever.py +2 -1
  97. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -4
  98. cognee/modules/retrieval/graph_completion_cot_retriever.py +6 -5
  99. cognee/modules/retrieval/graph_completion_retriever.py +0 -3
  100. cognee/modules/retrieval/insights_retriever.py +1 -1
  101. cognee/modules/retrieval/jaccard_retrival.py +60 -0
  102. cognee/modules/retrieval/lexical_retriever.py +123 -0
  103. cognee/modules/retrieval/natural_language_retriever.py +2 -1
  104. cognee/modules/retrieval/temporal_retriever.py +3 -2
  105. cognee/modules/retrieval/utils/brute_force_triplet_search.py +2 -12
  106. cognee/modules/retrieval/utils/completion.py +4 -7
  107. cognee/modules/search/methods/get_search_type_tools.py +7 -0
  108. cognee/modules/search/methods/no_access_control_search.py +1 -1
  109. cognee/modules/search/methods/search.py +32 -13
  110. cognee/modules/search/types/SearchType.py +1 -0
  111. cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
  112. cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
  113. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +10 -0
  114. cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
  115. cognee/modules/users/permissions/methods/get_principal.py +9 -0
  116. cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
  117. cognee/modules/users/permissions/methods/get_role.py +10 -0
  118. cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
  119. cognee/modules/users/permissions/methods/get_tenant.py +9 -0
  120. cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
  121. cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
  122. cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
  123. cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
  124. cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
  125. cognee/modules/users/roles/methods/create_role.py +12 -1
  126. cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
  127. cognee/modules/users/tenants/methods/create_tenant.py +12 -1
  128. cognee/modules/visualization/cognee_network_visualization.py +13 -9
  129. cognee/shared/data_models.py +0 -1
  130. cognee/shared/utils.py +0 -32
  131. cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
  132. cognee/tasks/codingagents/coding_rule_associations.py +3 -2
  133. cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +3 -2
  134. cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +3 -2
  135. cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +3 -2
  136. cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +3 -2
  137. cognee/tasks/graph/extract_graph_from_code.py +2 -2
  138. cognee/tasks/graph/extract_graph_from_data.py +55 -12
  139. cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
  140. cognee/tasks/ingestion/migrate_relational_database.py +132 -41
  141. cognee/tasks/ingestion/resolve_data_directories.py +4 -1
  142. cognee/tasks/schema/ingest_database_schema.py +134 -0
  143. cognee/tasks/schema/models.py +40 -0
  144. cognee/tasks/storage/index_data_points.py +1 -1
  145. cognee/tasks/storage/index_graph_edges.py +3 -1
  146. cognee/tasks/summarization/summarize_code.py +2 -2
  147. cognee/tasks/summarization/summarize_text.py +2 -2
  148. cognee/tasks/temporal_graph/enrich_events.py +2 -2
  149. cognee/tasks/temporal_graph/extract_events_and_entities.py +2 -2
  150. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +13 -4
  151. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +13 -3
  152. cognee/tests/test_advanced_pdf_loader.py +141 -0
  153. cognee/tests/test_chromadb.py +40 -0
  154. cognee/tests/test_cognee_server_start.py +6 -1
  155. cognee/tests/test_data/Quantum_computers.txt +9 -0
  156. cognee/tests/test_lancedb.py +211 -0
  157. cognee/tests/test_pgvector.py +40 -0
  158. cognee/tests/test_relational_db_migration.py +76 -0
  159. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +2 -1
  160. cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
  161. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +0 -4
  162. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -4
  163. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +0 -4
  164. {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/METADATA +92 -96
  165. {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/RECORD +173 -159
  166. distributed/pyproject.toml +0 -1
  167. cognee/infrastructure/data/utils/extract_keywords.py +0 -48
  168. cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +0 -1227
  169. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +0 -109
  170. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +0 -343
  171. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_categories.py +0 -0
  172. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +0 -89
  173. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/__init__.py +0 -0
  174. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +0 -44
  175. cognee/tasks/graph/infer_data_ontology.py +0 -309
  176. cognee/tests/test_falkordb.py +0 -174
  177. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/__init__.py +0 -0
  178. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/__init__.py +0 -0
  179. /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/texts.json +0 -0
  180. {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/WHEEL +0 -0
  181. {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/entry_points.txt +0 -0
  182. {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/licenses/LICENSE +0 -0
  183. {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,100 @@
1
+ from uuid import UUID
2
+ from typing import Union, BinaryIO, List, Optional
3
+
4
+ from cognee.modules.users.models import User
5
+ from cognee.api.v1.delete import delete
6
+ from cognee.api.v1.add import add
7
+ from cognee.api.v1.cognify import cognify
8
+
9
+
10
+ async def update(
11
+ data_id: UUID,
12
+ data: Union[BinaryIO, list[BinaryIO], str, list[str]],
13
+ user: User = None,
14
+ node_set: Optional[List[str]] = None,
15
+ dataset_id: Optional[UUID] = None,
16
+ vector_db_config: dict = None,
17
+ graph_db_config: dict = None,
18
+ preferred_loaders: List[str] = None,
19
+ incremental_loading: bool = True,
20
+ ):
21
+ """
22
+ Update existing data in Cognee.
23
+
24
+ Supported Input Types:
25
+ - **Text strings**: Direct text content (str) - any string not starting with "/" or "file://"
26
+ - **File paths**: Local file paths as strings in these formats:
27
+ * Absolute paths: "/path/to/document.pdf"
28
+ * File URLs: "file:///path/to/document.pdf" or "file://relative/path.txt"
29
+ * S3 paths: "s3://bucket-name/path/to/file.pdf"
30
+ - **Binary file objects**: File handles/streams (BinaryIO)
31
+ - **Lists**: Multiple files or text strings in a single call
32
+
33
+ Supported File Formats:
34
+ - Text files (.txt, .md, .csv)
35
+ - PDFs (.pdf)
36
+ - Images (.png, .jpg, .jpeg) - extracted via OCR/vision models
37
+ - Audio files (.mp3, .wav) - transcribed to text
38
+ - Code files (.py, .js, .ts, etc.) - parsed for structure and content
39
+ - Office documents (.docx, .pptx)
40
+
41
+ Workflow:
42
+ 1. **Data Resolution**: Resolves file paths and validates accessibility
43
+ 2. **Content Extraction**: Extracts text content from various file formats
44
+ 3. **Dataset Storage**: Stores processed content in the specified dataset
45
+ 4. **Metadata Tracking**: Records file metadata, timestamps, and user permissions
46
+ 5. **Permission Assignment**: Grants user read/write/delete/share permissions on dataset
47
+
48
+ Args:
49
+ data_id: UUID of existing data to update
50
+ data: The latest version of the data. Can be:
51
+ - Single text string: "Your text content here"
52
+ - Absolute file path: "/path/to/document.pdf"
53
+ - File URL: "file:///absolute/path/to/document.pdf" or "file://relative/path.txt"
54
+ - S3 path: "s3://my-bucket/documents/file.pdf"
55
+ - List of mixed types: ["text content", "/path/file.pdf", "file://doc.txt", file_handle]
56
+ - Binary file object: open("file.txt", "rb")
57
+ dataset_name: Name of the dataset to store data in. Defaults to "main_dataset".
58
+ Create separate datasets to organize different knowledge domains.
59
+ user: User object for authentication and permissions. Uses default user if None.
60
+ Default user: "default_user@example.com" (created automatically on first use).
61
+ Users can only access datasets they have permissions for.
62
+ node_set: Optional list of node identifiers for graph organization and access control.
63
+ Used for grouping related data points in the knowledge graph.
64
+ vector_db_config: Optional configuration for vector database (for custom setups).
65
+ graph_db_config: Optional configuration for graph database (for custom setups).
66
+ dataset_id: Optional specific dataset UUID to use instead of dataset_name.
67
+
68
+ Returns:
69
+ PipelineRunInfo: Information about the ingestion pipeline execution including:
70
+ - Pipeline run ID for tracking
71
+ - Dataset ID where data was stored
72
+ - Processing status and any errors
73
+ - Execution timestamps and metadata
74
+ """
75
+ await delete(
76
+ data_id=data_id,
77
+ dataset_id=dataset_id,
78
+ user=user,
79
+ )
80
+
81
+ await add(
82
+ data=data,
83
+ dataset_id=dataset_id,
84
+ user=user,
85
+ node_set=node_set,
86
+ vector_db_config=vector_db_config,
87
+ graph_db_config=graph_db_config,
88
+ preferred_loaders=preferred_loaders,
89
+ incremental_loading=incremental_loading,
90
+ )
91
+
92
+ cognify_run = await cognify(
93
+ datasets=[dataset_id],
94
+ user=user,
95
+ vector_db_config=vector_db_config,
96
+ graph_db_config=graph_db_config,
97
+ incremental_loading=incremental_loading,
98
+ )
99
+
100
+ return cognify_run
cognee/base_config.py CHANGED
@@ -11,7 +11,7 @@ class BaseConfig(BaseSettings):
11
11
  data_root_directory: str = get_absolute_path(".data_storage")
12
12
  system_root_directory: str = get_absolute_path(".cognee_system")
13
13
  cache_root_directory: str = get_absolute_path(".cognee_cache")
14
- monitoring_tool: object = Observer.LANGFUSE
14
+ monitoring_tool: object = Observer.NONE
15
15
 
16
16
  @pydantic.model_validator(mode="after")
17
17
  def validate_paths(self):
@@ -30,7 +30,10 @@ class BaseConfig(BaseSettings):
30
30
  # Require absolute paths for root directories
31
31
  self.data_root_directory = ensure_absolute_path(self.data_root_directory)
32
32
  self.system_root_directory = ensure_absolute_path(self.system_root_directory)
33
- self.cache_root_directory = ensure_absolute_path(self.cache_root_directory)
33
+ # Set monitoring tool based on available keys
34
+ if self.langfuse_public_key and self.langfuse_secret_key:
35
+ self.monitoring_tool = Observer.LANGFUSE
36
+
34
37
  return self
35
38
 
36
39
  langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")
cognee/cli/_cognee.py CHANGED
@@ -183,10 +183,20 @@ def main() -> int:
183
183
 
184
184
  for pid in spawned_pids:
185
185
  try:
186
- pgid = os.getpgid(pid)
187
- os.killpg(pgid, signal.SIGTERM)
188
- fmt.success(f"✓ Process group {pgid} (PID {pid}) terminated.")
189
- except (OSError, ProcessLookupError) as e:
186
+ if hasattr(os, "killpg"):
187
+ # Unix-like systems: Use process groups
188
+ pgid = os.getpgid(pid)
189
+ os.killpg(pgid, signal.SIGTERM)
190
+ fmt.success(f"✓ Process group {pgid} (PID {pid}) terminated.")
191
+ else:
192
+ # Windows: Use taskkill to terminate process and its children
193
+ subprocess.run(
194
+ ["taskkill", "/F", "/T", "/PID", str(pid)],
195
+ capture_output=True,
196
+ check=False,
197
+ )
198
+ fmt.success(f"✓ Process {pid} and its children terminated.")
199
+ except (OSError, ProcessLookupError, subprocess.SubprocessError) as e:
190
200
  fmt.warning(f"Could not terminate process {pid}: {e}")
191
201
 
192
202
  sys.exit(0)
@@ -204,19 +214,27 @@ def main() -> int:
204
214
  nonlocal spawned_pids
205
215
  spawned_pids.append(pid)
206
216
 
217
+ frontend_port = 3000
218
+ start_backend, backend_port = True, 8000
219
+ start_mcp, mcp_port = True, 8001
207
220
  server_process = start_ui(
208
- host="localhost",
209
- port=3000,
221
+ pid_callback=pid_callback,
222
+ port=frontend_port,
210
223
  open_browser=True,
211
- start_backend=True,
212
224
  auto_download=True,
213
- pid_callback=pid_callback,
225
+ start_backend=start_backend,
226
+ backend_port=backend_port,
227
+ start_mcp=start_mcp,
228
+ mcp_port=mcp_port,
214
229
  )
215
230
 
216
231
  if server_process:
217
232
  fmt.success("UI server started successfully!")
218
- fmt.echo("The interface is available at: http://localhost:3000")
219
- fmt.echo("The API backend is available at: http://localhost:8000")
233
+ fmt.echo(f"The interface is available at: http://localhost:{frontend_port}")
234
+ if start_backend:
235
+ fmt.echo(f"The API backend is available at: http://localhost:{backend_port}")
236
+ if start_mcp:
237
+ fmt.echo(f"The MCP server is available at: http://localhost:{mcp_port}")
220
238
  fmt.note("Press Ctrl+C to stop the server...")
221
239
 
222
240
  try:
@@ -6,6 +6,7 @@ from cognee.cli.reference import SupportsCliCommand
6
6
  from cognee.cli import DEFAULT_DOCS_URL
7
7
  import cognee.cli.echo as fmt
8
8
  from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
9
+ from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts
9
10
 
10
11
 
11
12
  class DeleteCommand(SupportsCliCommand):
@@ -41,7 +42,34 @@ Be careful with deletion operations as they are irreversible.
41
42
  fmt.error("Please specify what to delete: --dataset-name, --user-id, or --all")
42
43
  return
43
44
 
44
- # Build confirmation message
45
+ # If --force is used, skip the preview and go straight to deletion
46
+ if not args.force:
47
+ # --- START PREVIEW LOGIC ---
48
+ fmt.echo("Gathering data for preview...")
49
+ try:
50
+ preview_data = asyncio.run(
51
+ get_deletion_counts(
52
+ dataset_name=args.dataset_name,
53
+ user_id=args.user_id,
54
+ all_data=args.all,
55
+ )
56
+ )
57
+ except CliCommandException as e:
58
+ fmt.error(f"Error occured when fetching preview data: {str(e)}")
59
+ return
60
+
61
+ if not preview_data:
62
+ fmt.success("No data found to delete.")
63
+ return
64
+
65
+ fmt.echo("You are about to delete:")
66
+ fmt.echo(
67
+ f"Datasets: {preview_data.datasets}\nEntries: {preview_data.entries}\nUsers: {preview_data.users}"
68
+ )
69
+ fmt.echo("-" * 20)
70
+ # --- END PREVIEW LOGIC ---
71
+
72
+ # Build operation message for success/failure logging
45
73
  if args.all:
46
74
  confirm_msg = "Delete ALL data from cognee?"
47
75
  operation = "all data"
@@ -51,8 +79,9 @@ Be careful with deletion operations as they are irreversible.
51
79
  elif args.user_id:
52
80
  confirm_msg = f"Delete all data for user '{args.user_id}'?"
53
81
  operation = f"data for user '{args.user_id}'"
82
+ else:
83
+ operation = "data"
54
84
 
55
- # Confirm deletion unless forced
56
85
  if not args.force:
57
86
  fmt.warning("This operation is irreversible!")
58
87
  if not fmt.confirm(confirm_msg):
@@ -64,6 +93,8 @@ Be careful with deletion operations as they are irreversible.
64
93
  # Run the async delete function
65
94
  async def run_delete():
66
95
  try:
96
+ # NOTE: The underlying cognee.delete() function is currently not working as expected.
97
+ # This is a separate bug that this preview feature helps to expose.
67
98
  if args.all:
68
99
  await cognee.delete(dataset_name=None, user_id=args.user_id)
69
100
  else:
@@ -72,6 +103,7 @@ Be careful with deletion operations as they are irreversible.
72
103
  raise CliCommandInnerException(f"Failed to delete: {str(e)}")
73
104
 
74
105
  asyncio.run(run_delete())
106
+ # This success message may be inaccurate due to the underlying bug, but we leave it for now.
75
107
  fmt.success(f"Successfully deleted {operation}")
76
108
 
77
109
  except Exception as e:
@@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
5
5
  from cognee.tasks.graph import extract_graph_from_data
6
6
  from cognee.tasks.storage import add_data_points
7
7
  from cognee.shared.data_models import KnowledgeGraph
8
- from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
8
+ from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
9
9
 
10
10
 
11
11
  async def get_default_tasks_by_indices(
@@ -33,7 +33,7 @@ async def get_no_summary_tasks(
33
33
  # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
34
34
  base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
35
35
 
36
- ontology_adapter = OntologyResolver(ontology_file=ontology_file_path)
36
+ ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
37
37
 
38
38
  graph_task = Task(
39
39
  extract_graph_from_data,
@@ -3,6 +3,7 @@ from pydantic import BaseModel
3
3
  from cognee.eval_framework.evaluation.base_eval_adapter import BaseEvalAdapter
4
4
  from cognee.eval_framework.eval_config import EvalConfig
5
5
 
6
+ from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
6
7
  from cognee.infrastructure.llm import LLMGateway
7
8
 
8
9
 
@@ -25,8 +26,8 @@ class DirectLLMEvalAdapter(BaseEvalAdapter):
25
26
  ) -> Dict[str, Any]:
26
27
  args = {"question": question, "answer": answer, "golden_answer": golden_answer}
27
28
 
28
- user_prompt = LLMGateway.render_prompt(self.eval_prompt_path, args)
29
- system_prompt = LLMGateway.read_query_prompt(self.system_prompt_path)
29
+ user_prompt = render_prompt(self.eval_prompt_path, args)
30
+ system_prompt = read_query_prompt(self.system_prompt_path)
30
31
 
31
32
  evaluation = await LLMGateway.acreate_structured_output(
32
33
  text_input=user_prompt,
@@ -1,6 +1,6 @@
1
1
  import os
2
2
  import json
3
- import pandas as pd
3
+
4
4
  import subprocess
5
5
  import modal
6
6
  import streamlit as st
@@ -78,6 +78,14 @@ def main():
78
78
  }
79
79
  )
80
80
 
81
+ try:
82
+ import pandas as pd
83
+ except ImportError:
84
+ st.error(
85
+ "Pandas is required for the evaluation dashboard. Install with 'pip install cognee\"[evals]\"' to use this feature."
86
+ )
87
+ return
88
+
81
89
  df = pd.DataFrame(records)
82
90
  if df.empty:
83
91
  st.warning("No JSON files found in the volume.")
@@ -50,26 +50,26 @@ class GraphConfig(BaseSettings):
50
50
  # Model validator updates graph_filename and path dynamically after class creation based on current database provider
51
51
  # If no specific graph_filename or path are provided
52
52
  @pydantic.model_validator(mode="after")
53
- def fill_derived(cls, values):
54
- provider = values.graph_database_provider.lower()
53
+ def fill_derived(self):
54
+ provider = self.graph_database_provider.lower()
55
55
  base_config = get_base_config()
56
56
 
57
57
  # Set default filename if no filename is provided
58
- if not values.graph_filename:
59
- values.graph_filename = f"cognee_graph_{provider}"
58
+ if not self.graph_filename:
59
+ self.graph_filename = f"cognee_graph_{provider}"
60
60
 
61
61
  # Handle graph file path
62
- if values.graph_file_path:
62
+ if self.graph_file_path:
63
63
  # Check if absolute path is provided
64
- values.graph_file_path = ensure_absolute_path(
65
- os.path.join(values.graph_file_path, values.graph_filename)
64
+ self.graph_file_path = ensure_absolute_path(
65
+ os.path.join(self.graph_file_path, self.graph_filename)
66
66
  )
67
67
  else:
68
68
  # Default path
69
69
  databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
70
- values.graph_file_path = os.path.join(databases_directory_path, values.graph_filename)
70
+ self.graph_file_path = os.path.join(databases_directory_path, self.graph_filename)
71
71
 
72
- return values
72
+ return self
73
73
 
74
74
  def to_dict(self) -> dict:
75
75
  """
@@ -44,16 +44,14 @@ def create_graph_engine(
44
44
  Parameters:
45
45
  -----------
46
46
 
47
- - graph_database_provider: The type of graph database provider to use (e.g., neo4j,
48
- falkordb, kuzu).
49
- - graph_database_url: The URL for the graph database instance. Required for neo4j
50
- and falkordb providers.
47
+ - graph_database_provider: The type of graph database provider to use (e.g., neo4j, falkor, kuzu).
48
+ - graph_database_url: The URL for the graph database instance. Required for neo4j and falkordb providers.
51
49
  - graph_database_username: The username for authentication with the graph database.
52
50
  Required for neo4j provider.
53
51
  - graph_database_password: The password for authentication with the graph database.
54
52
  Required for neo4j provider.
55
53
  - graph_database_port: The port number for the graph database connection. Required
56
- for the falkordb provider.
54
+ for the falkordb provider
57
55
  - graph_file_path: The filesystem path to the graph file. Required for the kuzu
58
56
  provider.
59
57
 
@@ -86,21 +84,6 @@ def create_graph_engine(
86
84
  graph_database_name=graph_database_name or None,
87
85
  )
88
86
 
89
- elif graph_database_provider == "falkordb":
90
- if not (graph_database_url and graph_database_port):
91
- raise EnvironmentError("Missing required FalkorDB credentials.")
92
-
93
- from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine
94
- from cognee.infrastructure.databases.hybrid.falkordb.FalkorDBAdapter import FalkorDBAdapter
95
-
96
- embedding_engine = get_embedding_engine()
97
-
98
- return FalkorDBAdapter(
99
- database_url=graph_database_url,
100
- database_port=graph_database_port,
101
- embedding_engine=embedding_engine,
102
- )
103
-
104
87
  elif graph_database_provider == "kuzu":
105
88
  if not graph_file_path:
106
89
  raise EnvironmentError("Missing required Kuzu database path.")
@@ -179,5 +162,5 @@ def create_graph_engine(
179
162
 
180
163
  raise EnvironmentError(
181
164
  f"Unsupported graph database provider: {graph_database_provider}. "
182
- f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'falkordb', 'kuzu', 'kuzu-remote', 'memgraph', 'neptune', 'neptune_analytics'])}"
165
+ f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'kuzu', 'kuzu-remote', 'memgraph', 'neptune', 'neptune_analytics'])}"
183
166
  )
@@ -48,6 +48,29 @@ class KuzuAdapter(GraphDBInterface):
48
48
 
49
49
  def _initialize_connection(self) -> None:
50
50
  """Initialize the Kuzu database connection and schema."""
51
+
52
+ def _install_json_extension():
53
+ """
54
+ Function handles installing of the json extension for the current Kuzu version.
55
+ This has to be done with an empty graph db before connecting to an existing database otherwise
56
+ missing json extension errors will be raised.
57
+ """
58
+ try:
59
+ with tempfile.NamedTemporaryFile(mode="w", delete=True) as temp_file:
60
+ temp_graph_file = temp_file.name
61
+ tmp_db = Database(
62
+ temp_graph_file,
63
+ buffer_pool_size=2048 * 1024 * 1024, # 2048MB buffer pool
64
+ max_db_size=4096 * 1024 * 1024,
65
+ )
66
+ tmp_db.init_database()
67
+ connection = Connection(tmp_db)
68
+ connection.execute("INSTALL JSON;")
69
+ except Exception as e:
70
+ logger.info(f"JSON extension already installed or not needed: {e}")
71
+
72
+ _install_json_extension()
73
+
51
74
  try:
52
75
  if "s3://" in self.db_path:
53
76
  with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
@@ -109,11 +132,6 @@ class KuzuAdapter(GraphDBInterface):
109
132
  self.db.init_database()
110
133
  self.connection = Connection(self.db)
111
134
 
112
- try:
113
- self.connection.execute("INSTALL JSON;")
114
- except Exception as e:
115
- logger.info(f"JSON extension already installed or not needed: {e}")
116
-
117
135
  try:
118
136
  self.connection.execute("LOAD EXTENSION JSON;")
119
137
  logger.info("Loaded JSON extension")
@@ -1277,7 +1295,6 @@ class KuzuAdapter(GraphDBInterface):
1277
1295
  A tuple containing a list of filtered node properties and a list of filtered edge
1278
1296
  properties.
1279
1297
  """
1280
-
1281
1298
  where_clauses = []
1282
1299
  params = {}
1283
1300
 
@@ -1288,16 +1305,50 @@ class KuzuAdapter(GraphDBInterface):
1288
1305
  params[param_name] = values
1289
1306
 
1290
1307
  where_clause = " AND ".join(where_clauses)
1291
- nodes_query = f"MATCH (n:Node) WHERE {where_clause} RETURN properties(n)"
1308
+ nodes_query = (
1309
+ f"MATCH (n:Node) WHERE {where_clause} RETURN n.id, {{properties: n.properties}}"
1310
+ )
1292
1311
  edges_query = f"""
1293
1312
  MATCH (n1:Node)-[r:EDGE]->(n2:Node)
1294
1313
  WHERE {where_clause.replace("n.", "n1.")} AND {where_clause.replace("n.", "n2.")}
1295
- RETURN properties(r)
1314
+ RETURN n1.id, n2.id, r.relationship_name, r.properties
1296
1315
  """
1297
1316
  nodes, edges = await asyncio.gather(
1298
1317
  self.query(nodes_query, params), self.query(edges_query, params)
1299
1318
  )
1300
- return ([n[0] for n in nodes], [e[0] for e in edges])
1319
+ formatted_nodes = []
1320
+ for n in nodes:
1321
+ if n[0]:
1322
+ node_id = str(n[0])
1323
+ props = n[1]
1324
+ if props.get("properties"):
1325
+ try:
1326
+ additional_props = json.loads(props["properties"])
1327
+ props.update(additional_props)
1328
+ del props["properties"]
1329
+ except json.JSONDecodeError:
1330
+ logger.warning(f"Failed to parse properties JSON for node {node_id}")
1331
+ formatted_nodes.append((node_id, props))
1332
+ if not formatted_nodes:
1333
+ logger.warning("No nodes found in the database")
1334
+ return [], []
1335
+
1336
+ formatted_edges = []
1337
+ for e in edges:
1338
+ if e and len(e) >= 3:
1339
+ source_id = str(e[0])
1340
+ target_id = str(e[1])
1341
+ rel_type = str(e[2])
1342
+ props = {}
1343
+ if len(e) > 3 and e[3]:
1344
+ try:
1345
+ props = json.loads(e[3])
1346
+ except (json.JSONDecodeError, TypeError):
1347
+ logger.warning(
1348
+ f"Failed to parse edge properties for {source_id}->{target_id}"
1349
+ )
1350
+ formatted_edges.append((source_id, target_id, rel_type, props))
1351
+ return formatted_nodes, formatted_edges
1301
1352
 
1302
1353
  async def get_graph_metrics(self, include_optional=False) -> Dict[str, Any]:
1303
1354
  """
@@ -234,7 +234,7 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
234
234
  collection_name: str,
235
235
  query_text: Optional[str] = None,
236
236
  query_vector: Optional[List[float]] = None,
237
- limit: int = None,
237
+ limit: Optional[int] = None,
238
238
  with_vector: bool = False,
239
239
  ):
240
240
  """
@@ -265,10 +265,10 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
265
265
  "Use this option only when vector data is required."
266
266
  )
267
267
 
268
- # In the case of excessive limit, or zero / negative value, limit will be set to 10.
268
+ # In the case of excessive limit, or None / zero / negative value, limit will be set to 10.
269
269
  if not limit or limit <= self._TOPK_LOWER_BOUND or limit > self._TOPK_UPPER_BOUND:
270
270
  logger.warning(
271
- "Provided limit (%s) is invalid (zero, negative, or exceeds maximum). "
271
+ "Provided limit (%s) is invalid (None, zero, negative, or exceeds maximum). "
272
272
  "Defaulting to limit=10.",
273
273
  limit,
274
274
  )
@@ -23,14 +23,14 @@ class RelationalConfig(BaseSettings):
23
23
  model_config = SettingsConfigDict(env_file=".env", extra="allow")
24
24
 
25
25
  @pydantic.model_validator(mode="after")
26
- def fill_derived(cls, values):
26
+ def fill_derived(self):
27
27
  # Set file path based on graph database provider if no file path is provided
28
- if not values.db_path:
28
+ if not self.db_path:
29
29
  base_config = get_base_config()
30
30
  databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
31
- values.db_path = databases_directory_path
31
+ self.db_path = databases_directory_path
32
32
 
33
- return values
33
+ return self
34
34
 
35
35
  def to_dict(self) -> dict:
36
36
  """
@@ -39,8 +39,16 @@ def create_relational_engine(
39
39
  connection_string = f"sqlite+aiosqlite:///{db_path}/{db_name}"
40
40
 
41
41
  if db_provider == "postgres":
42
- connection_string = (
43
- f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
44
- )
42
+ try:
43
+ # Test if asyncpg is available
44
+ import asyncpg
45
+
46
+ connection_string = (
47
+ f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
48
+ )
49
+ except ImportError:
50
+ raise ImportError(
51
+ "PostgreSQL dependencies are not installed. Please install with 'pip install cognee\"[postgres]\"' or 'pip install cognee\"[postgres-binary]\"' to use PostgreSQL functionality."
52
+ )
45
53
 
46
54
  return SQLAlchemyAdapter(connection_string)
@@ -352,7 +352,7 @@ class ChromaDBAdapter(VectorDBInterface):
352
352
  collection_name: str,
353
353
  query_text: str = None,
354
354
  query_vector: List[float] = None,
355
- limit: int = 15,
355
+ limit: Optional[int] = 15,
356
356
  with_vector: bool = False,
357
357
  normalized: bool = True,
358
358
  ):
@@ -386,9 +386,13 @@ class ChromaDBAdapter(VectorDBInterface):
386
386
  try:
387
387
  collection = await self.get_collection(collection_name)
388
388
 
389
- if limit == 0:
389
+ if limit is None:
390
390
  limit = await collection.count()
391
391
 
392
+ # If limit is still 0, no need to do the search, just return empty results
393
+ if limit <= 0:
394
+ return []
395
+
392
396
  results = await collection.query(
393
397
  query_embeddings=[query_vector],
394
398
  include=["metadatas", "distances", "embeddings"]
@@ -428,7 +432,7 @@ class ChromaDBAdapter(VectorDBInterface):
428
432
  for row in vector_list
429
433
  ]
430
434
  except Exception as e:
431
- logger.error(f"Error in search: {str(e)}")
435
+ logger.warning(f"Error in search: {str(e)}")
432
436
  return []
433
437
 
434
438
  async def batch_search(
@@ -30,21 +30,21 @@ class VectorConfig(BaseSettings):
30
30
  model_config = SettingsConfigDict(env_file=".env", extra="allow")
31
31
 
32
32
  @pydantic.model_validator(mode="after")
33
- def validate_paths(cls, values):
33
+ def validate_paths(self):
34
34
  base_config = get_base_config()
35
35
 
36
36
  # If vector_db_url is provided and is not a path skip checking if path is absolute (as it can also be a url)
37
- if values.vector_db_url and Path(values.vector_db_url).exists():
37
+ if self.vector_db_url and Path(self.vector_db_url).exists():
38
38
  # Relative path to absolute
39
- values.vector_db_url = ensure_absolute_path(
40
- values.vector_db_url,
39
+ self.vector_db_url = ensure_absolute_path(
40
+ self.vector_db_url,
41
41
  )
42
- elif not values.vector_db_url:
42
+ elif not self.vector_db_url:
43
43
  # Default path
44
44
  databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
45
- values.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb")
45
+ self.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb")
46
46
 
47
- return values
47
+ return self
48
48
 
49
49
  def to_dict(self) -> dict:
50
50
  """