cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. cognee/api/client.py +5 -1
  2. cognee/api/v1/add/add.py +1 -2
  3. cognee/api/v1/cognify/code_graph_pipeline.py +119 -0
  4. cognee/api/v1/cognify/cognify.py +16 -24
  5. cognee/api/v1/cognify/routers/__init__.py +1 -0
  6. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +90 -0
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +1 -3
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/ontologies.py +37 -12
  10. cognee/api/v1/ontologies/routers/get_ontology_router.py +25 -27
  11. cognee/api/v1/search/search.py +0 -4
  12. cognee/api/v1/ui/ui.py +68 -38
  13. cognee/context_global_variables.py +16 -61
  14. cognee/eval_framework/answer_generation/answer_generation_executor.py +0 -10
  15. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  16. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +2 -0
  17. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  18. cognee/eval_framework/eval_config.py +2 -2
  19. cognee/eval_framework/modal_run_eval.py +28 -16
  20. cognee/infrastructure/databases/graph/config.py +0 -3
  21. cognee/infrastructure/databases/graph/get_graph_engine.py +0 -1
  22. cognee/infrastructure/databases/graph/graph_db_interface.py +0 -15
  23. cognee/infrastructure/databases/graph/kuzu/adapter.py +0 -228
  24. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +1 -80
  25. cognee/infrastructure/databases/utils/__init__.py +0 -3
  26. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +48 -62
  27. cognee/infrastructure/databases/vector/config.py +0 -2
  28. cognee/infrastructure/databases/vector/create_vector_engine.py +0 -1
  29. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -8
  30. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +7 -9
  31. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +10 -11
  32. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +544 -0
  33. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -2
  34. cognee/infrastructure/databases/vector/vector_db_interface.py +0 -35
  35. cognee/infrastructure/files/storage/s3_config.py +0 -2
  36. cognee/infrastructure/llm/LLMGateway.py +2 -5
  37. cognee/infrastructure/llm/config.py +0 -35
  38. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  39. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +8 -23
  40. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +16 -17
  41. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +37 -40
  42. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +36 -39
  43. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +1 -19
  44. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +9 -11
  45. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +21 -23
  46. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +34 -42
  47. cognee/modules/cognify/config.py +0 -2
  48. cognee/modules/data/deletion/prune_system.py +2 -52
  49. cognee/modules/data/methods/delete_dataset.py +0 -26
  50. cognee/modules/engine/models/__init__.py +0 -1
  51. cognee/modules/graph/cognee_graph/CogneeGraph.py +37 -85
  52. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +3 -8
  53. cognee/modules/memify/memify.py +7 -1
  54. cognee/modules/pipelines/operations/pipeline.py +2 -18
  55. cognee/modules/retrieval/__init__.py +1 -1
  56. cognee/modules/retrieval/code_retriever.py +232 -0
  57. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -4
  58. cognee/modules/retrieval/graph_completion_cot_retriever.py +0 -4
  59. cognee/modules/retrieval/graph_completion_retriever.py +0 -10
  60. cognee/modules/retrieval/graph_summary_completion_retriever.py +0 -4
  61. cognee/modules/retrieval/temporal_retriever.py +0 -4
  62. cognee/modules/retrieval/utils/brute_force_triplet_search.py +10 -42
  63. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +1 -8
  64. cognee/modules/search/methods/get_search_type_tools.py +8 -54
  65. cognee/modules/search/methods/no_access_control_search.py +0 -4
  66. cognee/modules/search/methods/search.py +0 -21
  67. cognee/modules/search/types/SearchType.py +1 -1
  68. cognee/modules/settings/get_settings.py +0 -19
  69. cognee/modules/users/methods/get_authenticated_user.py +2 -2
  70. cognee/modules/users/models/DatasetDatabase.py +3 -15
  71. cognee/shared/logging_utils.py +0 -4
  72. cognee/tasks/code/enrich_dependency_graph_checker.py +35 -0
  73. cognee/tasks/code/get_local_dependencies_checker.py +20 -0
  74. cognee/tasks/code/get_repo_dependency_graph_checker.py +35 -0
  75. cognee/tasks/documents/__init__.py +1 -0
  76. cognee/tasks/documents/check_permissions_on_dataset.py +26 -0
  77. cognee/tasks/graph/extract_graph_from_data.py +10 -9
  78. cognee/tasks/repo_processor/__init__.py +2 -0
  79. cognee/tasks/repo_processor/get_local_dependencies.py +335 -0
  80. cognee/tasks/repo_processor/get_non_code_files.py +158 -0
  81. cognee/tasks/repo_processor/get_repo_file_dependencies.py +243 -0
  82. cognee/tasks/storage/add_data_points.py +2 -142
  83. cognee/tests/test_cognee_server_start.py +4 -2
  84. cognee/tests/test_conversation_history.py +1 -23
  85. cognee/tests/test_delete_bmw_example.py +60 -0
  86. cognee/tests/test_search_db.py +1 -37
  87. cognee/tests/unit/api/test_ontology_endpoint.py +89 -77
  88. cognee/tests/unit/infrastructure/mock_embedding_engine.py +7 -3
  89. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -0
  90. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  91. cognee/tests/unit/modules/graph/cognee_graph_test.py +0 -406
  92. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +89 -76
  93. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +97 -118
  94. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
  95. cognee/api/v1/ui/node_setup.py +0 -360
  96. cognee/api/v1/ui/npm_utils.py +0 -50
  97. cognee/eval_framework/Dockerfile +0 -29
  98. cognee/infrastructure/databases/dataset_database_handler/__init__.py +0 -3
  99. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +0 -80
  100. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +0 -18
  101. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +0 -10
  102. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +0 -81
  103. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +0 -168
  104. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +0 -10
  105. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +0 -10
  106. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +0 -30
  107. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +0 -50
  108. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +0 -5
  109. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +0 -153
  110. cognee/memify_pipelines/create_triplet_embeddings.py +0 -53
  111. cognee/modules/engine/models/Triplet.py +0 -9
  112. cognee/modules/retrieval/register_retriever.py +0 -10
  113. cognee/modules/retrieval/registered_community_retrievers.py +0 -1
  114. cognee/modules/retrieval/triplet_retriever.py +0 -182
  115. cognee/shared/rate_limiting.py +0 -30
  116. cognee/tasks/memify/get_triplet_datapoints.py +0 -289
  117. cognee/tests/integration/retrieval/test_triplet_retriever.py +0 -84
  118. cognee/tests/integration/tasks/test_add_data_points.py +0 -139
  119. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +0 -69
  120. cognee/tests/test_dataset_database_handler.py +0 -137
  121. cognee/tests/test_dataset_delete.py +0 -76
  122. cognee/tests/test_edge_centered_payload.py +0 -170
  123. cognee/tests/test_pipeline_cache.py +0 -164
  124. cognee/tests/unit/infrastructure/llm/test_llm_config.py +0 -46
  125. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +0 -214
  126. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +0 -608
  127. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +0 -83
  128. cognee/tests/unit/tasks/storage/test_add_data_points.py +0 -288
  129. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -0
  130. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
  131. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.28.0
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,360 +0,0 @@
1
- import os
2
- import platform
3
- import subprocess
4
- import tempfile
5
- from pathlib import Path
6
-
7
- import requests
8
-
9
- from cognee.shared.logging_utils import get_logger
10
-
11
- logger = get_logger()
12
-
13
-
14
- def get_nvm_dir() -> Path:
15
- """
16
- Get the nvm directory path following standard nvm installation logic.
17
- Uses XDG_CONFIG_HOME if set, otherwise falls back to ~/.nvm.
18
- """
19
- xdg_config_home = os.environ.get("XDG_CONFIG_HOME")
20
- if xdg_config_home:
21
- return Path(xdg_config_home) / "nvm"
22
- return Path.home() / ".nvm"
23
-
24
-
25
- def get_nvm_sh_path() -> Path:
26
- """
27
- Get the path to nvm.sh following standard nvm installation logic.
28
- """
29
- return get_nvm_dir() / "nvm.sh"
30
-
31
-
32
- def check_nvm_installed() -> bool:
33
- """
34
- Check if nvm (Node Version Manager) is installed.
35
- """
36
- try:
37
- # Check if nvm is available in the shell
38
- # nvm is typically sourced in shell config files, so we need to check via shell
39
- if platform.system() == "Windows":
40
- # On Windows, nvm-windows uses a different approach
41
- result = subprocess.run(
42
- ["nvm", "version"],
43
- capture_output=True,
44
- text=True,
45
- timeout=10,
46
- shell=True,
47
- )
48
- else:
49
- # On Unix-like systems, nvm is a shell function, so we need to source it
50
- # First check if nvm.sh exists
51
- nvm_path = get_nvm_sh_path()
52
- if not nvm_path.exists():
53
- logger.debug(f"nvm.sh not found at {nvm_path}")
54
- return False
55
-
56
- # Try to source nvm and check version, capturing errors
57
- result = subprocess.run(
58
- ["bash", "-c", f"source {nvm_path} && nvm --version"],
59
- capture_output=True,
60
- text=True,
61
- timeout=10,
62
- )
63
-
64
- if result.returncode != 0:
65
- # Log the error to help diagnose configuration issues
66
- if result.stderr:
67
- logger.debug(f"nvm check failed: {result.stderr.strip()}")
68
- return False
69
-
70
- return result.returncode == 0
71
- except Exception as e:
72
- logger.debug(f"Exception checking nvm: {str(e)}")
73
- return False
74
-
75
-
76
- def install_nvm() -> bool:
77
- """
78
- Install nvm (Node Version Manager) on Unix-like systems.
79
- """
80
- if platform.system() == "Windows":
81
- logger.error("nvm installation on Windows requires nvm-windows.")
82
- logger.error(
83
- "Please install nvm-windows manually from: https://github.com/coreybutler/nvm-windows"
84
- )
85
- return False
86
-
87
- logger.info("Installing nvm (Node Version Manager)...")
88
-
89
- try:
90
- # Download and install nvm
91
- nvm_install_script = "https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh"
92
- logger.info(f"Downloading nvm installer from {nvm_install_script}...")
93
-
94
- response = requests.get(nvm_install_script, timeout=60)
95
- response.raise_for_status()
96
-
97
- # Create a temporary script file
98
- with tempfile.NamedTemporaryFile(mode="w", suffix=".sh", delete=False) as f:
99
- f.write(response.text)
100
- install_script_path = f.name
101
-
102
- try:
103
- # Make the script executable and run it
104
- os.chmod(install_script_path, 0o755)
105
- result = subprocess.run(
106
- ["bash", install_script_path],
107
- capture_output=True,
108
- text=True,
109
- timeout=120,
110
- )
111
-
112
- if result.returncode == 0:
113
- logger.info("✓ nvm installed successfully")
114
- # Source nvm in current shell session
115
- nvm_dir = get_nvm_dir()
116
- if nvm_dir.exists():
117
- return True
118
- else:
119
- logger.warning(
120
- f"nvm installation completed but nvm directory not found at {nvm_dir}"
121
- )
122
- return False
123
- else:
124
- logger.error(f"nvm installation failed: {result.stderr}")
125
- return False
126
- finally:
127
- # Clean up temporary script
128
- try:
129
- os.unlink(install_script_path)
130
- except Exception:
131
- pass
132
-
133
- except requests.exceptions.RequestException as e:
134
- logger.error(f"Failed to download nvm installer: {str(e)}")
135
- return False
136
- except Exception as e:
137
- logger.error(f"Failed to install nvm: {str(e)}")
138
- return False
139
-
140
-
141
- def install_node_with_nvm() -> bool:
142
- """
143
- Install the latest Node.js version using nvm.
144
- Returns True if installation succeeds, False otherwise.
145
- """
146
- if platform.system() == "Windows":
147
- logger.error("Node.js installation via nvm on Windows requires nvm-windows.")
148
- logger.error("Please install Node.js manually from: https://nodejs.org/")
149
- return False
150
-
151
- logger.info("Installing latest Node.js version using nvm...")
152
-
153
- try:
154
- # Source nvm and install latest Node.js
155
- nvm_path = get_nvm_sh_path()
156
- if not nvm_path.exists():
157
- logger.error(f"nvm.sh not found at {nvm_path}. nvm may not be properly installed.")
158
- return False
159
-
160
- nvm_source_cmd = f"source {nvm_path}"
161
- install_cmd = f"{nvm_source_cmd} && nvm install node"
162
-
163
- result = subprocess.run(
164
- ["bash", "-c", install_cmd],
165
- capture_output=True,
166
- text=True,
167
- timeout=300, # 5 minutes timeout for Node.js installation
168
- )
169
-
170
- if result.returncode == 0:
171
- logger.info("✓ Node.js installed successfully via nvm")
172
-
173
- # Set as default version
174
- use_cmd = f"{nvm_source_cmd} && nvm alias default node"
175
- subprocess.run(
176
- ["bash", "-c", use_cmd],
177
- capture_output=True,
178
- text=True,
179
- timeout=30,
180
- )
181
-
182
- # Add nvm to PATH for current session
183
- # This ensures node/npm are available in subsequent commands
184
- nvm_dir = get_nvm_dir()
185
- if nvm_dir.exists():
186
- # Update PATH for current process
187
- nvm_bin = nvm_dir / "versions" / "node"
188
- # Find the latest installed version
189
- if nvm_bin.exists():
190
- versions = sorted(nvm_bin.iterdir(), reverse=True)
191
- if versions:
192
- latest_node_bin = versions[0] / "bin"
193
- if latest_node_bin.exists():
194
- current_path = os.environ.get("PATH", "")
195
- os.environ["PATH"] = f"{latest_node_bin}:{current_path}"
196
-
197
- return True
198
- else:
199
- logger.error(f"Failed to install Node.js: {result.stderr}")
200
- return False
201
-
202
- except subprocess.TimeoutExpired:
203
- logger.error("Timeout installing Node.js (this can take several minutes)")
204
- return False
205
- except Exception as e:
206
- logger.error(f"Error installing Node.js: {str(e)}")
207
- return False
208
-
209
-
210
- def check_node_npm() -> tuple[bool, str]: # (is_available, error_message)
211
- """
212
- Check if Node.js and npm are available.
213
- If not available, attempts to install nvm and Node.js automatically.
214
- """
215
-
216
- try:
217
- # Check Node.js - try direct command first, then with nvm if needed
218
- result = subprocess.run(["node", "--version"], capture_output=True, text=True, timeout=10)
219
- if result.returncode != 0:
220
- # If direct command fails, try with nvm sourced (in case nvm is installed but not in PATH)
221
- nvm_path = get_nvm_sh_path()
222
- if nvm_path.exists():
223
- result = subprocess.run(
224
- ["bash", "-c", f"source {nvm_path} && node --version"],
225
- capture_output=True,
226
- text=True,
227
- timeout=10,
228
- )
229
- if result.returncode != 0 and result.stderr:
230
- logger.debug(f"Failed to source nvm or run node: {result.stderr.strip()}")
231
- if result.returncode != 0:
232
- # Node.js is not installed, try to install it
233
- logger.info("Node.js is not installed. Attempting to install automatically...")
234
-
235
- # Check if nvm is installed
236
- if not check_nvm_installed():
237
- logger.info("nvm is not installed. Installing nvm first...")
238
- if not install_nvm():
239
- return (
240
- False,
241
- "Failed to install nvm. Please install Node.js manually from https://nodejs.org/",
242
- )
243
-
244
- # Install Node.js using nvm
245
- if not install_node_with_nvm():
246
- return (
247
- False,
248
- "Failed to install Node.js. Please install Node.js manually from https://nodejs.org/",
249
- )
250
-
251
- # Verify installation after automatic setup
252
- # Try with nvm sourced first
253
- nvm_path = get_nvm_sh_path()
254
- if nvm_path.exists():
255
- result = subprocess.run(
256
- ["bash", "-c", f"source {nvm_path} && node --version"],
257
- capture_output=True,
258
- text=True,
259
- timeout=10,
260
- )
261
- if result.returncode != 0 and result.stderr:
262
- logger.debug(
263
- f"Failed to verify node after installation: {result.stderr.strip()}"
264
- )
265
- else:
266
- result = subprocess.run(
267
- ["node", "--version"], capture_output=True, text=True, timeout=10
268
- )
269
- if result.returncode != 0:
270
- nvm_path = get_nvm_sh_path()
271
- return (
272
- False,
273
- f"Node.js installation completed but node command is not available. Please restart your terminal or source {nvm_path}",
274
- )
275
-
276
- node_version = result.stdout.strip()
277
- logger.debug(f"Found Node.js version: {node_version}")
278
-
279
- # Check npm - handle Windows PowerShell scripts
280
- if platform.system() == "Windows":
281
- # On Windows, npm might be a PowerShell script, so we need to use shell=True
282
- result = subprocess.run(
283
- ["npm", "--version"], capture_output=True, text=True, timeout=10, shell=True
284
- )
285
- else:
286
- # On Unix-like systems, if we just installed via nvm, we may need to source nvm
287
- # Try direct command first
288
- result = subprocess.run(
289
- ["npm", "--version"], capture_output=True, text=True, timeout=10
290
- )
291
- if result.returncode != 0:
292
- # Try with nvm sourced
293
- nvm_path = get_nvm_sh_path()
294
- if nvm_path.exists():
295
- result = subprocess.run(
296
- ["bash", "-c", f"source {nvm_path} && npm --version"],
297
- capture_output=True,
298
- text=True,
299
- timeout=10,
300
- )
301
- if result.returncode != 0 and result.stderr:
302
- logger.debug(f"Failed to source nvm or run npm: {result.stderr.strip()}")
303
-
304
- if result.returncode != 0:
305
- return False, "npm is not installed or not in PATH"
306
-
307
- npm_version = result.stdout.strip()
308
- logger.debug(f"Found npm version: {npm_version}")
309
-
310
- return True, f"Node.js {node_version}, npm {npm_version}"
311
-
312
- except subprocess.TimeoutExpired:
313
- return False, "Timeout checking Node.js/npm installation"
314
- except FileNotFoundError:
315
- # Node.js is not installed, try to install it
316
- logger.info("Node.js is not found. Attempting to install automatically...")
317
-
318
- # Check if nvm is installed
319
- if not check_nvm_installed():
320
- logger.info("nvm is not installed. Installing nvm first...")
321
- if not install_nvm():
322
- return (
323
- False,
324
- "Failed to install nvm. Please install Node.js manually from https://nodejs.org/",
325
- )
326
-
327
- # Install Node.js using nvm
328
- if not install_node_with_nvm():
329
- return (
330
- False,
331
- "Failed to install Node.js. Please install Node.js manually from https://nodejs.org/",
332
- )
333
-
334
- # Retry checking Node.js after installation
335
- try:
336
- result = subprocess.run(
337
- ["node", "--version"], capture_output=True, text=True, timeout=10
338
- )
339
- if result.returncode == 0:
340
- node_version = result.stdout.strip()
341
- # Check npm
342
- nvm_path = get_nvm_sh_path()
343
- if nvm_path.exists():
344
- result = subprocess.run(
345
- ["bash", "-c", f"source {nvm_path} && npm --version"],
346
- capture_output=True,
347
- text=True,
348
- timeout=10,
349
- )
350
- if result.returncode == 0:
351
- npm_version = result.stdout.strip()
352
- return True, f"Node.js {node_version}, npm {npm_version}"
353
- elif result.stderr:
354
- logger.debug(f"Failed to source nvm or run npm: {result.stderr.strip()}")
355
- except Exception as e:
356
- logger.debug(f"Exception retrying node/npm check: {str(e)}")
357
-
358
- return False, "Node.js/npm not found. Please install Node.js from https://nodejs.org/"
359
- except Exception as e:
360
- return False, f"Error checking Node.js/npm: {str(e)}"
@@ -1,50 +0,0 @@
1
- import platform
2
- import subprocess
3
- from pathlib import Path
4
- from typing import List
5
-
6
- from cognee.shared.logging_utils import get_logger
7
- from .node_setup import get_nvm_sh_path
8
-
9
- logger = get_logger()
10
-
11
-
12
- def run_npm_command(cmd: List[str], cwd: Path, timeout: int = 300) -> subprocess.CompletedProcess:
13
- """
14
- Run an npm command, ensuring nvm is sourced if needed (Unix-like systems only).
15
- Returns the CompletedProcess result.
16
- """
17
- if platform.system() == "Windows":
18
- # On Windows, use shell=True for npm commands
19
- return subprocess.run(
20
- cmd,
21
- cwd=cwd,
22
- capture_output=True,
23
- text=True,
24
- timeout=timeout,
25
- shell=True,
26
- )
27
- else:
28
- # On Unix-like systems, try direct command first
29
- result = subprocess.run(
30
- cmd,
31
- cwd=cwd,
32
- capture_output=True,
33
- text=True,
34
- timeout=timeout,
35
- )
36
- # If it fails and nvm might be installed, try with nvm sourced
37
- if result.returncode != 0:
38
- nvm_path = get_nvm_sh_path()
39
- if nvm_path.exists():
40
- nvm_cmd = f"source {nvm_path} && {' '.join(cmd)}"
41
- result = subprocess.run(
42
- ["bash", "-c", nvm_cmd],
43
- cwd=cwd,
44
- capture_output=True,
45
- text=True,
46
- timeout=timeout,
47
- )
48
- if result.returncode != 0 and result.stderr:
49
- logger.debug(f"npm command failed with nvm: {result.stderr.strip()}")
50
- return result
@@ -1,29 +0,0 @@
1
- FROM python:3.11-slim
2
-
3
- # Set environment variables
4
- ENV PIP_NO_CACHE_DIR=true
5
- ENV PATH="${PATH}:/root/.poetry/bin"
6
- ENV PYTHONPATH=/app
7
- ENV SKIP_MIGRATIONS=true
8
-
9
- # System dependencies
10
- RUN apt-get update && apt-get install -y \
11
- gcc \
12
- libpq-dev \
13
- git \
14
- curl \
15
- build-essential \
16
- && rm -rf /var/lib/apt/lists/*
17
-
18
- WORKDIR /app
19
-
20
- COPY pyproject.toml poetry.lock README.md /app/
21
-
22
- RUN pip install poetry
23
-
24
- RUN poetry config virtualenvs.create false
25
-
26
- RUN poetry install --extras distributed --extras evals --extras deepeval --no-root
27
-
28
- COPY cognee/ /app/cognee
29
- COPY distributed/ /app/distributed
@@ -1,3 +0,0 @@
1
- from .dataset_database_handler_interface import DatasetDatabaseHandlerInterface
2
- from .supported_dataset_database_handlers import supported_dataset_database_handlers
3
- from .use_dataset_database_handler import use_dataset_database_handler
@@ -1,80 +0,0 @@
1
- from typing import Optional
2
- from uuid import UUID
3
- from abc import ABC, abstractmethod
4
-
5
- from cognee.modules.users.models.User import User
6
- from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
7
-
8
-
9
- class DatasetDatabaseHandlerInterface(ABC):
10
- @classmethod
11
- @abstractmethod
12
- async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
13
- """
14
- Return a dictionary with database connection/resolution info for a graph or vector database for the given dataset.
15
- Function can auto handle deploying of the actual database if needed, but is not necessary.
16
- Only providing connection info is sufficient, this info will be mapped when trying to connect to the provided dataset in the future.
17
- Needed for Cognee multi-tenant/multi-user and backend access control support.
18
-
19
- Dictionary returned from this function will be used to create a DatasetDatabase row in the relational database.
20
- From which internal mapping of dataset -> database connection info will be done.
21
-
22
- The returned dictionary is stored verbatim in the relational database and is later passed to
23
- resolve_dataset_connection_info() at connection time. For safe credential handling, prefer
24
- returning only references to secrets or role identifiers, not plaintext credentials.
25
-
26
- Each dataset needs to map to a unique graph or vector database when backend access control is enabled to facilitate a separation of concern for data.
27
-
28
- Args:
29
- dataset_id: UUID of the dataset if needed by the database creation logic
30
- user: User object if needed by the database creation logic
31
- Returns:
32
- dict: Connection info for the created graph or vector database instance.
33
- """
34
- pass
35
-
36
- @classmethod
37
- async def resolve_dataset_connection_info(
38
- cls, dataset_database: DatasetDatabase
39
- ) -> DatasetDatabase:
40
- """
41
- Resolve runtime connection details for a dataset’s backing graph/vector database.
42
- Function is intended to be overwritten to implement custom logic for resolving connection info.
43
-
44
- This method is invoked right before the application opens a connection for a given dataset.
45
- It receives the DatasetDatabase row that was persisted when create_dataset() ran and must
46
- return a modified instance of DatasetDatabase with concrete connection parameters that the client/driver can use.
47
- Do not update these new DatasetDatabase values in the relational database to avoid storing secure credentials.
48
-
49
- In case of separate graph and vector database handlers, each handler should implement its own logic for resolving
50
- connection info and only change parameters related to its appropriate database, the resolution function will then
51
- be called one after another with the updated DatasetDatabase value from the previous function as the input.
52
-
53
- Typical behavior:
54
- - If the DatasetDatabase row already contains raw connection fields (e.g., host/port/db/user/password
55
- or api_url/api_key), return them as-is.
56
- - If the row stores only references (e.g., secret IDs, vault paths, cloud resource ARNs/IDs, IAM
57
- roles, SSO tokens), resolve those references by calling the appropriate secret manager or provider
58
- API to obtain short-lived credentials and assemble the final connection DatasetDatabase object.
59
- - Do not persist any resolved or decrypted secrets back to the relational database. Return them only
60
- to the caller.
61
-
62
- Args:
63
- dataset_database: DatasetDatabase row from the relational database
64
- Returns:
65
- DatasetDatabase: Updated instance with resolved connection info
66
- """
67
- return dataset_database
68
-
69
- @classmethod
70
- @abstractmethod
71
- async def delete_dataset(cls, dataset_database: DatasetDatabase) -> None:
72
- """
73
- Delete the graph or vector database for the given dataset.
74
- Function should auto handle deleting of the actual database or send a request to the proper service to delete/mark the database as not needed for the given dataset.
75
- Needed for maintaining a database for Cognee multi-tenant/multi-user and backend access control.
76
-
77
- Args:
78
- dataset_database: DatasetDatabase row containing connection/resolution info for the graph or vector database to delete.
79
- """
80
- pass
@@ -1,18 +0,0 @@
1
- from cognee.infrastructure.databases.graph.neo4j_driver.Neo4jAuraDevDatasetDatabaseHandler import (
2
- Neo4jAuraDevDatasetDatabaseHandler,
3
- )
4
- from cognee.infrastructure.databases.vector.lancedb.LanceDBDatasetDatabaseHandler import (
5
- LanceDBDatasetDatabaseHandler,
6
- )
7
- from cognee.infrastructure.databases.graph.kuzu.KuzuDatasetDatabaseHandler import (
8
- KuzuDatasetDatabaseHandler,
9
- )
10
-
11
- supported_dataset_database_handlers = {
12
- "neo4j_aura_dev": {
13
- "handler_instance": Neo4jAuraDevDatasetDatabaseHandler,
14
- "handler_provider": "neo4j",
15
- },
16
- "lancedb": {"handler_instance": LanceDBDatasetDatabaseHandler, "handler_provider": "lancedb"},
17
- "kuzu": {"handler_instance": KuzuDatasetDatabaseHandler, "handler_provider": "kuzu"},
18
- }
@@ -1,10 +0,0 @@
1
- from .supported_dataset_database_handlers import supported_dataset_database_handlers
2
-
3
-
4
- def use_dataset_database_handler(
5
- dataset_database_handler_name, dataset_database_handler, dataset_database_provider
6
- ):
7
- supported_dataset_database_handlers[dataset_database_handler_name] = {
8
- "handler_instance": dataset_database_handler,
9
- "handler_provider": dataset_database_provider,
10
- }
@@ -1,81 +0,0 @@
1
- import os
2
- from uuid import UUID
3
- from typing import Optional
4
-
5
- from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine
6
- from cognee.base_config import get_base_config
7
- from cognee.modules.users.models import User
8
- from cognee.modules.users.models import DatasetDatabase
9
- from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
10
-
11
-
12
- class KuzuDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
13
- """
14
- Handler for interacting with Kuzu Dataset databases.
15
- """
16
-
17
- @classmethod
18
- async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
19
- """
20
- Create a new Kuzu instance for the dataset. Return connection info that will be mapped to the dataset.
21
-
22
- Args:
23
- dataset_id: Dataset UUID
24
- user: User object who owns the dataset and is making the request
25
-
26
- Returns:
27
- dict: Connection details for the created Kuzu instance
28
-
29
- """
30
- from cognee.infrastructure.databases.graph.config import get_graph_config
31
-
32
- graph_config = get_graph_config()
33
-
34
- if graph_config.graph_database_provider != "kuzu":
35
- raise ValueError(
36
- "KuzuDatasetDatabaseHandler can only be used with Kuzu graph database provider."
37
- )
38
-
39
- graph_db_name = f"{dataset_id}.pkl"
40
- graph_db_url = graph_config.graph_database_url
41
- graph_db_key = graph_config.graph_database_key
42
- graph_db_username = graph_config.graph_database_username
43
- graph_db_password = graph_config.graph_database_password
44
-
45
- return {
46
- "graph_database_name": graph_db_name,
47
- "graph_database_url": graph_db_url,
48
- "graph_database_provider": graph_config.graph_database_provider,
49
- "graph_database_key": graph_db_key,
50
- "graph_dataset_database_handler": "kuzu",
51
- "graph_database_connection_info": {
52
- "graph_database_username": graph_db_username,
53
- "graph_database_password": graph_db_password,
54
- },
55
- }
56
-
57
- @classmethod
58
- async def delete_dataset(cls, dataset_database: DatasetDatabase):
59
- base_config = get_base_config()
60
- databases_directory_path = os.path.join(
61
- base_config.system_root_directory, "databases", str(dataset_database.owner_id)
62
- )
63
- graph_file_path = os.path.join(
64
- databases_directory_path, dataset_database.graph_database_name
65
- )
66
- graph_engine = create_graph_engine(
67
- graph_database_provider=dataset_database.graph_database_provider,
68
- graph_database_url=dataset_database.graph_database_url,
69
- graph_database_name=dataset_database.graph_database_name,
70
- graph_database_key=dataset_database.graph_database_key,
71
- graph_file_path=graph_file_path,
72
- graph_database_username=dataset_database.graph_database_connection_info.get(
73
- "graph_database_username", ""
74
- ),
75
- graph_database_password=dataset_database.graph_database_connection_info.get(
76
- "graph_database_password", ""
77
- ),
78
- graph_dataset_database_handler="",
79
- graph_database_port="",
80
- )
81
- await graph_engine.delete_graph()