cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +5 -1
- cognee/api/v1/add/add.py +1 -2
- cognee/api/v1/cognify/code_graph_pipeline.py +119 -0
- cognee/api/v1/cognify/cognify.py +16 -24
- cognee/api/v1/cognify/routers/__init__.py +1 -0
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +90 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +1 -3
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/ontologies.py +37 -12
- cognee/api/v1/ontologies/routers/get_ontology_router.py +25 -27
- cognee/api/v1/search/search.py +0 -4
- cognee/api/v1/ui/ui.py +68 -38
- cognee/context_global_variables.py +16 -61
- cognee/eval_framework/answer_generation/answer_generation_executor.py +0 -10
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +2 -0
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +28 -16
- cognee/infrastructure/databases/graph/config.py +0 -3
- cognee/infrastructure/databases/graph/get_graph_engine.py +0 -1
- cognee/infrastructure/databases/graph/graph_db_interface.py +0 -15
- cognee/infrastructure/databases/graph/kuzu/adapter.py +0 -228
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +1 -80
- cognee/infrastructure/databases/utils/__init__.py +0 -3
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +48 -62
- cognee/infrastructure/databases/vector/config.py +0 -2
- cognee/infrastructure/databases/vector/create_vector_engine.py +0 -1
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -8
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +7 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +10 -11
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +544 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +0 -35
- cognee/infrastructure/files/storage/s3_config.py +0 -2
- cognee/infrastructure/llm/LLMGateway.py +2 -5
- cognee/infrastructure/llm/config.py +0 -35
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +8 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +16 -17
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +37 -40
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +36 -39
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +1 -19
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +9 -11
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +21 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +34 -42
- cognee/modules/cognify/config.py +0 -2
- cognee/modules/data/deletion/prune_system.py +2 -52
- cognee/modules/data/methods/delete_dataset.py +0 -26
- cognee/modules/engine/models/__init__.py +0 -1
- cognee/modules/graph/cognee_graph/CogneeGraph.py +37 -85
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +3 -8
- cognee/modules/memify/memify.py +7 -1
- cognee/modules/pipelines/operations/pipeline.py +2 -18
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/code_retriever.py +232 -0
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -4
- cognee/modules/retrieval/graph_completion_cot_retriever.py +0 -4
- cognee/modules/retrieval/graph_completion_retriever.py +0 -10
- cognee/modules/retrieval/graph_summary_completion_retriever.py +0 -4
- cognee/modules/retrieval/temporal_retriever.py +0 -4
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +10 -42
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +1 -8
- cognee/modules/search/methods/get_search_type_tools.py +8 -54
- cognee/modules/search/methods/no_access_control_search.py +0 -4
- cognee/modules/search/methods/search.py +0 -21
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +0 -19
- cognee/modules/users/methods/get_authenticated_user.py +2 -2
- cognee/modules/users/models/DatasetDatabase.py +3 -15
- cognee/shared/logging_utils.py +0 -4
- cognee/tasks/code/enrich_dependency_graph_checker.py +35 -0
- cognee/tasks/code/get_local_dependencies_checker.py +20 -0
- cognee/tasks/code/get_repo_dependency_graph_checker.py +35 -0
- cognee/tasks/documents/__init__.py +1 -0
- cognee/tasks/documents/check_permissions_on_dataset.py +26 -0
- cognee/tasks/graph/extract_graph_from_data.py +10 -9
- cognee/tasks/repo_processor/__init__.py +2 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +335 -0
- cognee/tasks/repo_processor/get_non_code_files.py +158 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +243 -0
- cognee/tasks/storage/add_data_points.py +2 -142
- cognee/tests/test_cognee_server_start.py +4 -2
- cognee/tests/test_conversation_history.py +1 -23
- cognee/tests/test_delete_bmw_example.py +60 -0
- cognee/tests/test_search_db.py +1 -37
- cognee/tests/unit/api/test_ontology_endpoint.py +89 -77
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +7 -3
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +0 -406
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +89 -76
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +97 -118
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
- cognee/api/v1/ui/node_setup.py +0 -360
- cognee/api/v1/ui/npm_utils.py +0 -50
- cognee/eval_framework/Dockerfile +0 -29
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +0 -3
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +0 -80
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +0 -18
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +0 -81
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +0 -168
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +0 -30
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +0 -50
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +0 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +0 -153
- cognee/memify_pipelines/create_triplet_embeddings.py +0 -53
- cognee/modules/engine/models/Triplet.py +0 -9
- cognee/modules/retrieval/register_retriever.py +0 -10
- cognee/modules/retrieval/registered_community_retrievers.py +0 -1
- cognee/modules/retrieval/triplet_retriever.py +0 -182
- cognee/shared/rate_limiting.py +0 -30
- cognee/tasks/memify/get_triplet_datapoints.py +0 -289
- cognee/tests/integration/retrieval/test_triplet_retriever.py +0 -84
- cognee/tests/integration/tasks/test_add_data_points.py +0 -139
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +0 -69
- cognee/tests/test_dataset_database_handler.py +0 -137
- cognee/tests/test_dataset_delete.py +0 -76
- cognee/tests/test_edge_centered_payload.py +0 -170
- cognee/tests/test_pipeline_cache.py +0 -164
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +0 -46
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +0 -214
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +0 -608
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +0 -83
- cognee/tests/unit/tasks/storage/test_add_data_points.py +0 -288
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
cognee/api/v1/ui/node_setup.py
DELETED
|
@@ -1,360 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import platform
|
|
3
|
-
import subprocess
|
|
4
|
-
import tempfile
|
|
5
|
-
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
import requests
|
|
8
|
-
|
|
9
|
-
from cognee.shared.logging_utils import get_logger
|
|
10
|
-
|
|
11
|
-
logger = get_logger()
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def get_nvm_dir() -> Path:
|
|
15
|
-
"""
|
|
16
|
-
Get the nvm directory path following standard nvm installation logic.
|
|
17
|
-
Uses XDG_CONFIG_HOME if set, otherwise falls back to ~/.nvm.
|
|
18
|
-
"""
|
|
19
|
-
xdg_config_home = os.environ.get("XDG_CONFIG_HOME")
|
|
20
|
-
if xdg_config_home:
|
|
21
|
-
return Path(xdg_config_home) / "nvm"
|
|
22
|
-
return Path.home() / ".nvm"
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
def get_nvm_sh_path() -> Path:
|
|
26
|
-
"""
|
|
27
|
-
Get the path to nvm.sh following standard nvm installation logic.
|
|
28
|
-
"""
|
|
29
|
-
return get_nvm_dir() / "nvm.sh"
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def check_nvm_installed() -> bool:
|
|
33
|
-
"""
|
|
34
|
-
Check if nvm (Node Version Manager) is installed.
|
|
35
|
-
"""
|
|
36
|
-
try:
|
|
37
|
-
# Check if nvm is available in the shell
|
|
38
|
-
# nvm is typically sourced in shell config files, so we need to check via shell
|
|
39
|
-
if platform.system() == "Windows":
|
|
40
|
-
# On Windows, nvm-windows uses a different approach
|
|
41
|
-
result = subprocess.run(
|
|
42
|
-
["nvm", "version"],
|
|
43
|
-
capture_output=True,
|
|
44
|
-
text=True,
|
|
45
|
-
timeout=10,
|
|
46
|
-
shell=True,
|
|
47
|
-
)
|
|
48
|
-
else:
|
|
49
|
-
# On Unix-like systems, nvm is a shell function, so we need to source it
|
|
50
|
-
# First check if nvm.sh exists
|
|
51
|
-
nvm_path = get_nvm_sh_path()
|
|
52
|
-
if not nvm_path.exists():
|
|
53
|
-
logger.debug(f"nvm.sh not found at {nvm_path}")
|
|
54
|
-
return False
|
|
55
|
-
|
|
56
|
-
# Try to source nvm and check version, capturing errors
|
|
57
|
-
result = subprocess.run(
|
|
58
|
-
["bash", "-c", f"source {nvm_path} && nvm --version"],
|
|
59
|
-
capture_output=True,
|
|
60
|
-
text=True,
|
|
61
|
-
timeout=10,
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
if result.returncode != 0:
|
|
65
|
-
# Log the error to help diagnose configuration issues
|
|
66
|
-
if result.stderr:
|
|
67
|
-
logger.debug(f"nvm check failed: {result.stderr.strip()}")
|
|
68
|
-
return False
|
|
69
|
-
|
|
70
|
-
return result.returncode == 0
|
|
71
|
-
except Exception as e:
|
|
72
|
-
logger.debug(f"Exception checking nvm: {str(e)}")
|
|
73
|
-
return False
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def install_nvm() -> bool:
|
|
77
|
-
"""
|
|
78
|
-
Install nvm (Node Version Manager) on Unix-like systems.
|
|
79
|
-
"""
|
|
80
|
-
if platform.system() == "Windows":
|
|
81
|
-
logger.error("nvm installation on Windows requires nvm-windows.")
|
|
82
|
-
logger.error(
|
|
83
|
-
"Please install nvm-windows manually from: https://github.com/coreybutler/nvm-windows"
|
|
84
|
-
)
|
|
85
|
-
return False
|
|
86
|
-
|
|
87
|
-
logger.info("Installing nvm (Node Version Manager)...")
|
|
88
|
-
|
|
89
|
-
try:
|
|
90
|
-
# Download and install nvm
|
|
91
|
-
nvm_install_script = "https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh"
|
|
92
|
-
logger.info(f"Downloading nvm installer from {nvm_install_script}...")
|
|
93
|
-
|
|
94
|
-
response = requests.get(nvm_install_script, timeout=60)
|
|
95
|
-
response.raise_for_status()
|
|
96
|
-
|
|
97
|
-
# Create a temporary script file
|
|
98
|
-
with tempfile.NamedTemporaryFile(mode="w", suffix=".sh", delete=False) as f:
|
|
99
|
-
f.write(response.text)
|
|
100
|
-
install_script_path = f.name
|
|
101
|
-
|
|
102
|
-
try:
|
|
103
|
-
# Make the script executable and run it
|
|
104
|
-
os.chmod(install_script_path, 0o755)
|
|
105
|
-
result = subprocess.run(
|
|
106
|
-
["bash", install_script_path],
|
|
107
|
-
capture_output=True,
|
|
108
|
-
text=True,
|
|
109
|
-
timeout=120,
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
if result.returncode == 0:
|
|
113
|
-
logger.info("✓ nvm installed successfully")
|
|
114
|
-
# Source nvm in current shell session
|
|
115
|
-
nvm_dir = get_nvm_dir()
|
|
116
|
-
if nvm_dir.exists():
|
|
117
|
-
return True
|
|
118
|
-
else:
|
|
119
|
-
logger.warning(
|
|
120
|
-
f"nvm installation completed but nvm directory not found at {nvm_dir}"
|
|
121
|
-
)
|
|
122
|
-
return False
|
|
123
|
-
else:
|
|
124
|
-
logger.error(f"nvm installation failed: {result.stderr}")
|
|
125
|
-
return False
|
|
126
|
-
finally:
|
|
127
|
-
# Clean up temporary script
|
|
128
|
-
try:
|
|
129
|
-
os.unlink(install_script_path)
|
|
130
|
-
except Exception:
|
|
131
|
-
pass
|
|
132
|
-
|
|
133
|
-
except requests.exceptions.RequestException as e:
|
|
134
|
-
logger.error(f"Failed to download nvm installer: {str(e)}")
|
|
135
|
-
return False
|
|
136
|
-
except Exception as e:
|
|
137
|
-
logger.error(f"Failed to install nvm: {str(e)}")
|
|
138
|
-
return False
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def install_node_with_nvm() -> bool:
|
|
142
|
-
"""
|
|
143
|
-
Install the latest Node.js version using nvm.
|
|
144
|
-
Returns True if installation succeeds, False otherwise.
|
|
145
|
-
"""
|
|
146
|
-
if platform.system() == "Windows":
|
|
147
|
-
logger.error("Node.js installation via nvm on Windows requires nvm-windows.")
|
|
148
|
-
logger.error("Please install Node.js manually from: https://nodejs.org/")
|
|
149
|
-
return False
|
|
150
|
-
|
|
151
|
-
logger.info("Installing latest Node.js version using nvm...")
|
|
152
|
-
|
|
153
|
-
try:
|
|
154
|
-
# Source nvm and install latest Node.js
|
|
155
|
-
nvm_path = get_nvm_sh_path()
|
|
156
|
-
if not nvm_path.exists():
|
|
157
|
-
logger.error(f"nvm.sh not found at {nvm_path}. nvm may not be properly installed.")
|
|
158
|
-
return False
|
|
159
|
-
|
|
160
|
-
nvm_source_cmd = f"source {nvm_path}"
|
|
161
|
-
install_cmd = f"{nvm_source_cmd} && nvm install node"
|
|
162
|
-
|
|
163
|
-
result = subprocess.run(
|
|
164
|
-
["bash", "-c", install_cmd],
|
|
165
|
-
capture_output=True,
|
|
166
|
-
text=True,
|
|
167
|
-
timeout=300, # 5 minutes timeout for Node.js installation
|
|
168
|
-
)
|
|
169
|
-
|
|
170
|
-
if result.returncode == 0:
|
|
171
|
-
logger.info("✓ Node.js installed successfully via nvm")
|
|
172
|
-
|
|
173
|
-
# Set as default version
|
|
174
|
-
use_cmd = f"{nvm_source_cmd} && nvm alias default node"
|
|
175
|
-
subprocess.run(
|
|
176
|
-
["bash", "-c", use_cmd],
|
|
177
|
-
capture_output=True,
|
|
178
|
-
text=True,
|
|
179
|
-
timeout=30,
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
# Add nvm to PATH for current session
|
|
183
|
-
# This ensures node/npm are available in subsequent commands
|
|
184
|
-
nvm_dir = get_nvm_dir()
|
|
185
|
-
if nvm_dir.exists():
|
|
186
|
-
# Update PATH for current process
|
|
187
|
-
nvm_bin = nvm_dir / "versions" / "node"
|
|
188
|
-
# Find the latest installed version
|
|
189
|
-
if nvm_bin.exists():
|
|
190
|
-
versions = sorted(nvm_bin.iterdir(), reverse=True)
|
|
191
|
-
if versions:
|
|
192
|
-
latest_node_bin = versions[0] / "bin"
|
|
193
|
-
if latest_node_bin.exists():
|
|
194
|
-
current_path = os.environ.get("PATH", "")
|
|
195
|
-
os.environ["PATH"] = f"{latest_node_bin}:{current_path}"
|
|
196
|
-
|
|
197
|
-
return True
|
|
198
|
-
else:
|
|
199
|
-
logger.error(f"Failed to install Node.js: {result.stderr}")
|
|
200
|
-
return False
|
|
201
|
-
|
|
202
|
-
except subprocess.TimeoutExpired:
|
|
203
|
-
logger.error("Timeout installing Node.js (this can take several minutes)")
|
|
204
|
-
return False
|
|
205
|
-
except Exception as e:
|
|
206
|
-
logger.error(f"Error installing Node.js: {str(e)}")
|
|
207
|
-
return False
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
def check_node_npm() -> tuple[bool, str]: # (is_available, error_message)
|
|
211
|
-
"""
|
|
212
|
-
Check if Node.js and npm are available.
|
|
213
|
-
If not available, attempts to install nvm and Node.js automatically.
|
|
214
|
-
"""
|
|
215
|
-
|
|
216
|
-
try:
|
|
217
|
-
# Check Node.js - try direct command first, then with nvm if needed
|
|
218
|
-
result = subprocess.run(["node", "--version"], capture_output=True, text=True, timeout=10)
|
|
219
|
-
if result.returncode != 0:
|
|
220
|
-
# If direct command fails, try with nvm sourced (in case nvm is installed but not in PATH)
|
|
221
|
-
nvm_path = get_nvm_sh_path()
|
|
222
|
-
if nvm_path.exists():
|
|
223
|
-
result = subprocess.run(
|
|
224
|
-
["bash", "-c", f"source {nvm_path} && node --version"],
|
|
225
|
-
capture_output=True,
|
|
226
|
-
text=True,
|
|
227
|
-
timeout=10,
|
|
228
|
-
)
|
|
229
|
-
if result.returncode != 0 and result.stderr:
|
|
230
|
-
logger.debug(f"Failed to source nvm or run node: {result.stderr.strip()}")
|
|
231
|
-
if result.returncode != 0:
|
|
232
|
-
# Node.js is not installed, try to install it
|
|
233
|
-
logger.info("Node.js is not installed. Attempting to install automatically...")
|
|
234
|
-
|
|
235
|
-
# Check if nvm is installed
|
|
236
|
-
if not check_nvm_installed():
|
|
237
|
-
logger.info("nvm is not installed. Installing nvm first...")
|
|
238
|
-
if not install_nvm():
|
|
239
|
-
return (
|
|
240
|
-
False,
|
|
241
|
-
"Failed to install nvm. Please install Node.js manually from https://nodejs.org/",
|
|
242
|
-
)
|
|
243
|
-
|
|
244
|
-
# Install Node.js using nvm
|
|
245
|
-
if not install_node_with_nvm():
|
|
246
|
-
return (
|
|
247
|
-
False,
|
|
248
|
-
"Failed to install Node.js. Please install Node.js manually from https://nodejs.org/",
|
|
249
|
-
)
|
|
250
|
-
|
|
251
|
-
# Verify installation after automatic setup
|
|
252
|
-
# Try with nvm sourced first
|
|
253
|
-
nvm_path = get_nvm_sh_path()
|
|
254
|
-
if nvm_path.exists():
|
|
255
|
-
result = subprocess.run(
|
|
256
|
-
["bash", "-c", f"source {nvm_path} && node --version"],
|
|
257
|
-
capture_output=True,
|
|
258
|
-
text=True,
|
|
259
|
-
timeout=10,
|
|
260
|
-
)
|
|
261
|
-
if result.returncode != 0 and result.stderr:
|
|
262
|
-
logger.debug(
|
|
263
|
-
f"Failed to verify node after installation: {result.stderr.strip()}"
|
|
264
|
-
)
|
|
265
|
-
else:
|
|
266
|
-
result = subprocess.run(
|
|
267
|
-
["node", "--version"], capture_output=True, text=True, timeout=10
|
|
268
|
-
)
|
|
269
|
-
if result.returncode != 0:
|
|
270
|
-
nvm_path = get_nvm_sh_path()
|
|
271
|
-
return (
|
|
272
|
-
False,
|
|
273
|
-
f"Node.js installation completed but node command is not available. Please restart your terminal or source {nvm_path}",
|
|
274
|
-
)
|
|
275
|
-
|
|
276
|
-
node_version = result.stdout.strip()
|
|
277
|
-
logger.debug(f"Found Node.js version: {node_version}")
|
|
278
|
-
|
|
279
|
-
# Check npm - handle Windows PowerShell scripts
|
|
280
|
-
if platform.system() == "Windows":
|
|
281
|
-
# On Windows, npm might be a PowerShell script, so we need to use shell=True
|
|
282
|
-
result = subprocess.run(
|
|
283
|
-
["npm", "--version"], capture_output=True, text=True, timeout=10, shell=True
|
|
284
|
-
)
|
|
285
|
-
else:
|
|
286
|
-
# On Unix-like systems, if we just installed via nvm, we may need to source nvm
|
|
287
|
-
# Try direct command first
|
|
288
|
-
result = subprocess.run(
|
|
289
|
-
["npm", "--version"], capture_output=True, text=True, timeout=10
|
|
290
|
-
)
|
|
291
|
-
if result.returncode != 0:
|
|
292
|
-
# Try with nvm sourced
|
|
293
|
-
nvm_path = get_nvm_sh_path()
|
|
294
|
-
if nvm_path.exists():
|
|
295
|
-
result = subprocess.run(
|
|
296
|
-
["bash", "-c", f"source {nvm_path} && npm --version"],
|
|
297
|
-
capture_output=True,
|
|
298
|
-
text=True,
|
|
299
|
-
timeout=10,
|
|
300
|
-
)
|
|
301
|
-
if result.returncode != 0 and result.stderr:
|
|
302
|
-
logger.debug(f"Failed to source nvm or run npm: {result.stderr.strip()}")
|
|
303
|
-
|
|
304
|
-
if result.returncode != 0:
|
|
305
|
-
return False, "npm is not installed or not in PATH"
|
|
306
|
-
|
|
307
|
-
npm_version = result.stdout.strip()
|
|
308
|
-
logger.debug(f"Found npm version: {npm_version}")
|
|
309
|
-
|
|
310
|
-
return True, f"Node.js {node_version}, npm {npm_version}"
|
|
311
|
-
|
|
312
|
-
except subprocess.TimeoutExpired:
|
|
313
|
-
return False, "Timeout checking Node.js/npm installation"
|
|
314
|
-
except FileNotFoundError:
|
|
315
|
-
# Node.js is not installed, try to install it
|
|
316
|
-
logger.info("Node.js is not found. Attempting to install automatically...")
|
|
317
|
-
|
|
318
|
-
# Check if nvm is installed
|
|
319
|
-
if not check_nvm_installed():
|
|
320
|
-
logger.info("nvm is not installed. Installing nvm first...")
|
|
321
|
-
if not install_nvm():
|
|
322
|
-
return (
|
|
323
|
-
False,
|
|
324
|
-
"Failed to install nvm. Please install Node.js manually from https://nodejs.org/",
|
|
325
|
-
)
|
|
326
|
-
|
|
327
|
-
# Install Node.js using nvm
|
|
328
|
-
if not install_node_with_nvm():
|
|
329
|
-
return (
|
|
330
|
-
False,
|
|
331
|
-
"Failed to install Node.js. Please install Node.js manually from https://nodejs.org/",
|
|
332
|
-
)
|
|
333
|
-
|
|
334
|
-
# Retry checking Node.js after installation
|
|
335
|
-
try:
|
|
336
|
-
result = subprocess.run(
|
|
337
|
-
["node", "--version"], capture_output=True, text=True, timeout=10
|
|
338
|
-
)
|
|
339
|
-
if result.returncode == 0:
|
|
340
|
-
node_version = result.stdout.strip()
|
|
341
|
-
# Check npm
|
|
342
|
-
nvm_path = get_nvm_sh_path()
|
|
343
|
-
if nvm_path.exists():
|
|
344
|
-
result = subprocess.run(
|
|
345
|
-
["bash", "-c", f"source {nvm_path} && npm --version"],
|
|
346
|
-
capture_output=True,
|
|
347
|
-
text=True,
|
|
348
|
-
timeout=10,
|
|
349
|
-
)
|
|
350
|
-
if result.returncode == 0:
|
|
351
|
-
npm_version = result.stdout.strip()
|
|
352
|
-
return True, f"Node.js {node_version}, npm {npm_version}"
|
|
353
|
-
elif result.stderr:
|
|
354
|
-
logger.debug(f"Failed to source nvm or run npm: {result.stderr.strip()}")
|
|
355
|
-
except Exception as e:
|
|
356
|
-
logger.debug(f"Exception retrying node/npm check: {str(e)}")
|
|
357
|
-
|
|
358
|
-
return False, "Node.js/npm not found. Please install Node.js from https://nodejs.org/"
|
|
359
|
-
except Exception as e:
|
|
360
|
-
return False, f"Error checking Node.js/npm: {str(e)}"
|
cognee/api/v1/ui/npm_utils.py
DELETED
|
@@ -1,50 +0,0 @@
|
|
|
1
|
-
import platform
|
|
2
|
-
import subprocess
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import List
|
|
5
|
-
|
|
6
|
-
from cognee.shared.logging_utils import get_logger
|
|
7
|
-
from .node_setup import get_nvm_sh_path
|
|
8
|
-
|
|
9
|
-
logger = get_logger()
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def run_npm_command(cmd: List[str], cwd: Path, timeout: int = 300) -> subprocess.CompletedProcess:
|
|
13
|
-
"""
|
|
14
|
-
Run an npm command, ensuring nvm is sourced if needed (Unix-like systems only).
|
|
15
|
-
Returns the CompletedProcess result.
|
|
16
|
-
"""
|
|
17
|
-
if platform.system() == "Windows":
|
|
18
|
-
# On Windows, use shell=True for npm commands
|
|
19
|
-
return subprocess.run(
|
|
20
|
-
cmd,
|
|
21
|
-
cwd=cwd,
|
|
22
|
-
capture_output=True,
|
|
23
|
-
text=True,
|
|
24
|
-
timeout=timeout,
|
|
25
|
-
shell=True,
|
|
26
|
-
)
|
|
27
|
-
else:
|
|
28
|
-
# On Unix-like systems, try direct command first
|
|
29
|
-
result = subprocess.run(
|
|
30
|
-
cmd,
|
|
31
|
-
cwd=cwd,
|
|
32
|
-
capture_output=True,
|
|
33
|
-
text=True,
|
|
34
|
-
timeout=timeout,
|
|
35
|
-
)
|
|
36
|
-
# If it fails and nvm might be installed, try with nvm sourced
|
|
37
|
-
if result.returncode != 0:
|
|
38
|
-
nvm_path = get_nvm_sh_path()
|
|
39
|
-
if nvm_path.exists():
|
|
40
|
-
nvm_cmd = f"source {nvm_path} && {' '.join(cmd)}"
|
|
41
|
-
result = subprocess.run(
|
|
42
|
-
["bash", "-c", nvm_cmd],
|
|
43
|
-
cwd=cwd,
|
|
44
|
-
capture_output=True,
|
|
45
|
-
text=True,
|
|
46
|
-
timeout=timeout,
|
|
47
|
-
)
|
|
48
|
-
if result.returncode != 0 and result.stderr:
|
|
49
|
-
logger.debug(f"npm command failed with nvm: {result.stderr.strip()}")
|
|
50
|
-
return result
|
cognee/eval_framework/Dockerfile
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
FROM python:3.11-slim
|
|
2
|
-
|
|
3
|
-
# Set environment variables
|
|
4
|
-
ENV PIP_NO_CACHE_DIR=true
|
|
5
|
-
ENV PATH="${PATH}:/root/.poetry/bin"
|
|
6
|
-
ENV PYTHONPATH=/app
|
|
7
|
-
ENV SKIP_MIGRATIONS=true
|
|
8
|
-
|
|
9
|
-
# System dependencies
|
|
10
|
-
RUN apt-get update && apt-get install -y \
|
|
11
|
-
gcc \
|
|
12
|
-
libpq-dev \
|
|
13
|
-
git \
|
|
14
|
-
curl \
|
|
15
|
-
build-essential \
|
|
16
|
-
&& rm -rf /var/lib/apt/lists/*
|
|
17
|
-
|
|
18
|
-
WORKDIR /app
|
|
19
|
-
|
|
20
|
-
COPY pyproject.toml poetry.lock README.md /app/
|
|
21
|
-
|
|
22
|
-
RUN pip install poetry
|
|
23
|
-
|
|
24
|
-
RUN poetry config virtualenvs.create false
|
|
25
|
-
|
|
26
|
-
RUN poetry install --extras distributed --extras evals --extras deepeval --no-root
|
|
27
|
-
|
|
28
|
-
COPY cognee/ /app/cognee
|
|
29
|
-
COPY distributed/ /app/distributed
|
cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py
DELETED
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
from uuid import UUID
|
|
3
|
-
from abc import ABC, abstractmethod
|
|
4
|
-
|
|
5
|
-
from cognee.modules.users.models.User import User
|
|
6
|
-
from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class DatasetDatabaseHandlerInterface(ABC):
|
|
10
|
-
@classmethod
|
|
11
|
-
@abstractmethod
|
|
12
|
-
async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
|
|
13
|
-
"""
|
|
14
|
-
Return a dictionary with database connection/resolution info for a graph or vector database for the given dataset.
|
|
15
|
-
Function can auto handle deploying of the actual database if needed, but is not necessary.
|
|
16
|
-
Only providing connection info is sufficient, this info will be mapped when trying to connect to the provided dataset in the future.
|
|
17
|
-
Needed for Cognee multi-tenant/multi-user and backend access control support.
|
|
18
|
-
|
|
19
|
-
Dictionary returned from this function will be used to create a DatasetDatabase row in the relational database.
|
|
20
|
-
From which internal mapping of dataset -> database connection info will be done.
|
|
21
|
-
|
|
22
|
-
The returned dictionary is stored verbatim in the relational database and is later passed to
|
|
23
|
-
resolve_dataset_connection_info() at connection time. For safe credential handling, prefer
|
|
24
|
-
returning only references to secrets or role identifiers, not plaintext credentials.
|
|
25
|
-
|
|
26
|
-
Each dataset needs to map to a unique graph or vector database when backend access control is enabled to facilitate a separation of concern for data.
|
|
27
|
-
|
|
28
|
-
Args:
|
|
29
|
-
dataset_id: UUID of the dataset if needed by the database creation logic
|
|
30
|
-
user: User object if needed by the database creation logic
|
|
31
|
-
Returns:
|
|
32
|
-
dict: Connection info for the created graph or vector database instance.
|
|
33
|
-
"""
|
|
34
|
-
pass
|
|
35
|
-
|
|
36
|
-
@classmethod
|
|
37
|
-
async def resolve_dataset_connection_info(
|
|
38
|
-
cls, dataset_database: DatasetDatabase
|
|
39
|
-
) -> DatasetDatabase:
|
|
40
|
-
"""
|
|
41
|
-
Resolve runtime connection details for a dataset’s backing graph/vector database.
|
|
42
|
-
Function is intended to be overwritten to implement custom logic for resolving connection info.
|
|
43
|
-
|
|
44
|
-
This method is invoked right before the application opens a connection for a given dataset.
|
|
45
|
-
It receives the DatasetDatabase row that was persisted when create_dataset() ran and must
|
|
46
|
-
return a modified instance of DatasetDatabase with concrete connection parameters that the client/driver can use.
|
|
47
|
-
Do not update these new DatasetDatabase values in the relational database to avoid storing secure credentials.
|
|
48
|
-
|
|
49
|
-
In case of separate graph and vector database handlers, each handler should implement its own logic for resolving
|
|
50
|
-
connection info and only change parameters related to its appropriate database, the resolution function will then
|
|
51
|
-
be called one after another with the updated DatasetDatabase value from the previous function as the input.
|
|
52
|
-
|
|
53
|
-
Typical behavior:
|
|
54
|
-
- If the DatasetDatabase row already contains raw connection fields (e.g., host/port/db/user/password
|
|
55
|
-
or api_url/api_key), return them as-is.
|
|
56
|
-
- If the row stores only references (e.g., secret IDs, vault paths, cloud resource ARNs/IDs, IAM
|
|
57
|
-
roles, SSO tokens), resolve those references by calling the appropriate secret manager or provider
|
|
58
|
-
API to obtain short-lived credentials and assemble the final connection DatasetDatabase object.
|
|
59
|
-
- Do not persist any resolved or decrypted secrets back to the relational database. Return them only
|
|
60
|
-
to the caller.
|
|
61
|
-
|
|
62
|
-
Args:
|
|
63
|
-
dataset_database: DatasetDatabase row from the relational database
|
|
64
|
-
Returns:
|
|
65
|
-
DatasetDatabase: Updated instance with resolved connection info
|
|
66
|
-
"""
|
|
67
|
-
return dataset_database
|
|
68
|
-
|
|
69
|
-
@classmethod
|
|
70
|
-
@abstractmethod
|
|
71
|
-
async def delete_dataset(cls, dataset_database: DatasetDatabase) -> None:
|
|
72
|
-
"""
|
|
73
|
-
Delete the graph or vector database for the given dataset.
|
|
74
|
-
Function should auto handle deleting of the actual database or send a request to the proper service to delete/mark the database as not needed for the given dataset.
|
|
75
|
-
Needed for maintaining a database for Cognee multi-tenant/multi-user and backend access control.
|
|
76
|
-
|
|
77
|
-
Args:
|
|
78
|
-
dataset_database: DatasetDatabase row containing connection/resolution info for the graph or vector database to delete.
|
|
79
|
-
"""
|
|
80
|
-
pass
|
cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
from cognee.infrastructure.databases.graph.neo4j_driver.Neo4jAuraDevDatasetDatabaseHandler import (
|
|
2
|
-
Neo4jAuraDevDatasetDatabaseHandler,
|
|
3
|
-
)
|
|
4
|
-
from cognee.infrastructure.databases.vector.lancedb.LanceDBDatasetDatabaseHandler import (
|
|
5
|
-
LanceDBDatasetDatabaseHandler,
|
|
6
|
-
)
|
|
7
|
-
from cognee.infrastructure.databases.graph.kuzu.KuzuDatasetDatabaseHandler import (
|
|
8
|
-
KuzuDatasetDatabaseHandler,
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
supported_dataset_database_handlers = {
|
|
12
|
-
"neo4j_aura_dev": {
|
|
13
|
-
"handler_instance": Neo4jAuraDevDatasetDatabaseHandler,
|
|
14
|
-
"handler_provider": "neo4j",
|
|
15
|
-
},
|
|
16
|
-
"lancedb": {"handler_instance": LanceDBDatasetDatabaseHandler, "handler_provider": "lancedb"},
|
|
17
|
-
"kuzu": {"handler_instance": KuzuDatasetDatabaseHandler, "handler_provider": "kuzu"},
|
|
18
|
-
}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
from .supported_dataset_database_handlers import supported_dataset_database_handlers
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def use_dataset_database_handler(
|
|
5
|
-
dataset_database_handler_name, dataset_database_handler, dataset_database_provider
|
|
6
|
-
):
|
|
7
|
-
supported_dataset_database_handlers[dataset_database_handler_name] = {
|
|
8
|
-
"handler_instance": dataset_database_handler,
|
|
9
|
-
"handler_provider": dataset_database_provider,
|
|
10
|
-
}
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from uuid import UUID
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
|
-
from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine
|
|
6
|
-
from cognee.base_config import get_base_config
|
|
7
|
-
from cognee.modules.users.models import User
|
|
8
|
-
from cognee.modules.users.models import DatasetDatabase
|
|
9
|
-
from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class KuzuDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|
13
|
-
"""
|
|
14
|
-
Handler for interacting with Kuzu Dataset databases.
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
@classmethod
|
|
18
|
-
async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
|
|
19
|
-
"""
|
|
20
|
-
Create a new Kuzu instance for the dataset. Return connection info that will be mapped to the dataset.
|
|
21
|
-
|
|
22
|
-
Args:
|
|
23
|
-
dataset_id: Dataset UUID
|
|
24
|
-
user: User object who owns the dataset and is making the request
|
|
25
|
-
|
|
26
|
-
Returns:
|
|
27
|
-
dict: Connection details for the created Kuzu instance
|
|
28
|
-
|
|
29
|
-
"""
|
|
30
|
-
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
31
|
-
|
|
32
|
-
graph_config = get_graph_config()
|
|
33
|
-
|
|
34
|
-
if graph_config.graph_database_provider != "kuzu":
|
|
35
|
-
raise ValueError(
|
|
36
|
-
"KuzuDatasetDatabaseHandler can only be used with Kuzu graph database provider."
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
graph_db_name = f"{dataset_id}.pkl"
|
|
40
|
-
graph_db_url = graph_config.graph_database_url
|
|
41
|
-
graph_db_key = graph_config.graph_database_key
|
|
42
|
-
graph_db_username = graph_config.graph_database_username
|
|
43
|
-
graph_db_password = graph_config.graph_database_password
|
|
44
|
-
|
|
45
|
-
return {
|
|
46
|
-
"graph_database_name": graph_db_name,
|
|
47
|
-
"graph_database_url": graph_db_url,
|
|
48
|
-
"graph_database_provider": graph_config.graph_database_provider,
|
|
49
|
-
"graph_database_key": graph_db_key,
|
|
50
|
-
"graph_dataset_database_handler": "kuzu",
|
|
51
|
-
"graph_database_connection_info": {
|
|
52
|
-
"graph_database_username": graph_db_username,
|
|
53
|
-
"graph_database_password": graph_db_password,
|
|
54
|
-
},
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
@classmethod
|
|
58
|
-
async def delete_dataset(cls, dataset_database: DatasetDatabase):
|
|
59
|
-
base_config = get_base_config()
|
|
60
|
-
databases_directory_path = os.path.join(
|
|
61
|
-
base_config.system_root_directory, "databases", str(dataset_database.owner_id)
|
|
62
|
-
)
|
|
63
|
-
graph_file_path = os.path.join(
|
|
64
|
-
databases_directory_path, dataset_database.graph_database_name
|
|
65
|
-
)
|
|
66
|
-
graph_engine = create_graph_engine(
|
|
67
|
-
graph_database_provider=dataset_database.graph_database_provider,
|
|
68
|
-
graph_database_url=dataset_database.graph_database_url,
|
|
69
|
-
graph_database_name=dataset_database.graph_database_name,
|
|
70
|
-
graph_database_key=dataset_database.graph_database_key,
|
|
71
|
-
graph_file_path=graph_file_path,
|
|
72
|
-
graph_database_username=dataset_database.graph_database_connection_info.get(
|
|
73
|
-
"graph_database_username", ""
|
|
74
|
-
),
|
|
75
|
-
graph_database_password=dataset_database.graph_database_connection_info.get(
|
|
76
|
-
"graph_database_password", ""
|
|
77
|
-
),
|
|
78
|
-
graph_dataset_database_handler="",
|
|
79
|
-
graph_database_port="",
|
|
80
|
-
)
|
|
81
|
-
await graph_engine.delete_graph()
|