cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +5 -1
- cognee/api/v1/add/add.py +1 -2
- cognee/api/v1/cognify/code_graph_pipeline.py +119 -0
- cognee/api/v1/cognify/cognify.py +16 -24
- cognee/api/v1/cognify/routers/__init__.py +1 -0
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +90 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +1 -3
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/ontologies.py +37 -12
- cognee/api/v1/ontologies/routers/get_ontology_router.py +25 -27
- cognee/api/v1/search/search.py +0 -4
- cognee/api/v1/ui/ui.py +68 -38
- cognee/context_global_variables.py +16 -61
- cognee/eval_framework/answer_generation/answer_generation_executor.py +0 -10
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +2 -0
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +28 -16
- cognee/infrastructure/databases/graph/config.py +0 -3
- cognee/infrastructure/databases/graph/get_graph_engine.py +0 -1
- cognee/infrastructure/databases/graph/graph_db_interface.py +0 -15
- cognee/infrastructure/databases/graph/kuzu/adapter.py +0 -228
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +1 -80
- cognee/infrastructure/databases/utils/__init__.py +0 -3
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +48 -62
- cognee/infrastructure/databases/vector/config.py +0 -2
- cognee/infrastructure/databases/vector/create_vector_engine.py +0 -1
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -8
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +7 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +10 -11
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +544 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +0 -35
- cognee/infrastructure/files/storage/s3_config.py +0 -2
- cognee/infrastructure/llm/LLMGateway.py +2 -5
- cognee/infrastructure/llm/config.py +0 -35
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +8 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +16 -17
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +37 -40
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +36 -39
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +1 -19
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +9 -11
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +21 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +34 -42
- cognee/modules/cognify/config.py +0 -2
- cognee/modules/data/deletion/prune_system.py +2 -52
- cognee/modules/data/methods/delete_dataset.py +0 -26
- cognee/modules/engine/models/__init__.py +0 -1
- cognee/modules/graph/cognee_graph/CogneeGraph.py +37 -85
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +3 -8
- cognee/modules/memify/memify.py +7 -1
- cognee/modules/pipelines/operations/pipeline.py +2 -18
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/code_retriever.py +232 -0
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -4
- cognee/modules/retrieval/graph_completion_cot_retriever.py +0 -4
- cognee/modules/retrieval/graph_completion_retriever.py +0 -10
- cognee/modules/retrieval/graph_summary_completion_retriever.py +0 -4
- cognee/modules/retrieval/temporal_retriever.py +0 -4
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +10 -42
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +1 -8
- cognee/modules/search/methods/get_search_type_tools.py +8 -54
- cognee/modules/search/methods/no_access_control_search.py +0 -4
- cognee/modules/search/methods/search.py +0 -21
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +0 -19
- cognee/modules/users/methods/get_authenticated_user.py +2 -2
- cognee/modules/users/models/DatasetDatabase.py +3 -15
- cognee/shared/logging_utils.py +0 -4
- cognee/tasks/code/enrich_dependency_graph_checker.py +35 -0
- cognee/tasks/code/get_local_dependencies_checker.py +20 -0
- cognee/tasks/code/get_repo_dependency_graph_checker.py +35 -0
- cognee/tasks/documents/__init__.py +1 -0
- cognee/tasks/documents/check_permissions_on_dataset.py +26 -0
- cognee/tasks/graph/extract_graph_from_data.py +10 -9
- cognee/tasks/repo_processor/__init__.py +2 -0
- cognee/tasks/repo_processor/get_local_dependencies.py +335 -0
- cognee/tasks/repo_processor/get_non_code_files.py +158 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +243 -0
- cognee/tasks/storage/add_data_points.py +2 -142
- cognee/tests/test_cognee_server_start.py +4 -2
- cognee/tests/test_conversation_history.py +1 -23
- cognee/tests/test_delete_bmw_example.py +60 -0
- cognee/tests/test_search_db.py +1 -37
- cognee/tests/unit/api/test_ontology_endpoint.py +89 -77
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +7 -3
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +0 -406
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +89 -76
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +97 -118
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
- cognee/api/v1/ui/node_setup.py +0 -360
- cognee/api/v1/ui/npm_utils.py +0 -50
- cognee/eval_framework/Dockerfile +0 -29
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +0 -3
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +0 -80
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +0 -18
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +0 -81
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +0 -168
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +0 -10
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +0 -30
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +0 -50
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +0 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +0 -153
- cognee/memify_pipelines/create_triplet_embeddings.py +0 -53
- cognee/modules/engine/models/Triplet.py +0 -9
- cognee/modules/retrieval/register_retriever.py +0 -10
- cognee/modules/retrieval/registered_community_retrievers.py +0 -1
- cognee/modules/retrieval/triplet_retriever.py +0 -182
- cognee/shared/rate_limiting.py +0 -30
- cognee/tasks/memify/get_triplet_datapoints.py +0 -289
- cognee/tests/integration/retrieval/test_triplet_retriever.py +0 -84
- cognee/tests/integration/tasks/test_add_data_points.py +0 -139
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +0 -69
- cognee/tests/test_dataset_database_handler.py +0 -137
- cognee/tests/test_dataset_delete.py +0 -76
- cognee/tests/test_edge_centered_payload.py +0 -170
- cognee/tests/test_pipeline_cache.py +0 -164
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +0 -46
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +0 -214
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +0 -608
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +0 -83
- cognee/tests/unit/tasks/storage/test_add_data_points.py +0 -288
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
cognee/api/v1/ui/ui.py
CHANGED
|
@@ -15,8 +15,6 @@ import shutil
|
|
|
15
15
|
|
|
16
16
|
from cognee.shared.logging_utils import get_logger
|
|
17
17
|
from cognee.version import get_cognee_version
|
|
18
|
-
from .node_setup import check_node_npm, get_nvm_dir, get_nvm_sh_path
|
|
19
|
-
from .npm_utils import run_npm_command
|
|
20
18
|
|
|
21
19
|
logger = get_logger()
|
|
22
20
|
|
|
@@ -287,6 +285,48 @@ def find_frontend_path() -> Optional[Path]:
|
|
|
287
285
|
return None
|
|
288
286
|
|
|
289
287
|
|
|
288
|
+
def check_node_npm() -> tuple[bool, str]:
|
|
289
|
+
"""
|
|
290
|
+
Check if Node.js and npm are available.
|
|
291
|
+
Returns (is_available, error_message)
|
|
292
|
+
"""
|
|
293
|
+
|
|
294
|
+
try:
|
|
295
|
+
# Check Node.js
|
|
296
|
+
result = subprocess.run(["node", "--version"], capture_output=True, text=True, timeout=10)
|
|
297
|
+
if result.returncode != 0:
|
|
298
|
+
return False, "Node.js is not installed or not in PATH"
|
|
299
|
+
|
|
300
|
+
node_version = result.stdout.strip()
|
|
301
|
+
logger.debug(f"Found Node.js version: {node_version}")
|
|
302
|
+
|
|
303
|
+
# Check npm - handle Windows PowerShell scripts
|
|
304
|
+
if platform.system() == "Windows":
|
|
305
|
+
# On Windows, npm might be a PowerShell script, so we need to use shell=True
|
|
306
|
+
result = subprocess.run(
|
|
307
|
+
["npm", "--version"], capture_output=True, text=True, timeout=10, shell=True
|
|
308
|
+
)
|
|
309
|
+
else:
|
|
310
|
+
result = subprocess.run(
|
|
311
|
+
["npm", "--version"], capture_output=True, text=True, timeout=10
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
if result.returncode != 0:
|
|
315
|
+
return False, "npm is not installed or not in PATH"
|
|
316
|
+
|
|
317
|
+
npm_version = result.stdout.strip()
|
|
318
|
+
logger.debug(f"Found npm version: {npm_version}")
|
|
319
|
+
|
|
320
|
+
return True, f"Node.js {node_version}, npm {npm_version}"
|
|
321
|
+
|
|
322
|
+
except subprocess.TimeoutExpired:
|
|
323
|
+
return False, "Timeout checking Node.js/npm installation"
|
|
324
|
+
except FileNotFoundError:
|
|
325
|
+
return False, "Node.js/npm not found. Please install Node.js from https://nodejs.org/"
|
|
326
|
+
except Exception as e:
|
|
327
|
+
return False, f"Error checking Node.js/npm: {str(e)}"
|
|
328
|
+
|
|
329
|
+
|
|
290
330
|
def install_frontend_dependencies(frontend_path: Path) -> bool:
|
|
291
331
|
"""
|
|
292
332
|
Install frontend dependencies if node_modules doesn't exist.
|
|
@@ -301,7 +341,24 @@ def install_frontend_dependencies(frontend_path: Path) -> bool:
|
|
|
301
341
|
logger.info("Installing frontend dependencies (this may take a few minutes)...")
|
|
302
342
|
|
|
303
343
|
try:
|
|
304
|
-
|
|
344
|
+
# Use shell=True on Windows for npm commands
|
|
345
|
+
if platform.system() == "Windows":
|
|
346
|
+
result = subprocess.run(
|
|
347
|
+
["npm", "install"],
|
|
348
|
+
cwd=frontend_path,
|
|
349
|
+
capture_output=True,
|
|
350
|
+
text=True,
|
|
351
|
+
timeout=300, # 5 minutes timeout
|
|
352
|
+
shell=True,
|
|
353
|
+
)
|
|
354
|
+
else:
|
|
355
|
+
result = subprocess.run(
|
|
356
|
+
["npm", "install"],
|
|
357
|
+
cwd=frontend_path,
|
|
358
|
+
capture_output=True,
|
|
359
|
+
text=True,
|
|
360
|
+
timeout=300, # 5 minutes timeout
|
|
361
|
+
)
|
|
305
362
|
|
|
306
363
|
if result.returncode == 0:
|
|
307
364
|
logger.info("Frontend dependencies installed successfully")
|
|
@@ -585,21 +642,6 @@ def start_ui(
|
|
|
585
642
|
env["HOST"] = "localhost"
|
|
586
643
|
env["PORT"] = str(port)
|
|
587
644
|
|
|
588
|
-
# If nvm is installed, ensure it's available in the environment
|
|
589
|
-
nvm_path = get_nvm_sh_path()
|
|
590
|
-
if platform.system() != "Windows" and nvm_path.exists():
|
|
591
|
-
# Add nvm to PATH for the subprocess
|
|
592
|
-
nvm_dir = get_nvm_dir()
|
|
593
|
-
# Find the latest Node.js version installed via nvm
|
|
594
|
-
nvm_versions = nvm_dir / "versions" / "node"
|
|
595
|
-
if nvm_versions.exists():
|
|
596
|
-
versions = sorted(nvm_versions.iterdir(), reverse=True)
|
|
597
|
-
if versions:
|
|
598
|
-
latest_node_bin = versions[0] / "bin"
|
|
599
|
-
if latest_node_bin.exists():
|
|
600
|
-
current_path = env.get("PATH", "")
|
|
601
|
-
env["PATH"] = f"{latest_node_bin}:{current_path}"
|
|
602
|
-
|
|
603
645
|
# Start the development server
|
|
604
646
|
logger.info(f"Starting frontend server at http://localhost:{port}")
|
|
605
647
|
logger.info("This may take a moment to compile and start...")
|
|
@@ -617,26 +659,14 @@ def start_ui(
|
|
|
617
659
|
shell=True,
|
|
618
660
|
)
|
|
619
661
|
else:
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
stderr=subprocess.PIPE,
|
|
629
|
-
preexec_fn=os.setsid if hasattr(os, "setsid") else None,
|
|
630
|
-
)
|
|
631
|
-
else:
|
|
632
|
-
process = subprocess.Popen(
|
|
633
|
-
["npm", "run", "dev"],
|
|
634
|
-
cwd=frontend_path,
|
|
635
|
-
env=env,
|
|
636
|
-
stdout=subprocess.PIPE,
|
|
637
|
-
stderr=subprocess.PIPE,
|
|
638
|
-
preexec_fn=os.setsid if hasattr(os, "setsid") else None,
|
|
639
|
-
)
|
|
662
|
+
process = subprocess.Popen(
|
|
663
|
+
["npm", "run", "dev"],
|
|
664
|
+
cwd=frontend_path,
|
|
665
|
+
env=env,
|
|
666
|
+
stdout=subprocess.PIPE,
|
|
667
|
+
stderr=subprocess.PIPE,
|
|
668
|
+
preexec_fn=os.setsid if hasattr(os, "setsid") else None,
|
|
669
|
+
)
|
|
640
670
|
|
|
641
671
|
# Start threads to stream frontend output with prefix
|
|
642
672
|
_stream_process_output(process, "stdout", "[FRONTEND]", "\033[33m") # Yellow
|
|
@@ -4,10 +4,9 @@ from typing import Union
|
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
from cognee.base_config import get_base_config
|
|
7
|
-
from cognee.infrastructure.databases.vector.config import
|
|
8
|
-
from cognee.infrastructure.databases.graph.config import
|
|
7
|
+
from cognee.infrastructure.databases.vector.config import get_vectordb_context_config
|
|
8
|
+
from cognee.infrastructure.databases.graph.config import get_graph_context_config
|
|
9
9
|
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
|
|
10
|
-
from cognee.infrastructure.databases.utils import resolve_dataset_database_connection_info
|
|
11
10
|
from cognee.infrastructure.files.storage.config import file_storage_config
|
|
12
11
|
from cognee.modules.users.methods import get_user
|
|
13
12
|
|
|
@@ -17,59 +16,22 @@ vector_db_config = ContextVar("vector_db_config", default=None)
|
|
|
17
16
|
graph_db_config = ContextVar("graph_db_config", default=None)
|
|
18
17
|
session_user = ContextVar("session_user", default=None)
|
|
19
18
|
|
|
19
|
+
VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"]
|
|
20
|
+
GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor"]
|
|
21
|
+
|
|
20
22
|
|
|
21
23
|
async def set_session_user_context_variable(user):
|
|
22
24
|
session_user.set(user)
|
|
23
25
|
|
|
24
26
|
|
|
25
27
|
def multi_user_support_possible():
|
|
26
|
-
graph_db_config =
|
|
27
|
-
vector_db_config =
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
from cognee.infrastructure.databases.dataset_database_handler import (
|
|
32
|
-
supported_dataset_database_handlers,
|
|
28
|
+
graph_db_config = get_graph_context_config()
|
|
29
|
+
vector_db_config = get_vectordb_context_config()
|
|
30
|
+
return (
|
|
31
|
+
graph_db_config["graph_database_provider"] in GRAPH_DBS_WITH_MULTI_USER_SUPPORT
|
|
32
|
+
and vector_db_config["vector_db_provider"] in VECTOR_DBS_WITH_MULTI_USER_SUPPORT
|
|
33
33
|
)
|
|
34
34
|
|
|
35
|
-
if graph_handler not in supported_dataset_database_handlers:
|
|
36
|
-
raise EnvironmentError(
|
|
37
|
-
"Unsupported graph dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
|
38
|
-
f"Selected graph dataset to database handler: {graph_handler}\n"
|
|
39
|
-
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
if vector_handler not in supported_dataset_database_handlers:
|
|
43
|
-
raise EnvironmentError(
|
|
44
|
-
"Unsupported vector dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
|
45
|
-
f"Selected vector dataset to database handler: {vector_handler}\n"
|
|
46
|
-
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
if (
|
|
50
|
-
supported_dataset_database_handlers[graph_handler]["handler_provider"]
|
|
51
|
-
!= graph_db_config.graph_database_provider
|
|
52
|
-
):
|
|
53
|
-
raise EnvironmentError(
|
|
54
|
-
"The selected graph dataset to database handler does not work with the configured graph database provider. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
|
55
|
-
f"Selected graph database provider: {graph_db_config.graph_database_provider}\n"
|
|
56
|
-
f"Selected graph dataset to database handler: {graph_handler}\n"
|
|
57
|
-
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
if (
|
|
61
|
-
supported_dataset_database_handlers[vector_handler]["handler_provider"]
|
|
62
|
-
!= vector_db_config.vector_db_provider
|
|
63
|
-
):
|
|
64
|
-
raise EnvironmentError(
|
|
65
|
-
"The selected vector dataset to database handler does not work with the configured vector database provider. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
|
66
|
-
f"Selected vector database provider: {vector_db_config.vector_db_provider}\n"
|
|
67
|
-
f"Selected vector dataset to database handler: {vector_handler}\n"
|
|
68
|
-
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
return True
|
|
72
|
-
|
|
73
35
|
|
|
74
36
|
def backend_access_control_enabled():
|
|
75
37
|
backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
|
|
@@ -79,7 +41,12 @@ def backend_access_control_enabled():
|
|
|
79
41
|
return multi_user_support_possible()
|
|
80
42
|
elif backend_access_control.lower() == "true":
|
|
81
43
|
# If enabled, ensure that the current graph and vector DBs can support it
|
|
82
|
-
|
|
44
|
+
multi_user_support = multi_user_support_possible()
|
|
45
|
+
if not multi_user_support:
|
|
46
|
+
raise EnvironmentError(
|
|
47
|
+
"ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control."
|
|
48
|
+
)
|
|
49
|
+
return True
|
|
83
50
|
return False
|
|
84
51
|
|
|
85
52
|
|
|
@@ -109,8 +76,6 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
|
|
|
109
76
|
|
|
110
77
|
# To ensure permissions are enforced properly all datasets will have their own databases
|
|
111
78
|
dataset_database = await get_or_create_dataset_database(dataset, user)
|
|
112
|
-
# Ensure that all connection info is resolved properly
|
|
113
|
-
dataset_database = await resolve_dataset_database_connection_info(dataset_database)
|
|
114
79
|
|
|
115
80
|
base_config = get_base_config()
|
|
116
81
|
data_root_directory = os.path.join(
|
|
@@ -121,8 +86,6 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
|
|
|
121
86
|
)
|
|
122
87
|
|
|
123
88
|
# Set vector and graph database configuration based on dataset database information
|
|
124
|
-
# TODO: Add better handling of vector and graph config accross Cognee.
|
|
125
|
-
# LRU_CACHE takes into account order of inputs, if order of inputs is changed it will be registered as a new DB adapter
|
|
126
89
|
vector_config = {
|
|
127
90
|
"vector_db_provider": dataset_database.vector_database_provider,
|
|
128
91
|
"vector_db_url": dataset_database.vector_database_url,
|
|
@@ -138,14 +101,6 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
|
|
|
138
101
|
"graph_file_path": os.path.join(
|
|
139
102
|
databases_directory_path, dataset_database.graph_database_name
|
|
140
103
|
),
|
|
141
|
-
"graph_database_username": dataset_database.graph_database_connection_info.get(
|
|
142
|
-
"graph_database_username", ""
|
|
143
|
-
),
|
|
144
|
-
"graph_database_password": dataset_database.graph_database_connection_info.get(
|
|
145
|
-
"graph_database_password", ""
|
|
146
|
-
),
|
|
147
|
-
"graph_dataset_database_handler": "",
|
|
148
|
-
"graph_database_port": "",
|
|
149
104
|
}
|
|
150
105
|
|
|
151
106
|
storage_config = {
|
|
@@ -35,16 +35,6 @@ class AnswerGeneratorExecutor:
|
|
|
35
35
|
retrieval_context = await retriever.get_context(query_text)
|
|
36
36
|
search_results = await retriever.get_completion(query_text, retrieval_context)
|
|
37
37
|
|
|
38
|
-
############
|
|
39
|
-
#:TODO This is a quick fix until we don't structure retriever results properly but lets not leave it like this...this is needed now due to the changed combined retriever structure..
|
|
40
|
-
if isinstance(retrieval_context, list):
|
|
41
|
-
retrieval_context = await retriever.convert_retrieved_objects_to_context(
|
|
42
|
-
triplets=retrieval_context
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
if isinstance(search_results, str):
|
|
46
|
-
search_results = [search_results]
|
|
47
|
-
#############
|
|
48
38
|
answer = {
|
|
49
39
|
"question": query_text,
|
|
50
40
|
"answer": search_results[0],
|
|
@@ -35,7 +35,7 @@ async def create_and_insert_answers_table(questions_payload):
|
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
async def run_question_answering(
|
|
38
|
-
params: dict, system_prompt="
|
|
38
|
+
params: dict, system_prompt="answer_simple_question.txt", top_k: Optional[int] = None
|
|
39
39
|
) -> List[dict]:
|
|
40
40
|
if params.get("answering_questions"):
|
|
41
41
|
logger.info("Question answering started...")
|
|
@@ -8,6 +8,7 @@ from cognee.modules.users.models import User
|
|
|
8
8
|
from cognee.shared.data_models import KnowledgeGraph
|
|
9
9
|
from cognee.shared.utils import send_telemetry
|
|
10
10
|
from cognee.tasks.documents import (
|
|
11
|
+
check_permissions_on_dataset,
|
|
11
12
|
classify_documents,
|
|
12
13
|
extract_chunks_from_documents,
|
|
13
14
|
)
|
|
@@ -30,6 +31,7 @@ async def get_cascade_graph_tasks(
|
|
|
30
31
|
cognee_config = get_cognify_config()
|
|
31
32
|
default_tasks = [
|
|
32
33
|
Task(classify_documents),
|
|
34
|
+
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
|
33
35
|
Task(
|
|
34
36
|
extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
|
|
35
37
|
), # Extract text chunks based on the document type.
|
|
@@ -30,8 +30,8 @@ async def get_no_summary_tasks(
|
|
|
30
30
|
ontology_file_path=None,
|
|
31
31
|
) -> List[Task]:
|
|
32
32
|
"""Returns default tasks without summarization tasks."""
|
|
33
|
-
# Get base tasks (0=classify, 1=extract_chunks)
|
|
34
|
-
base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
|
|
33
|
+
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
|
|
34
|
+
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
|
|
35
35
|
|
|
36
36
|
ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
|
|
37
37
|
|
|
@@ -51,8 +51,8 @@ async def get_just_chunks_tasks(
|
|
|
51
51
|
chunk_size: int = None, chunker=TextChunker, user=None
|
|
52
52
|
) -> List[Task]:
|
|
53
53
|
"""Returns default tasks with only chunk extraction and data points addition."""
|
|
54
|
-
# Get base tasks (0=classify, 1=extract_chunks)
|
|
55
|
-
base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
|
|
54
|
+
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
|
|
55
|
+
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
|
|
56
56
|
|
|
57
57
|
add_data_points_task = Task(add_data_points, task_config={"batch_size": 10})
|
|
58
58
|
|
|
@@ -14,7 +14,7 @@ class EvalConfig(BaseSettings):
|
|
|
14
14
|
|
|
15
15
|
# Question answering params
|
|
16
16
|
answering_questions: bool = True
|
|
17
|
-
qa_engine: str = "
|
|
17
|
+
qa_engine: str = "cognee_completion" # Options: 'cognee_completion' or 'cognee_graph_completion' or 'cognee_graph_completion_cot' or 'cognee_graph_completion_context_extension'
|
|
18
18
|
|
|
19
19
|
# Evaluation params
|
|
20
20
|
evaluating_answers: bool = True
|
|
@@ -25,7 +25,7 @@ class EvalConfig(BaseSettings):
|
|
|
25
25
|
"EM",
|
|
26
26
|
"f1",
|
|
27
27
|
] # Use only 'correctness' for DirectLLM
|
|
28
|
-
deepeval_model: str = "gpt-
|
|
28
|
+
deepeval_model: str = "gpt-5-mini"
|
|
29
29
|
|
|
30
30
|
# Metrics params
|
|
31
31
|
calculate_metrics: bool = True
|
|
@@ -2,6 +2,7 @@ import modal
|
|
|
2
2
|
import os
|
|
3
3
|
import asyncio
|
|
4
4
|
import datetime
|
|
5
|
+
import hashlib
|
|
5
6
|
import json
|
|
6
7
|
from cognee.shared.logging_utils import get_logger
|
|
7
8
|
from cognee.eval_framework.eval_config import EvalConfig
|
|
@@ -9,9 +10,6 @@ from cognee.eval_framework.corpus_builder.run_corpus_builder import run_corpus_b
|
|
|
9
10
|
from cognee.eval_framework.answer_generation.run_question_answering_module import (
|
|
10
11
|
run_question_answering,
|
|
11
12
|
)
|
|
12
|
-
import pathlib
|
|
13
|
-
from os import path
|
|
14
|
-
from modal import Image
|
|
15
13
|
from cognee.eval_framework.evaluation.run_evaluation_module import run_evaluation
|
|
16
14
|
from cognee.eval_framework.metrics_dashboard import create_dashboard
|
|
17
15
|
|
|
@@ -40,19 +38,22 @@ def read_and_combine_metrics(eval_params: dict) -> dict:
|
|
|
40
38
|
|
|
41
39
|
app = modal.App("modal-run-eval")
|
|
42
40
|
|
|
43
|
-
image =
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
41
|
+
image = (
|
|
42
|
+
modal.Image.from_dockerfile(path="Dockerfile_modal", force_build=False)
|
|
43
|
+
.copy_local_file("pyproject.toml", "pyproject.toml")
|
|
44
|
+
.copy_local_file("poetry.lock", "poetry.lock")
|
|
45
|
+
.env(
|
|
46
|
+
{
|
|
47
|
+
"ENV": os.getenv("ENV"),
|
|
48
|
+
"LLM_API_KEY": os.getenv("LLM_API_KEY"),
|
|
49
|
+
"OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
|
|
50
|
+
}
|
|
51
|
+
)
|
|
52
|
+
.pip_install("protobuf", "h2", "deepeval", "gdown", "plotly")
|
|
53
|
+
)
|
|
47
54
|
|
|
48
55
|
|
|
49
|
-
@app.function(
|
|
50
|
-
image=image,
|
|
51
|
-
max_containers=10,
|
|
52
|
-
timeout=86400,
|
|
53
|
-
volumes={"/data": vol},
|
|
54
|
-
secrets=[modal.Secret.from_name("eval_secrets")],
|
|
55
|
-
)
|
|
56
|
+
@app.function(image=image, concurrency_limit=10, timeout=86400, volumes={"/data": vol})
|
|
56
57
|
async def modal_run_eval(eval_params=None):
|
|
57
58
|
"""Runs evaluation pipeline and returns combined metrics results."""
|
|
58
59
|
if eval_params is None:
|
|
@@ -104,7 +105,18 @@ async def main():
|
|
|
104
105
|
configs = [
|
|
105
106
|
EvalConfig(
|
|
106
107
|
task_getter_type="Default",
|
|
107
|
-
number_of_samples_in_corpus=
|
|
108
|
+
number_of_samples_in_corpus=10,
|
|
109
|
+
benchmark="HotPotQA",
|
|
110
|
+
qa_engine="cognee_graph_completion",
|
|
111
|
+
building_corpus_from_scratch=True,
|
|
112
|
+
answering_questions=True,
|
|
113
|
+
evaluating_answers=True,
|
|
114
|
+
calculate_metrics=True,
|
|
115
|
+
dashboard=True,
|
|
116
|
+
),
|
|
117
|
+
EvalConfig(
|
|
118
|
+
task_getter_type="Default",
|
|
119
|
+
number_of_samples_in_corpus=10,
|
|
108
120
|
benchmark="TwoWikiMultiHop",
|
|
109
121
|
qa_engine="cognee_graph_completion",
|
|
110
122
|
building_corpus_from_scratch=True,
|
|
@@ -115,7 +127,7 @@ async def main():
|
|
|
115
127
|
),
|
|
116
128
|
EvalConfig(
|
|
117
129
|
task_getter_type="Default",
|
|
118
|
-
number_of_samples_in_corpus=
|
|
130
|
+
number_of_samples_in_corpus=10,
|
|
119
131
|
benchmark="Musique",
|
|
120
132
|
qa_engine="cognee_graph_completion",
|
|
121
133
|
building_corpus_from_scratch=True,
|
|
@@ -47,7 +47,6 @@ class GraphConfig(BaseSettings):
|
|
|
47
47
|
graph_filename: str = ""
|
|
48
48
|
graph_model: object = KnowledgeGraph
|
|
49
49
|
graph_topology: object = KnowledgeGraph
|
|
50
|
-
graph_dataset_database_handler: str = "kuzu"
|
|
51
50
|
model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
|
|
52
51
|
|
|
53
52
|
# Model validator updates graph_filename and path dynamically after class creation based on current database provider
|
|
@@ -98,7 +97,6 @@ class GraphConfig(BaseSettings):
|
|
|
98
97
|
"graph_model": self.graph_model,
|
|
99
98
|
"graph_topology": self.graph_topology,
|
|
100
99
|
"model_config": self.model_config,
|
|
101
|
-
"graph_dataset_database_handler": self.graph_dataset_database_handler,
|
|
102
100
|
}
|
|
103
101
|
|
|
104
102
|
def to_hashable_dict(self) -> dict:
|
|
@@ -123,7 +121,6 @@ class GraphConfig(BaseSettings):
|
|
|
123
121
|
"graph_database_port": self.graph_database_port,
|
|
124
122
|
"graph_database_key": self.graph_database_key,
|
|
125
123
|
"graph_file_path": self.graph_file_path,
|
|
126
|
-
"graph_dataset_database_handler": self.graph_dataset_database_handler,
|
|
127
124
|
}
|
|
128
125
|
|
|
129
126
|
|
|
@@ -398,18 +398,3 @@ class GraphDBInterface(ABC):
|
|
|
398
398
|
- node_id (Union[str, UUID]): Unique identifier of the node for which to retrieve connections.
|
|
399
399
|
"""
|
|
400
400
|
raise NotImplementedError
|
|
401
|
-
|
|
402
|
-
@abstractmethod
|
|
403
|
-
async def get_filtered_graph_data(
|
|
404
|
-
self, attribute_filters: List[Dict[str, List[Union[str, int]]]]
|
|
405
|
-
) -> Tuple[List[Node], List[EdgeData]]:
|
|
406
|
-
"""
|
|
407
|
-
Retrieve nodes and edges filtered by the provided attribute criteria.
|
|
408
|
-
|
|
409
|
-
Parameters:
|
|
410
|
-
-----------
|
|
411
|
-
|
|
412
|
-
- attribute_filters: A list of dictionaries where keys are attribute names and values
|
|
413
|
-
are lists of attribute values to filter by.
|
|
414
|
-
"""
|
|
415
|
-
raise NotImplementedError
|