cognee 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/api/client.py +9 -5
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/add/routers/get_add_router.py +3 -1
- cognee/api/v1/cognify/cognify.py +24 -16
- cognee/api/v1/cognify/routers/__init__.py +0 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/__init__.py +4 -0
- cognee/api/v1/ontologies/ontologies.py +158 -0
- cognee/api/v1/ontologies/routers/__init__.py +0 -0
- cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
- cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
- cognee/api/v1/search/search.py +4 -0
- cognee/api/v1/ui/node_setup.py +360 -0
- cognee/api/v1/ui/npm_utils.py +50 -0
- cognee/api/v1/ui/ui.py +38 -68
- cognee/cli/commands/cognify_command.py +8 -1
- cognee/cli/config.py +1 -1
- cognee/context_global_variables.py +86 -9
- cognee/eval_framework/Dockerfile +29 -0
- cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +16 -28
- cognee/infrastructure/databases/cache/config.py +3 -1
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
- cognee/infrastructure/databases/graph/config.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
- cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
- cognee/infrastructure/databases/utils/__init__.py +3 -0
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
- cognee/infrastructure/databases/vector/config.py +5 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -13
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
- cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
- cognee/infrastructure/engine/models/Edge.py +13 -1
- cognee/infrastructure/files/storage/s3_config.py +2 -0
- cognee/infrastructure/files/utils/guess_file_type.py +4 -0
- cognee/infrastructure/llm/LLMGateway.py +5 -2
- cognee/infrastructure/llm/config.py +37 -0
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
- cognee/infrastructure/loaders/LoaderEngine.py +1 -0
- cognee/infrastructure/loaders/core/__init__.py +2 -1
- cognee/infrastructure/loaders/core/csv_loader.py +93 -0
- cognee/infrastructure/loaders/core/text_loader.py +1 -2
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
- cognee/infrastructure/loaders/supported_loaders.py +2 -1
- cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
- cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
- cognee/modules/chunking/CsvChunker.py +35 -0
- cognee/modules/chunking/models/DocumentChunk.py +2 -1
- cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/deletion/prune_system.py +52 -2
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/create_dataset.py +4 -2
- cognee/modules/data/methods/delete_dataset.py +26 -0
- cognee/modules/data/methods/get_dataset_ids.py +5 -1
- cognee/modules/data/methods/get_unique_data_id.py +68 -0
- cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
- cognee/modules/data/models/Dataset.py +2 -0
- cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
- cognee/modules/data/processing/document_types/__init__.py +1 -0
- cognee/modules/engine/models/Triplet.py +9 -0
- cognee/modules/engine/models/__init__.py +1 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
- cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
- cognee/modules/ingestion/identify.py +4 -4
- cognee/modules/memify/memify.py +1 -7
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
- cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
- cognee/modules/pipelines/operations/pipeline.py +18 -2
- cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
- cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/base_graph_retriever.py +7 -3
- cognee/modules/retrieval/base_retriever.py +7 -3
- cognee/modules/retrieval/completion_retriever.py +11 -4
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
- cognee/modules/retrieval/graph_completion_retriever.py +14 -1
- cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
- cognee/modules/retrieval/register_retriever.py +10 -0
- cognee/modules/retrieval/registered_community_retrievers.py +1 -0
- cognee/modules/retrieval/temporal_retriever.py +13 -2
- cognee/modules/retrieval/triplet_retriever.py +182 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
- cognee/modules/retrieval/utils/completion.py +2 -22
- cognee/modules/run_custom_pipeline/__init__.py +1 -0
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
- cognee/modules/search/methods/get_search_type_tools.py +54 -8
- cognee/modules/search/methods/no_access_control_search.py +4 -0
- cognee/modules/search/methods/search.py +26 -3
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +19 -0
- cognee/modules/users/methods/create_user.py +12 -27
- cognee/modules/users/methods/get_authenticated_user.py +3 -2
- cognee/modules/users/methods/get_default_user.py +4 -2
- cognee/modules/users/methods/get_user.py +1 -1
- cognee/modules/users/methods/get_user_by_email.py +1 -1
- cognee/modules/users/models/DatasetDatabase.py +24 -3
- cognee/modules/users/models/Tenant.py +6 -7
- cognee/modules/users/models/User.py +6 -5
- cognee/modules/users/models/UserTenant.py +12 -0
- cognee/modules/users/models/__init__.py +1 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
- cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
- cognee/modules/users/tenants/methods/__init__.py +1 -0
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
- cognee/modules/users/tenants/methods/create_tenant.py +22 -8
- cognee/modules/users/tenants/methods/select_tenant.py +62 -0
- cognee/shared/logging_utils.py +6 -0
- cognee/shared/rate_limiting.py +30 -0
- cognee/tasks/chunks/__init__.py +1 -0
- cognee/tasks/chunks/chunk_by_row.py +94 -0
- cognee/tasks/documents/__init__.py +0 -1
- cognee/tasks/documents/classify_documents.py +2 -0
- cognee/tasks/feedback/generate_improved_answers.py +3 -3
- cognee/tasks/graph/extract_graph_from_data.py +9 -10
- cognee/tasks/ingestion/ingest_data.py +1 -1
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/cognify_session.py +41 -0
- cognee/tasks/memify/extract_user_sessions.py +73 -0
- cognee/tasks/memify/get_triplet_datapoints.py +289 -0
- cognee/tasks/storage/add_data_points.py +142 -2
- cognee/tasks/storage/index_data_points.py +33 -22
- cognee/tasks/storage/index_graph_edges.py +37 -57
- cognee/tests/integration/documents/CsvDocument_test.py +70 -0
- cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
- cognee/tests/integration/tasks/test_add_data_points.py +139 -0
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
- cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
- cognee/tests/test_add_docling_document.py +2 -2
- cognee/tests/test_cognee_server_start.py +84 -3
- cognee/tests/test_conversation_history.py +68 -5
- cognee/tests/test_data/example_with_header.csv +3 -0
- cognee/tests/test_dataset_database_handler.py +137 -0
- cognee/tests/test_dataset_delete.py +76 -0
- cognee/tests/test_edge_centered_payload.py +170 -0
- cognee/tests/test_edge_ingestion.py +27 -0
- cognee/tests/test_feedback_enrichment.py +1 -1
- cognee/tests/test_library.py +6 -4
- cognee/tests/test_load.py +62 -0
- cognee/tests/test_multi_tenancy.py +165 -0
- cognee/tests/test_parallel_databases.py +2 -0
- cognee/tests/test_pipeline_cache.py +164 -0
- cognee/tests/test_relational_db_migration.py +54 -2
- cognee/tests/test_search_db.py +44 -2
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
- cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
- cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
- cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
- cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
- cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
- cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
- cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
- cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
- cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
- cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -7
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/RECORD +212 -160
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
- cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
- cognee/modules/retrieval/code_retriever.py +0 -232
- cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
- cognee/tasks/code/get_local_dependencies_checker.py +0 -20
- cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
- cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
- cognee/tasks/repo_processor/__init__.py +0 -2
- cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
- cognee/tasks/repo_processor/get_non_code_files.py +0 -158
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/WHEEL +0 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -4,7 +4,10 @@ from typing import Union
|
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
from cognee.base_config import get_base_config
|
|
7
|
+
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
|
8
|
+
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
7
9
|
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
|
|
10
|
+
from cognee.infrastructure.databases.utils import resolve_dataset_database_connection_info
|
|
8
11
|
from cognee.infrastructure.files.storage.config import file_storage_config
|
|
9
12
|
from cognee.modules.users.methods import get_user
|
|
10
13
|
|
|
@@ -19,6 +22,67 @@ async def set_session_user_context_variable(user):
|
|
|
19
22
|
session_user.set(user)
|
|
20
23
|
|
|
21
24
|
|
|
25
|
+
def multi_user_support_possible():
|
|
26
|
+
graph_db_config = get_graph_config()
|
|
27
|
+
vector_db_config = get_vectordb_config()
|
|
28
|
+
|
|
29
|
+
graph_handler = graph_db_config.graph_dataset_database_handler
|
|
30
|
+
vector_handler = vector_db_config.vector_dataset_database_handler
|
|
31
|
+
from cognee.infrastructure.databases.dataset_database_handler import (
|
|
32
|
+
supported_dataset_database_handlers,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
if graph_handler not in supported_dataset_database_handlers:
|
|
36
|
+
raise EnvironmentError(
|
|
37
|
+
"Unsupported graph dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
|
38
|
+
f"Selected graph dataset to database handler: {graph_handler}\n"
|
|
39
|
+
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
if vector_handler not in supported_dataset_database_handlers:
|
|
43
|
+
raise EnvironmentError(
|
|
44
|
+
"Unsupported vector dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
|
45
|
+
f"Selected vector dataset to database handler: {vector_handler}\n"
|
|
46
|
+
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if (
|
|
50
|
+
supported_dataset_database_handlers[graph_handler]["handler_provider"]
|
|
51
|
+
!= graph_db_config.graph_database_provider
|
|
52
|
+
):
|
|
53
|
+
raise EnvironmentError(
|
|
54
|
+
"The selected graph dataset to database handler does not work with the configured graph database provider. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
|
55
|
+
f"Selected graph database provider: {graph_db_config.graph_database_provider}\n"
|
|
56
|
+
f"Selected graph dataset to database handler: {graph_handler}\n"
|
|
57
|
+
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
if (
|
|
61
|
+
supported_dataset_database_handlers[vector_handler]["handler_provider"]
|
|
62
|
+
!= vector_db_config.vector_db_provider
|
|
63
|
+
):
|
|
64
|
+
raise EnvironmentError(
|
|
65
|
+
"The selected vector dataset to database handler does not work with the configured vector database provider. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
|
|
66
|
+
f"Selected vector database provider: {vector_db_config.vector_db_provider}\n"
|
|
67
|
+
f"Selected vector dataset to database handler: {vector_handler}\n"
|
|
68
|
+
f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
return True
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def backend_access_control_enabled():
|
|
75
|
+
backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
|
|
76
|
+
if backend_access_control is None:
|
|
77
|
+
# If backend access control is not defined in environment variables,
|
|
78
|
+
# enable it by default if graph and vector DBs can support it, otherwise disable it
|
|
79
|
+
return multi_user_support_possible()
|
|
80
|
+
elif backend_access_control.lower() == "true":
|
|
81
|
+
# If enabled, ensure that the current graph and vector DBs can support it
|
|
82
|
+
return multi_user_support_possible()
|
|
83
|
+
return False
|
|
84
|
+
|
|
85
|
+
|
|
22
86
|
async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
|
|
23
87
|
"""
|
|
24
88
|
If backend access control is enabled this function will ensure all datasets have their own databases,
|
|
@@ -38,16 +102,17 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
|
|
|
38
102
|
|
|
39
103
|
"""
|
|
40
104
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
if not os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
|
|
105
|
+
if not backend_access_control_enabled():
|
|
44
106
|
return
|
|
45
107
|
|
|
46
108
|
user = await get_user(user_id)
|
|
47
109
|
|
|
48
110
|
# To ensure permissions are enforced properly all datasets will have their own databases
|
|
49
111
|
dataset_database = await get_or_create_dataset_database(dataset, user)
|
|
112
|
+
# Ensure that all connection info is resolved properly
|
|
113
|
+
dataset_database = await resolve_dataset_database_connection_info(dataset_database)
|
|
50
114
|
|
|
115
|
+
base_config = get_base_config()
|
|
51
116
|
data_root_directory = os.path.join(
|
|
52
117
|
base_config.data_root_directory, str(user.tenant_id or user.id)
|
|
53
118
|
)
|
|
@@ -56,19 +121,31 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
|
|
|
56
121
|
)
|
|
57
122
|
|
|
58
123
|
# Set vector and graph database configuration based on dataset database information
|
|
124
|
+
# TODO: Add better handling of vector and graph config accross Cognee.
|
|
125
|
+
# LRU_CACHE takes into account order of inputs, if order of inputs is changed it will be registered as a new DB adapter
|
|
59
126
|
vector_config = {
|
|
60
|
-
"
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
"
|
|
64
|
-
"vector_db_provider": "lancedb",
|
|
127
|
+
"vector_db_provider": dataset_database.vector_database_provider,
|
|
128
|
+
"vector_db_url": dataset_database.vector_database_url,
|
|
129
|
+
"vector_db_key": dataset_database.vector_database_key,
|
|
130
|
+
"vector_db_name": dataset_database.vector_database_name,
|
|
65
131
|
}
|
|
66
132
|
|
|
67
133
|
graph_config = {
|
|
68
|
-
"graph_database_provider":
|
|
134
|
+
"graph_database_provider": dataset_database.graph_database_provider,
|
|
135
|
+
"graph_database_url": dataset_database.graph_database_url,
|
|
136
|
+
"graph_database_name": dataset_database.graph_database_name,
|
|
137
|
+
"graph_database_key": dataset_database.graph_database_key,
|
|
69
138
|
"graph_file_path": os.path.join(
|
|
70
139
|
databases_directory_path, dataset_database.graph_database_name
|
|
71
140
|
),
|
|
141
|
+
"graph_database_username": dataset_database.graph_database_connection_info.get(
|
|
142
|
+
"graph_database_username", ""
|
|
143
|
+
),
|
|
144
|
+
"graph_database_password": dataset_database.graph_database_connection_info.get(
|
|
145
|
+
"graph_database_password", ""
|
|
146
|
+
),
|
|
147
|
+
"graph_dataset_database_handler": "",
|
|
148
|
+
"graph_database_port": "",
|
|
72
149
|
}
|
|
73
150
|
|
|
74
151
|
storage_config = {
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
FROM python:3.11-slim
|
|
2
|
+
|
|
3
|
+
# Set environment variables
|
|
4
|
+
ENV PIP_NO_CACHE_DIR=true
|
|
5
|
+
ENV PATH="${PATH}:/root/.poetry/bin"
|
|
6
|
+
ENV PYTHONPATH=/app
|
|
7
|
+
ENV SKIP_MIGRATIONS=true
|
|
8
|
+
|
|
9
|
+
# System dependencies
|
|
10
|
+
RUN apt-get update && apt-get install -y \
|
|
11
|
+
gcc \
|
|
12
|
+
libpq-dev \
|
|
13
|
+
git \
|
|
14
|
+
curl \
|
|
15
|
+
build-essential \
|
|
16
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
17
|
+
|
|
18
|
+
WORKDIR /app
|
|
19
|
+
|
|
20
|
+
COPY pyproject.toml poetry.lock README.md /app/
|
|
21
|
+
|
|
22
|
+
RUN pip install poetry
|
|
23
|
+
|
|
24
|
+
RUN poetry config virtualenvs.create false
|
|
25
|
+
|
|
26
|
+
RUN poetry install --extras distributed --extras evals --extras deepeval --no-root
|
|
27
|
+
|
|
28
|
+
COPY cognee/ /app/cognee
|
|
29
|
+
COPY distributed/ /app/distributed
|
|
@@ -35,6 +35,16 @@ class AnswerGeneratorExecutor:
|
|
|
35
35
|
retrieval_context = await retriever.get_context(query_text)
|
|
36
36
|
search_results = await retriever.get_completion(query_text, retrieval_context)
|
|
37
37
|
|
|
38
|
+
############
|
|
39
|
+
#:TODO This is a quick fix until we don't structure retriever results properly but lets not leave it like this...this is needed now due to the changed combined retriever structure..
|
|
40
|
+
if isinstance(retrieval_context, list):
|
|
41
|
+
retrieval_context = await retriever.convert_retrieved_objects_to_context(
|
|
42
|
+
triplets=retrieval_context
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
if isinstance(search_results, str):
|
|
46
|
+
search_results = [search_results]
|
|
47
|
+
#############
|
|
38
48
|
answer = {
|
|
39
49
|
"question": query_text,
|
|
40
50
|
"answer": search_results[0],
|
|
@@ -35,7 +35,7 @@ async def create_and_insert_answers_table(questions_payload):
|
|
|
35
35
|
|
|
36
36
|
|
|
37
37
|
async def run_question_answering(
|
|
38
|
-
params: dict, system_prompt="
|
|
38
|
+
params: dict, system_prompt="answer_simple_question_benchmark.txt", top_k: Optional[int] = None
|
|
39
39
|
) -> List[dict]:
|
|
40
40
|
if params.get("answering_questions"):
|
|
41
41
|
logger.info("Question answering started...")
|
|
@@ -8,7 +8,6 @@ from cognee.modules.users.models import User
|
|
|
8
8
|
from cognee.shared.data_models import KnowledgeGraph
|
|
9
9
|
from cognee.shared.utils import send_telemetry
|
|
10
10
|
from cognee.tasks.documents import (
|
|
11
|
-
check_permissions_on_dataset,
|
|
12
11
|
classify_documents,
|
|
13
12
|
extract_chunks_from_documents,
|
|
14
13
|
)
|
|
@@ -31,7 +30,6 @@ async def get_cascade_graph_tasks(
|
|
|
31
30
|
cognee_config = get_cognify_config()
|
|
32
31
|
default_tasks = [
|
|
33
32
|
Task(classify_documents),
|
|
34
|
-
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
|
35
33
|
Task(
|
|
36
34
|
extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
|
|
37
35
|
), # Extract text chunks based on the document type.
|
|
@@ -30,8 +30,8 @@ async def get_no_summary_tasks(
|
|
|
30
30
|
ontology_file_path=None,
|
|
31
31
|
) -> List[Task]:
|
|
32
32
|
"""Returns default tasks without summarization tasks."""
|
|
33
|
-
# Get base tasks (0=classify, 1=
|
|
34
|
-
base_tasks = await get_default_tasks_by_indices([0, 1
|
|
33
|
+
# Get base tasks (0=classify, 1=extract_chunks)
|
|
34
|
+
base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
|
|
35
35
|
|
|
36
36
|
ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
|
|
37
37
|
|
|
@@ -51,8 +51,8 @@ async def get_just_chunks_tasks(
|
|
|
51
51
|
chunk_size: int = None, chunker=TextChunker, user=None
|
|
52
52
|
) -> List[Task]:
|
|
53
53
|
"""Returns default tasks with only chunk extraction and data points addition."""
|
|
54
|
-
# Get base tasks (0=classify, 1=
|
|
55
|
-
base_tasks = await get_default_tasks_by_indices([0, 1
|
|
54
|
+
# Get base tasks (0=classify, 1=extract_chunks)
|
|
55
|
+
base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
|
|
56
56
|
|
|
57
57
|
add_data_points_task = Task(add_data_points, task_config={"batch_size": 10})
|
|
58
58
|
|
|
@@ -14,7 +14,7 @@ class EvalConfig(BaseSettings):
|
|
|
14
14
|
|
|
15
15
|
# Question answering params
|
|
16
16
|
answering_questions: bool = True
|
|
17
|
-
qa_engine: str = "
|
|
17
|
+
qa_engine: str = "cognee_graph_completion" # Options: 'cognee_completion' or 'cognee_graph_completion' or 'cognee_graph_completion_cot' or 'cognee_graph_completion_context_extension'
|
|
18
18
|
|
|
19
19
|
# Evaluation params
|
|
20
20
|
evaluating_answers: bool = True
|
|
@@ -25,7 +25,7 @@ class EvalConfig(BaseSettings):
|
|
|
25
25
|
"EM",
|
|
26
26
|
"f1",
|
|
27
27
|
] # Use only 'correctness' for DirectLLM
|
|
28
|
-
deepeval_model: str = "gpt-
|
|
28
|
+
deepeval_model: str = "gpt-4o-mini"
|
|
29
29
|
|
|
30
30
|
# Metrics params
|
|
31
31
|
calculate_metrics: bool = True
|
|
@@ -2,7 +2,6 @@ import modal
|
|
|
2
2
|
import os
|
|
3
3
|
import asyncio
|
|
4
4
|
import datetime
|
|
5
|
-
import hashlib
|
|
6
5
|
import json
|
|
7
6
|
from cognee.shared.logging_utils import get_logger
|
|
8
7
|
from cognee.eval_framework.eval_config import EvalConfig
|
|
@@ -10,6 +9,9 @@ from cognee.eval_framework.corpus_builder.run_corpus_builder import run_corpus_b
|
|
|
10
9
|
from cognee.eval_framework.answer_generation.run_question_answering_module import (
|
|
11
10
|
run_question_answering,
|
|
12
11
|
)
|
|
12
|
+
import pathlib
|
|
13
|
+
from os import path
|
|
14
|
+
from modal import Image
|
|
13
15
|
from cognee.eval_framework.evaluation.run_evaluation_module import run_evaluation
|
|
14
16
|
from cognee.eval_framework.metrics_dashboard import create_dashboard
|
|
15
17
|
|
|
@@ -38,22 +40,19 @@ def read_and_combine_metrics(eval_params: dict) -> dict:
|
|
|
38
40
|
|
|
39
41
|
app = modal.App("modal-run-eval")
|
|
40
42
|
|
|
41
|
-
image = (
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
.env(
|
|
46
|
-
{
|
|
47
|
-
"ENV": os.getenv("ENV"),
|
|
48
|
-
"LLM_API_KEY": os.getenv("LLM_API_KEY"),
|
|
49
|
-
"OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
|
|
50
|
-
}
|
|
51
|
-
)
|
|
52
|
-
.pip_install("protobuf", "h2", "deepeval", "gdown", "plotly")
|
|
53
|
-
)
|
|
43
|
+
image = Image.from_dockerfile(
|
|
44
|
+
path=pathlib.Path(path.join(path.dirname(__file__), "Dockerfile")).resolve(),
|
|
45
|
+
force_build=False,
|
|
46
|
+
).add_local_python_source("cognee")
|
|
54
47
|
|
|
55
48
|
|
|
56
|
-
@app.function(
|
|
49
|
+
@app.function(
|
|
50
|
+
image=image,
|
|
51
|
+
max_containers=10,
|
|
52
|
+
timeout=86400,
|
|
53
|
+
volumes={"/data": vol},
|
|
54
|
+
secrets=[modal.Secret.from_name("eval_secrets")],
|
|
55
|
+
)
|
|
57
56
|
async def modal_run_eval(eval_params=None):
|
|
58
57
|
"""Runs evaluation pipeline and returns combined metrics results."""
|
|
59
58
|
if eval_params is None:
|
|
@@ -105,18 +104,7 @@ async def main():
|
|
|
105
104
|
configs = [
|
|
106
105
|
EvalConfig(
|
|
107
106
|
task_getter_type="Default",
|
|
108
|
-
number_of_samples_in_corpus=
|
|
109
|
-
benchmark="HotPotQA",
|
|
110
|
-
qa_engine="cognee_graph_completion",
|
|
111
|
-
building_corpus_from_scratch=True,
|
|
112
|
-
answering_questions=True,
|
|
113
|
-
evaluating_answers=True,
|
|
114
|
-
calculate_metrics=True,
|
|
115
|
-
dashboard=True,
|
|
116
|
-
),
|
|
117
|
-
EvalConfig(
|
|
118
|
-
task_getter_type="Default",
|
|
119
|
-
number_of_samples_in_corpus=10,
|
|
107
|
+
number_of_samples_in_corpus=25,
|
|
120
108
|
benchmark="TwoWikiMultiHop",
|
|
121
109
|
qa_engine="cognee_graph_completion",
|
|
122
110
|
building_corpus_from_scratch=True,
|
|
@@ -127,7 +115,7 @@ async def main():
|
|
|
127
115
|
),
|
|
128
116
|
EvalConfig(
|
|
129
117
|
task_getter_type="Default",
|
|
130
|
-
number_of_samples_in_corpus=
|
|
118
|
+
number_of_samples_in_corpus=25,
|
|
131
119
|
benchmark="Musique",
|
|
132
120
|
qa_engine="cognee_graph_completion",
|
|
133
121
|
building_corpus_from_scratch=True,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
2
2
|
from functools import lru_cache
|
|
3
|
-
from typing import Optional
|
|
3
|
+
from typing import Optional, Literal
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
class CacheConfig(BaseSettings):
|
|
@@ -15,6 +15,7 @@ class CacheConfig(BaseSettings):
|
|
|
15
15
|
- agentic_lock_timeout: Maximum time (in seconds) to wait for the lock release.
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
|
+
cache_backend: Literal["redis", "fs"] = "fs"
|
|
18
19
|
caching: bool = False
|
|
19
20
|
shared_kuzu_lock: bool = False
|
|
20
21
|
cache_host: str = "localhost"
|
|
@@ -28,6 +29,7 @@ class CacheConfig(BaseSettings):
|
|
|
28
29
|
|
|
29
30
|
def to_dict(self) -> dict:
|
|
30
31
|
return {
|
|
32
|
+
"cache_backend": self.cache_backend,
|
|
31
33
|
"caching": self.caching,
|
|
32
34
|
"shared_kuzu_lock": self.shared_kuzu_lock,
|
|
33
35
|
"cache_host": self.cache_host,
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
import time
|
|
6
|
+
import threading
|
|
7
|
+
import diskcache as dc
|
|
8
|
+
|
|
9
|
+
from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
|
|
10
|
+
from cognee.infrastructure.databases.exceptions.exceptions import (
|
|
11
|
+
CacheConnectionError,
|
|
12
|
+
SharedKuzuLockRequiresRedisError,
|
|
13
|
+
)
|
|
14
|
+
from cognee.infrastructure.files.storage.get_storage_config import get_storage_config
|
|
15
|
+
from cognee.shared.logging_utils import get_logger
|
|
16
|
+
|
|
17
|
+
logger = get_logger("FSCacheAdapter")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FSCacheAdapter(CacheDBInterface):
|
|
21
|
+
def __init__(self):
|
|
22
|
+
default_key = "sessions_db"
|
|
23
|
+
|
|
24
|
+
storage_config = get_storage_config()
|
|
25
|
+
data_root_directory = storage_config["data_root_directory"]
|
|
26
|
+
cache_directory = os.path.join(data_root_directory, ".cognee_fs_cache", default_key)
|
|
27
|
+
os.makedirs(cache_directory, exist_ok=True)
|
|
28
|
+
self.cache = dc.Cache(directory=cache_directory)
|
|
29
|
+
self.cache.expire()
|
|
30
|
+
|
|
31
|
+
logger.debug(f"FSCacheAdapter initialized with cache directory: {cache_directory}")
|
|
32
|
+
|
|
33
|
+
def acquire_lock(self):
|
|
34
|
+
"""Lock acquisition is not available for filesystem cache backend."""
|
|
35
|
+
message = "Shared Kuzu lock requires Redis cache backend."
|
|
36
|
+
logger.error(message)
|
|
37
|
+
raise SharedKuzuLockRequiresRedisError()
|
|
38
|
+
|
|
39
|
+
def release_lock(self):
|
|
40
|
+
"""Lock release is not available for filesystem cache backend."""
|
|
41
|
+
message = "Shared Kuzu lock requires Redis cache backend."
|
|
42
|
+
logger.error(message)
|
|
43
|
+
raise SharedKuzuLockRequiresRedisError()
|
|
44
|
+
|
|
45
|
+
async def add_qa(
|
|
46
|
+
self,
|
|
47
|
+
user_id: str,
|
|
48
|
+
session_id: str,
|
|
49
|
+
question: str,
|
|
50
|
+
context: str,
|
|
51
|
+
answer: str,
|
|
52
|
+
ttl: int | None = 86400,
|
|
53
|
+
):
|
|
54
|
+
try:
|
|
55
|
+
session_key = f"agent_sessions:{user_id}:{session_id}"
|
|
56
|
+
|
|
57
|
+
qa_entry = {
|
|
58
|
+
"time": datetime.utcnow().isoformat(),
|
|
59
|
+
"question": question,
|
|
60
|
+
"context": context,
|
|
61
|
+
"answer": answer,
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
existing_value = self.cache.get(session_key)
|
|
65
|
+
if existing_value is not None:
|
|
66
|
+
value: list = json.loads(existing_value)
|
|
67
|
+
value.append(qa_entry)
|
|
68
|
+
else:
|
|
69
|
+
value = [qa_entry]
|
|
70
|
+
|
|
71
|
+
self.cache.set(session_key, json.dumps(value), expire=ttl)
|
|
72
|
+
except Exception as e:
|
|
73
|
+
error_msg = f"Unexpected error while adding Q&A to diskcache: {str(e)}"
|
|
74
|
+
logger.error(error_msg)
|
|
75
|
+
raise CacheConnectionError(error_msg) from e
|
|
76
|
+
|
|
77
|
+
async def get_latest_qa(self, user_id: str, session_id: str, last_n: int = 5):
|
|
78
|
+
session_key = f"agent_sessions:{user_id}:{session_id}"
|
|
79
|
+
value = self.cache.get(session_key)
|
|
80
|
+
if value is None:
|
|
81
|
+
return None
|
|
82
|
+
entries = json.loads(value)
|
|
83
|
+
return entries[-last_n:] if len(entries) > last_n else entries
|
|
84
|
+
|
|
85
|
+
async def get_all_qas(self, user_id: str, session_id: str):
|
|
86
|
+
session_key = f"agent_sessions:{user_id}:{session_id}"
|
|
87
|
+
value = self.cache.get(session_key)
|
|
88
|
+
if value is None:
|
|
89
|
+
return None
|
|
90
|
+
return json.loads(value)
|
|
91
|
+
|
|
92
|
+
async def close(self):
|
|
93
|
+
if self.cache is not None:
|
|
94
|
+
self.cache.expire()
|
|
95
|
+
self.cache.close()
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
async def main():
|
|
99
|
+
adapter = FSCacheAdapter()
|
|
100
|
+
session_id = "demo_session"
|
|
101
|
+
user_id = "demo_user_id"
|
|
102
|
+
|
|
103
|
+
print("\nAdding sample Q/A pairs...")
|
|
104
|
+
await adapter.add_qa(
|
|
105
|
+
user_id,
|
|
106
|
+
session_id,
|
|
107
|
+
"What is Redis?",
|
|
108
|
+
"Basic DB context",
|
|
109
|
+
"Redis is an in-memory data store.",
|
|
110
|
+
)
|
|
111
|
+
await adapter.add_qa(
|
|
112
|
+
user_id,
|
|
113
|
+
session_id,
|
|
114
|
+
"Who created Redis?",
|
|
115
|
+
"Historical context",
|
|
116
|
+
"Salvatore Sanfilippo (antirez).",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
print("\nLatest QA:")
|
|
120
|
+
latest = await adapter.get_latest_qa(user_id, session_id)
|
|
121
|
+
print(json.dumps(latest, indent=2))
|
|
122
|
+
|
|
123
|
+
print("\nLast 2 QAs:")
|
|
124
|
+
last_two = await adapter.get_latest_qa(user_id, session_id, last_n=2)
|
|
125
|
+
print(json.dumps(last_two, indent=2))
|
|
126
|
+
|
|
127
|
+
session_id = "session_expire_demo"
|
|
128
|
+
|
|
129
|
+
await adapter.add_qa(
|
|
130
|
+
user_id,
|
|
131
|
+
session_id,
|
|
132
|
+
"What is Redis?",
|
|
133
|
+
"Database context",
|
|
134
|
+
"Redis is an in-memory data store.",
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
await adapter.add_qa(
|
|
138
|
+
user_id,
|
|
139
|
+
session_id,
|
|
140
|
+
"Who created Redis?",
|
|
141
|
+
"History context",
|
|
142
|
+
"Salvatore Sanfilippo (antirez).",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
print(await adapter.get_all_qas(user_id, session_id))
|
|
146
|
+
|
|
147
|
+
await adapter.close()
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
if __name__ == "__main__":
|
|
151
|
+
asyncio.run(main())
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
"""Factory to get the appropriate cache coordination engine (e.g., Redis)."""
|
|
2
2
|
|
|
3
3
|
from functools import lru_cache
|
|
4
|
+
import os
|
|
4
5
|
from typing import Optional
|
|
5
6
|
from cognee.infrastructure.databases.cache.config import get_cache_config
|
|
6
7
|
from cognee.infrastructure.databases.cache.cache_db_interface import CacheDBInterface
|
|
8
|
+
from cognee.infrastructure.databases.cache.fscache.FsCacheAdapter import FSCacheAdapter
|
|
7
9
|
|
|
8
10
|
config = get_cache_config()
|
|
9
11
|
|
|
@@ -33,20 +35,28 @@ def create_cache_engine(
|
|
|
33
35
|
|
|
34
36
|
Returns:
|
|
35
37
|
--------
|
|
36
|
-
- CacheDBInterface: An instance of the appropriate cache adapter.
|
|
38
|
+
- CacheDBInterface: An instance of the appropriate cache adapter.
|
|
37
39
|
"""
|
|
38
40
|
if config.caching:
|
|
39
41
|
from cognee.infrastructure.databases.cache.redis.RedisAdapter import RedisAdapter
|
|
40
42
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
43
|
+
if config.cache_backend == "redis":
|
|
44
|
+
return RedisAdapter(
|
|
45
|
+
host=cache_host,
|
|
46
|
+
port=cache_port,
|
|
47
|
+
username=cache_username,
|
|
48
|
+
password=cache_password,
|
|
49
|
+
lock_name=lock_key,
|
|
50
|
+
timeout=agentic_lock_expire,
|
|
51
|
+
blocking_timeout=agentic_lock_timeout,
|
|
52
|
+
)
|
|
53
|
+
elif config.cache_backend == "fs":
|
|
54
|
+
return FSCacheAdapter()
|
|
55
|
+
else:
|
|
56
|
+
raise ValueError(
|
|
57
|
+
f"Unsupported cache backend: '{config.cache_backend}'. "
|
|
58
|
+
f"Supported backends are: 'redis', 'fs'"
|
|
59
|
+
)
|
|
50
60
|
else:
|
|
51
61
|
return None
|
|
52
62
|
|
cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
from cognee.modules.users.models.User import User
|
|
6
|
+
from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DatasetDatabaseHandlerInterface(ABC):
|
|
10
|
+
@classmethod
|
|
11
|
+
@abstractmethod
|
|
12
|
+
async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
|
|
13
|
+
"""
|
|
14
|
+
Return a dictionary with database connection/resolution info for a graph or vector database for the given dataset.
|
|
15
|
+
Function can auto handle deploying of the actual database if needed, but is not necessary.
|
|
16
|
+
Only providing connection info is sufficient, this info will be mapped when trying to connect to the provided dataset in the future.
|
|
17
|
+
Needed for Cognee multi-tenant/multi-user and backend access control support.
|
|
18
|
+
|
|
19
|
+
Dictionary returned from this function will be used to create a DatasetDatabase row in the relational database.
|
|
20
|
+
From which internal mapping of dataset -> database connection info will be done.
|
|
21
|
+
|
|
22
|
+
The returned dictionary is stored verbatim in the relational database and is later passed to
|
|
23
|
+
resolve_dataset_connection_info() at connection time. For safe credential handling, prefer
|
|
24
|
+
returning only references to secrets or role identifiers, not plaintext credentials.
|
|
25
|
+
|
|
26
|
+
Each dataset needs to map to a unique graph or vector database when backend access control is enabled to facilitate a separation of concern for data.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
dataset_id: UUID of the dataset if needed by the database creation logic
|
|
30
|
+
user: User object if needed by the database creation logic
|
|
31
|
+
Returns:
|
|
32
|
+
dict: Connection info for the created graph or vector database instance.
|
|
33
|
+
"""
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
async def resolve_dataset_connection_info(
|
|
38
|
+
cls, dataset_database: DatasetDatabase
|
|
39
|
+
) -> DatasetDatabase:
|
|
40
|
+
"""
|
|
41
|
+
Resolve runtime connection details for a dataset’s backing graph/vector database.
|
|
42
|
+
Function is intended to be overwritten to implement custom logic for resolving connection info.
|
|
43
|
+
|
|
44
|
+
This method is invoked right before the application opens a connection for a given dataset.
|
|
45
|
+
It receives the DatasetDatabase row that was persisted when create_dataset() ran and must
|
|
46
|
+
return a modified instance of DatasetDatabase with concrete connection parameters that the client/driver can use.
|
|
47
|
+
Do not update these new DatasetDatabase values in the relational database to avoid storing secure credentials.
|
|
48
|
+
|
|
49
|
+
In case of separate graph and vector database handlers, each handler should implement its own logic for resolving
|
|
50
|
+
connection info and only change parameters related to its appropriate database, the resolution function will then
|
|
51
|
+
be called one after another with the updated DatasetDatabase value from the previous function as the input.
|
|
52
|
+
|
|
53
|
+
Typical behavior:
|
|
54
|
+
- If the DatasetDatabase row already contains raw connection fields (e.g., host/port/db/user/password
|
|
55
|
+
or api_url/api_key), return them as-is.
|
|
56
|
+
- If the row stores only references (e.g., secret IDs, vault paths, cloud resource ARNs/IDs, IAM
|
|
57
|
+
roles, SSO tokens), resolve those references by calling the appropriate secret manager or provider
|
|
58
|
+
API to obtain short-lived credentials and assemble the final connection DatasetDatabase object.
|
|
59
|
+
- Do not persist any resolved or decrypted secrets back to the relational database. Return them only
|
|
60
|
+
to the caller.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
dataset_database: DatasetDatabase row from the relational database
|
|
64
|
+
Returns:
|
|
65
|
+
DatasetDatabase: Updated instance with resolved connection info
|
|
66
|
+
"""
|
|
67
|
+
return dataset_database
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
@abstractmethod
|
|
71
|
+
async def delete_dataset(cls, dataset_database: DatasetDatabase) -> None:
|
|
72
|
+
"""
|
|
73
|
+
Delete the graph or vector database for the given dataset.
|
|
74
|
+
Function should auto handle deleting of the actual database or send a request to the proper service to delete/mark the database as not needed for the given dataset.
|
|
75
|
+
Needed for maintaining a database for Cognee multi-tenant/multi-user and backend access control.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
dataset_database: DatasetDatabase row containing connection/resolution info for the graph or vector database to delete.
|
|
79
|
+
"""
|
|
80
|
+
pass
|
cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from cognee.infrastructure.databases.graph.neo4j_driver.Neo4jAuraDevDatasetDatabaseHandler import (
|
|
2
|
+
Neo4jAuraDevDatasetDatabaseHandler,
|
|
3
|
+
)
|
|
4
|
+
from cognee.infrastructure.databases.vector.lancedb.LanceDBDatasetDatabaseHandler import (
|
|
5
|
+
LanceDBDatasetDatabaseHandler,
|
|
6
|
+
)
|
|
7
|
+
from cognee.infrastructure.databases.graph.kuzu.KuzuDatasetDatabaseHandler import (
|
|
8
|
+
KuzuDatasetDatabaseHandler,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
supported_dataset_database_handlers = {
|
|
12
|
+
"neo4j_aura_dev": {
|
|
13
|
+
"handler_instance": Neo4jAuraDevDatasetDatabaseHandler,
|
|
14
|
+
"handler_provider": "neo4j",
|
|
15
|
+
},
|
|
16
|
+
"lancedb": {"handler_instance": LanceDBDatasetDatabaseHandler, "handler_provider": "lancedb"},
|
|
17
|
+
"kuzu": {"handler_instance": KuzuDatasetDatabaseHandler, "handler_provider": "kuzu"},
|
|
18
|
+
}
|