cognee 0.5.0.dev0__py3-none-any.whl → 0.5.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. cognee/api/client.py +1 -5
  2. cognee/api/v1/add/add.py +2 -1
  3. cognee/api/v1/cognify/cognify.py +24 -16
  4. cognee/api/v1/cognify/routers/__init__.py +0 -1
  5. cognee/api/v1/cognify/routers/get_cognify_router.py +3 -1
  6. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  7. cognee/api/v1/ontologies/ontologies.py +12 -37
  8. cognee/api/v1/ontologies/routers/get_ontology_router.py +27 -25
  9. cognee/api/v1/search/search.py +4 -0
  10. cognee/api/v1/ui/node_setup.py +360 -0
  11. cognee/api/v1/ui/npm_utils.py +50 -0
  12. cognee/api/v1/ui/ui.py +38 -68
  13. cognee/context_global_variables.py +61 -16
  14. cognee/eval_framework/Dockerfile +29 -0
  15. cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
  16. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  17. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
  18. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  19. cognee/eval_framework/eval_config.py +2 -2
  20. cognee/eval_framework/modal_run_eval.py +16 -28
  21. cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
  22. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
  23. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
  24. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
  25. cognee/infrastructure/databases/graph/config.py +3 -0
  26. cognee/infrastructure/databases/graph/get_graph_engine.py +1 -0
  27. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
  28. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
  29. cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
  30. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
  31. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
  32. cognee/infrastructure/databases/utils/__init__.py +3 -0
  33. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
  34. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +62 -48
  35. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
  36. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
  37. cognee/infrastructure/databases/vector/config.py +2 -0
  38. cognee/infrastructure/databases/vector/create_vector_engine.py +1 -0
  39. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
  40. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
  41. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -10
  42. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
  43. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
  44. cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
  45. cognee/infrastructure/files/storage/s3_config.py +2 -0
  46. cognee/infrastructure/llm/LLMGateway.py +5 -2
  47. cognee/infrastructure/llm/config.py +35 -0
  48. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  49. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
  50. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -16
  51. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
  52. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
  53. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +40 -37
  54. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +39 -36
  55. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +19 -1
  56. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +11 -9
  57. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +23 -21
  58. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +42 -34
  59. cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
  60. cognee/modules/cognify/config.py +2 -0
  61. cognee/modules/data/deletion/prune_system.py +52 -2
  62. cognee/modules/data/methods/delete_dataset.py +26 -0
  63. cognee/modules/engine/models/Triplet.py +9 -0
  64. cognee/modules/engine/models/__init__.py +1 -0
  65. cognee/modules/graph/cognee_graph/CogneeGraph.py +85 -37
  66. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
  67. cognee/modules/memify/memify.py +1 -7
  68. cognee/modules/pipelines/operations/pipeline.py +18 -2
  69. cognee/modules/retrieval/__init__.py +1 -1
  70. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +4 -0
  71. cognee/modules/retrieval/graph_completion_cot_retriever.py +4 -0
  72. cognee/modules/retrieval/graph_completion_retriever.py +10 -0
  73. cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
  74. cognee/modules/retrieval/register_retriever.py +10 -0
  75. cognee/modules/retrieval/registered_community_retrievers.py +1 -0
  76. cognee/modules/retrieval/temporal_retriever.py +4 -0
  77. cognee/modules/retrieval/triplet_retriever.py +182 -0
  78. cognee/modules/retrieval/utils/brute_force_triplet_search.py +42 -10
  79. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +8 -1
  80. cognee/modules/search/methods/get_search_type_tools.py +54 -8
  81. cognee/modules/search/methods/no_access_control_search.py +4 -0
  82. cognee/modules/search/methods/search.py +21 -0
  83. cognee/modules/search/types/SearchType.py +1 -1
  84. cognee/modules/settings/get_settings.py +19 -0
  85. cognee/modules/users/methods/get_authenticated_user.py +2 -2
  86. cognee/modules/users/models/DatasetDatabase.py +15 -3
  87. cognee/shared/logging_utils.py +4 -0
  88. cognee/shared/rate_limiting.py +30 -0
  89. cognee/tasks/documents/__init__.py +0 -1
  90. cognee/tasks/graph/extract_graph_from_data.py +9 -10
  91. cognee/tasks/memify/get_triplet_datapoints.py +289 -0
  92. cognee/tasks/storage/add_data_points.py +142 -2
  93. cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
  94. cognee/tests/integration/tasks/test_add_data_points.py +139 -0
  95. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
  96. cognee/tests/test_cognee_server_start.py +2 -4
  97. cognee/tests/test_conversation_history.py +23 -1
  98. cognee/tests/test_dataset_database_handler.py +137 -0
  99. cognee/tests/test_dataset_delete.py +76 -0
  100. cognee/tests/test_edge_centered_payload.py +170 -0
  101. cognee/tests/test_pipeline_cache.py +164 -0
  102. cognee/tests/test_search_db.py +37 -1
  103. cognee/tests/unit/api/test_ontology_endpoint.py +77 -89
  104. cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
  105. cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
  106. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
  107. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  108. cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
  109. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
  110. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
  111. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
  112. cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
  113. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/METADATA +76 -89
  114. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/RECORD +118 -97
  115. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/WHEEL +1 -1
  116. cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
  117. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
  118. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
  119. cognee/modules/retrieval/code_retriever.py +0 -232
  120. cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
  121. cognee/tasks/code/get_local_dependencies_checker.py +0 -20
  122. cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
  123. cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
  124. cognee/tasks/repo_processor/__init__.py +0 -2
  125. cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
  126. cognee/tasks/repo_processor/get_non_code_files.py +0 -158
  127. cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
  128. cognee/tests/test_delete_bmw_example.py +0 -60
  129. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/entry_points.txt +0 -0
  130. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/licenses/LICENSE +0 -0
  131. {cognee-0.5.0.dev0.dist-info → cognee-0.5.0.dev1.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,29 @@
1
+ FROM python:3.11-slim
2
+
3
+ # Set environment variables
4
+ ENV PIP_NO_CACHE_DIR=true
5
+ ENV PATH="${PATH}:/root/.poetry/bin"
6
+ ENV PYTHONPATH=/app
7
+ ENV SKIP_MIGRATIONS=true
8
+
9
+ # System dependencies
10
+ RUN apt-get update && apt-get install -y \
11
+ gcc \
12
+ libpq-dev \
13
+ git \
14
+ curl \
15
+ build-essential \
16
+ && rm -rf /var/lib/apt/lists/*
17
+
18
+ WORKDIR /app
19
+
20
+ COPY pyproject.toml poetry.lock README.md /app/
21
+
22
+ RUN pip install poetry
23
+
24
+ RUN poetry config virtualenvs.create false
25
+
26
+ RUN poetry install --extras distributed --extras evals --extras deepeval --no-root
27
+
28
+ COPY cognee/ /app/cognee
29
+ COPY distributed/ /app/distributed
@@ -35,6 +35,16 @@ class AnswerGeneratorExecutor:
35
35
  retrieval_context = await retriever.get_context(query_text)
36
36
  search_results = await retriever.get_completion(query_text, retrieval_context)
37
37
 
38
+ ############
39
+ #:TODO This is a quick fix until we don't structure retriever results properly but lets not leave it like this...this is needed now due to the changed combined retriever structure..
40
+ if isinstance(retrieval_context, list):
41
+ retrieval_context = await retriever.convert_retrieved_objects_to_context(
42
+ triplets=retrieval_context
43
+ )
44
+
45
+ if isinstance(search_results, str):
46
+ search_results = [search_results]
47
+ #############
38
48
  answer = {
39
49
  "question": query_text,
40
50
  "answer": search_results[0],
@@ -35,7 +35,7 @@ async def create_and_insert_answers_table(questions_payload):
35
35
 
36
36
 
37
37
  async def run_question_answering(
38
- params: dict, system_prompt="answer_simple_question.txt", top_k: Optional[int] = None
38
+ params: dict, system_prompt="answer_simple_question_benchmark.txt", top_k: Optional[int] = None
39
39
  ) -> List[dict]:
40
40
  if params.get("answering_questions"):
41
41
  logger.info("Question answering started...")
@@ -8,7 +8,6 @@ from cognee.modules.users.models import User
8
8
  from cognee.shared.data_models import KnowledgeGraph
9
9
  from cognee.shared.utils import send_telemetry
10
10
  from cognee.tasks.documents import (
11
- check_permissions_on_dataset,
12
11
  classify_documents,
13
12
  extract_chunks_from_documents,
14
13
  )
@@ -31,7 +30,6 @@ async def get_cascade_graph_tasks(
31
30
  cognee_config = get_cognify_config()
32
31
  default_tasks = [
33
32
  Task(classify_documents),
34
- Task(check_permissions_on_dataset, user=user, permissions=["write"]),
35
33
  Task(
36
34
  extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
37
35
  ), # Extract text chunks based on the document type.
@@ -30,8 +30,8 @@ async def get_no_summary_tasks(
30
30
  ontology_file_path=None,
31
31
  ) -> List[Task]:
32
32
  """Returns default tasks without summarization tasks."""
33
- # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
34
- base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
33
+ # Get base tasks (0=classify, 1=extract_chunks)
34
+ base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
35
35
 
36
36
  ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
37
37
 
@@ -51,8 +51,8 @@ async def get_just_chunks_tasks(
51
51
  chunk_size: int = None, chunker=TextChunker, user=None
52
52
  ) -> List[Task]:
53
53
  """Returns default tasks with only chunk extraction and data points addition."""
54
- # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
55
- base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
54
+ # Get base tasks (0=classify, 1=extract_chunks)
55
+ base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
56
56
 
57
57
  add_data_points_task = Task(add_data_points, task_config={"batch_size": 10})
58
58
 
@@ -14,7 +14,7 @@ class EvalConfig(BaseSettings):
14
14
 
15
15
  # Question answering params
16
16
  answering_questions: bool = True
17
- qa_engine: str = "cognee_completion" # Options: 'cognee_completion' or 'cognee_graph_completion' or 'cognee_graph_completion_cot' or 'cognee_graph_completion_context_extension'
17
+ qa_engine: str = "cognee_graph_completion" # Options: 'cognee_completion' or 'cognee_graph_completion' or 'cognee_graph_completion_cot' or 'cognee_graph_completion_context_extension'
18
18
 
19
19
  # Evaluation params
20
20
  evaluating_answers: bool = True
@@ -25,7 +25,7 @@ class EvalConfig(BaseSettings):
25
25
  "EM",
26
26
  "f1",
27
27
  ] # Use only 'correctness' for DirectLLM
28
- deepeval_model: str = "gpt-5-mini"
28
+ deepeval_model: str = "gpt-4o-mini"
29
29
 
30
30
  # Metrics params
31
31
  calculate_metrics: bool = True
@@ -2,7 +2,6 @@ import modal
2
2
  import os
3
3
  import asyncio
4
4
  import datetime
5
- import hashlib
6
5
  import json
7
6
  from cognee.shared.logging_utils import get_logger
8
7
  from cognee.eval_framework.eval_config import EvalConfig
@@ -10,6 +9,9 @@ from cognee.eval_framework.corpus_builder.run_corpus_builder import run_corpus_b
10
9
  from cognee.eval_framework.answer_generation.run_question_answering_module import (
11
10
  run_question_answering,
12
11
  )
12
+ import pathlib
13
+ from os import path
14
+ from modal import Image
13
15
  from cognee.eval_framework.evaluation.run_evaluation_module import run_evaluation
14
16
  from cognee.eval_framework.metrics_dashboard import create_dashboard
15
17
 
@@ -38,22 +40,19 @@ def read_and_combine_metrics(eval_params: dict) -> dict:
38
40
 
39
41
  app = modal.App("modal-run-eval")
40
42
 
41
- image = (
42
- modal.Image.from_dockerfile(path="Dockerfile_modal", force_build=False)
43
- .copy_local_file("pyproject.toml", "pyproject.toml")
44
- .copy_local_file("poetry.lock", "poetry.lock")
45
- .env(
46
- {
47
- "ENV": os.getenv("ENV"),
48
- "LLM_API_KEY": os.getenv("LLM_API_KEY"),
49
- "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
50
- }
51
- )
52
- .pip_install("protobuf", "h2", "deepeval", "gdown", "plotly")
53
- )
43
+ image = Image.from_dockerfile(
44
+ path=pathlib.Path(path.join(path.dirname(__file__), "Dockerfile")).resolve(),
45
+ force_build=False,
46
+ ).add_local_python_source("cognee")
54
47
 
55
48
 
56
- @app.function(image=image, concurrency_limit=10, timeout=86400, volumes={"/data": vol})
49
+ @app.function(
50
+ image=image,
51
+ max_containers=10,
52
+ timeout=86400,
53
+ volumes={"/data": vol},
54
+ secrets=[modal.Secret.from_name("eval_secrets")],
55
+ )
57
56
  async def modal_run_eval(eval_params=None):
58
57
  """Runs evaluation pipeline and returns combined metrics results."""
59
58
  if eval_params is None:
@@ -105,18 +104,7 @@ async def main():
105
104
  configs = [
106
105
  EvalConfig(
107
106
  task_getter_type="Default",
108
- number_of_samples_in_corpus=10,
109
- benchmark="HotPotQA",
110
- qa_engine="cognee_graph_completion",
111
- building_corpus_from_scratch=True,
112
- answering_questions=True,
113
- evaluating_answers=True,
114
- calculate_metrics=True,
115
- dashboard=True,
116
- ),
117
- EvalConfig(
118
- task_getter_type="Default",
119
- number_of_samples_in_corpus=10,
107
+ number_of_samples_in_corpus=25,
120
108
  benchmark="TwoWikiMultiHop",
121
109
  qa_engine="cognee_graph_completion",
122
110
  building_corpus_from_scratch=True,
@@ -127,7 +115,7 @@ async def main():
127
115
  ),
128
116
  EvalConfig(
129
117
  task_getter_type="Default",
130
- number_of_samples_in_corpus=10,
118
+ number_of_samples_in_corpus=25,
131
119
  benchmark="Musique",
132
120
  qa_engine="cognee_graph_completion",
133
121
  building_corpus_from_scratch=True,
@@ -0,0 +1,3 @@
1
+ from .dataset_database_handler_interface import DatasetDatabaseHandlerInterface
2
+ from .supported_dataset_database_handlers import supported_dataset_database_handlers
3
+ from .use_dataset_database_handler import use_dataset_database_handler
@@ -0,0 +1,80 @@
1
+ from typing import Optional
2
+ from uuid import UUID
3
+ from abc import ABC, abstractmethod
4
+
5
+ from cognee.modules.users.models.User import User
6
+ from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
7
+
8
+
9
+ class DatasetDatabaseHandlerInterface(ABC):
10
+ @classmethod
11
+ @abstractmethod
12
+ async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
13
+ """
14
+ Return a dictionary with database connection/resolution info for a graph or vector database for the given dataset.
15
+ Function can auto handle deploying of the actual database if needed, but is not necessary.
16
+ Only providing connection info is sufficient, this info will be mapped when trying to connect to the provided dataset in the future.
17
+ Needed for Cognee multi-tenant/multi-user and backend access control support.
18
+
19
+ Dictionary returned from this function will be used to create a DatasetDatabase row in the relational database.
20
+ From which internal mapping of dataset -> database connection info will be done.
21
+
22
+ The returned dictionary is stored verbatim in the relational database and is later passed to
23
+ resolve_dataset_connection_info() at connection time. For safe credential handling, prefer
24
+ returning only references to secrets or role identifiers, not plaintext credentials.
25
+
26
+ Each dataset needs to map to a unique graph or vector database when backend access control is enabled to facilitate a separation of concern for data.
27
+
28
+ Args:
29
+ dataset_id: UUID of the dataset if needed by the database creation logic
30
+ user: User object if needed by the database creation logic
31
+ Returns:
32
+ dict: Connection info for the created graph or vector database instance.
33
+ """
34
+ pass
35
+
36
+ @classmethod
37
+ async def resolve_dataset_connection_info(
38
+ cls, dataset_database: DatasetDatabase
39
+ ) -> DatasetDatabase:
40
+ """
41
+ Resolve runtime connection details for a dataset’s backing graph/vector database.
42
+ Function is intended to be overwritten to implement custom logic for resolving connection info.
43
+
44
+ This method is invoked right before the application opens a connection for a given dataset.
45
+ It receives the DatasetDatabase row that was persisted when create_dataset() ran and must
46
+ return a modified instance of DatasetDatabase with concrete connection parameters that the client/driver can use.
47
+ Do not update these new DatasetDatabase values in the relational database to avoid storing secure credentials.
48
+
49
+ In case of separate graph and vector database handlers, each handler should implement its own logic for resolving
50
+ connection info and only change parameters related to its appropriate database, the resolution function will then
51
+ be called one after another with the updated DatasetDatabase value from the previous function as the input.
52
+
53
+ Typical behavior:
54
+ - If the DatasetDatabase row already contains raw connection fields (e.g., host/port/db/user/password
55
+ or api_url/api_key), return them as-is.
56
+ - If the row stores only references (e.g., secret IDs, vault paths, cloud resource ARNs/IDs, IAM
57
+ roles, SSO tokens), resolve those references by calling the appropriate secret manager or provider
58
+ API to obtain short-lived credentials and assemble the final connection DatasetDatabase object.
59
+ - Do not persist any resolved or decrypted secrets back to the relational database. Return them only
60
+ to the caller.
61
+
62
+ Args:
63
+ dataset_database: DatasetDatabase row from the relational database
64
+ Returns:
65
+ DatasetDatabase: Updated instance with resolved connection info
66
+ """
67
+ return dataset_database
68
+
69
+ @classmethod
70
+ @abstractmethod
71
+ async def delete_dataset(cls, dataset_database: DatasetDatabase) -> None:
72
+ """
73
+ Delete the graph or vector database for the given dataset.
74
+ Function should auto handle deleting of the actual database or send a request to the proper service to delete/mark the database as not needed for the given dataset.
75
+ Needed for maintaining a database for Cognee multi-tenant/multi-user and backend access control.
76
+
77
+ Args:
78
+ dataset_database: DatasetDatabase row containing connection/resolution info for the graph or vector database to delete.
79
+ """
80
+ pass
@@ -0,0 +1,18 @@
1
+ from cognee.infrastructure.databases.graph.neo4j_driver.Neo4jAuraDevDatasetDatabaseHandler import (
2
+ Neo4jAuraDevDatasetDatabaseHandler,
3
+ )
4
+ from cognee.infrastructure.databases.vector.lancedb.LanceDBDatasetDatabaseHandler import (
5
+ LanceDBDatasetDatabaseHandler,
6
+ )
7
+ from cognee.infrastructure.databases.graph.kuzu.KuzuDatasetDatabaseHandler import (
8
+ KuzuDatasetDatabaseHandler,
9
+ )
10
+
11
+ supported_dataset_database_handlers = {
12
+ "neo4j_aura_dev": {
13
+ "handler_instance": Neo4jAuraDevDatasetDatabaseHandler,
14
+ "handler_provider": "neo4j",
15
+ },
16
+ "lancedb": {"handler_instance": LanceDBDatasetDatabaseHandler, "handler_provider": "lancedb"},
17
+ "kuzu": {"handler_instance": KuzuDatasetDatabaseHandler, "handler_provider": "kuzu"},
18
+ }
@@ -0,0 +1,10 @@
1
+ from .supported_dataset_database_handlers import supported_dataset_database_handlers
2
+
3
+
4
+ def use_dataset_database_handler(
5
+ dataset_database_handler_name, dataset_database_handler, dataset_database_provider
6
+ ):
7
+ supported_dataset_database_handlers[dataset_database_handler_name] = {
8
+ "handler_instance": dataset_database_handler,
9
+ "handler_provider": dataset_database_provider,
10
+ }
@@ -47,6 +47,7 @@ class GraphConfig(BaseSettings):
47
47
  graph_filename: str = ""
48
48
  graph_model: object = KnowledgeGraph
49
49
  graph_topology: object = KnowledgeGraph
50
+ graph_dataset_database_handler: str = "kuzu"
50
51
  model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
51
52
 
52
53
  # Model validator updates graph_filename and path dynamically after class creation based on current database provider
@@ -97,6 +98,7 @@ class GraphConfig(BaseSettings):
97
98
  "graph_model": self.graph_model,
98
99
  "graph_topology": self.graph_topology,
99
100
  "model_config": self.model_config,
101
+ "graph_dataset_database_handler": self.graph_dataset_database_handler,
100
102
  }
101
103
 
102
104
  def to_hashable_dict(self) -> dict:
@@ -121,6 +123,7 @@ class GraphConfig(BaseSettings):
121
123
  "graph_database_port": self.graph_database_port,
122
124
  "graph_database_key": self.graph_database_key,
123
125
  "graph_file_path": self.graph_file_path,
126
+ "graph_dataset_database_handler": self.graph_dataset_database_handler,
124
127
  }
125
128
 
126
129
 
@@ -34,6 +34,7 @@ def create_graph_engine(
34
34
  graph_database_password="",
35
35
  graph_database_port="",
36
36
  graph_database_key="",
37
+ graph_dataset_database_handler="",
37
38
  ):
38
39
  """
39
40
  Create a graph engine based on the specified provider type.
@@ -398,3 +398,18 @@ class GraphDBInterface(ABC):
398
398
  - node_id (Union[str, UUID]): Unique identifier of the node for which to retrieve connections.
399
399
  """
400
400
  raise NotImplementedError
401
+
402
+ @abstractmethod
403
+ async def get_filtered_graph_data(
404
+ self, attribute_filters: List[Dict[str, List[Union[str, int]]]]
405
+ ) -> Tuple[List[Node], List[EdgeData]]:
406
+ """
407
+ Retrieve nodes and edges filtered by the provided attribute criteria.
408
+
409
+ Parameters:
410
+ -----------
411
+
412
+ - attribute_filters: A list of dictionaries where keys are attribute names and values
413
+ are lists of attribute values to filter by.
414
+ """
415
+ raise NotImplementedError
@@ -0,0 +1,81 @@
1
+ import os
2
+ from uuid import UUID
3
+ from typing import Optional
4
+
5
+ from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine
6
+ from cognee.base_config import get_base_config
7
+ from cognee.modules.users.models import User
8
+ from cognee.modules.users.models import DatasetDatabase
9
+ from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
10
+
11
+
12
+ class KuzuDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
13
+ """
14
+ Handler for interacting with Kuzu Dataset databases.
15
+ """
16
+
17
+ @classmethod
18
+ async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
19
+ """
20
+ Create a new Kuzu instance for the dataset. Return connection info that will be mapped to the dataset.
21
+
22
+ Args:
23
+ dataset_id: Dataset UUID
24
+ user: User object who owns the dataset and is making the request
25
+
26
+ Returns:
27
+ dict: Connection details for the created Kuzu instance
28
+
29
+ """
30
+ from cognee.infrastructure.databases.graph.config import get_graph_config
31
+
32
+ graph_config = get_graph_config()
33
+
34
+ if graph_config.graph_database_provider != "kuzu":
35
+ raise ValueError(
36
+ "KuzuDatasetDatabaseHandler can only be used with Kuzu graph database provider."
37
+ )
38
+
39
+ graph_db_name = f"{dataset_id}.pkl"
40
+ graph_db_url = graph_config.graph_database_url
41
+ graph_db_key = graph_config.graph_database_key
42
+ graph_db_username = graph_config.graph_database_username
43
+ graph_db_password = graph_config.graph_database_password
44
+
45
+ return {
46
+ "graph_database_name": graph_db_name,
47
+ "graph_database_url": graph_db_url,
48
+ "graph_database_provider": graph_config.graph_database_provider,
49
+ "graph_database_key": graph_db_key,
50
+ "graph_dataset_database_handler": "kuzu",
51
+ "graph_database_connection_info": {
52
+ "graph_database_username": graph_db_username,
53
+ "graph_database_password": graph_db_password,
54
+ },
55
+ }
56
+
57
+ @classmethod
58
+ async def delete_dataset(cls, dataset_database: DatasetDatabase):
59
+ base_config = get_base_config()
60
+ databases_directory_path = os.path.join(
61
+ base_config.system_root_directory, "databases", str(dataset_database.owner_id)
62
+ )
63
+ graph_file_path = os.path.join(
64
+ databases_directory_path, dataset_database.graph_database_name
65
+ )
66
+ graph_engine = create_graph_engine(
67
+ graph_database_provider=dataset_database.graph_database_provider,
68
+ graph_database_url=dataset_database.graph_database_url,
69
+ graph_database_name=dataset_database.graph_database_name,
70
+ graph_database_key=dataset_database.graph_database_key,
71
+ graph_file_path=graph_file_path,
72
+ graph_database_username=dataset_database.graph_database_connection_info.get(
73
+ "graph_database_username", ""
74
+ ),
75
+ graph_database_password=dataset_database.graph_database_connection_info.get(
76
+ "graph_database_password", ""
77
+ ),
78
+ graph_dataset_database_handler="",
79
+ graph_database_port="",
80
+ )
81
+ await graph_engine.delete_graph()