cognee 0.5.0__py3-none-any.whl → 0.5.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. cognee/api/client.py +5 -1
  2. cognee/api/v1/add/add.py +1 -2
  3. cognee/api/v1/cognify/code_graph_pipeline.py +119 -0
  4. cognee/api/v1/cognify/cognify.py +16 -24
  5. cognee/api/v1/cognify/routers/__init__.py +1 -0
  6. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +90 -0
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +1 -3
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/ontologies.py +37 -12
  10. cognee/api/v1/ontologies/routers/get_ontology_router.py +25 -27
  11. cognee/api/v1/search/search.py +0 -4
  12. cognee/api/v1/ui/ui.py +68 -38
  13. cognee/context_global_variables.py +16 -61
  14. cognee/eval_framework/answer_generation/answer_generation_executor.py +0 -10
  15. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  16. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +2 -0
  17. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  18. cognee/eval_framework/eval_config.py +2 -2
  19. cognee/eval_framework/modal_run_eval.py +28 -16
  20. cognee/infrastructure/databases/graph/config.py +0 -3
  21. cognee/infrastructure/databases/graph/get_graph_engine.py +0 -1
  22. cognee/infrastructure/databases/graph/graph_db_interface.py +0 -15
  23. cognee/infrastructure/databases/graph/kuzu/adapter.py +0 -228
  24. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +1 -80
  25. cognee/infrastructure/databases/utils/__init__.py +0 -3
  26. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +48 -62
  27. cognee/infrastructure/databases/vector/config.py +0 -2
  28. cognee/infrastructure/databases/vector/create_vector_engine.py +0 -1
  29. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +6 -8
  30. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +7 -9
  31. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +10 -11
  32. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +544 -0
  33. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +0 -2
  34. cognee/infrastructure/databases/vector/vector_db_interface.py +0 -35
  35. cognee/infrastructure/files/storage/s3_config.py +0 -2
  36. cognee/infrastructure/llm/LLMGateway.py +2 -5
  37. cognee/infrastructure/llm/config.py +0 -35
  38. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  39. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +8 -23
  40. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +16 -17
  41. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +37 -40
  42. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +36 -39
  43. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +1 -19
  44. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +9 -11
  45. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +21 -23
  46. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +34 -42
  47. cognee/modules/cognify/config.py +0 -2
  48. cognee/modules/data/deletion/prune_system.py +2 -52
  49. cognee/modules/data/methods/delete_dataset.py +0 -26
  50. cognee/modules/engine/models/__init__.py +0 -1
  51. cognee/modules/graph/cognee_graph/CogneeGraph.py +37 -85
  52. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +3 -8
  53. cognee/modules/memify/memify.py +7 -1
  54. cognee/modules/pipelines/operations/pipeline.py +2 -18
  55. cognee/modules/retrieval/__init__.py +1 -1
  56. cognee/modules/retrieval/code_retriever.py +232 -0
  57. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -4
  58. cognee/modules/retrieval/graph_completion_cot_retriever.py +0 -4
  59. cognee/modules/retrieval/graph_completion_retriever.py +0 -10
  60. cognee/modules/retrieval/graph_summary_completion_retriever.py +0 -4
  61. cognee/modules/retrieval/temporal_retriever.py +0 -4
  62. cognee/modules/retrieval/utils/brute_force_triplet_search.py +10 -42
  63. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +1 -8
  64. cognee/modules/search/methods/get_search_type_tools.py +8 -54
  65. cognee/modules/search/methods/no_access_control_search.py +0 -4
  66. cognee/modules/search/methods/search.py +0 -21
  67. cognee/modules/search/types/SearchType.py +1 -1
  68. cognee/modules/settings/get_settings.py +0 -19
  69. cognee/modules/users/methods/get_authenticated_user.py +2 -2
  70. cognee/modules/users/models/DatasetDatabase.py +3 -15
  71. cognee/shared/logging_utils.py +0 -4
  72. cognee/tasks/code/enrich_dependency_graph_checker.py +35 -0
  73. cognee/tasks/code/get_local_dependencies_checker.py +20 -0
  74. cognee/tasks/code/get_repo_dependency_graph_checker.py +35 -0
  75. cognee/tasks/documents/__init__.py +1 -0
  76. cognee/tasks/documents/check_permissions_on_dataset.py +26 -0
  77. cognee/tasks/graph/extract_graph_from_data.py +10 -9
  78. cognee/tasks/repo_processor/__init__.py +2 -0
  79. cognee/tasks/repo_processor/get_local_dependencies.py +335 -0
  80. cognee/tasks/repo_processor/get_non_code_files.py +158 -0
  81. cognee/tasks/repo_processor/get_repo_file_dependencies.py +243 -0
  82. cognee/tasks/storage/add_data_points.py +2 -142
  83. cognee/tests/test_cognee_server_start.py +4 -2
  84. cognee/tests/test_conversation_history.py +1 -23
  85. cognee/tests/test_delete_bmw_example.py +60 -0
  86. cognee/tests/test_search_db.py +1 -37
  87. cognee/tests/unit/api/test_ontology_endpoint.py +89 -77
  88. cognee/tests/unit/infrastructure/mock_embedding_engine.py +7 -3
  89. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -0
  90. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  91. cognee/tests/unit/modules/graph/cognee_graph_test.py +0 -406
  92. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/METADATA +89 -76
  93. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/RECORD +97 -118
  94. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/WHEEL +1 -1
  95. cognee/api/v1/ui/node_setup.py +0 -360
  96. cognee/api/v1/ui/npm_utils.py +0 -50
  97. cognee/eval_framework/Dockerfile +0 -29
  98. cognee/infrastructure/databases/dataset_database_handler/__init__.py +0 -3
  99. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +0 -80
  100. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +0 -18
  101. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +0 -10
  102. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +0 -81
  103. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +0 -168
  104. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +0 -10
  105. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +0 -10
  106. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +0 -30
  107. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +0 -50
  108. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +0 -5
  109. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +0 -153
  110. cognee/memify_pipelines/create_triplet_embeddings.py +0 -53
  111. cognee/modules/engine/models/Triplet.py +0 -9
  112. cognee/modules/retrieval/register_retriever.py +0 -10
  113. cognee/modules/retrieval/registered_community_retrievers.py +0 -1
  114. cognee/modules/retrieval/triplet_retriever.py +0 -182
  115. cognee/shared/rate_limiting.py +0 -30
  116. cognee/tasks/memify/get_triplet_datapoints.py +0 -289
  117. cognee/tests/integration/retrieval/test_triplet_retriever.py +0 -84
  118. cognee/tests/integration/tasks/test_add_data_points.py +0 -139
  119. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +0 -69
  120. cognee/tests/test_dataset_database_handler.py +0 -137
  121. cognee/tests/test_dataset_delete.py +0 -76
  122. cognee/tests/test_edge_centered_payload.py +0 -170
  123. cognee/tests/test_pipeline_cache.py +0 -164
  124. cognee/tests/unit/infrastructure/llm/test_llm_config.py +0 -46
  125. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +0 -214
  126. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +0 -608
  127. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +0 -83
  128. cognee/tests/unit/tasks/storage/test_add_data_points.py +0 -288
  129. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/entry_points.txt +0 -0
  130. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/LICENSE +0 -0
  131. {cognee-0.5.0.dist-info → cognee-0.5.0.dev0.dist-info}/licenses/NOTICE.md +0 -0
cognee/api/v1/ui/ui.py CHANGED
@@ -15,8 +15,6 @@ import shutil
15
15
 
16
16
  from cognee.shared.logging_utils import get_logger
17
17
  from cognee.version import get_cognee_version
18
- from .node_setup import check_node_npm, get_nvm_dir, get_nvm_sh_path
19
- from .npm_utils import run_npm_command
20
18
 
21
19
  logger = get_logger()
22
20
 
@@ -287,6 +285,48 @@ def find_frontend_path() -> Optional[Path]:
287
285
  return None
288
286
 
289
287
 
288
+ def check_node_npm() -> tuple[bool, str]:
289
+ """
290
+ Check if Node.js and npm are available.
291
+ Returns (is_available, error_message)
292
+ """
293
+
294
+ try:
295
+ # Check Node.js
296
+ result = subprocess.run(["node", "--version"], capture_output=True, text=True, timeout=10)
297
+ if result.returncode != 0:
298
+ return False, "Node.js is not installed or not in PATH"
299
+
300
+ node_version = result.stdout.strip()
301
+ logger.debug(f"Found Node.js version: {node_version}")
302
+
303
+ # Check npm - handle Windows PowerShell scripts
304
+ if platform.system() == "Windows":
305
+ # On Windows, npm might be a PowerShell script, so we need to use shell=True
306
+ result = subprocess.run(
307
+ ["npm", "--version"], capture_output=True, text=True, timeout=10, shell=True
308
+ )
309
+ else:
310
+ result = subprocess.run(
311
+ ["npm", "--version"], capture_output=True, text=True, timeout=10
312
+ )
313
+
314
+ if result.returncode != 0:
315
+ return False, "npm is not installed or not in PATH"
316
+
317
+ npm_version = result.stdout.strip()
318
+ logger.debug(f"Found npm version: {npm_version}")
319
+
320
+ return True, f"Node.js {node_version}, npm {npm_version}"
321
+
322
+ except subprocess.TimeoutExpired:
323
+ return False, "Timeout checking Node.js/npm installation"
324
+ except FileNotFoundError:
325
+ return False, "Node.js/npm not found. Please install Node.js from https://nodejs.org/"
326
+ except Exception as e:
327
+ return False, f"Error checking Node.js/npm: {str(e)}"
328
+
329
+
290
330
  def install_frontend_dependencies(frontend_path: Path) -> bool:
291
331
  """
292
332
  Install frontend dependencies if node_modules doesn't exist.
@@ -301,7 +341,24 @@ def install_frontend_dependencies(frontend_path: Path) -> bool:
301
341
  logger.info("Installing frontend dependencies (this may take a few minutes)...")
302
342
 
303
343
  try:
304
- result = run_npm_command(["npm", "install"], frontend_path, timeout=300)
344
+ # Use shell=True on Windows for npm commands
345
+ if platform.system() == "Windows":
346
+ result = subprocess.run(
347
+ ["npm", "install"],
348
+ cwd=frontend_path,
349
+ capture_output=True,
350
+ text=True,
351
+ timeout=300, # 5 minutes timeout
352
+ shell=True,
353
+ )
354
+ else:
355
+ result = subprocess.run(
356
+ ["npm", "install"],
357
+ cwd=frontend_path,
358
+ capture_output=True,
359
+ text=True,
360
+ timeout=300, # 5 minutes timeout
361
+ )
305
362
 
306
363
  if result.returncode == 0:
307
364
  logger.info("Frontend dependencies installed successfully")
@@ -585,21 +642,6 @@ def start_ui(
585
642
  env["HOST"] = "localhost"
586
643
  env["PORT"] = str(port)
587
644
 
588
- # If nvm is installed, ensure it's available in the environment
589
- nvm_path = get_nvm_sh_path()
590
- if platform.system() != "Windows" and nvm_path.exists():
591
- # Add nvm to PATH for the subprocess
592
- nvm_dir = get_nvm_dir()
593
- # Find the latest Node.js version installed via nvm
594
- nvm_versions = nvm_dir / "versions" / "node"
595
- if nvm_versions.exists():
596
- versions = sorted(nvm_versions.iterdir(), reverse=True)
597
- if versions:
598
- latest_node_bin = versions[0] / "bin"
599
- if latest_node_bin.exists():
600
- current_path = env.get("PATH", "")
601
- env["PATH"] = f"{latest_node_bin}:{current_path}"
602
-
603
645
  # Start the development server
604
646
  logger.info(f"Starting frontend server at http://localhost:{port}")
605
647
  logger.info("This may take a moment to compile and start...")
@@ -617,26 +659,14 @@ def start_ui(
617
659
  shell=True,
618
660
  )
619
661
  else:
620
- # On Unix-like systems, use bash with nvm sourced if available
621
- if nvm_path.exists():
622
- # Use bash to source nvm and run npm
623
- process = subprocess.Popen(
624
- ["bash", "-c", f"source {nvm_path} && npm run dev"],
625
- cwd=frontend_path,
626
- env=env,
627
- stdout=subprocess.PIPE,
628
- stderr=subprocess.PIPE,
629
- preexec_fn=os.setsid if hasattr(os, "setsid") else None,
630
- )
631
- else:
632
- process = subprocess.Popen(
633
- ["npm", "run", "dev"],
634
- cwd=frontend_path,
635
- env=env,
636
- stdout=subprocess.PIPE,
637
- stderr=subprocess.PIPE,
638
- preexec_fn=os.setsid if hasattr(os, "setsid") else None,
639
- )
662
+ process = subprocess.Popen(
663
+ ["npm", "run", "dev"],
664
+ cwd=frontend_path,
665
+ env=env,
666
+ stdout=subprocess.PIPE,
667
+ stderr=subprocess.PIPE,
668
+ preexec_fn=os.setsid if hasattr(os, "setsid") else None,
669
+ )
640
670
 
641
671
  # Start threads to stream frontend output with prefix
642
672
  _stream_process_output(process, "stdout", "[FRONTEND]", "\033[33m") # Yellow
@@ -4,10 +4,9 @@ from typing import Union
4
4
  from uuid import UUID
5
5
 
6
6
  from cognee.base_config import get_base_config
7
- from cognee.infrastructure.databases.vector.config import get_vectordb_config
8
- from cognee.infrastructure.databases.graph.config import get_graph_config
7
+ from cognee.infrastructure.databases.vector.config import get_vectordb_context_config
8
+ from cognee.infrastructure.databases.graph.config import get_graph_context_config
9
9
  from cognee.infrastructure.databases.utils import get_or_create_dataset_database
10
- from cognee.infrastructure.databases.utils import resolve_dataset_database_connection_info
11
10
  from cognee.infrastructure.files.storage.config import file_storage_config
12
11
  from cognee.modules.users.methods import get_user
13
12
 
@@ -17,59 +16,22 @@ vector_db_config = ContextVar("vector_db_config", default=None)
17
16
  graph_db_config = ContextVar("graph_db_config", default=None)
18
17
  session_user = ContextVar("session_user", default=None)
19
18
 
19
+ VECTOR_DBS_WITH_MULTI_USER_SUPPORT = ["lancedb", "falkor"]
20
+ GRAPH_DBS_WITH_MULTI_USER_SUPPORT = ["kuzu", "falkor"]
21
+
20
22
 
21
23
  async def set_session_user_context_variable(user):
22
24
  session_user.set(user)
23
25
 
24
26
 
25
27
  def multi_user_support_possible():
26
- graph_db_config = get_graph_config()
27
- vector_db_config = get_vectordb_config()
28
-
29
- graph_handler = graph_db_config.graph_dataset_database_handler
30
- vector_handler = vector_db_config.vector_dataset_database_handler
31
- from cognee.infrastructure.databases.dataset_database_handler import (
32
- supported_dataset_database_handlers,
28
+ graph_db_config = get_graph_context_config()
29
+ vector_db_config = get_vectordb_context_config()
30
+ return (
31
+ graph_db_config["graph_database_provider"] in GRAPH_DBS_WITH_MULTI_USER_SUPPORT
32
+ and vector_db_config["vector_db_provider"] in VECTOR_DBS_WITH_MULTI_USER_SUPPORT
33
33
  )
34
34
 
35
- if graph_handler not in supported_dataset_database_handlers:
36
- raise EnvironmentError(
37
- "Unsupported graph dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
38
- f"Selected graph dataset to database handler: {graph_handler}\n"
39
- f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
40
- )
41
-
42
- if vector_handler not in supported_dataset_database_handlers:
43
- raise EnvironmentError(
44
- "Unsupported vector dataset to database handler configured. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
45
- f"Selected vector dataset to database handler: {vector_handler}\n"
46
- f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
47
- )
48
-
49
- if (
50
- supported_dataset_database_handlers[graph_handler]["handler_provider"]
51
- != graph_db_config.graph_database_provider
52
- ):
53
- raise EnvironmentError(
54
- "The selected graph dataset to database handler does not work with the configured graph database provider. Cannot add support for multi-user access control mode. Please use a supported graph dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
55
- f"Selected graph database provider: {graph_db_config.graph_database_provider}\n"
56
- f"Selected graph dataset to database handler: {graph_handler}\n"
57
- f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
58
- )
59
-
60
- if (
61
- supported_dataset_database_handlers[vector_handler]["handler_provider"]
62
- != vector_db_config.vector_db_provider
63
- ):
64
- raise EnvironmentError(
65
- "The selected vector dataset to database handler does not work with the configured vector database provider. Cannot add support for multi-user access control mode. Please use a supported vector dataset to database handler or set the environment variables ENABLE_BACKEND_ACCESS_CONTROL to false to switch off multi-user access control mode.\n"
66
- f"Selected vector database provider: {vector_db_config.vector_db_provider}\n"
67
- f"Selected vector dataset to database handler: {vector_handler}\n"
68
- f"Supported dataset to database handlers: {list(supported_dataset_database_handlers.keys())}\n"
69
- )
70
-
71
- return True
72
-
73
35
 
74
36
  def backend_access_control_enabled():
75
37
  backend_access_control = os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", None)
@@ -79,7 +41,12 @@ def backend_access_control_enabled():
79
41
  return multi_user_support_possible()
80
42
  elif backend_access_control.lower() == "true":
81
43
  # If enabled, ensure that the current graph and vector DBs can support it
82
- return multi_user_support_possible()
44
+ multi_user_support = multi_user_support_possible()
45
+ if not multi_user_support:
46
+ raise EnvironmentError(
47
+ "ENABLE_BACKEND_ACCESS_CONTROL is set to true but the current graph and/or vector databases do not support multi-user access control. Please use supported databases or disable backend access control."
48
+ )
49
+ return True
83
50
  return False
84
51
 
85
52
 
@@ -109,8 +76,6 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
109
76
 
110
77
  # To ensure permissions are enforced properly all datasets will have their own databases
111
78
  dataset_database = await get_or_create_dataset_database(dataset, user)
112
- # Ensure that all connection info is resolved properly
113
- dataset_database = await resolve_dataset_database_connection_info(dataset_database)
114
79
 
115
80
  base_config = get_base_config()
116
81
  data_root_directory = os.path.join(
@@ -121,8 +86,6 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
121
86
  )
122
87
 
123
88
  # Set vector and graph database configuration based on dataset database information
124
- # TODO: Add better handling of vector and graph config accross Cognee.
125
- # LRU_CACHE takes into account order of inputs, if order of inputs is changed it will be registered as a new DB adapter
126
89
  vector_config = {
127
90
  "vector_db_provider": dataset_database.vector_database_provider,
128
91
  "vector_db_url": dataset_database.vector_database_url,
@@ -138,14 +101,6 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
138
101
  "graph_file_path": os.path.join(
139
102
  databases_directory_path, dataset_database.graph_database_name
140
103
  ),
141
- "graph_database_username": dataset_database.graph_database_connection_info.get(
142
- "graph_database_username", ""
143
- ),
144
- "graph_database_password": dataset_database.graph_database_connection_info.get(
145
- "graph_database_password", ""
146
- ),
147
- "graph_dataset_database_handler": "",
148
- "graph_database_port": "",
149
104
  }
150
105
 
151
106
  storage_config = {
@@ -35,16 +35,6 @@ class AnswerGeneratorExecutor:
35
35
  retrieval_context = await retriever.get_context(query_text)
36
36
  search_results = await retriever.get_completion(query_text, retrieval_context)
37
37
 
38
- ############
39
- #:TODO This is a quick fix until we don't structure retriever results properly but lets not leave it like this...this is needed now due to the changed combined retriever structure..
40
- if isinstance(retrieval_context, list):
41
- retrieval_context = await retriever.convert_retrieved_objects_to_context(
42
- triplets=retrieval_context
43
- )
44
-
45
- if isinstance(search_results, str):
46
- search_results = [search_results]
47
- #############
48
38
  answer = {
49
39
  "question": query_text,
50
40
  "answer": search_results[0],
@@ -35,7 +35,7 @@ async def create_and_insert_answers_table(questions_payload):
35
35
 
36
36
 
37
37
  async def run_question_answering(
38
- params: dict, system_prompt="answer_simple_question_benchmark.txt", top_k: Optional[int] = None
38
+ params: dict, system_prompt="answer_simple_question.txt", top_k: Optional[int] = None
39
39
  ) -> List[dict]:
40
40
  if params.get("answering_questions"):
41
41
  logger.info("Question answering started...")
@@ -8,6 +8,7 @@ from cognee.modules.users.models import User
8
8
  from cognee.shared.data_models import KnowledgeGraph
9
9
  from cognee.shared.utils import send_telemetry
10
10
  from cognee.tasks.documents import (
11
+ check_permissions_on_dataset,
11
12
  classify_documents,
12
13
  extract_chunks_from_documents,
13
14
  )
@@ -30,6 +31,7 @@ async def get_cascade_graph_tasks(
30
31
  cognee_config = get_cognify_config()
31
32
  default_tasks = [
32
33
  Task(classify_documents),
34
+ Task(check_permissions_on_dataset, user=user, permissions=["write"]),
33
35
  Task(
34
36
  extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
35
37
  ), # Extract text chunks based on the document type.
@@ -30,8 +30,8 @@ async def get_no_summary_tasks(
30
30
  ontology_file_path=None,
31
31
  ) -> List[Task]:
32
32
  """Returns default tasks without summarization tasks."""
33
- # Get base tasks (0=classify, 1=extract_chunks)
34
- base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
33
+ # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
34
+ base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
35
35
 
36
36
  ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
37
37
 
@@ -51,8 +51,8 @@ async def get_just_chunks_tasks(
51
51
  chunk_size: int = None, chunker=TextChunker, user=None
52
52
  ) -> List[Task]:
53
53
  """Returns default tasks with only chunk extraction and data points addition."""
54
- # Get base tasks (0=classify, 1=extract_chunks)
55
- base_tasks = await get_default_tasks_by_indices([0, 1], chunk_size, chunker)
54
+ # Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
55
+ base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
56
56
 
57
57
  add_data_points_task = Task(add_data_points, task_config={"batch_size": 10})
58
58
 
@@ -14,7 +14,7 @@ class EvalConfig(BaseSettings):
14
14
 
15
15
  # Question answering params
16
16
  answering_questions: bool = True
17
- qa_engine: str = "cognee_graph_completion" # Options: 'cognee_completion' or 'cognee_graph_completion' or 'cognee_graph_completion_cot' or 'cognee_graph_completion_context_extension'
17
+ qa_engine: str = "cognee_completion" # Options: 'cognee_completion' or 'cognee_graph_completion' or 'cognee_graph_completion_cot' or 'cognee_graph_completion_context_extension'
18
18
 
19
19
  # Evaluation params
20
20
  evaluating_answers: bool = True
@@ -25,7 +25,7 @@ class EvalConfig(BaseSettings):
25
25
  "EM",
26
26
  "f1",
27
27
  ] # Use only 'correctness' for DirectLLM
28
- deepeval_model: str = "gpt-4o-mini"
28
+ deepeval_model: str = "gpt-5-mini"
29
29
 
30
30
  # Metrics params
31
31
  calculate_metrics: bool = True
@@ -2,6 +2,7 @@ import modal
2
2
  import os
3
3
  import asyncio
4
4
  import datetime
5
+ import hashlib
5
6
  import json
6
7
  from cognee.shared.logging_utils import get_logger
7
8
  from cognee.eval_framework.eval_config import EvalConfig
@@ -9,9 +10,6 @@ from cognee.eval_framework.corpus_builder.run_corpus_builder import run_corpus_b
9
10
  from cognee.eval_framework.answer_generation.run_question_answering_module import (
10
11
  run_question_answering,
11
12
  )
12
- import pathlib
13
- from os import path
14
- from modal import Image
15
13
  from cognee.eval_framework.evaluation.run_evaluation_module import run_evaluation
16
14
  from cognee.eval_framework.metrics_dashboard import create_dashboard
17
15
 
@@ -40,19 +38,22 @@ def read_and_combine_metrics(eval_params: dict) -> dict:
40
38
 
41
39
  app = modal.App("modal-run-eval")
42
40
 
43
- image = Image.from_dockerfile(
44
- path=pathlib.Path(path.join(path.dirname(__file__), "Dockerfile")).resolve(),
45
- force_build=False,
46
- ).add_local_python_source("cognee")
41
+ image = (
42
+ modal.Image.from_dockerfile(path="Dockerfile_modal", force_build=False)
43
+ .copy_local_file("pyproject.toml", "pyproject.toml")
44
+ .copy_local_file("poetry.lock", "poetry.lock")
45
+ .env(
46
+ {
47
+ "ENV": os.getenv("ENV"),
48
+ "LLM_API_KEY": os.getenv("LLM_API_KEY"),
49
+ "OPENAI_API_KEY": os.getenv("OPENAI_API_KEY"),
50
+ }
51
+ )
52
+ .pip_install("protobuf", "h2", "deepeval", "gdown", "plotly")
53
+ )
47
54
 
48
55
 
49
- @app.function(
50
- image=image,
51
- max_containers=10,
52
- timeout=86400,
53
- volumes={"/data": vol},
54
- secrets=[modal.Secret.from_name("eval_secrets")],
55
- )
56
+ @app.function(image=image, concurrency_limit=10, timeout=86400, volumes={"/data": vol})
56
57
  async def modal_run_eval(eval_params=None):
57
58
  """Runs evaluation pipeline and returns combined metrics results."""
58
59
  if eval_params is None:
@@ -104,7 +105,18 @@ async def main():
104
105
  configs = [
105
106
  EvalConfig(
106
107
  task_getter_type="Default",
107
- number_of_samples_in_corpus=25,
108
+ number_of_samples_in_corpus=10,
109
+ benchmark="HotPotQA",
110
+ qa_engine="cognee_graph_completion",
111
+ building_corpus_from_scratch=True,
112
+ answering_questions=True,
113
+ evaluating_answers=True,
114
+ calculate_metrics=True,
115
+ dashboard=True,
116
+ ),
117
+ EvalConfig(
118
+ task_getter_type="Default",
119
+ number_of_samples_in_corpus=10,
108
120
  benchmark="TwoWikiMultiHop",
109
121
  qa_engine="cognee_graph_completion",
110
122
  building_corpus_from_scratch=True,
@@ -115,7 +127,7 @@ async def main():
115
127
  ),
116
128
  EvalConfig(
117
129
  task_getter_type="Default",
118
- number_of_samples_in_corpus=25,
130
+ number_of_samples_in_corpus=10,
119
131
  benchmark="Musique",
120
132
  qa_engine="cognee_graph_completion",
121
133
  building_corpus_from_scratch=True,
@@ -47,7 +47,6 @@ class GraphConfig(BaseSettings):
47
47
  graph_filename: str = ""
48
48
  graph_model: object = KnowledgeGraph
49
49
  graph_topology: object = KnowledgeGraph
50
- graph_dataset_database_handler: str = "kuzu"
51
50
  model_config = SettingsConfigDict(env_file=".env", extra="allow", populate_by_name=True)
52
51
 
53
52
  # Model validator updates graph_filename and path dynamically after class creation based on current database provider
@@ -98,7 +97,6 @@ class GraphConfig(BaseSettings):
98
97
  "graph_model": self.graph_model,
99
98
  "graph_topology": self.graph_topology,
100
99
  "model_config": self.model_config,
101
- "graph_dataset_database_handler": self.graph_dataset_database_handler,
102
100
  }
103
101
 
104
102
  def to_hashable_dict(self) -> dict:
@@ -123,7 +121,6 @@ class GraphConfig(BaseSettings):
123
121
  "graph_database_port": self.graph_database_port,
124
122
  "graph_database_key": self.graph_database_key,
125
123
  "graph_file_path": self.graph_file_path,
126
- "graph_dataset_database_handler": self.graph_dataset_database_handler,
127
124
  }
128
125
 
129
126
 
@@ -34,7 +34,6 @@ def create_graph_engine(
34
34
  graph_database_password="",
35
35
  graph_database_port="",
36
36
  graph_database_key="",
37
- graph_dataset_database_handler="",
38
37
  ):
39
38
  """
40
39
  Create a graph engine based on the specified provider type.
@@ -398,18 +398,3 @@ class GraphDBInterface(ABC):
398
398
  - node_id (Union[str, UUID]): Unique identifier of the node for which to retrieve connections.
399
399
  """
400
400
  raise NotImplementedError
401
-
402
- @abstractmethod
403
- async def get_filtered_graph_data(
404
- self, attribute_filters: List[Dict[str, List[Union[str, int]]]]
405
- ) -> Tuple[List[Node], List[EdgeData]]:
406
- """
407
- Retrieve nodes and edges filtered by the provided attribute criteria.
408
-
409
- Parameters:
410
- -----------
411
-
412
- - attribute_filters: A list of dictionaries where keys are attribute names and values
413
- are lists of attribute values to filter by.
414
- """
415
- raise NotImplementedError