cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. cognee/api/client.py +44 -4
  2. cognee/api/health.py +332 -0
  3. cognee/api/v1/add/add.py +5 -2
  4. cognee/api/v1/add/routers/get_add_router.py +3 -0
  5. cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
  6. cognee/api/v1/cognify/cognify.py +8 -0
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
  8. cognee/api/v1/config/config.py +3 -1
  9. cognee/api/v1/datasets/routers/get_datasets_router.py +2 -8
  10. cognee/api/v1/delete/delete.py +16 -12
  11. cognee/api/v1/responses/routers/get_responses_router.py +3 -1
  12. cognee/api/v1/search/search.py +10 -0
  13. cognee/api/v1/settings/routers/get_settings_router.py +0 -2
  14. cognee/base_config.py +1 -0
  15. cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
  16. cognee/infrastructure/databases/graph/config.py +2 -0
  17. cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
  18. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
  19. cognee/infrastructure/databases/graph/kuzu/adapter.py +43 -16
  20. cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +281 -0
  21. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +151 -77
  22. cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
  23. cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
  24. cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
  25. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
  26. cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
  27. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
  28. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +11 -3
  29. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
  30. cognee/infrastructure/databases/vector/create_vector_engine.py +31 -23
  31. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
  32. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
  33. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
  34. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
  35. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
  36. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
  37. cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
  38. cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
  39. cognee/infrastructure/files/utils/guess_file_type.py +2 -2
  40. cognee/infrastructure/files/utils/open_data_file.py +4 -23
  41. cognee/infrastructure/llm/LLMGateway.py +137 -0
  42. cognee/infrastructure/llm/__init__.py +14 -4
  43. cognee/infrastructure/llm/config.py +29 -1
  44. cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
  45. cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
  46. cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
  47. cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
  48. cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
  49. cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
  50. cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
  51. cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
  52. cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
  53. cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
  54. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
  55. cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
  56. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
  57. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
  58. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
  60. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
  61. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
  62. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
  64. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
  65. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
  66. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
  67. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
  68. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
  69. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
  70. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
  71. cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
  72. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
  73. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
  74. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
  77. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
  78. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
  79. cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
  80. cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
  82. cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
  83. cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
  84. cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
  85. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
  86. cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
  87. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
  88. cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
  89. cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
  90. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
  91. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
  92. cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
  93. cognee/infrastructure/llm/utils.py +3 -1
  94. cognee/infrastructure/loaders/LoaderEngine.py +156 -0
  95. cognee/infrastructure/loaders/LoaderInterface.py +73 -0
  96. cognee/infrastructure/loaders/__init__.py +18 -0
  97. cognee/infrastructure/loaders/core/__init__.py +7 -0
  98. cognee/infrastructure/loaders/core/audio_loader.py +98 -0
  99. cognee/infrastructure/loaders/core/image_loader.py +114 -0
  100. cognee/infrastructure/loaders/core/text_loader.py +90 -0
  101. cognee/infrastructure/loaders/create_loader_engine.py +32 -0
  102. cognee/infrastructure/loaders/external/__init__.py +22 -0
  103. cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
  104. cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
  105. cognee/infrastructure/loaders/get_loader_engine.py +18 -0
  106. cognee/infrastructure/loaders/supported_loaders.py +18 -0
  107. cognee/infrastructure/loaders/use_loader.py +21 -0
  108. cognee/infrastructure/loaders/utils/__init__.py +0 -0
  109. cognee/modules/data/methods/__init__.py +1 -0
  110. cognee/modules/data/methods/get_authorized_dataset.py +23 -0
  111. cognee/modules/data/models/Data.py +13 -3
  112. cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
  113. cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
  114. cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
  115. cognee/modules/data/processing/document_types/UnstructuredDocument.py +2 -5
  116. cognee/modules/engine/utils/generate_edge_id.py +5 -0
  117. cognee/modules/graph/cognee_graph/CogneeGraph.py +45 -35
  118. cognee/modules/graph/methods/get_formatted_graph_data.py +8 -2
  119. cognee/modules/graph/utils/get_graph_from_model.py +93 -101
  120. cognee/modules/ingestion/data_types/TextData.py +8 -2
  121. cognee/modules/ingestion/save_data_to_file.py +1 -1
  122. cognee/modules/pipelines/exceptions/__init__.py +1 -0
  123. cognee/modules/pipelines/exceptions/exceptions.py +12 -0
  124. cognee/modules/pipelines/models/DataItemStatus.py +5 -0
  125. cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
  126. cognee/modules/pipelines/models/__init__.py +1 -0
  127. cognee/modules/pipelines/operations/pipeline.py +10 -2
  128. cognee/modules/pipelines/operations/run_tasks.py +252 -20
  129. cognee/modules/pipelines/operations/run_tasks_distributed.py +1 -1
  130. cognee/modules/retrieval/chunks_retriever.py +23 -1
  131. cognee/modules/retrieval/code_retriever.py +66 -9
  132. cognee/modules/retrieval/completion_retriever.py +11 -9
  133. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
  134. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
  135. cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
  136. cognee/modules/retrieval/graph_completion_retriever.py +1 -1
  137. cognee/modules/retrieval/insights_retriever.py +4 -0
  138. cognee/modules/retrieval/natural_language_retriever.py +9 -15
  139. cognee/modules/retrieval/summaries_retriever.py +23 -1
  140. cognee/modules/retrieval/utils/brute_force_triplet_search.py +23 -4
  141. cognee/modules/retrieval/utils/completion.py +6 -9
  142. cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
  143. cognee/modules/search/methods/search.py +5 -1
  144. cognee/modules/search/operations/__init__.py +1 -0
  145. cognee/modules/search/operations/select_search_type.py +42 -0
  146. cognee/modules/search/types/SearchType.py +1 -0
  147. cognee/modules/settings/get_settings.py +0 -8
  148. cognee/modules/settings/save_vector_db_config.py +1 -1
  149. cognee/shared/data_models.py +3 -1
  150. cognee/shared/logging_utils.py +0 -5
  151. cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
  152. cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
  153. cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
  154. cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
  155. cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
  156. cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
  157. cognee/tasks/graph/extract_graph_from_code.py +3 -2
  158. cognee/tasks/graph/extract_graph_from_data.py +4 -3
  159. cognee/tasks/graph/infer_data_ontology.py +5 -6
  160. cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
  161. cognee/tasks/ingestion/ingest_data.py +91 -61
  162. cognee/tasks/ingestion/resolve_data_directories.py +3 -0
  163. cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
  164. cognee/tasks/storage/index_data_points.py +1 -1
  165. cognee/tasks/storage/index_graph_edges.py +4 -1
  166. cognee/tasks/summarization/summarize_code.py +2 -3
  167. cognee/tasks/summarization/summarize_text.py +3 -2
  168. cognee/tests/test_cognee_server_start.py +12 -7
  169. cognee/tests/test_deduplication.py +2 -2
  170. cognee/tests/test_deletion.py +58 -17
  171. cognee/tests/test_graph_visualization_permissions.py +161 -0
  172. cognee/tests/test_neptune_analytics_graph.py +309 -0
  173. cognee/tests/test_neptune_analytics_hybrid.py +176 -0
  174. cognee/tests/{test_weaviate.py → test_neptune_analytics_vector.py} +86 -11
  175. cognee/tests/test_pgvector.py +5 -5
  176. cognee/tests/test_s3.py +1 -6
  177. cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
  178. cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
  179. cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
  180. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
  181. cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
  182. cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
  183. cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
  184. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +84 -9
  185. cognee/tests/unit/modules/search/search_methods_test.py +55 -0
  186. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/METADATA +13 -9
  187. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/RECORD +203 -164
  188. cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
  189. cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
  190. cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
  191. cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
  192. cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
  193. cognee/modules/data/extraction/extract_categories.py +0 -14
  194. cognee/tests/test_qdrant.py +0 -99
  195. distributed/Dockerfile +0 -34
  196. distributed/app.py +0 -4
  197. distributed/entrypoint.py +0 -71
  198. distributed/entrypoint.sh +0 -5
  199. distributed/modal_image.py +0 -11
  200. distributed/queues.py +0 -5
  201. distributed/tasks/queued_add_data_points.py +0 -13
  202. distributed/tasks/queued_add_edges.py +0 -13
  203. distributed/tasks/queued_add_nodes.py +0 -13
  204. distributed/test.py +0 -28
  205. distributed/utils.py +0 -19
  206. distributed/workers/data_point_saving_worker.py +0 -93
  207. distributed/workers/graph_saving_worker.py +0 -104
  208. /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
  209. /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
  210. /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
  211. /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
  212. /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
  213. /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
  214. /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
  215. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
  216. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
  217. /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
  218. {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
  219. {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
  220. /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
  221. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/WHEEL +0 -0
  222. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/LICENSE +0 -0
  223. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/NOTICE.md +0 -0
@@ -71,6 +71,12 @@ async def search(
71
71
  Best for: Advanced users, specific graph traversals, debugging.
72
72
  Returns: Raw graph query results.
73
73
 
74
+ **FEELING_LUCKY**:
75
+ Intelligently selects and runs the most appropriate search type.
76
+ Best for: General-purpose queries or when you're unsure which search type is best.
77
+ Returns: The results from the automatically selected search type.
78
+
79
+
74
80
  Args:
75
81
  query_text: Your question or search query in natural language.
76
82
  Examples:
@@ -119,6 +125,9 @@ async def search(
119
125
  **CODE**:
120
126
  [List of structured code information with context]
121
127
 
128
+ **FEELING_LUCKY**:
129
+ [List of results in the format of the search type that is automatically selected]
130
+
122
131
 
123
132
 
124
133
 
@@ -130,6 +139,7 @@ async def search(
130
139
  - **CHUNKS**: Fastest, pure vector similarity search without LLM
131
140
  - **SUMMARIES**: Fast, returns pre-computed summaries
132
141
  - **CODE**: Medium speed, specialized for code understanding
142
+ - **FEELING_LUCKY**: Variable speed, uses LLM + search type selection intelligently
133
143
  - **top_k**: Start with 10, increase for comprehensive analysis (max 100)
134
144
  - **datasets**: Specify datasets to improve speed and relevance
135
145
 
@@ -30,8 +30,6 @@ class VectorDBConfigInputDTO(InDTO):
30
30
  provider: Union[
31
31
  Literal["lancedb"],
32
32
  Literal["chromadb"],
33
- Literal["qdrant"],
34
- Literal["weaviate"],
35
33
  Literal["pgvector"],
36
34
  ]
37
35
  url: str
cognee/base_config.py CHANGED
@@ -20,6 +20,7 @@ class BaseConfig(BaseSettings):
20
20
  def to_dict(self) -> dict:
21
21
  return {
22
22
  "data_root_directory": self.data_root_directory,
23
+ "system_root_directory": self.system_root_directory,
23
24
  "monitoring_tool": self.monitoring_tool,
24
25
  }
25
26
 
@@ -1,10 +1,10 @@
1
1
  from typing import Any, Dict, List
2
2
  from pydantic import BaseModel
3
- from cognee.infrastructure.llm.get_llm_client import get_llm_client
4
3
  from cognee.eval_framework.evaluation.base_eval_adapter import BaseEvalAdapter
5
- from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
6
4
  from cognee.eval_framework.eval_config import EvalConfig
7
5
 
6
+ from cognee.infrastructure.llm import LLMGateway
7
+
8
8
 
9
9
  class CorrectnessEvaluation(BaseModel):
10
10
  """Response model containing evaluation score and explanation."""
@@ -19,17 +19,16 @@ class DirectLLMEvalAdapter(BaseEvalAdapter):
19
19
  config = EvalConfig()
20
20
  self.system_prompt_path = config.direct_llm_system_prompt
21
21
  self.eval_prompt_path = config.direct_llm_eval_prompt
22
- self.llm_client = get_llm_client()
23
22
 
24
23
  async def evaluate_correctness(
25
24
  self, question: str, answer: str, golden_answer: str
26
25
  ) -> Dict[str, Any]:
27
26
  args = {"question": question, "answer": answer, "golden_answer": golden_answer}
28
27
 
29
- user_prompt = render_prompt(self.eval_prompt_path, args)
30
- system_prompt = read_query_prompt(self.system_prompt_path)
28
+ user_prompt = LLMGateway.render_prompt(self.eval_prompt_path, args)
29
+ system_prompt = LLMGateway.read_query_prompt(self.system_prompt_path)
31
30
 
32
- evaluation = await self.llm_client.acreate_structured_output(
31
+ evaluation = await LLMGateway.acreate_structured_output(
33
32
  text_input=user_prompt,
34
33
  system_prompt=system_prompt,
35
34
  response_model=CorrectnessEvaluation,
@@ -36,6 +36,7 @@ class GraphConfig(BaseSettings):
36
36
  graph_database_provider: str = Field("kuzu", env="GRAPH_DATABASE_PROVIDER")
37
37
 
38
38
  graph_database_url: str = ""
39
+ graph_database_name: str = ""
39
40
  graph_database_username: str = ""
40
41
  graph_database_password: str = ""
41
42
  graph_database_port: int = 123
@@ -105,6 +106,7 @@ class GraphConfig(BaseSettings):
105
106
  return {
106
107
  "graph_database_provider": self.graph_database_provider,
107
108
  "graph_database_url": self.graph_database_url,
109
+ "graph_database_name": self.graph_database_name,
108
110
  "graph_database_username": self.graph_database_username,
109
111
  "graph_database_password": self.graph_database_password,
110
112
  "graph_database_port": self.graph_database_port,
@@ -33,6 +33,7 @@ def create_graph_engine(
33
33
  graph_database_provider,
34
34
  graph_file_path,
35
35
  graph_database_url="",
36
+ graph_database_name="",
36
37
  graph_database_username="",
37
38
  graph_database_password="",
38
39
  graph_database_port="",
@@ -48,13 +49,13 @@ def create_graph_engine(
48
49
  -----------
49
50
 
50
51
  - graph_database_provider: The type of graph database provider to use (e.g., neo4j,
51
- falkordb, kuzu, memgraph).
52
- - graph_database_url: The URL for the graph database instance. Required for neo4j,
53
- falkordb, and memgraph providers.
52
+ falkordb, kuzu).
53
+ - graph_database_url: The URL for the graph database instance. Required for neo4j
54
+ and falkordb providers.
54
55
  - graph_database_username: The username for authentication with the graph database.
55
- Required for neo4j and memgraph providers.
56
+ Required for neo4j provider.
56
57
  - graph_database_password: The password for authentication with the graph database.
57
- Required for neo4j and memgraph providers.
58
+ Required for neo4j provider.
58
59
  - graph_database_port: The port number for the graph database connection. Required
59
60
  for the falkordb provider.
60
61
  - graph_file_path: The filesystem path to the graph file. Required for the kuzu
@@ -86,6 +87,7 @@ def create_graph_engine(
86
87
  graph_database_url=graph_database_url,
87
88
  graph_database_username=graph_database_username or None,
88
89
  graph_database_password=graph_database_password or None,
90
+ graph_database_name=graph_database_name or None,
89
91
  )
90
92
 
91
93
  elif graph_database_provider == "falkordb":
@@ -122,17 +124,61 @@ def create_graph_engine(
122
124
  username=graph_database_username,
123
125
  password=graph_database_password,
124
126
  )
127
+ elif graph_database_provider == "neptune":
128
+ try:
129
+ from langchain_aws import NeptuneAnalyticsGraph
130
+ except ImportError:
131
+ raise ImportError(
132
+ "langchain_aws is not installed. Please install it with 'pip install langchain_aws'"
133
+ )
125
134
 
126
- elif graph_database_provider == "memgraph":
127
135
  if not graph_database_url:
128
- raise EnvironmentError("Missing required Memgraph URL.")
136
+ raise EnvironmentError("Missing Neptune endpoint.")
129
137
 
130
- from .memgraph.memgraph_adapter import MemgraphAdapter
138
+ from .neptune_driver.adapter import NeptuneGraphDB, NEPTUNE_ENDPOINT_URL
131
139
 
132
- return MemgraphAdapter(
133
- graph_database_url=graph_database_url,
134
- graph_database_username=graph_database_username or None,
135
- graph_database_password=graph_database_password or None,
140
+ if not graph_database_url.startswith(NEPTUNE_ENDPOINT_URL):
141
+ raise ValueError(
142
+ f"Neptune endpoint must have the format {NEPTUNE_ENDPOINT_URL}<GRAPH_ID>"
143
+ )
144
+
145
+ graph_identifier = graph_database_url.replace(NEPTUNE_ENDPOINT_URL, "")
146
+
147
+ return NeptuneGraphDB(
148
+ graph_id=graph_identifier,
149
+ )
150
+
151
+ elif graph_database_provider == "neptune_analytics":
152
+ """
153
+ Creates a graph DB from config
154
+ We want to use a hybrid (graph & vector) DB and we should update this
155
+ to make a single instance of the hybrid configuration (with embedder)
156
+ instead of creating the hybrid object twice.
157
+ """
158
+ try:
159
+ from langchain_aws import NeptuneAnalyticsGraph
160
+ except ImportError:
161
+ raise ImportError(
162
+ "langchain_aws is not installed. Please install it with 'pip install langchain_aws'"
163
+ )
164
+
165
+ if not graph_database_url:
166
+ raise EnvironmentError("Missing Neptune endpoint.")
167
+
168
+ from ..hybrid.neptune_analytics.NeptuneAnalyticsAdapter import (
169
+ NeptuneAnalyticsAdapter,
170
+ NEPTUNE_ANALYTICS_ENDPOINT_URL,
171
+ )
172
+
173
+ if not graph_database_url.startswith(NEPTUNE_ANALYTICS_ENDPOINT_URL):
174
+ raise ValueError(
175
+ f"Neptune endpoint must have the format '{NEPTUNE_ANALYTICS_ENDPOINT_URL}<GRAPH_ID>'"
176
+ )
177
+
178
+ graph_identifier = graph_database_url.replace(NEPTUNE_ANALYTICS_ENDPOINT_URL, "")
179
+
180
+ return NeptuneAnalyticsAdapter(
181
+ graph_id=graph_identifier,
136
182
  )
137
183
 
138
184
  from .networkx.adapter import NetworkXAdapter
@@ -2,7 +2,7 @@ import inspect
2
2
  from functools import wraps
3
3
  from abc import abstractmethod, ABC
4
4
  from datetime import datetime, timezone
5
- from typing import Optional, Dict, Any, List, Tuple, Type
5
+ from typing import Optional, Dict, Any, List, Tuple, Type, Union
6
6
  from uuid import NAMESPACE_OID, UUID, uuid5
7
7
  from cognee.shared.logging_utils import get_logger
8
8
  from cognee.infrastructure.engine import DataPoint
@@ -173,28 +173,31 @@ class GraphDBInterface(ABC):
173
173
  raise NotImplementedError
174
174
 
175
175
  @abstractmethod
176
- async def add_node(self, node_id: str, properties: Dict[str, Any]) -> None:
176
+ async def add_node(
177
+ self, node: Union[DataPoint, str], properties: Optional[Dict[str, Any]] = None
178
+ ) -> None:
177
179
  """
178
180
  Add a single node with specified properties to the graph.
179
181
 
180
182
  Parameters:
181
183
  -----------
182
184
 
183
- - node_id (str): Unique identifier for the node being added.
184
- - properties (Dict[str, Any]): A dictionary of properties associated with the node.
185
+ - node (Union[DataPoint, str]): Either a DataPoint object or a string identifier for the node being added.
186
+ - properties (Optional[Dict[str, Any]]): A dictionary of properties associated with the node.
187
+ Required when node is a string, ignored when node is a DataPoint.
185
188
  """
186
189
  raise NotImplementedError
187
190
 
188
191
  @abstractmethod
189
192
  @record_graph_changes
190
- async def add_nodes(self, nodes: List[Node]) -> None:
193
+ async def add_nodes(self, nodes: Union[List[Node], List[DataPoint]]) -> None:
191
194
  """
192
195
  Add multiple nodes to the graph in a single operation.
193
196
 
194
197
  Parameters:
195
198
  -----------
196
199
 
197
- - nodes (List[Node]): A list of Node objects to be added to the graph.
200
+ - nodes (Union[List[Node], List[DataPoint]]): A list of Node objects or DataPoint objects to be added to the graph.
198
201
  """
199
202
  raise NotImplementedError
200
203
 
@@ -271,14 +274,16 @@ class GraphDBInterface(ABC):
271
274
 
272
275
  @abstractmethod
273
276
  @record_graph_changes
274
- async def add_edges(self, edges: List[EdgeData]) -> None:
277
+ async def add_edges(
278
+ self, edges: Union[List[EdgeData], List[Tuple[str, str, str, Optional[Dict[str, Any]]]]]
279
+ ) -> None:
275
280
  """
276
281
  Add multiple edges to the graph in a single operation.
277
282
 
278
283
  Parameters:
279
284
  -----------
280
285
 
281
- - edges (List[EdgeData]): A list of EdgeData objects representing edges to be added.
286
+ - edges (Union[List[EdgeData], List[Tuple[str, str, str, Optional[Dict[str, Any]]]]]): A list of EdgeData objects or tuples representing edges to be added.
282
287
  """
283
288
  raise NotImplementedError
284
289
 
@@ -377,7 +382,7 @@ class GraphDBInterface(ABC):
377
382
 
378
383
  @abstractmethod
379
384
  async def get_connections(
380
- self, node_id: str
385
+ self, node_id: Union[str, UUID]
381
386
  ) -> List[Tuple[NodeData, Dict[str, Any], NodeData]]:
382
387
  """
383
388
  Get all nodes connected to a specified node and their relationship details.
@@ -385,6 +390,6 @@ class GraphDBInterface(ABC):
385
390
  Parameters:
386
391
  -----------
387
392
 
388
- - node_id (str): Unique identifier of the node for which to retrieve connections.
393
+ - node_id (Union[str, UUID]): Unique identifier of the node for which to retrieve connections.
389
394
  """
390
395
  raise NotImplementedError
@@ -42,6 +42,7 @@ class KuzuAdapter(GraphDBInterface):
42
42
  self.connection: Optional[Connection] = None
43
43
  self.executor = ThreadPoolExecutor()
44
44
  self._initialize_connection()
45
+ self.KUZU_ASYNC_LOCK = asyncio.Lock()
45
46
 
46
47
  def _initialize_connection(self) -> None:
47
48
  """Initialize the Kuzu database connection and schema."""
@@ -72,11 +73,36 @@ class KuzuAdapter(GraphDBInterface):
72
73
 
73
74
  run_sync(file_storage.ensure_directory_exists())
74
75
 
75
- self.db = Database(
76
- self.db_path,
77
- buffer_pool_size=256 * 1024 * 1024, # 256MB buffer pool
78
- max_db_size=1024 * 1024 * 1024,
79
- )
76
+ try:
77
+ self.db = Database(
78
+ self.db_path,
79
+ buffer_pool_size=2048 * 1024 * 1024, # 2048MB buffer pool
80
+ max_db_size=4096 * 1024 * 1024,
81
+ )
82
+ except RuntimeError:
83
+ from .kuzu_migrate import read_kuzu_storage_version
84
+ import kuzu
85
+
86
+ kuzu_db_version = read_kuzu_storage_version(self.db_path)
87
+ if (
88
+ kuzu_db_version == "0.9.0" or kuzu_db_version == "0.8.2"
89
+ ) and kuzu_db_version != kuzu.__version__:
90
+ # Try to migrate kuzu database to latest version
91
+ from .kuzu_migrate import kuzu_migration
92
+
93
+ kuzu_migration(
94
+ new_db=self.db_path + "_new",
95
+ old_db=self.db_path,
96
+ new_version=kuzu.__version__,
97
+ old_version=kuzu_db_version,
98
+ overwrite=True,
99
+ )
100
+
101
+ self.db = Database(
102
+ self.db_path,
103
+ buffer_pool_size=2048 * 1024 * 1024, # 2048MB buffer pool
104
+ max_db_size=4096 * 1024 * 1024,
105
+ )
80
106
 
81
107
  self.db.init_database()
82
108
  self.connection = Connection(self.db)
@@ -111,6 +137,10 @@ class KuzuAdapter(GraphDBInterface):
111
137
  from cognee.infrastructure.files.storage.S3FileStorage import S3FileStorage
112
138
 
113
139
  s3_file_storage = S3FileStorage("")
140
+
141
+ async with self.KUZU_ASYNC_LOCK:
142
+ self.connection.execute("CHECKPOINT;")
143
+
114
144
  s3_file_storage.s3.put(self.temp_graph_file, self.db_path, recursive=True)
115
145
 
116
146
  async def pull_from_s3(self) -> None:
@@ -120,7 +150,7 @@ class KuzuAdapter(GraphDBInterface):
120
150
  try:
121
151
  s3_file_storage.s3.get(self.db_path, self.temp_graph_file, recursive=True)
122
152
  except FileNotFoundError:
123
- pass
153
+ logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
124
154
 
125
155
  async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
126
156
  """
@@ -1438,11 +1468,8 @@ class KuzuAdapter(GraphDBInterface):
1438
1468
  It raises exceptions for failures occurring during deletion processes.
1439
1469
  """
1440
1470
  try:
1441
- # Use DETACH DELETE to remove both nodes and their relationships in one operation
1442
- await self.query("MATCH (n:Node) DETACH DELETE n")
1443
- logger.info("Cleared all data from graph while preserving structure")
1444
-
1445
1471
  if self.connection:
1472
+ self.connection.close()
1446
1473
  self.connection = None
1447
1474
  if self.db:
1448
1475
  self.db.close()
@@ -1502,7 +1529,7 @@ class KuzuAdapter(GraphDBInterface):
1502
1529
  logger.error(f"Error during database clearing: {e}")
1503
1530
  raise
1504
1531
 
1505
- async def get_document_subgraph(self, content_hash: str):
1532
+ async def get_document_subgraph(self, data_id: str):
1506
1533
  """
1507
1534
  Get all nodes that should be deleted when removing a document.
1508
1535
 
@@ -1513,7 +1540,7 @@ class KuzuAdapter(GraphDBInterface):
1513
1540
  Parameters:
1514
1541
  -----------
1515
1542
 
1516
- - content_hash (str): The identifier for the document to query against.
1543
+ - data_id (str): The identifier for the document to query against.
1517
1544
 
1518
1545
  Returns:
1519
1546
  --------
@@ -1523,7 +1550,7 @@ class KuzuAdapter(GraphDBInterface):
1523
1550
  """
1524
1551
  query = """
1525
1552
  MATCH (doc:Node)
1526
- WHERE (doc.type = 'TextDocument' OR doc.type = 'PdfDocument') AND doc.name = $content_hash
1553
+ WHERE (doc.type = 'TextDocument' OR doc.type = 'PdfDocument' OR doc.type = 'AudioDocument' OR doc.type = 'ImageDocument' OR doc.type = 'UnstructuredDocument') AND doc.id = $data_id
1527
1554
 
1528
1555
  OPTIONAL MATCH (doc)<-[e1:EDGE]-(chunk:Node)
1529
1556
  WHERE e1.relationship_name = 'is_part_of' AND chunk.type = 'DocumentChunk'
@@ -1534,7 +1561,7 @@ class KuzuAdapter(GraphDBInterface):
1534
1561
  MATCH (entity)<-[e3:EDGE]-(otherChunk:Node)-[e4:EDGE]->(otherDoc:Node)
1535
1562
  WHERE e3.relationship_name = 'contains'
1536
1563
  AND e4.relationship_name = 'is_part_of'
1537
- AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument')
1564
+ AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument' OR otherDoc.type = 'AudioDocument' OR otherDoc.type = 'ImageDocument' OR otherDoc.type = 'UnstructuredDocument')
1538
1565
  AND otherDoc.id <> doc.id
1539
1566
  }
1540
1567
 
@@ -1550,7 +1577,7 @@ class KuzuAdapter(GraphDBInterface):
1550
1577
  AND e9.relationship_name = 'is_part_of'
1551
1578
  AND otherEntity.type = 'Entity'
1552
1579
  AND otherChunk.type = 'DocumentChunk'
1553
- AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument')
1580
+ AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument' OR otherDoc.type = 'AudioDocument' OR otherDoc.type = 'ImageDocument' OR otherDoc.type = 'UnstructuredDocument')
1554
1581
  AND otherDoc.id <> doc.id
1555
1582
  }
1556
1583
 
@@ -1561,7 +1588,7 @@ class KuzuAdapter(GraphDBInterface):
1561
1588
  COLLECT(DISTINCT made_node) as made_from_nodes,
1562
1589
  COLLECT(DISTINCT type) as orphan_types
1563
1590
  """
1564
- result = await self.query(query, {"content_hash": f"text_{content_hash}"})
1591
+ result = await self.query(query, {"data_id": f"{data_id}"})
1565
1592
  if not result or not result[0]:
1566
1593
  return None
1567
1594