cognee 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. cognee/api/client.py +41 -3
  2. cognee/api/health.py +332 -0
  3. cognee/api/v1/add/add.py +5 -2
  4. cognee/api/v1/add/routers/get_add_router.py +3 -0
  5. cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
  6. cognee/api/v1/cognify/cognify.py +8 -0
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
  8. cognee/api/v1/config/config.py +3 -1
  9. cognee/api/v1/datasets/routers/get_datasets_router.py +1 -7
  10. cognee/api/v1/delete/delete.py +16 -12
  11. cognee/api/v1/responses/routers/get_responses_router.py +3 -1
  12. cognee/api/v1/search/search.py +10 -0
  13. cognee/api/v1/settings/routers/get_settings_router.py +0 -2
  14. cognee/base_config.py +1 -0
  15. cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
  16. cognee/infrastructure/databases/graph/config.py +2 -0
  17. cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
  18. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
  19. cognee/infrastructure/databases/graph/kuzu/adapter.py +12 -7
  20. cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +1 -1
  21. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +48 -13
  22. cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
  23. cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
  24. cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
  25. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
  26. cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
  27. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
  28. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -0
  29. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
  30. cognee/infrastructure/databases/vector/create_vector_engine.py +31 -15
  31. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
  32. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
  33. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
  34. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
  35. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
  36. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
  37. cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
  38. cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
  39. cognee/infrastructure/files/utils/guess_file_type.py +2 -2
  40. cognee/infrastructure/files/utils/open_data_file.py +4 -23
  41. cognee/infrastructure/llm/LLMGateway.py +137 -0
  42. cognee/infrastructure/llm/__init__.py +14 -4
  43. cognee/infrastructure/llm/config.py +29 -1
  44. cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
  45. cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
  46. cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
  47. cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
  48. cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
  49. cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
  50. cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
  51. cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
  52. cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
  53. cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
  54. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
  55. cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
  56. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
  57. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
  58. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
  60. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
  61. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
  62. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
  64. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
  65. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
  66. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
  67. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
  68. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
  69. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
  70. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
  71. cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
  72. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
  73. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
  74. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
  77. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
  78. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
  79. cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
  80. cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
  82. cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
  83. cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
  84. cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
  85. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
  86. cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
  87. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
  88. cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
  89. cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
  90. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
  91. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
  92. cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
  93. cognee/infrastructure/llm/utils.py +3 -1
  94. cognee/infrastructure/loaders/LoaderEngine.py +156 -0
  95. cognee/infrastructure/loaders/LoaderInterface.py +73 -0
  96. cognee/infrastructure/loaders/__init__.py +18 -0
  97. cognee/infrastructure/loaders/core/__init__.py +7 -0
  98. cognee/infrastructure/loaders/core/audio_loader.py +98 -0
  99. cognee/infrastructure/loaders/core/image_loader.py +114 -0
  100. cognee/infrastructure/loaders/core/text_loader.py +90 -0
  101. cognee/infrastructure/loaders/create_loader_engine.py +32 -0
  102. cognee/infrastructure/loaders/external/__init__.py +22 -0
  103. cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
  104. cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
  105. cognee/infrastructure/loaders/get_loader_engine.py +18 -0
  106. cognee/infrastructure/loaders/supported_loaders.py +18 -0
  107. cognee/infrastructure/loaders/use_loader.py +21 -0
  108. cognee/infrastructure/loaders/utils/__init__.py +0 -0
  109. cognee/modules/data/methods/__init__.py +1 -0
  110. cognee/modules/data/methods/get_authorized_dataset.py +23 -0
  111. cognee/modules/data/models/Data.py +11 -1
  112. cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
  113. cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
  114. cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
  115. cognee/modules/engine/utils/generate_edge_id.py +5 -0
  116. cognee/modules/graph/cognee_graph/CogneeGraph.py +9 -18
  117. cognee/modules/graph/methods/get_formatted_graph_data.py +7 -1
  118. cognee/modules/graph/utils/get_graph_from_model.py +93 -101
  119. cognee/modules/ingestion/data_types/TextData.py +8 -2
  120. cognee/modules/ingestion/save_data_to_file.py +1 -1
  121. cognee/modules/pipelines/exceptions/__init__.py +1 -0
  122. cognee/modules/pipelines/exceptions/exceptions.py +12 -0
  123. cognee/modules/pipelines/models/DataItemStatus.py +5 -0
  124. cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
  125. cognee/modules/pipelines/models/__init__.py +1 -0
  126. cognee/modules/pipelines/operations/pipeline.py +10 -2
  127. cognee/modules/pipelines/operations/run_tasks.py +251 -19
  128. cognee/modules/retrieval/code_retriever.py +3 -5
  129. cognee/modules/retrieval/completion_retriever.py +1 -1
  130. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
  131. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
  132. cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
  133. cognee/modules/retrieval/natural_language_retriever.py +3 -5
  134. cognee/modules/retrieval/utils/completion.py +6 -9
  135. cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
  136. cognee/modules/search/methods/search.py +5 -1
  137. cognee/modules/search/operations/__init__.py +1 -0
  138. cognee/modules/search/operations/select_search_type.py +42 -0
  139. cognee/modules/search/types/SearchType.py +1 -0
  140. cognee/modules/settings/get_settings.py +0 -4
  141. cognee/modules/settings/save_vector_db_config.py +1 -1
  142. cognee/shared/data_models.py +3 -1
  143. cognee/shared/logging_utils.py +0 -5
  144. cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
  145. cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
  146. cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
  147. cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
  148. cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
  149. cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
  150. cognee/tasks/graph/extract_graph_from_code.py +3 -2
  151. cognee/tasks/graph/extract_graph_from_data.py +4 -3
  152. cognee/tasks/graph/infer_data_ontology.py +5 -6
  153. cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
  154. cognee/tasks/ingestion/ingest_data.py +91 -61
  155. cognee/tasks/ingestion/resolve_data_directories.py +3 -0
  156. cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
  157. cognee/tasks/storage/index_data_points.py +1 -1
  158. cognee/tasks/storage/index_graph_edges.py +4 -1
  159. cognee/tasks/summarization/summarize_code.py +2 -3
  160. cognee/tasks/summarization/summarize_text.py +3 -2
  161. cognee/tests/test_cognee_server_start.py +12 -7
  162. cognee/tests/test_deduplication.py +2 -2
  163. cognee/tests/test_deletion.py +58 -17
  164. cognee/tests/test_graph_visualization_permissions.py +161 -0
  165. cognee/tests/test_neptune_analytics_graph.py +309 -0
  166. cognee/tests/test_neptune_analytics_hybrid.py +176 -0
  167. cognee/tests/{test_qdrant.py → test_neptune_analytics_vector.py} +86 -16
  168. cognee/tests/test_pgvector.py +5 -5
  169. cognee/tests/test_s3.py +1 -6
  170. cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
  171. cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
  172. cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
  173. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
  174. cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
  175. cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
  176. cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
  177. cognee/tests/unit/modules/search/search_methods_test.py +55 -0
  178. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/METADATA +12 -6
  179. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/RECORD +195 -156
  180. cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
  181. cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
  182. cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
  183. cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
  184. cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
  185. cognee/modules/data/extraction/extract_categories.py +0 -14
  186. distributed/Dockerfile +0 -34
  187. distributed/app.py +0 -4
  188. distributed/entrypoint.py +0 -71
  189. distributed/entrypoint.sh +0 -5
  190. distributed/modal_image.py +0 -11
  191. distributed/queues.py +0 -5
  192. distributed/tasks/queued_add_data_points.py +0 -13
  193. distributed/tasks/queued_add_edges.py +0 -13
  194. distributed/tasks/queued_add_nodes.py +0 -13
  195. distributed/test.py +0 -28
  196. distributed/utils.py +0 -19
  197. distributed/workers/data_point_saving_worker.py +0 -93
  198. distributed/workers/graph_saving_worker.py +0 -104
  199. /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
  200. /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
  201. /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
  202. /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
  203. /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
  204. /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
  205. /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
  206. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
  207. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
  208. /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
  209. {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
  210. {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
  211. /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
  212. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/WHEEL +0 -0
  213. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/licenses/LICENSE +0 -0
  214. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/licenses/NOTICE.md +0 -0
@@ -71,6 +71,12 @@ async def search(
71
71
  Best for: Advanced users, specific graph traversals, debugging.
72
72
  Returns: Raw graph query results.
73
73
 
74
+ **FEELING_LUCKY**:
75
+ Intelligently selects and runs the most appropriate search type.
76
+ Best for: General-purpose queries or when you're unsure which search type is best.
77
+ Returns: The results from the automatically selected search type.
78
+
79
+
74
80
  Args:
75
81
  query_text: Your question or search query in natural language.
76
82
  Examples:
@@ -119,6 +125,9 @@ async def search(
119
125
  **CODE**:
120
126
  [List of structured code information with context]
121
127
 
128
+ **FEELING_LUCKY**:
129
+ [List of results in the format of the search type that is automatically selected]
130
+
122
131
 
123
132
 
124
133
 
@@ -130,6 +139,7 @@ async def search(
130
139
  - **CHUNKS**: Fastest, pure vector similarity search without LLM
131
140
  - **SUMMARIES**: Fast, returns pre-computed summaries
132
141
  - **CODE**: Medium speed, specialized for code understanding
142
+ - **FEELING_LUCKY**: Variable speed, uses LLM + search type selection intelligently
133
143
  - **top_k**: Start with 10, increase for comprehensive analysis (max 100)
134
144
  - **datasets**: Specify datasets to improve speed and relevance
135
145
 
@@ -30,8 +30,6 @@ class VectorDBConfigInputDTO(InDTO):
30
30
  provider: Union[
31
31
  Literal["lancedb"],
32
32
  Literal["chromadb"],
33
- Literal["qdrant"],
34
- Literal["weaviate"],
35
33
  Literal["pgvector"],
36
34
  ]
37
35
  url: str
cognee/base_config.py CHANGED
@@ -20,6 +20,7 @@ class BaseConfig(BaseSettings):
20
20
  def to_dict(self) -> dict:
21
21
  return {
22
22
  "data_root_directory": self.data_root_directory,
23
+ "system_root_directory": self.system_root_directory,
23
24
  "monitoring_tool": self.monitoring_tool,
24
25
  }
25
26
 
@@ -1,10 +1,10 @@
1
1
  from typing import Any, Dict, List
2
2
  from pydantic import BaseModel
3
- from cognee.infrastructure.llm.get_llm_client import get_llm_client
4
3
  from cognee.eval_framework.evaluation.base_eval_adapter import BaseEvalAdapter
5
- from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
6
4
  from cognee.eval_framework.eval_config import EvalConfig
7
5
 
6
+ from cognee.infrastructure.llm import LLMGateway
7
+
8
8
 
9
9
  class CorrectnessEvaluation(BaseModel):
10
10
  """Response model containing evaluation score and explanation."""
@@ -19,17 +19,16 @@ class DirectLLMEvalAdapter(BaseEvalAdapter):
19
19
  config = EvalConfig()
20
20
  self.system_prompt_path = config.direct_llm_system_prompt
21
21
  self.eval_prompt_path = config.direct_llm_eval_prompt
22
- self.llm_client = get_llm_client()
23
22
 
24
23
  async def evaluate_correctness(
25
24
  self, question: str, answer: str, golden_answer: str
26
25
  ) -> Dict[str, Any]:
27
26
  args = {"question": question, "answer": answer, "golden_answer": golden_answer}
28
27
 
29
- user_prompt = render_prompt(self.eval_prompt_path, args)
30
- system_prompt = read_query_prompt(self.system_prompt_path)
28
+ user_prompt = LLMGateway.render_prompt(self.eval_prompt_path, args)
29
+ system_prompt = LLMGateway.read_query_prompt(self.system_prompt_path)
31
30
 
32
- evaluation = await self.llm_client.acreate_structured_output(
31
+ evaluation = await LLMGateway.acreate_structured_output(
33
32
  text_input=user_prompt,
34
33
  system_prompt=system_prompt,
35
34
  response_model=CorrectnessEvaluation,
@@ -36,6 +36,7 @@ class GraphConfig(BaseSettings):
36
36
  graph_database_provider: str = Field("kuzu", env="GRAPH_DATABASE_PROVIDER")
37
37
 
38
38
  graph_database_url: str = ""
39
+ graph_database_name: str = ""
39
40
  graph_database_username: str = ""
40
41
  graph_database_password: str = ""
41
42
  graph_database_port: int = 123
@@ -105,6 +106,7 @@ class GraphConfig(BaseSettings):
105
106
  return {
106
107
  "graph_database_provider": self.graph_database_provider,
107
108
  "graph_database_url": self.graph_database_url,
109
+ "graph_database_name": self.graph_database_name,
108
110
  "graph_database_username": self.graph_database_username,
109
111
  "graph_database_password": self.graph_database_password,
110
112
  "graph_database_port": self.graph_database_port,
@@ -33,6 +33,7 @@ def create_graph_engine(
33
33
  graph_database_provider,
34
34
  graph_file_path,
35
35
  graph_database_url="",
36
+ graph_database_name="",
36
37
  graph_database_username="",
37
38
  graph_database_password="",
38
39
  graph_database_port="",
@@ -48,13 +49,13 @@ def create_graph_engine(
48
49
  -----------
49
50
 
50
51
  - graph_database_provider: The type of graph database provider to use (e.g., neo4j,
51
- falkordb, kuzu, memgraph).
52
- - graph_database_url: The URL for the graph database instance. Required for neo4j,
53
- falkordb, and memgraph providers.
52
+ falkordb, kuzu).
53
+ - graph_database_url: The URL for the graph database instance. Required for neo4j
54
+ and falkordb providers.
54
55
  - graph_database_username: The username for authentication with the graph database.
55
- Required for neo4j and memgraph providers.
56
+ Required for neo4j provider.
56
57
  - graph_database_password: The password for authentication with the graph database.
57
- Required for neo4j and memgraph providers.
58
+ Required for neo4j provider.
58
59
  - graph_database_port: The port number for the graph database connection. Required
59
60
  for the falkordb provider.
60
61
  - graph_file_path: The filesystem path to the graph file. Required for the kuzu
@@ -86,6 +87,7 @@ def create_graph_engine(
86
87
  graph_database_url=graph_database_url,
87
88
  graph_database_username=graph_database_username or None,
88
89
  graph_database_password=graph_database_password or None,
90
+ graph_database_name=graph_database_name or None,
89
91
  )
90
92
 
91
93
  elif graph_database_provider == "falkordb":
@@ -122,17 +124,61 @@ def create_graph_engine(
122
124
  username=graph_database_username,
123
125
  password=graph_database_password,
124
126
  )
127
+ elif graph_database_provider == "neptune":
128
+ try:
129
+ from langchain_aws import NeptuneAnalyticsGraph
130
+ except ImportError:
131
+ raise ImportError(
132
+ "langchain_aws is not installed. Please install it with 'pip install langchain_aws'"
133
+ )
125
134
 
126
- elif graph_database_provider == "memgraph":
127
135
  if not graph_database_url:
128
- raise EnvironmentError("Missing required Memgraph URL.")
136
+ raise EnvironmentError("Missing Neptune endpoint.")
129
137
 
130
- from .memgraph.memgraph_adapter import MemgraphAdapter
138
+ from .neptune_driver.adapter import NeptuneGraphDB, NEPTUNE_ENDPOINT_URL
131
139
 
132
- return MemgraphAdapter(
133
- graph_database_url=graph_database_url,
134
- graph_database_username=graph_database_username or None,
135
- graph_database_password=graph_database_password or None,
140
+ if not graph_database_url.startswith(NEPTUNE_ENDPOINT_URL):
141
+ raise ValueError(
142
+ f"Neptune endpoint must have the format {NEPTUNE_ENDPOINT_URL}<GRAPH_ID>"
143
+ )
144
+
145
+ graph_identifier = graph_database_url.replace(NEPTUNE_ENDPOINT_URL, "")
146
+
147
+ return NeptuneGraphDB(
148
+ graph_id=graph_identifier,
149
+ )
150
+
151
+ elif graph_database_provider == "neptune_analytics":
152
+ """
153
+ Creates a graph DB from config
154
+ We want to use a hybrid (graph & vector) DB and we should update this
155
+ to make a single instance of the hybrid configuration (with embedder)
156
+ instead of creating the hybrid object twice.
157
+ """
158
+ try:
159
+ from langchain_aws import NeptuneAnalyticsGraph
160
+ except ImportError:
161
+ raise ImportError(
162
+ "langchain_aws is not installed. Please install it with 'pip install langchain_aws'"
163
+ )
164
+
165
+ if not graph_database_url:
166
+ raise EnvironmentError("Missing Neptune endpoint.")
167
+
168
+ from ..hybrid.neptune_analytics.NeptuneAnalyticsAdapter import (
169
+ NeptuneAnalyticsAdapter,
170
+ NEPTUNE_ANALYTICS_ENDPOINT_URL,
171
+ )
172
+
173
+ if not graph_database_url.startswith(NEPTUNE_ANALYTICS_ENDPOINT_URL):
174
+ raise ValueError(
175
+ f"Neptune endpoint must have the format '{NEPTUNE_ANALYTICS_ENDPOINT_URL}<GRAPH_ID>'"
176
+ )
177
+
178
+ graph_identifier = graph_database_url.replace(NEPTUNE_ANALYTICS_ENDPOINT_URL, "")
179
+
180
+ return NeptuneAnalyticsAdapter(
181
+ graph_id=graph_identifier,
136
182
  )
137
183
 
138
184
  from .networkx.adapter import NetworkXAdapter
@@ -2,7 +2,7 @@ import inspect
2
2
  from functools import wraps
3
3
  from abc import abstractmethod, ABC
4
4
  from datetime import datetime, timezone
5
- from typing import Optional, Dict, Any, List, Tuple, Type
5
+ from typing import Optional, Dict, Any, List, Tuple, Type, Union
6
6
  from uuid import NAMESPACE_OID, UUID, uuid5
7
7
  from cognee.shared.logging_utils import get_logger
8
8
  from cognee.infrastructure.engine import DataPoint
@@ -173,28 +173,31 @@ class GraphDBInterface(ABC):
173
173
  raise NotImplementedError
174
174
 
175
175
  @abstractmethod
176
- async def add_node(self, node_id: str, properties: Dict[str, Any]) -> None:
176
+ async def add_node(
177
+ self, node: Union[DataPoint, str], properties: Optional[Dict[str, Any]] = None
178
+ ) -> None:
177
179
  """
178
180
  Add a single node with specified properties to the graph.
179
181
 
180
182
  Parameters:
181
183
  -----------
182
184
 
183
- - node_id (str): Unique identifier for the node being added.
184
- - properties (Dict[str, Any]): A dictionary of properties associated with the node.
185
+ - node (Union[DataPoint, str]): Either a DataPoint object or a string identifier for the node being added.
186
+ - properties (Optional[Dict[str, Any]]): A dictionary of properties associated with the node.
187
+ Required when node is a string, ignored when node is a DataPoint.
185
188
  """
186
189
  raise NotImplementedError
187
190
 
188
191
  @abstractmethod
189
192
  @record_graph_changes
190
- async def add_nodes(self, nodes: List[Node]) -> None:
193
+ async def add_nodes(self, nodes: Union[List[Node], List[DataPoint]]) -> None:
191
194
  """
192
195
  Add multiple nodes to the graph in a single operation.
193
196
 
194
197
  Parameters:
195
198
  -----------
196
199
 
197
- - nodes (List[Node]): A list of Node objects to be added to the graph.
200
+ - nodes (Union[List[Node], List[DataPoint]]): A list of Node objects or DataPoint objects to be added to the graph.
198
201
  """
199
202
  raise NotImplementedError
200
203
 
@@ -271,14 +274,16 @@ class GraphDBInterface(ABC):
271
274
 
272
275
  @abstractmethod
273
276
  @record_graph_changes
274
- async def add_edges(self, edges: List[EdgeData]) -> None:
277
+ async def add_edges(
278
+ self, edges: Union[List[EdgeData], List[Tuple[str, str, str, Optional[Dict[str, Any]]]]]
279
+ ) -> None:
275
280
  """
276
281
  Add multiple edges to the graph in a single operation.
277
282
 
278
283
  Parameters:
279
284
  -----------
280
285
 
281
- - edges (List[EdgeData]): A list of EdgeData objects representing edges to be added.
286
+ - edges (Union[List[EdgeData], List[Tuple[str, str, str, Optional[Dict[str, Any]]]]]): A list of EdgeData objects or tuples representing edges to be added.
282
287
  """
283
288
  raise NotImplementedError
284
289
 
@@ -377,7 +382,7 @@ class GraphDBInterface(ABC):
377
382
 
378
383
  @abstractmethod
379
384
  async def get_connections(
380
- self, node_id: str
385
+ self, node_id: Union[str, UUID]
381
386
  ) -> List[Tuple[NodeData, Dict[str, Any], NodeData]]:
382
387
  """
383
388
  Get all nodes connected to a specified node and their relationship details.
@@ -385,6 +390,6 @@ class GraphDBInterface(ABC):
385
390
  Parameters:
386
391
  -----------
387
392
 
388
- - node_id (str): Unique identifier of the node for which to retrieve connections.
393
+ - node_id (Union[str, UUID]): Unique identifier of the node for which to retrieve connections.
389
394
  """
390
395
  raise NotImplementedError
@@ -42,6 +42,7 @@ class KuzuAdapter(GraphDBInterface):
42
42
  self.connection: Optional[Connection] = None
43
43
  self.executor = ThreadPoolExecutor()
44
44
  self._initialize_connection()
45
+ self.KUZU_ASYNC_LOCK = asyncio.Lock()
45
46
 
46
47
  def _initialize_connection(self) -> None:
47
48
  """Initialize the Kuzu database connection and schema."""
@@ -136,6 +137,10 @@ class KuzuAdapter(GraphDBInterface):
136
137
  from cognee.infrastructure.files.storage.S3FileStorage import S3FileStorage
137
138
 
138
139
  s3_file_storage = S3FileStorage("")
140
+
141
+ async with self.KUZU_ASYNC_LOCK:
142
+ self.connection.execute("CHECKPOINT;")
143
+
139
144
  s3_file_storage.s3.put(self.temp_graph_file, self.db_path, recursive=True)
140
145
 
141
146
  async def pull_from_s3(self) -> None:
@@ -145,7 +150,7 @@ class KuzuAdapter(GraphDBInterface):
145
150
  try:
146
151
  s3_file_storage.s3.get(self.db_path, self.temp_graph_file, recursive=True)
147
152
  except FileNotFoundError:
148
- pass
153
+ logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
149
154
 
150
155
  async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
151
156
  """
@@ -1524,7 +1529,7 @@ class KuzuAdapter(GraphDBInterface):
1524
1529
  logger.error(f"Error during database clearing: {e}")
1525
1530
  raise
1526
1531
 
1527
- async def get_document_subgraph(self, content_hash: str):
1532
+ async def get_document_subgraph(self, data_id: str):
1528
1533
  """
1529
1534
  Get all nodes that should be deleted when removing a document.
1530
1535
 
@@ -1535,7 +1540,7 @@ class KuzuAdapter(GraphDBInterface):
1535
1540
  Parameters:
1536
1541
  -----------
1537
1542
 
1538
- - content_hash (str): The identifier for the document to query against.
1543
+ - data_id (str): The identifier for the document to query against.
1539
1544
 
1540
1545
  Returns:
1541
1546
  --------
@@ -1545,7 +1550,7 @@ class KuzuAdapter(GraphDBInterface):
1545
1550
  """
1546
1551
  query = """
1547
1552
  MATCH (doc:Node)
1548
- WHERE (doc.type = 'TextDocument' OR doc.type = 'PdfDocument') AND doc.name = $content_hash
1553
+ WHERE (doc.type = 'TextDocument' OR doc.type = 'PdfDocument' OR doc.type = 'AudioDocument' OR doc.type = 'ImageDocument' OR doc.type = 'UnstructuredDocument') AND doc.id = $data_id
1549
1554
 
1550
1555
  OPTIONAL MATCH (doc)<-[e1:EDGE]-(chunk:Node)
1551
1556
  WHERE e1.relationship_name = 'is_part_of' AND chunk.type = 'DocumentChunk'
@@ -1556,7 +1561,7 @@ class KuzuAdapter(GraphDBInterface):
1556
1561
  MATCH (entity)<-[e3:EDGE]-(otherChunk:Node)-[e4:EDGE]->(otherDoc:Node)
1557
1562
  WHERE e3.relationship_name = 'contains'
1558
1563
  AND e4.relationship_name = 'is_part_of'
1559
- AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument')
1564
+ AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument' OR otherDoc.type = 'AudioDocument' OR otherDoc.type = 'ImageDocument' OR otherDoc.type = 'UnstructuredDocument')
1560
1565
  AND otherDoc.id <> doc.id
1561
1566
  }
1562
1567
 
@@ -1572,7 +1577,7 @@ class KuzuAdapter(GraphDBInterface):
1572
1577
  AND e9.relationship_name = 'is_part_of'
1573
1578
  AND otherEntity.type = 'Entity'
1574
1579
  AND otherChunk.type = 'DocumentChunk'
1575
- AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument')
1580
+ AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument' OR otherDoc.type = 'AudioDocument' OR otherDoc.type = 'ImageDocument' OR otherDoc.type = 'UnstructuredDocument')
1576
1581
  AND otherDoc.id <> doc.id
1577
1582
  }
1578
1583
 
@@ -1583,7 +1588,7 @@ class KuzuAdapter(GraphDBInterface):
1583
1588
  COLLECT(DISTINCT made_node) as made_from_nodes,
1584
1589
  COLLECT(DISTINCT type) as orphan_types
1585
1590
  """
1586
- result = await self.query(query, {"content_hash": f"text_{content_hash}"})
1591
+ result = await self.query(query, {"data_id": f"{data_id}"})
1587
1592
  if not result or not result[0]:
1588
1593
  return None
1589
1594
 
@@ -74,7 +74,7 @@ def read_kuzu_storage_version(kuzu_db_path: str) -> int:
74
74
  if kuzu_version_mapping.get(version_code):
75
75
  return kuzu_version_mapping[version_code]
76
76
  else:
77
- ValueError("Could not map version_code to proper Kuzu version.")
77
+ raise ValueError("Could not map version_code to proper Kuzu version.")
78
78
 
79
79
 
80
80
  def ensure_env(version: str, export_dir) -> str:
@@ -50,6 +50,7 @@ class Neo4jAdapter(GraphDBInterface):
50
50
  graph_database_url: str,
51
51
  graph_database_username: Optional[str] = None,
52
52
  graph_database_password: Optional[str] = None,
53
+ graph_database_name: Optional[str] = None,
53
54
  driver: Optional[Any] = None,
54
55
  ):
55
56
  # Only use auth if both username and password are provided
@@ -59,7 +60,7 @@ class Neo4jAdapter(GraphDBInterface):
59
60
  elif graph_database_username or graph_database_password:
60
61
  logger = get_logger(__name__)
61
62
  logger.warning("Neo4j credentials incomplete – falling back to anonymous connection.")
62
-
63
+ self.graph_database_name = graph_database_name
63
64
  self.driver = driver or AsyncGraphDatabase.driver(
64
65
  graph_database_url,
65
66
  auth=auth,
@@ -80,7 +81,7 @@ class Neo4jAdapter(GraphDBInterface):
80
81
  """
81
82
  Get a session for database operations.
82
83
  """
83
- async with self.driver.session() as session:
84
+ async with self.driver.session(database=self.graph_database_name) as session:
84
85
  yield session
85
86
 
86
87
  @deadlock_retry()
@@ -410,6 +411,38 @@ class Neo4jAdapter(GraphDBInterface):
410
411
 
411
412
  return await self.query(query, params)
412
413
 
414
+ def _flatten_edge_properties(self, properties: Dict[str, Any]) -> Dict[str, Any]:
415
+ """
416
+ Flatten edge properties to handle nested dictionaries like weights.
417
+
418
+ Neo4j doesn't support nested dictionaries as property values, so we need to
419
+ flatten the 'weights' dictionary into individual properties with prefixes.
420
+
421
+ Args:
422
+ properties: Dictionary of edge properties that may contain nested dicts
423
+
424
+ Returns:
425
+ Flattened properties dictionary suitable for Neo4j storage
426
+ """
427
+ flattened = {}
428
+
429
+ for key, value in properties.items():
430
+ if key == "weights" and isinstance(value, dict):
431
+ # Flatten weights dictionary into individual properties
432
+ for weight_name, weight_value in value.items():
433
+ flattened[f"weight_{weight_name}"] = weight_value
434
+ elif isinstance(value, dict):
435
+ # For other nested dictionaries, serialize as JSON string
436
+ flattened[f"{key}_json"] = json.dumps(value, cls=JSONEncoder)
437
+ elif isinstance(value, list):
438
+ # For lists, serialize as JSON string
439
+ flattened[f"{key}_json"] = json.dumps(value, cls=JSONEncoder)
440
+ else:
441
+ # Keep primitive types as-is
442
+ flattened[key] = value
443
+
444
+ return flattened
445
+
413
446
  @record_graph_changes
414
447
  @override_distributed(queued_add_edges)
415
448
  async def add_edges(self, edges: list[tuple[str, str, str, dict[str, Any]]]) -> None:
@@ -448,11 +481,13 @@ class Neo4jAdapter(GraphDBInterface):
448
481
  "from_node": str(edge[0]),
449
482
  "to_node": str(edge[1]),
450
483
  "relationship_name": edge[2],
451
- "properties": {
452
- **(edge[3] if edge[3] else {}),
453
- "source_node_id": str(edge[0]),
454
- "target_node_id": str(edge[1]),
455
- },
484
+ "properties": self._flatten_edge_properties(
485
+ {
486
+ **(edge[3] if edge[3] else {}),
487
+ "source_node_id": str(edge[0]),
488
+ "target_node_id": str(edge[1]),
489
+ }
490
+ ),
456
491
  }
457
492
  for edge in edges
458
493
  ]
@@ -1217,7 +1252,7 @@ class Neo4jAdapter(GraphDBInterface):
1217
1252
 
1218
1253
  return mandatory_metrics | optional_metrics
1219
1254
 
1220
- async def get_document_subgraph(self, content_hash: str):
1255
+ async def get_document_subgraph(self, data_id: str):
1221
1256
  """
1222
1257
  Retrieve a subgraph related to a document identified by its content hash, including
1223
1258
  related entities and chunks.
@@ -1235,21 +1270,21 @@ class Neo4jAdapter(GraphDBInterface):
1235
1270
  """
1236
1271
  query = """
1237
1272
  MATCH (doc)
1238
- WHERE (doc:TextDocument OR doc:PdfDocument)
1239
- AND doc.name = 'text_' + $content_hash
1273
+ WHERE (doc:TextDocument OR doc:PdfDocument OR doc:UnstructuredDocument OR doc:AudioDocument or doc:ImageDocument)
1274
+ AND doc.id = $data_id
1240
1275
 
1241
1276
  OPTIONAL MATCH (doc)<-[:is_part_of]-(chunk:DocumentChunk)
1242
1277
  OPTIONAL MATCH (chunk)-[:contains]->(entity:Entity)
1243
1278
  WHERE NOT EXISTS {
1244
1279
  MATCH (entity)<-[:contains]-(otherChunk:DocumentChunk)-[:is_part_of]->(otherDoc)
1245
- WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument)
1280
+ WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument OR otherDoc:UnstructuredDocument OR otherDoc:AudioDocument or otherDoc:ImageDocument)
1246
1281
  AND otherDoc.id <> doc.id
1247
1282
  }
1248
1283
  OPTIONAL MATCH (chunk)<-[:made_from]-(made_node:TextSummary)
1249
1284
  OPTIONAL MATCH (entity)-[:is_a]->(type:EntityType)
1250
1285
  WHERE NOT EXISTS {
1251
1286
  MATCH (type)<-[:is_a]-(otherEntity:Entity)<-[:contains]-(otherChunk:DocumentChunk)-[:is_part_of]->(otherDoc)
1252
- WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument)
1287
+ WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument OR otherDoc:UnstructuredDocument OR otherDoc:AudioDocument or otherDoc:ImageDocument)
1253
1288
  AND otherDoc.id <> doc.id
1254
1289
  }
1255
1290
 
@@ -1260,7 +1295,7 @@ class Neo4jAdapter(GraphDBInterface):
1260
1295
  collect(DISTINCT made_node) as made_from_nodes,
1261
1296
  collect(DISTINCT type) as orphan_types
1262
1297
  """
1263
- result = await self.query(query, {"content_hash": content_hash})
1298
+ result = await self.query(query, {"data_id": data_id})
1264
1299
  return result[0] if result else None
1265
1300
 
1266
1301
  async def get_degree_one_nodes(self, node_type: str):
@@ -0,0 +1,15 @@
1
+ """Neptune Analytics Driver Module
2
+
3
+ This module provides the Neptune Analytics adapter and utilities for interacting
4
+ with Amazon Neptune Analytics graph databases.
5
+ """
6
+
7
+ from .adapter import NeptuneGraphDB
8
+ from . import neptune_utils
9
+ from . import exceptions
10
+
11
+ __all__ = [
12
+ "NeptuneGraphDB",
13
+ "neptune_utils",
14
+ "exceptions",
15
+ ]