cognee 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. cognee/api/client.py +41 -3
  2. cognee/api/health.py +332 -0
  3. cognee/api/v1/add/add.py +5 -2
  4. cognee/api/v1/add/routers/get_add_router.py +3 -0
  5. cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
  6. cognee/api/v1/cognify/cognify.py +8 -0
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
  8. cognee/api/v1/config/config.py +3 -1
  9. cognee/api/v1/datasets/routers/get_datasets_router.py +1 -7
  10. cognee/api/v1/delete/delete.py +16 -12
  11. cognee/api/v1/responses/routers/get_responses_router.py +3 -1
  12. cognee/api/v1/search/search.py +10 -0
  13. cognee/api/v1/settings/routers/get_settings_router.py +0 -2
  14. cognee/base_config.py +1 -0
  15. cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
  16. cognee/infrastructure/databases/graph/config.py +2 -0
  17. cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
  18. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
  19. cognee/infrastructure/databases/graph/kuzu/adapter.py +12 -7
  20. cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +1 -1
  21. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +48 -13
  22. cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
  23. cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
  24. cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
  25. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
  26. cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
  27. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
  28. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -0
  29. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
  30. cognee/infrastructure/databases/vector/create_vector_engine.py +31 -15
  31. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
  32. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
  33. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
  34. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
  35. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
  36. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
  37. cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
  38. cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
  39. cognee/infrastructure/files/utils/guess_file_type.py +2 -2
  40. cognee/infrastructure/files/utils/open_data_file.py +4 -23
  41. cognee/infrastructure/llm/LLMGateway.py +137 -0
  42. cognee/infrastructure/llm/__init__.py +14 -4
  43. cognee/infrastructure/llm/config.py +29 -1
  44. cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
  45. cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
  46. cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
  47. cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
  48. cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
  49. cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
  50. cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
  51. cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
  52. cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
  53. cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
  54. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
  55. cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
  56. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
  57. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
  58. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
  60. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
  61. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
  62. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
  64. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
  65. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
  66. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
  67. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
  68. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
  69. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
  70. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
  71. cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
  72. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
  73. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
  74. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
  77. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
  78. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
  79. cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
  80. cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
  82. cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
  83. cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
  84. cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
  85. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
  86. cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
  87. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
  88. cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
  89. cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
  90. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
  91. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
  92. cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
  93. cognee/infrastructure/llm/utils.py +3 -1
  94. cognee/infrastructure/loaders/LoaderEngine.py +156 -0
  95. cognee/infrastructure/loaders/LoaderInterface.py +73 -0
  96. cognee/infrastructure/loaders/__init__.py +18 -0
  97. cognee/infrastructure/loaders/core/__init__.py +7 -0
  98. cognee/infrastructure/loaders/core/audio_loader.py +98 -0
  99. cognee/infrastructure/loaders/core/image_loader.py +114 -0
  100. cognee/infrastructure/loaders/core/text_loader.py +90 -0
  101. cognee/infrastructure/loaders/create_loader_engine.py +32 -0
  102. cognee/infrastructure/loaders/external/__init__.py +22 -0
  103. cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
  104. cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
  105. cognee/infrastructure/loaders/get_loader_engine.py +18 -0
  106. cognee/infrastructure/loaders/supported_loaders.py +18 -0
  107. cognee/infrastructure/loaders/use_loader.py +21 -0
  108. cognee/infrastructure/loaders/utils/__init__.py +0 -0
  109. cognee/modules/data/methods/__init__.py +1 -0
  110. cognee/modules/data/methods/get_authorized_dataset.py +23 -0
  111. cognee/modules/data/models/Data.py +11 -1
  112. cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
  113. cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
  114. cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
  115. cognee/modules/engine/utils/generate_edge_id.py +5 -0
  116. cognee/modules/graph/cognee_graph/CogneeGraph.py +9 -18
  117. cognee/modules/graph/methods/get_formatted_graph_data.py +7 -1
  118. cognee/modules/graph/utils/get_graph_from_model.py +93 -101
  119. cognee/modules/ingestion/data_types/TextData.py +8 -2
  120. cognee/modules/ingestion/save_data_to_file.py +1 -1
  121. cognee/modules/pipelines/exceptions/__init__.py +1 -0
  122. cognee/modules/pipelines/exceptions/exceptions.py +12 -0
  123. cognee/modules/pipelines/models/DataItemStatus.py +5 -0
  124. cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
  125. cognee/modules/pipelines/models/__init__.py +1 -0
  126. cognee/modules/pipelines/operations/pipeline.py +10 -2
  127. cognee/modules/pipelines/operations/run_tasks.py +251 -19
  128. cognee/modules/retrieval/code_retriever.py +3 -5
  129. cognee/modules/retrieval/completion_retriever.py +1 -1
  130. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
  131. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
  132. cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
  133. cognee/modules/retrieval/natural_language_retriever.py +3 -5
  134. cognee/modules/retrieval/utils/completion.py +6 -9
  135. cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
  136. cognee/modules/search/methods/search.py +5 -1
  137. cognee/modules/search/operations/__init__.py +1 -0
  138. cognee/modules/search/operations/select_search_type.py +42 -0
  139. cognee/modules/search/types/SearchType.py +1 -0
  140. cognee/modules/settings/get_settings.py +0 -4
  141. cognee/modules/settings/save_vector_db_config.py +1 -1
  142. cognee/shared/data_models.py +3 -1
  143. cognee/shared/logging_utils.py +0 -5
  144. cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
  145. cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
  146. cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
  147. cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
  148. cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
  149. cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
  150. cognee/tasks/graph/extract_graph_from_code.py +3 -2
  151. cognee/tasks/graph/extract_graph_from_data.py +4 -3
  152. cognee/tasks/graph/infer_data_ontology.py +5 -6
  153. cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
  154. cognee/tasks/ingestion/ingest_data.py +91 -61
  155. cognee/tasks/ingestion/resolve_data_directories.py +3 -0
  156. cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
  157. cognee/tasks/storage/index_data_points.py +1 -1
  158. cognee/tasks/storage/index_graph_edges.py +4 -1
  159. cognee/tasks/summarization/summarize_code.py +2 -3
  160. cognee/tasks/summarization/summarize_text.py +3 -2
  161. cognee/tests/test_cognee_server_start.py +12 -7
  162. cognee/tests/test_deduplication.py +2 -2
  163. cognee/tests/test_deletion.py +58 -17
  164. cognee/tests/test_graph_visualization_permissions.py +161 -0
  165. cognee/tests/test_neptune_analytics_graph.py +309 -0
  166. cognee/tests/test_neptune_analytics_hybrid.py +176 -0
  167. cognee/tests/{test_qdrant.py → test_neptune_analytics_vector.py} +86 -16
  168. cognee/tests/test_pgvector.py +5 -5
  169. cognee/tests/test_s3.py +1 -6
  170. cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
  171. cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
  172. cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
  173. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
  174. cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
  175. cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
  176. cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
  177. cognee/tests/unit/modules/search/search_methods_test.py +55 -0
  178. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/METADATA +12 -6
  179. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/RECORD +195 -156
  180. cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
  181. cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
  182. cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
  183. cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
  184. cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
  185. cognee/modules/data/extraction/extract_categories.py +0 -14
  186. distributed/Dockerfile +0 -34
  187. distributed/app.py +0 -4
  188. distributed/entrypoint.py +0 -71
  189. distributed/entrypoint.sh +0 -5
  190. distributed/modal_image.py +0 -11
  191. distributed/queues.py +0 -5
  192. distributed/tasks/queued_add_data_points.py +0 -13
  193. distributed/tasks/queued_add_edges.py +0 -13
  194. distributed/tasks/queued_add_nodes.py +0 -13
  195. distributed/test.py +0 -28
  196. distributed/utils.py +0 -19
  197. distributed/workers/data_point_saving_worker.py +0 -93
  198. distributed/workers/graph_saving_worker.py +0 -104
  199. /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
  200. /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
  201. /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
  202. /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
  203. /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
  204. /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
  205. /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
  206. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
  207. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
  208. /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
  209. {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
  210. {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
  211. /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
  212. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/WHEEL +0 -0
  213. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/licenses/LICENSE +0 -0
  214. {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/licenses/NOTICE.md +0 -0
@@ -6,6 +6,7 @@ import signal
6
6
  import requests
7
7
  from pathlib import Path
8
8
  import sys
9
+ import uuid
9
10
 
10
11
 
11
12
  class TestCogneeServerStart(unittest.TestCase):
@@ -47,7 +48,7 @@ class TestCogneeServerStart(unittest.TestCase):
47
48
  """Test that the server is running and can accept connections."""
48
49
  # Test health endpoint
49
50
  health_response = requests.get("http://localhost:8000/health", timeout=15)
50
- self.assertEqual(health_response.status_code, 200)
51
+ self.assertIn(health_response.status_code, [200, 503])
51
52
 
52
53
  # Test root endpoint
53
54
  root_response = requests.get("http://localhost:8000/", timeout=15)
@@ -74,7 +75,8 @@ class TestCogneeServerStart(unittest.TestCase):
74
75
  file_path = Path(os.path.join(Path(__file__).parent, "test_data/example.png"))
75
76
  headers = {"Authorization": auth_var}
76
77
 
77
- form_data = {"datasetName": "test"}
78
+ dataset_name = f"test_{uuid.uuid4().hex[:8]}"
79
+ form_data = {"datasetName": dataset_name}
78
80
 
79
81
  file = {
80
82
  "data": (
@@ -83,8 +85,11 @@ class TestCogneeServerStart(unittest.TestCase):
83
85
  )
84
86
  }
85
87
 
88
+ payload = {"datasets": [dataset_name]}
89
+
86
90
  add_response = requests.post(url, headers=headers, data=form_data, files=file, timeout=50)
87
- add_response.raise_for_status() # raise if HTTP 4xx/5xx
91
+ if add_response.status_code not in [200, 201, 409]:
92
+ add_response.raise_for_status()
88
93
 
89
94
  # Cognify request
90
95
  url = "http://127.0.0.1:8000/api/v1/cognify"
@@ -93,10 +98,9 @@ class TestCogneeServerStart(unittest.TestCase):
93
98
  "Content-Type": "application/json",
94
99
  }
95
100
 
96
- payload = {"datasets": ["test"]}
97
-
98
101
  cognify_response = requests.post(url, headers=headers, json=payload, timeout=150)
99
- cognify_response.raise_for_status() # raises on HTTP 4xx/5xx
102
+ if cognify_response.status_code not in [200, 201, 409]:
103
+ cognify_response.raise_for_status()
100
104
 
101
105
  # TODO: Add test to verify cognify pipeline is complete before testing search
102
106
 
@@ -111,7 +115,8 @@ class TestCogneeServerStart(unittest.TestCase):
111
115
  payload = {"searchType": "GRAPH_COMPLETION", "query": "What's in the document?"}
112
116
 
113
117
  search_response = requests.post(url, headers=headers, json=payload, timeout=50)
114
- search_response.raise_for_status() # raises on HTTP 4xx/5xx
118
+ if search_response.status_code not in [200, 201, 409]:
119
+ search_response.raise_for_status()
115
120
 
116
121
 
117
122
  if __name__ == "__main__":
@@ -26,8 +26,8 @@ async def test_deduplication():
26
26
  explanation_file_path2 = os.path.join(
27
27
  pathlib.Path(__file__).parent, "test_data/Natural_language_processing_copy.txt"
28
28
  )
29
- await cognee.add([explanation_file_path], dataset_name)
30
- await cognee.add([explanation_file_path2], dataset_name2)
29
+ await cognee.add([explanation_file_path], dataset_name, incremental_loading=False)
30
+ await cognee.add([explanation_file_path2], dataset_name2, incremental_loading=False)
31
31
 
32
32
  result = await relational_engine.get_all_data_from_table("data")
33
33
  assert len(result) == 1, "More than one data entity was found."
@@ -12,7 +12,21 @@ async def main():
12
12
  await cognee.prune.prune_data()
13
13
  await cognee.prune.prune_system(metadata=True)
14
14
 
15
- text_1 = """
15
+ pdf_document = os.path.join(
16
+ pathlib.Path(__file__).parent, "test_data/artificial-intelligence.pdf"
17
+ )
18
+
19
+ txt_document = os.path.join(
20
+ pathlib.Path(__file__).parent, "test_data/Natural_language_processing_copy.txt"
21
+ )
22
+
23
+ audio_document = os.path.join(pathlib.Path(__file__).parent, "test_data/text_to_speech.mp3")
24
+
25
+ image_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.png")
26
+
27
+ unstructured_document = os.path.join(pathlib.Path(__file__).parent, "test_data/example.pptx")
28
+
29
+ text_document_as_literal = """
16
30
  1. Audi
17
31
  Audi is known for its modern designs and advanced technology. Founded in the early 1900s, the brand has earned a reputation for precision engineering and innovation. With features like the Quattro all-wheel-drive system, Audi offers a range of vehicles from stylish sedans to high-performance sports cars.
18
32
 
@@ -31,27 +45,54 @@ async def main():
31
45
  Each of these car manufacturer contributes to Germany's reputation as a leader in the global automotive industry, showcasing a blend of innovation, performance, and design excellence.
32
46
  """
33
47
 
34
- text_2 = """
35
- 1. Apple
36
- Apple is renowned for its innovative consumer electronics and software. Its product lineup includes the iPhone, iPad, Mac computers, and wearables like the Apple Watch. Known for its emphasis on sleek design and user-friendly interfaces, Apple has built a loyal customer base and created a seamless ecosystem that integrates hardware, software, and services.
48
+ ################### HARD DELETE
37
49
 
38
- 2. Google
39
- Founded in 1998, Google started as a search engine and quickly became the go-to resource for finding information online. Over the years, the company has diversified its offerings to include digital advertising, cloud computing, mobile operating systems (Android), and various web services like Gmail and Google Maps. Google's innovations have played a major role in shaping the internet landscape.
50
+ # Add documents and get dataset information
51
+ add_result = await cognee.add(
52
+ [
53
+ pdf_document,
54
+ txt_document,
55
+ text_document_as_literal,
56
+ unstructured_document,
57
+ audio_document,
58
+ image_document,
59
+ ]
60
+ )
61
+ dataset_id = add_result.dataset_id
40
62
 
41
- 3. Microsoft
42
- Microsoft Corporation has been a dominant force in software for decades. Its Windows operating system and Microsoft Office suite are staples in both business and personal computing. In recent years, Microsoft has expanded into cloud computing with Azure, gaming with the Xbox platform, and even hardware through products like the Surface line. This evolution has helped the company maintain its relevance in a rapidly changing tech world.
63
+ await cognee.cognify()
43
64
 
44
- 4. Amazon
45
- What began as an online bookstore has grown into one of the largest e-commerce platforms globally. Amazon is known for its vast online marketplace, but its influence extends far beyond retail. With Amazon Web Services (AWS), the company has become a leader in cloud computing, offering robust solutions that power websites, applications, and businesses around the world. Amazon's constant drive for innovation continues to reshape both retail and technology sectors.
65
+ from cognee.infrastructure.databases.graph import get_graph_engine
46
66
 
47
- 5. Meta
48
- Meta, originally known as Facebook, revolutionized social media by connecting billions of people worldwide. Beyond its core social networking service, Meta is investing in the next generation of digital experiences through virtual and augmented reality technologies, with projects like Oculus. The company's efforts signal a commitment to evolving digital interaction and building the metaverse—a shared virtual space where users can connect and collaborate.
67
+ graph_engine = await get_graph_engine()
68
+ nodes, edges = await graph_engine.get_graph_data()
69
+ assert len(nodes) > 10 and len(edges) > 10, "Graph database is not loaded."
49
70
 
50
- Each of these companies has significantly impacted the technology landscape, driving innovation and transforming everyday life through their groundbreaking products and services.
51
- """
71
+ # Get the data IDs from the dataset
72
+ dataset_data = await get_dataset_data(dataset_id)
73
+ assert len(dataset_data) > 0, "Dataset should contain data"
74
+
75
+ # Delete each document using its ID
76
+ for data_item in dataset_data:
77
+ await cognee.delete(data_item.id, dataset_id, mode="hard")
78
+
79
+ nodes, edges = await graph_engine.get_graph_data()
80
+
81
+ assert len(nodes) == 0 and len(edges) == 0, "Document is not deleted with hard delete."
82
+
83
+ ################### SOFT DELETE
52
84
 
53
85
  # Add documents and get dataset information
54
- add_result = await cognee.add([text_1, text_2])
86
+ add_result = await cognee.add(
87
+ [
88
+ pdf_document,
89
+ txt_document,
90
+ text_document_as_literal,
91
+ unstructured_document,
92
+ audio_document,
93
+ image_document,
94
+ ]
95
+ )
55
96
  dataset_id = add_result.dataset_id
56
97
 
57
98
  await cognee.cognify()
@@ -68,11 +109,11 @@ async def main():
68
109
 
69
110
  # Delete each document using its ID
70
111
  for data_item in dataset_data:
71
- await cognee.delete(data_item.id, dataset_id, mode="hard")
112
+ await cognee.delete(data_item.id, dataset_id, mode="soft")
72
113
 
73
114
  nodes, edges = await graph_engine.get_graph_data()
74
115
 
75
- assert len(nodes) == 0 and len(edges) == 0, "Document is not deleted."
116
+ assert len(nodes) == 0 and len(edges) == 0, "Document is not deleted with soft delete."
76
117
 
77
118
 
78
119
  if __name__ == "__main__":
@@ -0,0 +1,161 @@
1
+ import asyncio
2
+ import os
3
+ import pathlib
4
+
5
+ import pytest
6
+ import pytest_asyncio
7
+ from httpx import ASGITransport, AsyncClient
8
+
9
+ import cognee
10
+ from cognee.api.client import app
11
+ from cognee.modules.users.methods import create_user, get_default_user
12
+ from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
13
+
14
+ # Use pytest-asyncio to handle all async tests
15
+ pytestmark = pytest.mark.asyncio
16
+
17
+
18
+ @pytest.fixture(scope="module")
19
+ def event_loop():
20
+ """Create an instance of the default event loop for our test module."""
21
+ policy = asyncio.get_event_loop_policy()
22
+ loop = policy.new_event_loop()
23
+ yield loop
24
+ loop.close()
25
+
26
+
27
+ @pytest_asyncio.fixture(scope="module")
28
+ async def client():
29
+ """Create an async HTTP client for testing"""
30
+ transport = ASGITransport(app=app)
31
+ async with AsyncClient(transport=transport, base_url="http://test") as client:
32
+ yield client
33
+
34
+
35
+ @pytest_asyncio.fixture(scope="module")
36
+ async def setup_environment():
37
+ """
38
+ Set up a clean environment for the test, creating necessary users and datasets.
39
+ This fixture runs once before all tests and cleans up afterwards.
40
+ """
41
+ # 1. Enable permissions feature
42
+ os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "True"
43
+
44
+ # 2. Set up an independent test directory
45
+ base_dir = pathlib.Path(__file__).parent
46
+ cognee.config.data_root_directory(str(base_dir / ".data_storage/test_graph_viz"))
47
+ cognee.config.system_root_directory(str(base_dir / ".cognee_system/test_graph_viz"))
48
+
49
+ # 3. Clean up old data
50
+ await cognee.prune.prune_data()
51
+ await cognee.prune.prune_system(metadata=True)
52
+
53
+ # 4. Add document for default user
54
+ explanation_file_path = os.path.join(base_dir, "test_data/Natural_language_processing.txt")
55
+ await cognee.add([explanation_file_path], dataset_name="NLP")
56
+ default_user = await get_default_user()
57
+ nlp_cognify_result = await cognee.cognify(["NLP"], user=default_user)
58
+
59
+ def extract_dataset_id_from_cognify(cognify_result):
60
+ """Extract dataset_id from cognify output dictionary"""
61
+ for dataset_id, pipeline_result in cognify_result.items():
62
+ return dataset_id
63
+ return None
64
+
65
+ dataset_id = extract_dataset_id_from_cognify(nlp_cognify_result)
66
+
67
+ yield dataset_id
68
+
69
+ # 5. Clean up data after tests are finished
70
+ await cognee.prune.prune_data()
71
+ await cognee.prune.prune_system(metadata=True)
72
+
73
+
74
+ async def get_authentication_headers(client: AsyncClient, email: str, password: str) -> dict:
75
+ """Authenticates and returns the Authorization header."""
76
+ login_data = {"username": email, "password": password}
77
+ response = await client.post("/api/v1/auth/login", data=login_data, timeout=15)
78
+
79
+ assert response.status_code == 200, "Failed to log in and get token"
80
+
81
+ token_data = response.json()
82
+ access_token = token_data["access_token"]
83
+
84
+ return {"Authorization": f"Bearer {access_token}"}
85
+
86
+
87
+ async def test_owner_can_access_graph(client: AsyncClient, setup_environment: int):
88
+ """
89
+ Test Case 1: The dataset owner should be able to access the graph data successfully.
90
+ """
91
+ dataset_id = setup_environment
92
+ default_user_email = "default_user@example.com"
93
+ default_user_password = "default_password"
94
+
95
+ response = await client.get(
96
+ f"/api/v1/datasets/{dataset_id}/graph",
97
+ headers=await get_authentication_headers(client, default_user_email, default_user_password),
98
+ )
99
+ assert response.status_code == 200, (
100
+ f"Owner failed to get the knowledge graph visualization. Response: {response.json()}"
101
+ )
102
+ data = response.json()
103
+ assert len(data) > 1, "The graph data is not valid."
104
+
105
+ print("✅ Owner can access the graph visualization successfully.")
106
+
107
+
108
+ async def test_granting_permission_enables_access(client: AsyncClient, setup_environment: int):
109
+ """
110
+ Test Case 2: A user without any permissions should be denied access (404 Not Found).
111
+ After granting permission, the user should be able to access the graph data.
112
+ """
113
+ dataset_id = setup_environment
114
+ # Create a user without any permissions to the dataset
115
+ test_user_email = "test_user@example.com"
116
+ test_user_password = "test_password"
117
+ test_user = await create_user(test_user_email, test_user_password)
118
+
119
+ # Test the access to graph visualization for the test user without any permissions
120
+ response = await client.get(
121
+ f"/api/v1/datasets/{dataset_id}/graph",
122
+ headers=await get_authentication_headers(client, test_user_email, test_user_password),
123
+ )
124
+ assert response.status_code == 403, (
125
+ "Access to graph visualization should be denied without READ permission."
126
+ )
127
+ assert (
128
+ response.json()["detail"]
129
+ == "Request owner does not have necessary permission: [read] for all datasets requested. [PermissionDeniedError]"
130
+ )
131
+ print("✅ Access to graph visualization should be denied without READ permission.")
132
+
133
+ # Grant permission to the test user
134
+ default_user = await get_default_user()
135
+ await authorized_give_permission_on_datasets(
136
+ test_user.id, [dataset_id], "read", default_user.id
137
+ )
138
+
139
+ # Test the access to graph visualization for the test user
140
+ response_for_test_user = await client.get(
141
+ f"/api/v1/datasets/{dataset_id}/graph",
142
+ headers=await get_authentication_headers(client, test_user_email, test_user_password),
143
+ )
144
+ assert response_for_test_user.status_code == 200, (
145
+ "Access to graph visualization should succeed for user with been granted read permission"
146
+ )
147
+ print(
148
+ "✅ Access to graph visualization should succeed for user with been granted read permission"
149
+ )
150
+
151
+ # Test the graph data is the same for the test user and the default user
152
+ default_user_email = "default_user@example.com"
153
+ default_user_password = "default_password"
154
+ response_for_default_user = await client.get(
155
+ f"/api/v1/datasets/{dataset_id}/graph",
156
+ headers=await get_authentication_headers(client, default_user_email, default_user_password),
157
+ )
158
+ assert response_for_test_user.json() == response_for_default_user.json(), (
159
+ "The graph data for the test user and the default user is not the same."
160
+ )
161
+ print("✅ The graph data for the test user and the default user is the same.")
@@ -0,0 +1,309 @@
1
+ import os
2
+ from dotenv import load_dotenv
3
+ import asyncio
4
+ from cognee.infrastructure.databases.graph.neptune_driver import NeptuneGraphDB
5
+ from cognee.modules.chunking.models import DocumentChunk
6
+ from cognee.modules.engine.models import Entity, EntityType
7
+ from cognee.modules.data.processing.document_types import TextDocument
8
+
9
+ # Set up Amazon credentials in .env file and get the values from environment variables
10
+ load_dotenv()
11
+ graph_id = os.getenv("GRAPH_ID", "")
12
+
13
+ na_adapter = NeptuneGraphDB(graph_id)
14
+
15
+
16
+ def setup():
17
+ # Define nodes data before the main function
18
+ # These nodes were defined using openAI from the following prompt:
19
+
20
+ # Neptune Analytics is an ideal choice for investigatory, exploratory, or data-science workloads
21
+ # that require fast iteration for data, analytical and algorithmic processing, or vector search on graph data. It
22
+ # complements Amazon Neptune Database, a popular managed graph database. To perform intensive analysis, you can load
23
+ # the data from a Neptune Database graph or snapshot into Neptune Analytics. You can also load graph data that's
24
+ # stored in Amazon S3.
25
+
26
+ document = TextDocument(
27
+ name="text_test.txt",
28
+ raw_data_location="git/cognee/examples/database_examples/data_storage/data/text_test.txt",
29
+ external_metadata="{}",
30
+ mime_type="text/plain",
31
+ )
32
+ document_chunk = DocumentChunk(
33
+ text="Neptune Analytics is an ideal choice for investigatory, exploratory, or data-science workloads \n that require fast iteration for data, analytical and algorithmic processing, or vector search on graph data. It \n complements Amazon Neptune Database, a popular managed graph database. To perform intensive analysis, you can load \n the data from a Neptune Database graph or snapshot into Neptune Analytics. You can also load graph data that's \n stored in Amazon S3.\n ",
34
+ chunk_size=187,
35
+ chunk_index=0,
36
+ cut_type="paragraph_end",
37
+ is_part_of=document,
38
+ )
39
+
40
+ graph_database = EntityType(name="graph database", description="graph database")
41
+ neptune_analytics_entity = Entity(
42
+ name="neptune analytics",
43
+ description="A memory-optimized graph database engine for analytics that processes large amounts of graph data quickly.",
44
+ )
45
+ neptune_database_entity = Entity(
46
+ name="amazon neptune database",
47
+ description="A popular managed graph database that complements Neptune Analytics.",
48
+ )
49
+
50
+ storage = EntityType(name="storage", description="storage")
51
+ storage_entity = Entity(
52
+ name="amazon s3",
53
+ description="A storage service provided by Amazon Web Services that allows storing graph data.",
54
+ )
55
+
56
+ nodes_data = [
57
+ document,
58
+ document_chunk,
59
+ graph_database,
60
+ neptune_analytics_entity,
61
+ neptune_database_entity,
62
+ storage,
63
+ storage_entity,
64
+ ]
65
+
66
+ edges_data = [
67
+ (
68
+ str(document_chunk.id),
69
+ str(storage_entity.id),
70
+ "contains",
71
+ ),
72
+ (
73
+ str(storage_entity.id),
74
+ str(storage.id),
75
+ "is_a",
76
+ ),
77
+ (
78
+ str(document_chunk.id),
79
+ str(neptune_database_entity.id),
80
+ "contains",
81
+ ),
82
+ (
83
+ str(neptune_database_entity.id),
84
+ str(graph_database.id),
85
+ "is_a",
86
+ ),
87
+ (
88
+ str(document_chunk.id),
89
+ str(document.id),
90
+ "is_part_of",
91
+ ),
92
+ (
93
+ str(document_chunk.id),
94
+ str(neptune_analytics_entity.id),
95
+ "contains",
96
+ ),
97
+ (
98
+ str(neptune_analytics_entity.id),
99
+ str(graph_database.id),
100
+ "is_a",
101
+ ),
102
+ ]
103
+
104
+ return nodes_data, edges_data
105
+
106
+
107
+ async def pipeline_method():
108
+ """
109
+ Example script using the neptune analytics with small sample data
110
+
111
+ This example demonstrates how to add nodes to Neptune Analytics
112
+ """
113
+
114
+ print("------TRUNCATE GRAPH-------")
115
+ await na_adapter.delete_graph()
116
+
117
+ print("------SETUP DATA-------")
118
+ nodes, edges = setup()
119
+
120
+ print("------ADD NODES-------")
121
+ await na_adapter.add_node(nodes[0])
122
+ await na_adapter.add_nodes(nodes[1:])
123
+
124
+ print("------GET NODES FROM DATA-------")
125
+ node_ids = [str(node.id) for node in nodes]
126
+ db_nodes = await na_adapter.get_nodes(node_ids)
127
+
128
+ print("------RESULTS:-------")
129
+ for n in db_nodes:
130
+ print(n)
131
+
132
+ print("------ADD EDGES-------")
133
+ await na_adapter.add_edge(edges[0][0], edges[0][1], edges[0][2])
134
+ await na_adapter.add_edges(edges[1:])
135
+
136
+ print("------HAS EDGES-------")
137
+ has_edge = await na_adapter.has_edge(
138
+ edges[0][0],
139
+ edges[0][1],
140
+ edges[0][2],
141
+ )
142
+ if has_edge:
143
+ print(f"found edge ({edges[0][0]})-[{edges[0][2]}]->({edges[0][1]})")
144
+
145
+ has_edges = await na_adapter.has_edges(edges)
146
+ if len(has_edges) > 0:
147
+ print(f"found edges: {len(has_edges)} (expected: {len(edges)})")
148
+ else:
149
+ print(f"no edges found (expected: {len(edges)})")
150
+
151
+ print("------GET GRAPH-------")
152
+ all_nodes, all_edges = await na_adapter.get_graph_data()
153
+ print(f"found {len(all_nodes)} nodes and found {len(all_edges)} edges")
154
+
155
+ print("------NEIGHBORING NODES-------")
156
+ center_node = nodes[2]
157
+ neighbors = await na_adapter.get_neighbors(str(center_node.id))
158
+ print(f'found {len(neighbors)} neighbors for node "{center_node.name}"')
159
+ for neighbor in neighbors:
160
+ print(neighbor)
161
+
162
+ print("------NEIGHBORING EDGES-------")
163
+ center_node = nodes[2]
164
+ neighbouring_edges = await na_adapter.get_edges(str(center_node.id))
165
+ print(f'found {len(neighbouring_edges)} edges neighbouring node "{center_node.name}"')
166
+ for edge in neighbouring_edges:
167
+ print(edge)
168
+
169
+ print("------GET CONNECTIONS (SOURCE NODE)-------")
170
+ document_chunk_node = nodes[0]
171
+ connections = await na_adapter.get_connections(str(document_chunk_node.id))
172
+ print(f'found {len(connections)} connections for node "{document_chunk_node.type}"')
173
+ for connection in connections:
174
+ src, relationship, tgt = connection
175
+ src = src.get("name", src.get("type", "unknown"))
176
+ relationship = relationship["relationship_name"]
177
+ tgt = tgt.get("name", tgt.get("type", "unknown"))
178
+ print(f'"{src}"-[{relationship}]->"{tgt}"')
179
+
180
+ print("------GET CONNECTIONS (TARGET NODE)-------")
181
+ connections = await na_adapter.get_connections(str(center_node.id))
182
+ print(f'found {len(connections)} connections for node "{center_node.name}"')
183
+ for connection in connections:
184
+ src, relationship, tgt = connection
185
+ src = src.get("name", src.get("type", "unknown"))
186
+ relationship = relationship["relationship_name"]
187
+ tgt = tgt.get("name", tgt.get("type", "unknown"))
188
+ print(f'"{src}"-[{relationship}]->"{tgt}"')
189
+
190
+ print("------SUBGRAPH-------")
191
+ node_names = ["neptune analytics", "amazon neptune database"]
192
+ subgraph_nodes, subgraph_edges = await na_adapter.get_nodeset_subgraph(Entity, node_names)
193
+ print(
194
+ f"found {len(subgraph_nodes)} nodes and {len(subgraph_edges)} edges in the subgraph around {node_names}"
195
+ )
196
+ for subgraph_node in subgraph_nodes:
197
+ print(subgraph_node)
198
+ for subgraph_edge in subgraph_edges:
199
+ print(subgraph_edge)
200
+
201
+ print("------STAT-------")
202
+ stat = await na_adapter.get_graph_metrics(include_optional=True)
203
+ assert type(stat) is dict
204
+ assert stat["num_nodes"] == 7
205
+ assert stat["num_edges"] == 7
206
+ assert stat["mean_degree"] == 2.0
207
+ assert round(stat["edge_density"], 3) == 0.167
208
+ assert stat["num_connected_components"] == [7]
209
+ assert stat["sizes_of_connected_components"] == 1
210
+ assert stat["num_selfloops"] == 0
211
+ # Unsupported optional metrics
212
+ assert stat["diameter"] == -1
213
+ assert stat["avg_shortest_path_length"] == -1
214
+ assert stat["avg_clustering"] == -1
215
+
216
+ print("------DELETE-------")
217
+ # delete all nodes and edges:
218
+ await na_adapter.delete_graph()
219
+
220
+ # delete all nodes by node id
221
+ # node_ids = [str(node.id) for node in nodes]
222
+ # await na_adapter.delete_nodes(node_ids)
223
+
224
+ has_edges = await na_adapter.has_edges(edges)
225
+ if len(has_edges) == 0:
226
+ print("Delete successful")
227
+ else:
228
+ print("Delete failed")
229
+
230
+
231
+ async def misc_methods():
232
+ print("------TRUNCATE GRAPH-------")
233
+ await na_adapter.delete_graph()
234
+
235
+ print("------SETUP TEST ENV-------")
236
+ nodes, edges = setup()
237
+ await na_adapter.add_nodes(nodes)
238
+ await na_adapter.add_edges(edges)
239
+
240
+ print("------GET GRAPH-------")
241
+ all_nodes, all_edges = await na_adapter.get_graph_data()
242
+ print(f"found {len(all_nodes)} nodes and found {len(all_edges)} edges")
243
+
244
+ print("------GET DISCONNECTED-------")
245
+ nodes_disconnected = await na_adapter.get_disconnected_nodes()
246
+ print(nodes_disconnected)
247
+ assert len(nodes_disconnected) == 0
248
+
249
+ print("------Get Labels (Node)-------")
250
+ node_labels = await na_adapter.get_node_labels_string()
251
+ print(node_labels)
252
+
253
+ print("------Get Labels (Edge)-------")
254
+ edge_labels = await na_adapter.get_relationship_labels_string()
255
+ print(edge_labels)
256
+
257
+ print("------Get Filtered Graph-------")
258
+ filtered_nodes, filtered_edges = await na_adapter.get_filtered_graph_data(
259
+ [{"name": ["text_test.txt"]}]
260
+ )
261
+ print(filtered_nodes, filtered_edges)
262
+
263
+ print("------Get Degree one nodes-------")
264
+ degree_one_nodes = await na_adapter.get_degree_one_nodes("EntityType")
265
+ print(degree_one_nodes)
266
+
267
+ print("------Get Doc sub-graph-------")
268
+ doc_sub_graph = await na_adapter.get_document_subgraph("test.txt")
269
+ print(doc_sub_graph)
270
+
271
+ print("------Fetch and Remove connections (Predecessors)-------")
272
+ # Fetch test edge
273
+ (src_id, dest_id, relationship) = edges[0]
274
+ nodes_predecessors = await na_adapter.get_predecessors(node_id=dest_id, edge_label=relationship)
275
+ assert len(nodes_predecessors) > 0
276
+
277
+ await na_adapter.remove_connection_to_predecessors_of(
278
+ node_ids=[src_id], edge_label=relationship
279
+ )
280
+ nodes_predecessors_after = await na_adapter.get_predecessors(
281
+ node_id=dest_id, edge_label=relationship
282
+ )
283
+ # Return empty after relationship being deleted.
284
+ assert len(nodes_predecessors_after) == 0
285
+
286
+ print("------Fetch and Remove connections (Successors)-------")
287
+ _, edges_suc = await na_adapter.get_graph_data()
288
+ (src_id, dest_id, relationship, _) = edges_suc[0]
289
+
290
+ nodes_successors = await na_adapter.get_successors(node_id=src_id, edge_label=relationship)
291
+ assert len(nodes_successors) > 0
292
+
293
+ await na_adapter.remove_connection_to_successors_of(node_ids=[dest_id], edge_label=relationship)
294
+ nodes_successors_after = await na_adapter.get_successors(
295
+ node_id=src_id, edge_label=relationship
296
+ )
297
+ assert len(nodes_successors_after) == 0
298
+
299
+ # no-op
300
+ await na_adapter.project_entire_graph()
301
+ await na_adapter.drop_graph()
302
+ await na_adapter.graph_exists()
303
+
304
+ pass
305
+
306
+
307
+ if __name__ == "__main__":
308
+ asyncio.run(pipeline_method())
309
+ asyncio.run(misc_methods())