cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (223) hide show
  1. cognee/api/client.py +44 -4
  2. cognee/api/health.py +332 -0
  3. cognee/api/v1/add/add.py +5 -2
  4. cognee/api/v1/add/routers/get_add_router.py +3 -0
  5. cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
  6. cognee/api/v1/cognify/cognify.py +8 -0
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
  8. cognee/api/v1/config/config.py +3 -1
  9. cognee/api/v1/datasets/routers/get_datasets_router.py +2 -8
  10. cognee/api/v1/delete/delete.py +16 -12
  11. cognee/api/v1/responses/routers/get_responses_router.py +3 -1
  12. cognee/api/v1/search/search.py +10 -0
  13. cognee/api/v1/settings/routers/get_settings_router.py +0 -2
  14. cognee/base_config.py +1 -0
  15. cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
  16. cognee/infrastructure/databases/graph/config.py +2 -0
  17. cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
  18. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
  19. cognee/infrastructure/databases/graph/kuzu/adapter.py +43 -16
  20. cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +281 -0
  21. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +151 -77
  22. cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
  23. cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
  24. cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
  25. cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
  26. cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
  27. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
  28. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +11 -3
  29. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
  30. cognee/infrastructure/databases/vector/create_vector_engine.py +31 -23
  31. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
  32. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
  33. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
  34. cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
  35. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
  36. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
  37. cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
  38. cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
  39. cognee/infrastructure/files/utils/guess_file_type.py +2 -2
  40. cognee/infrastructure/files/utils/open_data_file.py +4 -23
  41. cognee/infrastructure/llm/LLMGateway.py +137 -0
  42. cognee/infrastructure/llm/__init__.py +14 -4
  43. cognee/infrastructure/llm/config.py +29 -1
  44. cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
  45. cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
  46. cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
  47. cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
  48. cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
  49. cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
  50. cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
  51. cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
  52. cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
  53. cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
  54. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
  55. cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
  56. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
  57. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
  58. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
  59. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
  60. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
  61. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
  62. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
  64. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
  65. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
  66. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
  67. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
  68. cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
  69. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
  70. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
  71. cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
  72. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
  73. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
  74. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
  75. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
  76. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
  77. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
  78. cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
  79. cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
  80. cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
  81. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
  82. cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
  83. cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
  84. cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
  85. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
  86. cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
  87. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
  88. cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
  89. cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
  90. cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
  91. cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
  92. cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
  93. cognee/infrastructure/llm/utils.py +3 -1
  94. cognee/infrastructure/loaders/LoaderEngine.py +156 -0
  95. cognee/infrastructure/loaders/LoaderInterface.py +73 -0
  96. cognee/infrastructure/loaders/__init__.py +18 -0
  97. cognee/infrastructure/loaders/core/__init__.py +7 -0
  98. cognee/infrastructure/loaders/core/audio_loader.py +98 -0
  99. cognee/infrastructure/loaders/core/image_loader.py +114 -0
  100. cognee/infrastructure/loaders/core/text_loader.py +90 -0
  101. cognee/infrastructure/loaders/create_loader_engine.py +32 -0
  102. cognee/infrastructure/loaders/external/__init__.py +22 -0
  103. cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
  104. cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
  105. cognee/infrastructure/loaders/get_loader_engine.py +18 -0
  106. cognee/infrastructure/loaders/supported_loaders.py +18 -0
  107. cognee/infrastructure/loaders/use_loader.py +21 -0
  108. cognee/infrastructure/loaders/utils/__init__.py +0 -0
  109. cognee/modules/data/methods/__init__.py +1 -0
  110. cognee/modules/data/methods/get_authorized_dataset.py +23 -0
  111. cognee/modules/data/models/Data.py +13 -3
  112. cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
  113. cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
  114. cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
  115. cognee/modules/data/processing/document_types/UnstructuredDocument.py +2 -5
  116. cognee/modules/engine/utils/generate_edge_id.py +5 -0
  117. cognee/modules/graph/cognee_graph/CogneeGraph.py +45 -35
  118. cognee/modules/graph/methods/get_formatted_graph_data.py +8 -2
  119. cognee/modules/graph/utils/get_graph_from_model.py +93 -101
  120. cognee/modules/ingestion/data_types/TextData.py +8 -2
  121. cognee/modules/ingestion/save_data_to_file.py +1 -1
  122. cognee/modules/pipelines/exceptions/__init__.py +1 -0
  123. cognee/modules/pipelines/exceptions/exceptions.py +12 -0
  124. cognee/modules/pipelines/models/DataItemStatus.py +5 -0
  125. cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
  126. cognee/modules/pipelines/models/__init__.py +1 -0
  127. cognee/modules/pipelines/operations/pipeline.py +10 -2
  128. cognee/modules/pipelines/operations/run_tasks.py +252 -20
  129. cognee/modules/pipelines/operations/run_tasks_distributed.py +1 -1
  130. cognee/modules/retrieval/chunks_retriever.py +23 -1
  131. cognee/modules/retrieval/code_retriever.py +66 -9
  132. cognee/modules/retrieval/completion_retriever.py +11 -9
  133. cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
  134. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
  135. cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
  136. cognee/modules/retrieval/graph_completion_retriever.py +1 -1
  137. cognee/modules/retrieval/insights_retriever.py +4 -0
  138. cognee/modules/retrieval/natural_language_retriever.py +9 -15
  139. cognee/modules/retrieval/summaries_retriever.py +23 -1
  140. cognee/modules/retrieval/utils/brute_force_triplet_search.py +23 -4
  141. cognee/modules/retrieval/utils/completion.py +6 -9
  142. cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
  143. cognee/modules/search/methods/search.py +5 -1
  144. cognee/modules/search/operations/__init__.py +1 -0
  145. cognee/modules/search/operations/select_search_type.py +42 -0
  146. cognee/modules/search/types/SearchType.py +1 -0
  147. cognee/modules/settings/get_settings.py +0 -8
  148. cognee/modules/settings/save_vector_db_config.py +1 -1
  149. cognee/shared/data_models.py +3 -1
  150. cognee/shared/logging_utils.py +0 -5
  151. cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
  152. cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
  153. cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
  154. cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
  155. cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
  156. cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
  157. cognee/tasks/graph/extract_graph_from_code.py +3 -2
  158. cognee/tasks/graph/extract_graph_from_data.py +4 -3
  159. cognee/tasks/graph/infer_data_ontology.py +5 -6
  160. cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
  161. cognee/tasks/ingestion/ingest_data.py +91 -61
  162. cognee/tasks/ingestion/resolve_data_directories.py +3 -0
  163. cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
  164. cognee/tasks/storage/index_data_points.py +1 -1
  165. cognee/tasks/storage/index_graph_edges.py +4 -1
  166. cognee/tasks/summarization/summarize_code.py +2 -3
  167. cognee/tasks/summarization/summarize_text.py +3 -2
  168. cognee/tests/test_cognee_server_start.py +12 -7
  169. cognee/tests/test_deduplication.py +2 -2
  170. cognee/tests/test_deletion.py +58 -17
  171. cognee/tests/test_graph_visualization_permissions.py +161 -0
  172. cognee/tests/test_neptune_analytics_graph.py +309 -0
  173. cognee/tests/test_neptune_analytics_hybrid.py +176 -0
  174. cognee/tests/{test_weaviate.py → test_neptune_analytics_vector.py} +86 -11
  175. cognee/tests/test_pgvector.py +5 -5
  176. cognee/tests/test_s3.py +1 -6
  177. cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
  178. cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
  179. cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
  180. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
  181. cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
  182. cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
  183. cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
  184. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +84 -9
  185. cognee/tests/unit/modules/search/search_methods_test.py +55 -0
  186. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/METADATA +13 -9
  187. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/RECORD +203 -164
  188. cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
  189. cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
  190. cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
  191. cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
  192. cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
  193. cognee/modules/data/extraction/extract_categories.py +0 -14
  194. cognee/tests/test_qdrant.py +0 -99
  195. distributed/Dockerfile +0 -34
  196. distributed/app.py +0 -4
  197. distributed/entrypoint.py +0 -71
  198. distributed/entrypoint.sh +0 -5
  199. distributed/modal_image.py +0 -11
  200. distributed/queues.py +0 -5
  201. distributed/tasks/queued_add_data_points.py +0 -13
  202. distributed/tasks/queued_add_edges.py +0 -13
  203. distributed/tasks/queued_add_nodes.py +0 -13
  204. distributed/test.py +0 -28
  205. distributed/utils.py +0 -19
  206. distributed/workers/data_point_saving_worker.py +0 -93
  207. distributed/workers/graph_saving_worker.py +0 -104
  208. /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
  209. /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
  210. /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
  211. /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
  212. /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
  213. /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
  214. /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
  215. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
  216. /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
  217. /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
  218. {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
  219. {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
  220. /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
  221. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/WHEEL +0 -0
  222. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/LICENSE +0 -0
  223. {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,115 @@
1
+ """Neptune Analytics Exceptions
2
+
3
+ This module defines custom exceptions for Neptune Analytics operations.
4
+ """
5
+
6
+ from cognee.exceptions import CogneeApiError
7
+ from fastapi import status
8
+
9
+
10
+ class NeptuneAnalyticsError(CogneeApiError):
11
+ """Base exception for Neptune Analytics operations."""
12
+
13
+ def __init__(
14
+ self,
15
+ message: str = "Neptune Analytics error.",
16
+ name: str = "NeptuneAnalyticsError",
17
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
18
+ ):
19
+ super().__init__(message, name, status_code)
20
+
21
+
22
+ class NeptuneAnalyticsConnectionError(NeptuneAnalyticsError):
23
+ """Exception raised when connection to Neptune Analytics fails."""
24
+
25
+ def __init__(
26
+ self,
27
+ message: str = "Unable to connect to Neptune Analytics. Please check the endpoint and network connectivity.",
28
+ name: str = "NeptuneAnalyticsConnectionError",
29
+ status_code=status.HTTP_404_NOT_FOUND,
30
+ ):
31
+ super().__init__(message, name, status_code)
32
+
33
+
34
+ class NeptuneAnalyticsQueryError(NeptuneAnalyticsError):
35
+ """Exception raised when a query execution fails."""
36
+
37
+ def __init__(
38
+ self,
39
+ message: str = "The query execution failed due to invalid syntax or semantic issues.",
40
+ name: str = "NeptuneAnalyticsQueryError",
41
+ status_code=status.HTTP_400_BAD_REQUEST,
42
+ ):
43
+ super().__init__(message, name, status_code)
44
+
45
+
46
+ class NeptuneAnalyticsAuthenticationError(NeptuneAnalyticsError):
47
+ """Exception raised when authentication with Neptune Analytics fails."""
48
+
49
+ def __init__(
50
+ self,
51
+ message: str = "Authentication with Neptune Analytics failed. Please verify your credentials.",
52
+ name: str = "NeptuneAnalyticsAuthenticationError",
53
+ status_code=status.HTTP_401_UNAUTHORIZED,
54
+ ):
55
+ super().__init__(message, name, status_code)
56
+
57
+
58
+ class NeptuneAnalyticsConfigurationError(NeptuneAnalyticsError):
59
+ """Exception raised when Neptune Analytics configuration is invalid."""
60
+
61
+ def __init__(
62
+ self,
63
+ message: str = "Neptune Analytics configuration is invalid or incomplete. Please review your setup.",
64
+ name: str = "NeptuneAnalyticsConfigurationError",
65
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
66
+ ):
67
+ super().__init__(message, name, status_code)
68
+
69
+
70
+ class NeptuneAnalyticsTimeoutError(NeptuneAnalyticsError):
71
+ """Exception raised when a Neptune Analytics operation times out."""
72
+
73
+ def __init__(
74
+ self,
75
+ message: str = "The operation timed out while communicating with Neptune Analytics.",
76
+ name: str = "NeptuneAnalyticsTimeoutError",
77
+ status_code=status.HTTP_504_GATEWAY_TIMEOUT,
78
+ ):
79
+ super().__init__(message, name, status_code)
80
+
81
+
82
+ class NeptuneAnalyticsThrottlingError(NeptuneAnalyticsError):
83
+ """Exception raised when requests are throttled by Neptune Analytics."""
84
+
85
+ def __init__(
86
+ self,
87
+ message: str = "Request was throttled by Neptune Analytics due to exceeding rate limits.",
88
+ name: str = "NeptuneAnalyticsThrottlingError",
89
+ status_code=status.HTTP_429_TOO_MANY_REQUESTS,
90
+ ):
91
+ super().__init__(message, name, status_code)
92
+
93
+
94
+ class NeptuneAnalyticsResourceNotFoundError(NeptuneAnalyticsError):
95
+ """Exception raised when a Neptune Analytics resource is not found."""
96
+
97
+ def __init__(
98
+ self,
99
+ message: str = "The requested Neptune Analytics resource could not be found.",
100
+ name: str = "NeptuneAnalyticsResourceNotFoundError",
101
+ status_code=status.HTTP_404_NOT_FOUND,
102
+ ):
103
+ super().__init__(message, name, status_code)
104
+
105
+
106
+ class NeptuneAnalyticsInvalidParameterError(NeptuneAnalyticsError):
107
+ """Exception raised when invalid parameters are provided to Neptune Analytics."""
108
+
109
+ def __init__(
110
+ self,
111
+ message: str = "One or more parameters provided to Neptune Analytics are invalid or missing.",
112
+ name: str = "NeptuneAnalyticsInvalidParameterError",
113
+ status_code=status.HTTP_400_BAD_REQUEST,
114
+ ):
115
+ super().__init__(message, name, status_code)
@@ -0,0 +1,224 @@
1
+ """Neptune Utilities
2
+
3
+ This module provides utility functions for Neptune Analytics operations including
4
+ connection management, URL parsing, and Neptune-specific configurations.
5
+ """
6
+
7
+ import re
8
+ from typing import Optional, Dict, Any, Tuple
9
+ from urllib.parse import urlparse
10
+
11
+ from cognee.shared.logging_utils import get_logger
12
+
13
+ logger = get_logger("NeptuneUtils")
14
+
15
+
16
+ def parse_neptune_url(url: str) -> Tuple[str, str]:
17
+ """
18
+ Parse a Neptune Analytics URL to extract graph ID and region.
19
+
20
+ Expected format: neptune-graph://<GRAPH_ID>?region=<REGION>
21
+ or neptune-graph://<GRAPH_ID> (defaults to us-east-1)
22
+
23
+ Parameters:
24
+ -----------
25
+ - url (str): The Neptune Analytics URL to parse
26
+
27
+ Returns:
28
+ --------
29
+ - Tuple[str, str]: A tuple containing (graph_id, region)
30
+
31
+ Raises:
32
+ -------
33
+ - ValueError: If the URL format is invalid
34
+ """
35
+ try:
36
+ parsed = urlparse(url)
37
+
38
+ if parsed.scheme != "neptune-graph":
39
+ raise ValueError(f"Invalid scheme: {parsed.scheme}. Expected 'neptune-graph'")
40
+
41
+ graph_id = parsed.hostname or parsed.path.lstrip("/")
42
+ if not graph_id:
43
+ raise ValueError("Graph ID not found in URL")
44
+
45
+ # Extract region from query parameters
46
+ region = "us-east-1" # default region
47
+ if parsed.query:
48
+ query_params = dict(
49
+ param.split("=") for param in parsed.query.split("&") if "=" in param
50
+ )
51
+ region = query_params.get("region", region)
52
+
53
+ return graph_id, region
54
+
55
+ except Exception as e:
56
+ raise ValueError(f"Failed to parse Neptune Analytics URL '{url}': {str(e)}")
57
+
58
+
59
+ def validate_graph_id(graph_id: str) -> bool:
60
+ """
61
+ Validate a Neptune Analytics graph ID format.
62
+
63
+ Graph IDs should follow AWS naming conventions.
64
+
65
+ Parameters:
66
+ -----------
67
+ - graph_id (str): The graph ID to validate
68
+
69
+ Returns:
70
+ --------
71
+ - bool: True if the graph ID is valid, False otherwise
72
+ """
73
+ if not graph_id:
74
+ return False
75
+
76
+ # Neptune Analytics graph IDs should be alphanumeric with hyphens
77
+ # and between 1-63 characters
78
+ pattern = r"^[a-zA-Z0-9][a-zA-Z0-9\-]{0,62}$"
79
+ return bool(re.match(pattern, graph_id))
80
+
81
+
82
+ def validate_aws_region(region: str) -> bool:
83
+ """
84
+ Validate an AWS region format.
85
+
86
+ Parameters:
87
+ -----------
88
+ - region (str): The AWS region to validate
89
+
90
+ Returns:
91
+ --------
92
+ - bool: True if the region format is valid, False otherwise
93
+ """
94
+ if not region:
95
+ return False
96
+
97
+ # AWS regions follow the pattern: us-east-1, eu-west-1, etc.
98
+ pattern = r"^[a-z]{2,3}-[a-z]+-\d+$"
99
+ return bool(re.match(pattern, region))
100
+
101
+
102
+ def build_neptune_config(
103
+ graph_id: str,
104
+ region: Optional[str],
105
+ aws_access_key_id: Optional[str] = None,
106
+ aws_secret_access_key: Optional[str] = None,
107
+ aws_session_token: Optional[str] = None,
108
+ **kwargs,
109
+ ) -> Dict[str, Any]:
110
+ """
111
+ Build a configuration dictionary for Neptune Analytics connection.
112
+
113
+ Parameters:
114
+ -----------
115
+ - graph_id (str): The Neptune Analytics graph identifier
116
+ - region (Optional[str]): AWS region where the graph is located
117
+ - aws_access_key_id (Optional[str]): AWS access key ID
118
+ - aws_secret_access_key (Optional[str]): AWS secret access key
119
+ - aws_session_token (Optional[str]): AWS session token for temporary credentials
120
+ - **kwargs: Additional configuration parameters
121
+
122
+ Returns:
123
+ --------
124
+ - Dict[str, Any]: Configuration dictionary for Neptune Analytics
125
+
126
+ Raises:
127
+ -------
128
+ - ValueError: If required parameters are invalid
129
+ """
130
+ config = {
131
+ "graph_id": graph_id,
132
+ "service_name": "neptune-graph",
133
+ }
134
+
135
+ # Add AWS credentials if provided
136
+ if region:
137
+ config["region"] = region
138
+
139
+ if aws_access_key_id:
140
+ config["aws_access_key_id"] = aws_access_key_id
141
+
142
+ if aws_secret_access_key:
143
+ config["aws_secret_access_key"] = aws_secret_access_key
144
+
145
+ if aws_session_token:
146
+ config["aws_session_token"] = aws_session_token
147
+
148
+ # Add any additional configuration
149
+ config.update(kwargs)
150
+
151
+ return config
152
+
153
+
154
+ def get_neptune_endpoint_url(graph_id: str, region: str) -> str:
155
+ """
156
+ Construct the Neptune Analytics endpoint URL for a given graph and region.
157
+
158
+ Parameters:
159
+ -----------
160
+ - graph_id (str): The Neptune Analytics graph identifier
161
+ - region (str): AWS region where the graph is located
162
+
163
+ Returns:
164
+ --------
165
+ - str: The Neptune Analytics endpoint URL
166
+ """
167
+ return f"https://neptune-graph.{region}.amazonaws.com/graphs/{graph_id}"
168
+
169
+
170
+ def format_neptune_error(error: Exception) -> str:
171
+ """
172
+ Format Neptune Analytics specific errors for better readability.
173
+
174
+ Parameters:
175
+ -----------
176
+ - error (Exception): The exception to format
177
+
178
+ Returns:
179
+ --------
180
+ - str: Formatted error message
181
+ """
182
+ error_msg = str(error)
183
+
184
+ # Common Neptune Analytics error patterns and their user-friendly messages
185
+ error_mappings = {
186
+ "AccessDenied": "Access denied. Please check your AWS credentials and permissions.",
187
+ "GraphNotFound": "Graph not found. Please verify the graph ID and region.",
188
+ "InvalidParameter": "Invalid parameter provided. Please check your request parameters.",
189
+ "ThrottlingException": "Request was throttled. Please retry with exponential backoff.",
190
+ "InternalServerError": "Internal server error occurred. Please try again later.",
191
+ }
192
+
193
+ for error_type, friendly_msg in error_mappings.items():
194
+ if error_type in error_msg:
195
+ return f"{friendly_msg} Original error: {error_msg}"
196
+
197
+ return error_msg
198
+
199
+
200
+ def get_default_query_timeout() -> int:
201
+ """
202
+ Get the default query timeout for Neptune Analytics operations.
203
+
204
+ Returns:
205
+ --------
206
+ - int: Default timeout in seconds
207
+ """
208
+ return 300 # 5 minutes
209
+
210
+
211
+ def get_default_connection_config() -> Dict[str, Any]:
212
+ """
213
+ Get default connection configuration for Neptune Analytics.
214
+
215
+ Returns:
216
+ --------
217
+ - Dict[str, Any]: Default connection configuration
218
+ """
219
+ return {
220
+ "query_timeout": get_default_query_timeout(),
221
+ "max_retries": 3,
222
+ "retry_delay": 1.0,
223
+ "preferred_query_language": "openCypher",
224
+ }
@@ -826,7 +826,7 @@ class NetworkXAdapter(GraphDBInterface):
826
826
 
827
827
  return mandatory_metrics | optional_metrics
828
828
 
829
- async def get_document_subgraph(self, content_hash: str):
829
+ async def get_document_subgraph(self, data_id: str):
830
830
  """
831
831
  Retrieve all relevant nodes when a document is being deleted, including chunks and
832
832
  orphaned entities.
@@ -834,7 +834,7 @@ class NetworkXAdapter(GraphDBInterface):
834
834
  Parameters:
835
835
  -----------
836
836
 
837
- - content_hash (str): The hash identifying the content of the document to fetch
837
+ - data_id(str): The data id identifying the document to fetch
838
838
  related nodes for.
839
839
 
840
840
  Returns:
@@ -853,7 +853,7 @@ class NetworkXAdapter(GraphDBInterface):
853
853
  for node_id, attrs in self.graph.nodes(data=True):
854
854
  if (
855
855
  attrs.get("type") in ["TextDocument", "PdfDocument"]
856
- and attrs.get("name") == f"text_{content_hash}"
856
+ and attrs.get("id") == f"{data_id}"
857
857
  ):
858
858
  document = {"id": str(node_id), **attrs} # Convert UUID to string for consistency
859
859
  document_node_id = node_id # Keep the original UUID