cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +44 -4
- cognee/api/health.py +332 -0
- cognee/api/v1/add/add.py +5 -2
- cognee/api/v1/add/routers/get_add_router.py +3 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
- cognee/api/v1/cognify/cognify.py +8 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
- cognee/api/v1/config/config.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +2 -8
- cognee/api/v1/delete/delete.py +16 -12
- cognee/api/v1/responses/routers/get_responses_router.py +3 -1
- cognee/api/v1/search/search.py +10 -0
- cognee/api/v1/settings/routers/get_settings_router.py +0 -2
- cognee/base_config.py +1 -0
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
- cognee/infrastructure/databases/graph/config.py +2 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
- cognee/infrastructure/databases/graph/kuzu/adapter.py +43 -16
- cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +281 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +151 -77
- cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
- cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
- cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +11 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
- cognee/infrastructure/databases/vector/create_vector_engine.py +31 -23
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
- cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
- cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
- cognee/infrastructure/files/utils/guess_file_type.py +2 -2
- cognee/infrastructure/files/utils/open_data_file.py +4 -23
- cognee/infrastructure/llm/LLMGateway.py +137 -0
- cognee/infrastructure/llm/__init__.py +14 -4
- cognee/infrastructure/llm/config.py +29 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
- cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
- cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
- cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
- cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
- cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
- cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
- cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
- cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
- cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
- cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
- cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
- cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
- cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
- cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
- cognee/infrastructure/llm/utils.py +3 -1
- cognee/infrastructure/loaders/LoaderEngine.py +156 -0
- cognee/infrastructure/loaders/LoaderInterface.py +73 -0
- cognee/infrastructure/loaders/__init__.py +18 -0
- cognee/infrastructure/loaders/core/__init__.py +7 -0
- cognee/infrastructure/loaders/core/audio_loader.py +98 -0
- cognee/infrastructure/loaders/core/image_loader.py +114 -0
- cognee/infrastructure/loaders/core/text_loader.py +90 -0
- cognee/infrastructure/loaders/create_loader_engine.py +32 -0
- cognee/infrastructure/loaders/external/__init__.py +22 -0
- cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
- cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
- cognee/infrastructure/loaders/get_loader_engine.py +18 -0
- cognee/infrastructure/loaders/supported_loaders.py +18 -0
- cognee/infrastructure/loaders/use_loader.py +21 -0
- cognee/infrastructure/loaders/utils/__init__.py +0 -0
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/get_authorized_dataset.py +23 -0
- cognee/modules/data/models/Data.py +13 -3
- cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
- cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
- cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
- cognee/modules/data/processing/document_types/UnstructuredDocument.py +2 -5
- cognee/modules/engine/utils/generate_edge_id.py +5 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +45 -35
- cognee/modules/graph/methods/get_formatted_graph_data.py +8 -2
- cognee/modules/graph/utils/get_graph_from_model.py +93 -101
- cognee/modules/ingestion/data_types/TextData.py +8 -2
- cognee/modules/ingestion/save_data_to_file.py +1 -1
- cognee/modules/pipelines/exceptions/__init__.py +1 -0
- cognee/modules/pipelines/exceptions/exceptions.py +12 -0
- cognee/modules/pipelines/models/DataItemStatus.py +5 -0
- cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
- cognee/modules/pipelines/models/__init__.py +1 -0
- cognee/modules/pipelines/operations/pipeline.py +10 -2
- cognee/modules/pipelines/operations/run_tasks.py +252 -20
- cognee/modules/pipelines/operations/run_tasks_distributed.py +1 -1
- cognee/modules/retrieval/chunks_retriever.py +23 -1
- cognee/modules/retrieval/code_retriever.py +66 -9
- cognee/modules/retrieval/completion_retriever.py +11 -9
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/insights_retriever.py +4 -0
- cognee/modules/retrieval/natural_language_retriever.py +9 -15
- cognee/modules/retrieval/summaries_retriever.py +23 -1
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +23 -4
- cognee/modules/retrieval/utils/completion.py +6 -9
- cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
- cognee/modules/search/methods/search.py +5 -1
- cognee/modules/search/operations/__init__.py +1 -0
- cognee/modules/search/operations/select_search_type.py +42 -0
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +0 -8
- cognee/modules/settings/save_vector_db_config.py +1 -1
- cognee/shared/data_models.py +3 -1
- cognee/shared/logging_utils.py +0 -5
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
- cognee/tasks/graph/extract_graph_from_code.py +3 -2
- cognee/tasks/graph/extract_graph_from_data.py +4 -3
- cognee/tasks/graph/infer_data_ontology.py +5 -6
- cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
- cognee/tasks/ingestion/ingest_data.py +91 -61
- cognee/tasks/ingestion/resolve_data_directories.py +3 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +4 -1
- cognee/tasks/summarization/summarize_code.py +2 -3
- cognee/tasks/summarization/summarize_text.py +3 -2
- cognee/tests/test_cognee_server_start.py +12 -7
- cognee/tests/test_deduplication.py +2 -2
- cognee/tests/test_deletion.py +58 -17
- cognee/tests/test_graph_visualization_permissions.py +161 -0
- cognee/tests/test_neptune_analytics_graph.py +309 -0
- cognee/tests/test_neptune_analytics_hybrid.py +176 -0
- cognee/tests/{test_weaviate.py → test_neptune_analytics_vector.py} +86 -11
- cognee/tests/test_pgvector.py +5 -5
- cognee/tests/test_s3.py +1 -6
- cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
- cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
- cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
- cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
- cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +84 -9
- cognee/tests/unit/modules/search/search_methods_test.py +55 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/METADATA +13 -9
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/RECORD +203 -164
- cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
- cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
- cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
- cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
- cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
- cognee/modules/data/extraction/extract_categories.py +0 -14
- cognee/tests/test_qdrant.py +0 -99
- distributed/Dockerfile +0 -34
- distributed/app.py +0 -4
- distributed/entrypoint.py +0 -71
- distributed/entrypoint.sh +0 -5
- distributed/modal_image.py +0 -11
- distributed/queues.py +0 -5
- distributed/tasks/queued_add_data_points.py +0 -13
- distributed/tasks/queued_add_edges.py +0 -13
- distributed/tasks/queued_add_nodes.py +0 -13
- distributed/test.py +0 -28
- distributed/utils.py +0 -19
- distributed/workers/data_point_saving_worker.py +0 -93
- distributed/workers/graph_saving_worker.py +0 -104
- /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
- /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
- /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
- /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
- /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
- /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
- /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
- /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
- {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
- {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
- /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/WHEEL +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Neptune Analytics Exceptions
|
|
2
|
+
|
|
3
|
+
This module defines custom exceptions for Neptune Analytics operations.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from cognee.exceptions import CogneeApiError
|
|
7
|
+
from fastapi import status
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class NeptuneAnalyticsError(CogneeApiError):
|
|
11
|
+
"""Base exception for Neptune Analytics operations."""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
message: str = "Neptune Analytics error.",
|
|
16
|
+
name: str = "NeptuneAnalyticsError",
|
|
17
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
18
|
+
):
|
|
19
|
+
super().__init__(message, name, status_code)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class NeptuneAnalyticsConnectionError(NeptuneAnalyticsError):
|
|
23
|
+
"""Exception raised when connection to Neptune Analytics fails."""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
message: str = "Unable to connect to Neptune Analytics. Please check the endpoint and network connectivity.",
|
|
28
|
+
name: str = "NeptuneAnalyticsConnectionError",
|
|
29
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
30
|
+
):
|
|
31
|
+
super().__init__(message, name, status_code)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class NeptuneAnalyticsQueryError(NeptuneAnalyticsError):
|
|
35
|
+
"""Exception raised when a query execution fails."""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
message: str = "The query execution failed due to invalid syntax or semantic issues.",
|
|
40
|
+
name: str = "NeptuneAnalyticsQueryError",
|
|
41
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
42
|
+
):
|
|
43
|
+
super().__init__(message, name, status_code)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class NeptuneAnalyticsAuthenticationError(NeptuneAnalyticsError):
|
|
47
|
+
"""Exception raised when authentication with Neptune Analytics fails."""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
message: str = "Authentication with Neptune Analytics failed. Please verify your credentials.",
|
|
52
|
+
name: str = "NeptuneAnalyticsAuthenticationError",
|
|
53
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
54
|
+
):
|
|
55
|
+
super().__init__(message, name, status_code)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class NeptuneAnalyticsConfigurationError(NeptuneAnalyticsError):
|
|
59
|
+
"""Exception raised when Neptune Analytics configuration is invalid."""
|
|
60
|
+
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
message: str = "Neptune Analytics configuration is invalid or incomplete. Please review your setup.",
|
|
64
|
+
name: str = "NeptuneAnalyticsConfigurationError",
|
|
65
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
66
|
+
):
|
|
67
|
+
super().__init__(message, name, status_code)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class NeptuneAnalyticsTimeoutError(NeptuneAnalyticsError):
|
|
71
|
+
"""Exception raised when a Neptune Analytics operation times out."""
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
message: str = "The operation timed out while communicating with Neptune Analytics.",
|
|
76
|
+
name: str = "NeptuneAnalyticsTimeoutError",
|
|
77
|
+
status_code=status.HTTP_504_GATEWAY_TIMEOUT,
|
|
78
|
+
):
|
|
79
|
+
super().__init__(message, name, status_code)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class NeptuneAnalyticsThrottlingError(NeptuneAnalyticsError):
|
|
83
|
+
"""Exception raised when requests are throttled by Neptune Analytics."""
|
|
84
|
+
|
|
85
|
+
def __init__(
|
|
86
|
+
self,
|
|
87
|
+
message: str = "Request was throttled by Neptune Analytics due to exceeding rate limits.",
|
|
88
|
+
name: str = "NeptuneAnalyticsThrottlingError",
|
|
89
|
+
status_code=status.HTTP_429_TOO_MANY_REQUESTS,
|
|
90
|
+
):
|
|
91
|
+
super().__init__(message, name, status_code)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class NeptuneAnalyticsResourceNotFoundError(NeptuneAnalyticsError):
|
|
95
|
+
"""Exception raised when a Neptune Analytics resource is not found."""
|
|
96
|
+
|
|
97
|
+
def __init__(
|
|
98
|
+
self,
|
|
99
|
+
message: str = "The requested Neptune Analytics resource could not be found.",
|
|
100
|
+
name: str = "NeptuneAnalyticsResourceNotFoundError",
|
|
101
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
102
|
+
):
|
|
103
|
+
super().__init__(message, name, status_code)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class NeptuneAnalyticsInvalidParameterError(NeptuneAnalyticsError):
|
|
107
|
+
"""Exception raised when invalid parameters are provided to Neptune Analytics."""
|
|
108
|
+
|
|
109
|
+
def __init__(
|
|
110
|
+
self,
|
|
111
|
+
message: str = "One or more parameters provided to Neptune Analytics are invalid or missing.",
|
|
112
|
+
name: str = "NeptuneAnalyticsInvalidParameterError",
|
|
113
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
114
|
+
):
|
|
115
|
+
super().__init__(message, name, status_code)
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"""Neptune Utilities
|
|
2
|
+
|
|
3
|
+
This module provides utility functions for Neptune Analytics operations including
|
|
4
|
+
connection management, URL parsing, and Neptune-specific configurations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from typing import Optional, Dict, Any, Tuple
|
|
9
|
+
from urllib.parse import urlparse
|
|
10
|
+
|
|
11
|
+
from cognee.shared.logging_utils import get_logger
|
|
12
|
+
|
|
13
|
+
logger = get_logger("NeptuneUtils")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def parse_neptune_url(url: str) -> Tuple[str, str]:
|
|
17
|
+
"""
|
|
18
|
+
Parse a Neptune Analytics URL to extract graph ID and region.
|
|
19
|
+
|
|
20
|
+
Expected format: neptune-graph://<GRAPH_ID>?region=<REGION>
|
|
21
|
+
or neptune-graph://<GRAPH_ID> (defaults to us-east-1)
|
|
22
|
+
|
|
23
|
+
Parameters:
|
|
24
|
+
-----------
|
|
25
|
+
- url (str): The Neptune Analytics URL to parse
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
--------
|
|
29
|
+
- Tuple[str, str]: A tuple containing (graph_id, region)
|
|
30
|
+
|
|
31
|
+
Raises:
|
|
32
|
+
-------
|
|
33
|
+
- ValueError: If the URL format is invalid
|
|
34
|
+
"""
|
|
35
|
+
try:
|
|
36
|
+
parsed = urlparse(url)
|
|
37
|
+
|
|
38
|
+
if parsed.scheme != "neptune-graph":
|
|
39
|
+
raise ValueError(f"Invalid scheme: {parsed.scheme}. Expected 'neptune-graph'")
|
|
40
|
+
|
|
41
|
+
graph_id = parsed.hostname or parsed.path.lstrip("/")
|
|
42
|
+
if not graph_id:
|
|
43
|
+
raise ValueError("Graph ID not found in URL")
|
|
44
|
+
|
|
45
|
+
# Extract region from query parameters
|
|
46
|
+
region = "us-east-1" # default region
|
|
47
|
+
if parsed.query:
|
|
48
|
+
query_params = dict(
|
|
49
|
+
param.split("=") for param in parsed.query.split("&") if "=" in param
|
|
50
|
+
)
|
|
51
|
+
region = query_params.get("region", region)
|
|
52
|
+
|
|
53
|
+
return graph_id, region
|
|
54
|
+
|
|
55
|
+
except Exception as e:
|
|
56
|
+
raise ValueError(f"Failed to parse Neptune Analytics URL '{url}': {str(e)}")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def validate_graph_id(graph_id: str) -> bool:
|
|
60
|
+
"""
|
|
61
|
+
Validate a Neptune Analytics graph ID format.
|
|
62
|
+
|
|
63
|
+
Graph IDs should follow AWS naming conventions.
|
|
64
|
+
|
|
65
|
+
Parameters:
|
|
66
|
+
-----------
|
|
67
|
+
- graph_id (str): The graph ID to validate
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
--------
|
|
71
|
+
- bool: True if the graph ID is valid, False otherwise
|
|
72
|
+
"""
|
|
73
|
+
if not graph_id:
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
# Neptune Analytics graph IDs should be alphanumeric with hyphens
|
|
77
|
+
# and between 1-63 characters
|
|
78
|
+
pattern = r"^[a-zA-Z0-9][a-zA-Z0-9\-]{0,62}$"
|
|
79
|
+
return bool(re.match(pattern, graph_id))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def validate_aws_region(region: str) -> bool:
|
|
83
|
+
"""
|
|
84
|
+
Validate an AWS region format.
|
|
85
|
+
|
|
86
|
+
Parameters:
|
|
87
|
+
-----------
|
|
88
|
+
- region (str): The AWS region to validate
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
--------
|
|
92
|
+
- bool: True if the region format is valid, False otherwise
|
|
93
|
+
"""
|
|
94
|
+
if not region:
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
# AWS regions follow the pattern: us-east-1, eu-west-1, etc.
|
|
98
|
+
pattern = r"^[a-z]{2,3}-[a-z]+-\d+$"
|
|
99
|
+
return bool(re.match(pattern, region))
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def build_neptune_config(
|
|
103
|
+
graph_id: str,
|
|
104
|
+
region: Optional[str],
|
|
105
|
+
aws_access_key_id: Optional[str] = None,
|
|
106
|
+
aws_secret_access_key: Optional[str] = None,
|
|
107
|
+
aws_session_token: Optional[str] = None,
|
|
108
|
+
**kwargs,
|
|
109
|
+
) -> Dict[str, Any]:
|
|
110
|
+
"""
|
|
111
|
+
Build a configuration dictionary for Neptune Analytics connection.
|
|
112
|
+
|
|
113
|
+
Parameters:
|
|
114
|
+
-----------
|
|
115
|
+
- graph_id (str): The Neptune Analytics graph identifier
|
|
116
|
+
- region (Optional[str]): AWS region where the graph is located
|
|
117
|
+
- aws_access_key_id (Optional[str]): AWS access key ID
|
|
118
|
+
- aws_secret_access_key (Optional[str]): AWS secret access key
|
|
119
|
+
- aws_session_token (Optional[str]): AWS session token for temporary credentials
|
|
120
|
+
- **kwargs: Additional configuration parameters
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
--------
|
|
124
|
+
- Dict[str, Any]: Configuration dictionary for Neptune Analytics
|
|
125
|
+
|
|
126
|
+
Raises:
|
|
127
|
+
-------
|
|
128
|
+
- ValueError: If required parameters are invalid
|
|
129
|
+
"""
|
|
130
|
+
config = {
|
|
131
|
+
"graph_id": graph_id,
|
|
132
|
+
"service_name": "neptune-graph",
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
# Add AWS credentials if provided
|
|
136
|
+
if region:
|
|
137
|
+
config["region"] = region
|
|
138
|
+
|
|
139
|
+
if aws_access_key_id:
|
|
140
|
+
config["aws_access_key_id"] = aws_access_key_id
|
|
141
|
+
|
|
142
|
+
if aws_secret_access_key:
|
|
143
|
+
config["aws_secret_access_key"] = aws_secret_access_key
|
|
144
|
+
|
|
145
|
+
if aws_session_token:
|
|
146
|
+
config["aws_session_token"] = aws_session_token
|
|
147
|
+
|
|
148
|
+
# Add any additional configuration
|
|
149
|
+
config.update(kwargs)
|
|
150
|
+
|
|
151
|
+
return config
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def get_neptune_endpoint_url(graph_id: str, region: str) -> str:
|
|
155
|
+
"""
|
|
156
|
+
Construct the Neptune Analytics endpoint URL for a given graph and region.
|
|
157
|
+
|
|
158
|
+
Parameters:
|
|
159
|
+
-----------
|
|
160
|
+
- graph_id (str): The Neptune Analytics graph identifier
|
|
161
|
+
- region (str): AWS region where the graph is located
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
--------
|
|
165
|
+
- str: The Neptune Analytics endpoint URL
|
|
166
|
+
"""
|
|
167
|
+
return f"https://neptune-graph.{region}.amazonaws.com/graphs/{graph_id}"
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def format_neptune_error(error: Exception) -> str:
|
|
171
|
+
"""
|
|
172
|
+
Format Neptune Analytics specific errors for better readability.
|
|
173
|
+
|
|
174
|
+
Parameters:
|
|
175
|
+
-----------
|
|
176
|
+
- error (Exception): The exception to format
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
--------
|
|
180
|
+
- str: Formatted error message
|
|
181
|
+
"""
|
|
182
|
+
error_msg = str(error)
|
|
183
|
+
|
|
184
|
+
# Common Neptune Analytics error patterns and their user-friendly messages
|
|
185
|
+
error_mappings = {
|
|
186
|
+
"AccessDenied": "Access denied. Please check your AWS credentials and permissions.",
|
|
187
|
+
"GraphNotFound": "Graph not found. Please verify the graph ID and region.",
|
|
188
|
+
"InvalidParameter": "Invalid parameter provided. Please check your request parameters.",
|
|
189
|
+
"ThrottlingException": "Request was throttled. Please retry with exponential backoff.",
|
|
190
|
+
"InternalServerError": "Internal server error occurred. Please try again later.",
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
for error_type, friendly_msg in error_mappings.items():
|
|
194
|
+
if error_type in error_msg:
|
|
195
|
+
return f"{friendly_msg} Original error: {error_msg}"
|
|
196
|
+
|
|
197
|
+
return error_msg
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def get_default_query_timeout() -> int:
|
|
201
|
+
"""
|
|
202
|
+
Get the default query timeout for Neptune Analytics operations.
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
--------
|
|
206
|
+
- int: Default timeout in seconds
|
|
207
|
+
"""
|
|
208
|
+
return 300 # 5 minutes
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def get_default_connection_config() -> Dict[str, Any]:
|
|
212
|
+
"""
|
|
213
|
+
Get default connection configuration for Neptune Analytics.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
--------
|
|
217
|
+
- Dict[str, Any]: Default connection configuration
|
|
218
|
+
"""
|
|
219
|
+
return {
|
|
220
|
+
"query_timeout": get_default_query_timeout(),
|
|
221
|
+
"max_retries": 3,
|
|
222
|
+
"retry_delay": 1.0,
|
|
223
|
+
"preferred_query_language": "openCypher",
|
|
224
|
+
}
|
|
@@ -826,7 +826,7 @@ class NetworkXAdapter(GraphDBInterface):
|
|
|
826
826
|
|
|
827
827
|
return mandatory_metrics | optional_metrics
|
|
828
828
|
|
|
829
|
-
async def get_document_subgraph(self,
|
|
829
|
+
async def get_document_subgraph(self, data_id: str):
|
|
830
830
|
"""
|
|
831
831
|
Retrieve all relevant nodes when a document is being deleted, including chunks and
|
|
832
832
|
orphaned entities.
|
|
@@ -834,7 +834,7 @@ class NetworkXAdapter(GraphDBInterface):
|
|
|
834
834
|
Parameters:
|
|
835
835
|
-----------
|
|
836
836
|
|
|
837
|
-
-
|
|
837
|
+
- data_id(str): The data id identifying the document to fetch
|
|
838
838
|
related nodes for.
|
|
839
839
|
|
|
840
840
|
Returns:
|
|
@@ -853,7 +853,7 @@ class NetworkXAdapter(GraphDBInterface):
|
|
|
853
853
|
for node_id, attrs in self.graph.nodes(data=True):
|
|
854
854
|
if (
|
|
855
855
|
attrs.get("type") in ["TextDocument", "PdfDocument"]
|
|
856
|
-
and attrs.get("
|
|
856
|
+
and attrs.get("id") == f"{data_id}"
|
|
857
857
|
):
|
|
858
858
|
document = {"id": str(node_id), **attrs} # Convert UUID to string for consistency
|
|
859
859
|
document_node_id = node_id # Keep the original UUID
|