cognee 0.2.2.dev0__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +41 -3
- cognee/api/health.py +332 -0
- cognee/api/v1/add/add.py +5 -2
- cognee/api/v1/add/routers/get_add_router.py +3 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
- cognee/api/v1/cognify/cognify.py +8 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
- cognee/api/v1/config/config.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +1 -7
- cognee/api/v1/delete/delete.py +16 -12
- cognee/api/v1/responses/routers/get_responses_router.py +3 -1
- cognee/api/v1/search/search.py +10 -0
- cognee/api/v1/settings/routers/get_settings_router.py +0 -2
- cognee/base_config.py +1 -0
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
- cognee/infrastructure/databases/graph/config.py +2 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
- cognee/infrastructure/databases/graph/kuzu/adapter.py +12 -7
- cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +1 -1
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +48 -13
- cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
- cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
- cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +1 -0
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
- cognee/infrastructure/databases/vector/create_vector_engine.py +31 -15
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
- cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
- cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
- cognee/infrastructure/files/utils/guess_file_type.py +2 -2
- cognee/infrastructure/files/utils/open_data_file.py +4 -23
- cognee/infrastructure/llm/LLMGateway.py +137 -0
- cognee/infrastructure/llm/__init__.py +14 -4
- cognee/infrastructure/llm/config.py +29 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
- cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
- cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
- cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
- cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
- cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
- cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
- cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
- cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
- cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
- cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
- cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
- cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
- cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
- cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
- cognee/infrastructure/llm/utils.py +3 -1
- cognee/infrastructure/loaders/LoaderEngine.py +156 -0
- cognee/infrastructure/loaders/LoaderInterface.py +73 -0
- cognee/infrastructure/loaders/__init__.py +18 -0
- cognee/infrastructure/loaders/core/__init__.py +7 -0
- cognee/infrastructure/loaders/core/audio_loader.py +98 -0
- cognee/infrastructure/loaders/core/image_loader.py +114 -0
- cognee/infrastructure/loaders/core/text_loader.py +90 -0
- cognee/infrastructure/loaders/create_loader_engine.py +32 -0
- cognee/infrastructure/loaders/external/__init__.py +22 -0
- cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
- cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
- cognee/infrastructure/loaders/get_loader_engine.py +18 -0
- cognee/infrastructure/loaders/supported_loaders.py +18 -0
- cognee/infrastructure/loaders/use_loader.py +21 -0
- cognee/infrastructure/loaders/utils/__init__.py +0 -0
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/get_authorized_dataset.py +23 -0
- cognee/modules/data/models/Data.py +11 -1
- cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
- cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
- cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
- cognee/modules/engine/utils/generate_edge_id.py +5 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +9 -18
- cognee/modules/graph/methods/get_formatted_graph_data.py +7 -1
- cognee/modules/graph/utils/get_graph_from_model.py +93 -101
- cognee/modules/ingestion/data_types/TextData.py +8 -2
- cognee/modules/ingestion/save_data_to_file.py +1 -1
- cognee/modules/pipelines/exceptions/__init__.py +1 -0
- cognee/modules/pipelines/exceptions/exceptions.py +12 -0
- cognee/modules/pipelines/models/DataItemStatus.py +5 -0
- cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
- cognee/modules/pipelines/models/__init__.py +1 -0
- cognee/modules/pipelines/operations/pipeline.py +10 -2
- cognee/modules/pipelines/operations/run_tasks.py +251 -19
- cognee/modules/retrieval/code_retriever.py +3 -5
- cognee/modules/retrieval/completion_retriever.py +1 -1
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
- cognee/modules/retrieval/natural_language_retriever.py +3 -5
- cognee/modules/retrieval/utils/completion.py +6 -9
- cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
- cognee/modules/search/methods/search.py +5 -1
- cognee/modules/search/operations/__init__.py +1 -0
- cognee/modules/search/operations/select_search_type.py +42 -0
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +0 -4
- cognee/modules/settings/save_vector_db_config.py +1 -1
- cognee/shared/data_models.py +3 -1
- cognee/shared/logging_utils.py +0 -5
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
- cognee/tasks/graph/extract_graph_from_code.py +3 -2
- cognee/tasks/graph/extract_graph_from_data.py +4 -3
- cognee/tasks/graph/infer_data_ontology.py +5 -6
- cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
- cognee/tasks/ingestion/ingest_data.py +91 -61
- cognee/tasks/ingestion/resolve_data_directories.py +3 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +4 -1
- cognee/tasks/summarization/summarize_code.py +2 -3
- cognee/tasks/summarization/summarize_text.py +3 -2
- cognee/tests/test_cognee_server_start.py +12 -7
- cognee/tests/test_deduplication.py +2 -2
- cognee/tests/test_deletion.py +58 -17
- cognee/tests/test_graph_visualization_permissions.py +161 -0
- cognee/tests/test_neptune_analytics_graph.py +309 -0
- cognee/tests/test_neptune_analytics_hybrid.py +176 -0
- cognee/tests/{test_qdrant.py → test_neptune_analytics_vector.py} +86 -16
- cognee/tests/test_pgvector.py +5 -5
- cognee/tests/test_s3.py +1 -6
- cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
- cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
- cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
- cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
- cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
- cognee/tests/unit/modules/search/search_methods_test.py +55 -0
- {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/METADATA +12 -6
- {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/RECORD +195 -156
- cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
- cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
- cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
- cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
- cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
- cognee/modules/data/extraction/extract_categories.py +0 -14
- distributed/Dockerfile +0 -34
- distributed/app.py +0 -4
- distributed/entrypoint.py +0 -71
- distributed/entrypoint.sh +0 -5
- distributed/modal_image.py +0 -11
- distributed/queues.py +0 -5
- distributed/tasks/queued_add_data_points.py +0 -13
- distributed/tasks/queued_add_edges.py +0 -13
- distributed/tasks/queued_add_nodes.py +0 -13
- distributed/test.py +0 -28
- distributed/utils.py +0 -19
- distributed/workers/data_point_saving_worker.py +0 -93
- distributed/workers/graph_saving_worker.py +0 -104
- /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
- /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
- /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
- /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
- /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
- /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
- /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
- /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
- {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
- {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
- /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
- {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/WHEEL +0 -0
- {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.2.dev0.dist-info → cognee-0.2.3.dist-info}/licenses/NOTICE.md +0 -0
cognee/api/v1/search/search.py
CHANGED
|
@@ -71,6 +71,12 @@ async def search(
|
|
|
71
71
|
Best for: Advanced users, specific graph traversals, debugging.
|
|
72
72
|
Returns: Raw graph query results.
|
|
73
73
|
|
|
74
|
+
**FEELING_LUCKY**:
|
|
75
|
+
Intelligently selects and runs the most appropriate search type.
|
|
76
|
+
Best for: General-purpose queries or when you're unsure which search type is best.
|
|
77
|
+
Returns: The results from the automatically selected search type.
|
|
78
|
+
|
|
79
|
+
|
|
74
80
|
Args:
|
|
75
81
|
query_text: Your question or search query in natural language.
|
|
76
82
|
Examples:
|
|
@@ -119,6 +125,9 @@ async def search(
|
|
|
119
125
|
**CODE**:
|
|
120
126
|
[List of structured code information with context]
|
|
121
127
|
|
|
128
|
+
**FEELING_LUCKY**:
|
|
129
|
+
[List of results in the format of the search type that is automatically selected]
|
|
130
|
+
|
|
122
131
|
|
|
123
132
|
|
|
124
133
|
|
|
@@ -130,6 +139,7 @@ async def search(
|
|
|
130
139
|
- **CHUNKS**: Fastest, pure vector similarity search without LLM
|
|
131
140
|
- **SUMMARIES**: Fast, returns pre-computed summaries
|
|
132
141
|
- **CODE**: Medium speed, specialized for code understanding
|
|
142
|
+
- **FEELING_LUCKY**: Variable speed, uses LLM + search type selection intelligently
|
|
133
143
|
- **top_k**: Start with 10, increase for comprehensive analysis (max 100)
|
|
134
144
|
- **datasets**: Specify datasets to improve speed and relevance
|
|
135
145
|
|
cognee/base_config.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from typing import Any, Dict, List
|
|
2
2
|
from pydantic import BaseModel
|
|
3
|
-
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
|
4
3
|
from cognee.eval_framework.evaluation.base_eval_adapter import BaseEvalAdapter
|
|
5
|
-
from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
|
|
6
4
|
from cognee.eval_framework.eval_config import EvalConfig
|
|
7
5
|
|
|
6
|
+
from cognee.infrastructure.llm import LLMGateway
|
|
7
|
+
|
|
8
8
|
|
|
9
9
|
class CorrectnessEvaluation(BaseModel):
|
|
10
10
|
"""Response model containing evaluation score and explanation."""
|
|
@@ -19,17 +19,16 @@ class DirectLLMEvalAdapter(BaseEvalAdapter):
|
|
|
19
19
|
config = EvalConfig()
|
|
20
20
|
self.system_prompt_path = config.direct_llm_system_prompt
|
|
21
21
|
self.eval_prompt_path = config.direct_llm_eval_prompt
|
|
22
|
-
self.llm_client = get_llm_client()
|
|
23
22
|
|
|
24
23
|
async def evaluate_correctness(
|
|
25
24
|
self, question: str, answer: str, golden_answer: str
|
|
26
25
|
) -> Dict[str, Any]:
|
|
27
26
|
args = {"question": question, "answer": answer, "golden_answer": golden_answer}
|
|
28
27
|
|
|
29
|
-
user_prompt = render_prompt(self.eval_prompt_path, args)
|
|
30
|
-
system_prompt = read_query_prompt(self.system_prompt_path)
|
|
28
|
+
user_prompt = LLMGateway.render_prompt(self.eval_prompt_path, args)
|
|
29
|
+
system_prompt = LLMGateway.read_query_prompt(self.system_prompt_path)
|
|
31
30
|
|
|
32
|
-
evaluation = await
|
|
31
|
+
evaluation = await LLMGateway.acreate_structured_output(
|
|
33
32
|
text_input=user_prompt,
|
|
34
33
|
system_prompt=system_prompt,
|
|
35
34
|
response_model=CorrectnessEvaluation,
|
|
@@ -36,6 +36,7 @@ class GraphConfig(BaseSettings):
|
|
|
36
36
|
graph_database_provider: str = Field("kuzu", env="GRAPH_DATABASE_PROVIDER")
|
|
37
37
|
|
|
38
38
|
graph_database_url: str = ""
|
|
39
|
+
graph_database_name: str = ""
|
|
39
40
|
graph_database_username: str = ""
|
|
40
41
|
graph_database_password: str = ""
|
|
41
42
|
graph_database_port: int = 123
|
|
@@ -105,6 +106,7 @@ class GraphConfig(BaseSettings):
|
|
|
105
106
|
return {
|
|
106
107
|
"graph_database_provider": self.graph_database_provider,
|
|
107
108
|
"graph_database_url": self.graph_database_url,
|
|
109
|
+
"graph_database_name": self.graph_database_name,
|
|
108
110
|
"graph_database_username": self.graph_database_username,
|
|
109
111
|
"graph_database_password": self.graph_database_password,
|
|
110
112
|
"graph_database_port": self.graph_database_port,
|
|
@@ -33,6 +33,7 @@ def create_graph_engine(
|
|
|
33
33
|
graph_database_provider,
|
|
34
34
|
graph_file_path,
|
|
35
35
|
graph_database_url="",
|
|
36
|
+
graph_database_name="",
|
|
36
37
|
graph_database_username="",
|
|
37
38
|
graph_database_password="",
|
|
38
39
|
graph_database_port="",
|
|
@@ -48,13 +49,13 @@ def create_graph_engine(
|
|
|
48
49
|
-----------
|
|
49
50
|
|
|
50
51
|
- graph_database_provider: The type of graph database provider to use (e.g., neo4j,
|
|
51
|
-
falkordb, kuzu
|
|
52
|
-
- graph_database_url: The URL for the graph database instance. Required for neo4j
|
|
53
|
-
|
|
52
|
+
falkordb, kuzu).
|
|
53
|
+
- graph_database_url: The URL for the graph database instance. Required for neo4j
|
|
54
|
+
and falkordb providers.
|
|
54
55
|
- graph_database_username: The username for authentication with the graph database.
|
|
55
|
-
Required for neo4j
|
|
56
|
+
Required for neo4j provider.
|
|
56
57
|
- graph_database_password: The password for authentication with the graph database.
|
|
57
|
-
Required for neo4j
|
|
58
|
+
Required for neo4j provider.
|
|
58
59
|
- graph_database_port: The port number for the graph database connection. Required
|
|
59
60
|
for the falkordb provider.
|
|
60
61
|
- graph_file_path: The filesystem path to the graph file. Required for the kuzu
|
|
@@ -86,6 +87,7 @@ def create_graph_engine(
|
|
|
86
87
|
graph_database_url=graph_database_url,
|
|
87
88
|
graph_database_username=graph_database_username or None,
|
|
88
89
|
graph_database_password=graph_database_password or None,
|
|
90
|
+
graph_database_name=graph_database_name or None,
|
|
89
91
|
)
|
|
90
92
|
|
|
91
93
|
elif graph_database_provider == "falkordb":
|
|
@@ -122,17 +124,61 @@ def create_graph_engine(
|
|
|
122
124
|
username=graph_database_username,
|
|
123
125
|
password=graph_database_password,
|
|
124
126
|
)
|
|
127
|
+
elif graph_database_provider == "neptune":
|
|
128
|
+
try:
|
|
129
|
+
from langchain_aws import NeptuneAnalyticsGraph
|
|
130
|
+
except ImportError:
|
|
131
|
+
raise ImportError(
|
|
132
|
+
"langchain_aws is not installed. Please install it with 'pip install langchain_aws'"
|
|
133
|
+
)
|
|
125
134
|
|
|
126
|
-
elif graph_database_provider == "memgraph":
|
|
127
135
|
if not graph_database_url:
|
|
128
|
-
raise EnvironmentError("Missing
|
|
136
|
+
raise EnvironmentError("Missing Neptune endpoint.")
|
|
129
137
|
|
|
130
|
-
from .
|
|
138
|
+
from .neptune_driver.adapter import NeptuneGraphDB, NEPTUNE_ENDPOINT_URL
|
|
131
139
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
140
|
+
if not graph_database_url.startswith(NEPTUNE_ENDPOINT_URL):
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"Neptune endpoint must have the format {NEPTUNE_ENDPOINT_URL}<GRAPH_ID>"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
graph_identifier = graph_database_url.replace(NEPTUNE_ENDPOINT_URL, "")
|
|
146
|
+
|
|
147
|
+
return NeptuneGraphDB(
|
|
148
|
+
graph_id=graph_identifier,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
elif graph_database_provider == "neptune_analytics":
|
|
152
|
+
"""
|
|
153
|
+
Creates a graph DB from config
|
|
154
|
+
We want to use a hybrid (graph & vector) DB and we should update this
|
|
155
|
+
to make a single instance of the hybrid configuration (with embedder)
|
|
156
|
+
instead of creating the hybrid object twice.
|
|
157
|
+
"""
|
|
158
|
+
try:
|
|
159
|
+
from langchain_aws import NeptuneAnalyticsGraph
|
|
160
|
+
except ImportError:
|
|
161
|
+
raise ImportError(
|
|
162
|
+
"langchain_aws is not installed. Please install it with 'pip install langchain_aws'"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
if not graph_database_url:
|
|
166
|
+
raise EnvironmentError("Missing Neptune endpoint.")
|
|
167
|
+
|
|
168
|
+
from ..hybrid.neptune_analytics.NeptuneAnalyticsAdapter import (
|
|
169
|
+
NeptuneAnalyticsAdapter,
|
|
170
|
+
NEPTUNE_ANALYTICS_ENDPOINT_URL,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if not graph_database_url.startswith(NEPTUNE_ANALYTICS_ENDPOINT_URL):
|
|
174
|
+
raise ValueError(
|
|
175
|
+
f"Neptune endpoint must have the format '{NEPTUNE_ANALYTICS_ENDPOINT_URL}<GRAPH_ID>'"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
graph_identifier = graph_database_url.replace(NEPTUNE_ANALYTICS_ENDPOINT_URL, "")
|
|
179
|
+
|
|
180
|
+
return NeptuneAnalyticsAdapter(
|
|
181
|
+
graph_id=graph_identifier,
|
|
136
182
|
)
|
|
137
183
|
|
|
138
184
|
from .networkx.adapter import NetworkXAdapter
|
|
@@ -2,7 +2,7 @@ import inspect
|
|
|
2
2
|
from functools import wraps
|
|
3
3
|
from abc import abstractmethod, ABC
|
|
4
4
|
from datetime import datetime, timezone
|
|
5
|
-
from typing import Optional, Dict, Any, List, Tuple, Type
|
|
5
|
+
from typing import Optional, Dict, Any, List, Tuple, Type, Union
|
|
6
6
|
from uuid import NAMESPACE_OID, UUID, uuid5
|
|
7
7
|
from cognee.shared.logging_utils import get_logger
|
|
8
8
|
from cognee.infrastructure.engine import DataPoint
|
|
@@ -173,28 +173,31 @@ class GraphDBInterface(ABC):
|
|
|
173
173
|
raise NotImplementedError
|
|
174
174
|
|
|
175
175
|
@abstractmethod
|
|
176
|
-
async def add_node(
|
|
176
|
+
async def add_node(
|
|
177
|
+
self, node: Union[DataPoint, str], properties: Optional[Dict[str, Any]] = None
|
|
178
|
+
) -> None:
|
|
177
179
|
"""
|
|
178
180
|
Add a single node with specified properties to the graph.
|
|
179
181
|
|
|
180
182
|
Parameters:
|
|
181
183
|
-----------
|
|
182
184
|
|
|
183
|
-
-
|
|
184
|
-
- properties (Dict[str, Any]): A dictionary of properties associated with the node.
|
|
185
|
+
- node (Union[DataPoint, str]): Either a DataPoint object or a string identifier for the node being added.
|
|
186
|
+
- properties (Optional[Dict[str, Any]]): A dictionary of properties associated with the node.
|
|
187
|
+
Required when node is a string, ignored when node is a DataPoint.
|
|
185
188
|
"""
|
|
186
189
|
raise NotImplementedError
|
|
187
190
|
|
|
188
191
|
@abstractmethod
|
|
189
192
|
@record_graph_changes
|
|
190
|
-
async def add_nodes(self, nodes: List[Node]) -> None:
|
|
193
|
+
async def add_nodes(self, nodes: Union[List[Node], List[DataPoint]]) -> None:
|
|
191
194
|
"""
|
|
192
195
|
Add multiple nodes to the graph in a single operation.
|
|
193
196
|
|
|
194
197
|
Parameters:
|
|
195
198
|
-----------
|
|
196
199
|
|
|
197
|
-
- nodes (List[Node]): A list of Node objects to be added to the graph.
|
|
200
|
+
- nodes (Union[List[Node], List[DataPoint]]): A list of Node objects or DataPoint objects to be added to the graph.
|
|
198
201
|
"""
|
|
199
202
|
raise NotImplementedError
|
|
200
203
|
|
|
@@ -271,14 +274,16 @@ class GraphDBInterface(ABC):
|
|
|
271
274
|
|
|
272
275
|
@abstractmethod
|
|
273
276
|
@record_graph_changes
|
|
274
|
-
async def add_edges(
|
|
277
|
+
async def add_edges(
|
|
278
|
+
self, edges: Union[List[EdgeData], List[Tuple[str, str, str, Optional[Dict[str, Any]]]]]
|
|
279
|
+
) -> None:
|
|
275
280
|
"""
|
|
276
281
|
Add multiple edges to the graph in a single operation.
|
|
277
282
|
|
|
278
283
|
Parameters:
|
|
279
284
|
-----------
|
|
280
285
|
|
|
281
|
-
- edges (List[EdgeData]): A list of EdgeData objects representing edges to be added.
|
|
286
|
+
- edges (Union[List[EdgeData], List[Tuple[str, str, str, Optional[Dict[str, Any]]]]]): A list of EdgeData objects or tuples representing edges to be added.
|
|
282
287
|
"""
|
|
283
288
|
raise NotImplementedError
|
|
284
289
|
|
|
@@ -377,7 +382,7 @@ class GraphDBInterface(ABC):
|
|
|
377
382
|
|
|
378
383
|
@abstractmethod
|
|
379
384
|
async def get_connections(
|
|
380
|
-
self, node_id: str
|
|
385
|
+
self, node_id: Union[str, UUID]
|
|
381
386
|
) -> List[Tuple[NodeData, Dict[str, Any], NodeData]]:
|
|
382
387
|
"""
|
|
383
388
|
Get all nodes connected to a specified node and their relationship details.
|
|
@@ -385,6 +390,6 @@ class GraphDBInterface(ABC):
|
|
|
385
390
|
Parameters:
|
|
386
391
|
-----------
|
|
387
392
|
|
|
388
|
-
- node_id (str): Unique identifier of the node for which to retrieve connections.
|
|
393
|
+
- node_id (Union[str, UUID]): Unique identifier of the node for which to retrieve connections.
|
|
389
394
|
"""
|
|
390
395
|
raise NotImplementedError
|
|
@@ -42,6 +42,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
42
42
|
self.connection: Optional[Connection] = None
|
|
43
43
|
self.executor = ThreadPoolExecutor()
|
|
44
44
|
self._initialize_connection()
|
|
45
|
+
self.KUZU_ASYNC_LOCK = asyncio.Lock()
|
|
45
46
|
|
|
46
47
|
def _initialize_connection(self) -> None:
|
|
47
48
|
"""Initialize the Kuzu database connection and schema."""
|
|
@@ -136,6 +137,10 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
136
137
|
from cognee.infrastructure.files.storage.S3FileStorage import S3FileStorage
|
|
137
138
|
|
|
138
139
|
s3_file_storage = S3FileStorage("")
|
|
140
|
+
|
|
141
|
+
async with self.KUZU_ASYNC_LOCK:
|
|
142
|
+
self.connection.execute("CHECKPOINT;")
|
|
143
|
+
|
|
139
144
|
s3_file_storage.s3.put(self.temp_graph_file, self.db_path, recursive=True)
|
|
140
145
|
|
|
141
146
|
async def pull_from_s3(self) -> None:
|
|
@@ -145,7 +150,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
145
150
|
try:
|
|
146
151
|
s3_file_storage.s3.get(self.db_path, self.temp_graph_file, recursive=True)
|
|
147
152
|
except FileNotFoundError:
|
|
148
|
-
|
|
153
|
+
logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
|
|
149
154
|
|
|
150
155
|
async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
|
|
151
156
|
"""
|
|
@@ -1524,7 +1529,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1524
1529
|
logger.error(f"Error during database clearing: {e}")
|
|
1525
1530
|
raise
|
|
1526
1531
|
|
|
1527
|
-
async def get_document_subgraph(self,
|
|
1532
|
+
async def get_document_subgraph(self, data_id: str):
|
|
1528
1533
|
"""
|
|
1529
1534
|
Get all nodes that should be deleted when removing a document.
|
|
1530
1535
|
|
|
@@ -1535,7 +1540,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1535
1540
|
Parameters:
|
|
1536
1541
|
-----------
|
|
1537
1542
|
|
|
1538
|
-
-
|
|
1543
|
+
- data_id (str): The identifier for the document to query against.
|
|
1539
1544
|
|
|
1540
1545
|
Returns:
|
|
1541
1546
|
--------
|
|
@@ -1545,7 +1550,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1545
1550
|
"""
|
|
1546
1551
|
query = """
|
|
1547
1552
|
MATCH (doc:Node)
|
|
1548
|
-
WHERE (doc.type = 'TextDocument' OR doc.type = 'PdfDocument') AND doc.
|
|
1553
|
+
WHERE (doc.type = 'TextDocument' OR doc.type = 'PdfDocument' OR doc.type = 'AudioDocument' OR doc.type = 'ImageDocument' OR doc.type = 'UnstructuredDocument') AND doc.id = $data_id
|
|
1549
1554
|
|
|
1550
1555
|
OPTIONAL MATCH (doc)<-[e1:EDGE]-(chunk:Node)
|
|
1551
1556
|
WHERE e1.relationship_name = 'is_part_of' AND chunk.type = 'DocumentChunk'
|
|
@@ -1556,7 +1561,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1556
1561
|
MATCH (entity)<-[e3:EDGE]-(otherChunk:Node)-[e4:EDGE]->(otherDoc:Node)
|
|
1557
1562
|
WHERE e3.relationship_name = 'contains'
|
|
1558
1563
|
AND e4.relationship_name = 'is_part_of'
|
|
1559
|
-
AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument')
|
|
1564
|
+
AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument' OR otherDoc.type = 'AudioDocument' OR otherDoc.type = 'ImageDocument' OR otherDoc.type = 'UnstructuredDocument')
|
|
1560
1565
|
AND otherDoc.id <> doc.id
|
|
1561
1566
|
}
|
|
1562
1567
|
|
|
@@ -1572,7 +1577,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1572
1577
|
AND e9.relationship_name = 'is_part_of'
|
|
1573
1578
|
AND otherEntity.type = 'Entity'
|
|
1574
1579
|
AND otherChunk.type = 'DocumentChunk'
|
|
1575
|
-
AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument')
|
|
1580
|
+
AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument' OR otherDoc.type = 'AudioDocument' OR otherDoc.type = 'ImageDocument' OR otherDoc.type = 'UnstructuredDocument')
|
|
1576
1581
|
AND otherDoc.id <> doc.id
|
|
1577
1582
|
}
|
|
1578
1583
|
|
|
@@ -1583,7 +1588,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1583
1588
|
COLLECT(DISTINCT made_node) as made_from_nodes,
|
|
1584
1589
|
COLLECT(DISTINCT type) as orphan_types
|
|
1585
1590
|
"""
|
|
1586
|
-
result = await self.query(query, {"
|
|
1591
|
+
result = await self.query(query, {"data_id": f"{data_id}"})
|
|
1587
1592
|
if not result or not result[0]:
|
|
1588
1593
|
return None
|
|
1589
1594
|
|
|
@@ -74,7 +74,7 @@ def read_kuzu_storage_version(kuzu_db_path: str) -> int:
|
|
|
74
74
|
if kuzu_version_mapping.get(version_code):
|
|
75
75
|
return kuzu_version_mapping[version_code]
|
|
76
76
|
else:
|
|
77
|
-
ValueError("Could not map version_code to proper Kuzu version.")
|
|
77
|
+
raise ValueError("Could not map version_code to proper Kuzu version.")
|
|
78
78
|
|
|
79
79
|
|
|
80
80
|
def ensure_env(version: str, export_dir) -> str:
|
|
@@ -50,6 +50,7 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
50
50
|
graph_database_url: str,
|
|
51
51
|
graph_database_username: Optional[str] = None,
|
|
52
52
|
graph_database_password: Optional[str] = None,
|
|
53
|
+
graph_database_name: Optional[str] = None,
|
|
53
54
|
driver: Optional[Any] = None,
|
|
54
55
|
):
|
|
55
56
|
# Only use auth if both username and password are provided
|
|
@@ -59,7 +60,7 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
59
60
|
elif graph_database_username or graph_database_password:
|
|
60
61
|
logger = get_logger(__name__)
|
|
61
62
|
logger.warning("Neo4j credentials incomplete – falling back to anonymous connection.")
|
|
62
|
-
|
|
63
|
+
self.graph_database_name = graph_database_name
|
|
63
64
|
self.driver = driver or AsyncGraphDatabase.driver(
|
|
64
65
|
graph_database_url,
|
|
65
66
|
auth=auth,
|
|
@@ -80,7 +81,7 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
80
81
|
"""
|
|
81
82
|
Get a session for database operations.
|
|
82
83
|
"""
|
|
83
|
-
async with self.driver.session() as session:
|
|
84
|
+
async with self.driver.session(database=self.graph_database_name) as session:
|
|
84
85
|
yield session
|
|
85
86
|
|
|
86
87
|
@deadlock_retry()
|
|
@@ -410,6 +411,38 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
410
411
|
|
|
411
412
|
return await self.query(query, params)
|
|
412
413
|
|
|
414
|
+
def _flatten_edge_properties(self, properties: Dict[str, Any]) -> Dict[str, Any]:
|
|
415
|
+
"""
|
|
416
|
+
Flatten edge properties to handle nested dictionaries like weights.
|
|
417
|
+
|
|
418
|
+
Neo4j doesn't support nested dictionaries as property values, so we need to
|
|
419
|
+
flatten the 'weights' dictionary into individual properties with prefixes.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
properties: Dictionary of edge properties that may contain nested dicts
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
Flattened properties dictionary suitable for Neo4j storage
|
|
426
|
+
"""
|
|
427
|
+
flattened = {}
|
|
428
|
+
|
|
429
|
+
for key, value in properties.items():
|
|
430
|
+
if key == "weights" and isinstance(value, dict):
|
|
431
|
+
# Flatten weights dictionary into individual properties
|
|
432
|
+
for weight_name, weight_value in value.items():
|
|
433
|
+
flattened[f"weight_{weight_name}"] = weight_value
|
|
434
|
+
elif isinstance(value, dict):
|
|
435
|
+
# For other nested dictionaries, serialize as JSON string
|
|
436
|
+
flattened[f"{key}_json"] = json.dumps(value, cls=JSONEncoder)
|
|
437
|
+
elif isinstance(value, list):
|
|
438
|
+
# For lists, serialize as JSON string
|
|
439
|
+
flattened[f"{key}_json"] = json.dumps(value, cls=JSONEncoder)
|
|
440
|
+
else:
|
|
441
|
+
# Keep primitive types as-is
|
|
442
|
+
flattened[key] = value
|
|
443
|
+
|
|
444
|
+
return flattened
|
|
445
|
+
|
|
413
446
|
@record_graph_changes
|
|
414
447
|
@override_distributed(queued_add_edges)
|
|
415
448
|
async def add_edges(self, edges: list[tuple[str, str, str, dict[str, Any]]]) -> None:
|
|
@@ -448,11 +481,13 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
448
481
|
"from_node": str(edge[0]),
|
|
449
482
|
"to_node": str(edge[1]),
|
|
450
483
|
"relationship_name": edge[2],
|
|
451
|
-
"properties":
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
484
|
+
"properties": self._flatten_edge_properties(
|
|
485
|
+
{
|
|
486
|
+
**(edge[3] if edge[3] else {}),
|
|
487
|
+
"source_node_id": str(edge[0]),
|
|
488
|
+
"target_node_id": str(edge[1]),
|
|
489
|
+
}
|
|
490
|
+
),
|
|
456
491
|
}
|
|
457
492
|
for edge in edges
|
|
458
493
|
]
|
|
@@ -1217,7 +1252,7 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
1217
1252
|
|
|
1218
1253
|
return mandatory_metrics | optional_metrics
|
|
1219
1254
|
|
|
1220
|
-
async def get_document_subgraph(self,
|
|
1255
|
+
async def get_document_subgraph(self, data_id: str):
|
|
1221
1256
|
"""
|
|
1222
1257
|
Retrieve a subgraph related to a document identified by its content hash, including
|
|
1223
1258
|
related entities and chunks.
|
|
@@ -1235,21 +1270,21 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
1235
1270
|
"""
|
|
1236
1271
|
query = """
|
|
1237
1272
|
MATCH (doc)
|
|
1238
|
-
WHERE (doc:TextDocument OR doc:PdfDocument)
|
|
1239
|
-
AND doc.
|
|
1273
|
+
WHERE (doc:TextDocument OR doc:PdfDocument OR doc:UnstructuredDocument OR doc:AudioDocument or doc:ImageDocument)
|
|
1274
|
+
AND doc.id = $data_id
|
|
1240
1275
|
|
|
1241
1276
|
OPTIONAL MATCH (doc)<-[:is_part_of]-(chunk:DocumentChunk)
|
|
1242
1277
|
OPTIONAL MATCH (chunk)-[:contains]->(entity:Entity)
|
|
1243
1278
|
WHERE NOT EXISTS {
|
|
1244
1279
|
MATCH (entity)<-[:contains]-(otherChunk:DocumentChunk)-[:is_part_of]->(otherDoc)
|
|
1245
|
-
WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument)
|
|
1280
|
+
WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument OR otherDoc:UnstructuredDocument OR otherDoc:AudioDocument or otherDoc:ImageDocument)
|
|
1246
1281
|
AND otherDoc.id <> doc.id
|
|
1247
1282
|
}
|
|
1248
1283
|
OPTIONAL MATCH (chunk)<-[:made_from]-(made_node:TextSummary)
|
|
1249
1284
|
OPTIONAL MATCH (entity)-[:is_a]->(type:EntityType)
|
|
1250
1285
|
WHERE NOT EXISTS {
|
|
1251
1286
|
MATCH (type)<-[:is_a]-(otherEntity:Entity)<-[:contains]-(otherChunk:DocumentChunk)-[:is_part_of]->(otherDoc)
|
|
1252
|
-
WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument)
|
|
1287
|
+
WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument OR otherDoc:UnstructuredDocument OR otherDoc:AudioDocument or otherDoc:ImageDocument)
|
|
1253
1288
|
AND otherDoc.id <> doc.id
|
|
1254
1289
|
}
|
|
1255
1290
|
|
|
@@ -1260,7 +1295,7 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
1260
1295
|
collect(DISTINCT made_node) as made_from_nodes,
|
|
1261
1296
|
collect(DISTINCT type) as orphan_types
|
|
1262
1297
|
"""
|
|
1263
|
-
result = await self.query(query, {"
|
|
1298
|
+
result = await self.query(query, {"data_id": data_id})
|
|
1264
1299
|
return result[0] if result else None
|
|
1265
1300
|
|
|
1266
1301
|
async def get_degree_one_nodes(self, node_type: str):
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Neptune Analytics Driver Module
|
|
2
|
+
|
|
3
|
+
This module provides the Neptune Analytics adapter and utilities for interacting
|
|
4
|
+
with Amazon Neptune Analytics graph databases.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .adapter import NeptuneGraphDB
|
|
8
|
+
from . import neptune_utils
|
|
9
|
+
from . import exceptions
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"NeptuneGraphDB",
|
|
13
|
+
"neptune_utils",
|
|
14
|
+
"exceptions",
|
|
15
|
+
]
|