cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +44 -4
- cognee/api/health.py +332 -0
- cognee/api/v1/add/add.py +5 -2
- cognee/api/v1/add/routers/get_add_router.py +3 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
- cognee/api/v1/cognify/cognify.py +8 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
- cognee/api/v1/config/config.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +2 -8
- cognee/api/v1/delete/delete.py +16 -12
- cognee/api/v1/responses/routers/get_responses_router.py +3 -1
- cognee/api/v1/search/search.py +10 -0
- cognee/api/v1/settings/routers/get_settings_router.py +0 -2
- cognee/base_config.py +1 -0
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
- cognee/infrastructure/databases/graph/config.py +2 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
- cognee/infrastructure/databases/graph/kuzu/adapter.py +43 -16
- cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +281 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +151 -77
- cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
- cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
- cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +11 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
- cognee/infrastructure/databases/vector/create_vector_engine.py +31 -23
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
- cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
- cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
- cognee/infrastructure/files/utils/guess_file_type.py +2 -2
- cognee/infrastructure/files/utils/open_data_file.py +4 -23
- cognee/infrastructure/llm/LLMGateway.py +137 -0
- cognee/infrastructure/llm/__init__.py +14 -4
- cognee/infrastructure/llm/config.py +29 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
- cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
- cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
- cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
- cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
- cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
- cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
- cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
- cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
- cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
- cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
- cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
- cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
- cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
- cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
- cognee/infrastructure/llm/utils.py +3 -1
- cognee/infrastructure/loaders/LoaderEngine.py +156 -0
- cognee/infrastructure/loaders/LoaderInterface.py +73 -0
- cognee/infrastructure/loaders/__init__.py +18 -0
- cognee/infrastructure/loaders/core/__init__.py +7 -0
- cognee/infrastructure/loaders/core/audio_loader.py +98 -0
- cognee/infrastructure/loaders/core/image_loader.py +114 -0
- cognee/infrastructure/loaders/core/text_loader.py +90 -0
- cognee/infrastructure/loaders/create_loader_engine.py +32 -0
- cognee/infrastructure/loaders/external/__init__.py +22 -0
- cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
- cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
- cognee/infrastructure/loaders/get_loader_engine.py +18 -0
- cognee/infrastructure/loaders/supported_loaders.py +18 -0
- cognee/infrastructure/loaders/use_loader.py +21 -0
- cognee/infrastructure/loaders/utils/__init__.py +0 -0
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/get_authorized_dataset.py +23 -0
- cognee/modules/data/models/Data.py +13 -3
- cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
- cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
- cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
- cognee/modules/data/processing/document_types/UnstructuredDocument.py +2 -5
- cognee/modules/engine/utils/generate_edge_id.py +5 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +45 -35
- cognee/modules/graph/methods/get_formatted_graph_data.py +8 -2
- cognee/modules/graph/utils/get_graph_from_model.py +93 -101
- cognee/modules/ingestion/data_types/TextData.py +8 -2
- cognee/modules/ingestion/save_data_to_file.py +1 -1
- cognee/modules/pipelines/exceptions/__init__.py +1 -0
- cognee/modules/pipelines/exceptions/exceptions.py +12 -0
- cognee/modules/pipelines/models/DataItemStatus.py +5 -0
- cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
- cognee/modules/pipelines/models/__init__.py +1 -0
- cognee/modules/pipelines/operations/pipeline.py +10 -2
- cognee/modules/pipelines/operations/run_tasks.py +252 -20
- cognee/modules/pipelines/operations/run_tasks_distributed.py +1 -1
- cognee/modules/retrieval/chunks_retriever.py +23 -1
- cognee/modules/retrieval/code_retriever.py +66 -9
- cognee/modules/retrieval/completion_retriever.py +11 -9
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/insights_retriever.py +4 -0
- cognee/modules/retrieval/natural_language_retriever.py +9 -15
- cognee/modules/retrieval/summaries_retriever.py +23 -1
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +23 -4
- cognee/modules/retrieval/utils/completion.py +6 -9
- cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
- cognee/modules/search/methods/search.py +5 -1
- cognee/modules/search/operations/__init__.py +1 -0
- cognee/modules/search/operations/select_search_type.py +42 -0
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +0 -8
- cognee/modules/settings/save_vector_db_config.py +1 -1
- cognee/shared/data_models.py +3 -1
- cognee/shared/logging_utils.py +0 -5
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
- cognee/tasks/graph/extract_graph_from_code.py +3 -2
- cognee/tasks/graph/extract_graph_from_data.py +4 -3
- cognee/tasks/graph/infer_data_ontology.py +5 -6
- cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
- cognee/tasks/ingestion/ingest_data.py +91 -61
- cognee/tasks/ingestion/resolve_data_directories.py +3 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +4 -1
- cognee/tasks/summarization/summarize_code.py +2 -3
- cognee/tasks/summarization/summarize_text.py +3 -2
- cognee/tests/test_cognee_server_start.py +12 -7
- cognee/tests/test_deduplication.py +2 -2
- cognee/tests/test_deletion.py +58 -17
- cognee/tests/test_graph_visualization_permissions.py +161 -0
- cognee/tests/test_neptune_analytics_graph.py +309 -0
- cognee/tests/test_neptune_analytics_hybrid.py +176 -0
- cognee/tests/{test_weaviate.py → test_neptune_analytics_vector.py} +86 -11
- cognee/tests/test_pgvector.py +5 -5
- cognee/tests/test_s3.py +1 -6
- cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
- cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
- cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
- cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
- cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +84 -9
- cognee/tests/unit/modules/search/search_methods_test.py +55 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/METADATA +13 -9
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/RECORD +203 -164
- cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
- cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
- cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
- cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
- cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
- cognee/modules/data/extraction/extract_categories.py +0 -14
- cognee/tests/test_qdrant.py +0 -99
- distributed/Dockerfile +0 -34
- distributed/app.py +0 -4
- distributed/entrypoint.py +0 -71
- distributed/entrypoint.sh +0 -5
- distributed/modal_image.py +0 -11
- distributed/queues.py +0 -5
- distributed/tasks/queued_add_data_points.py +0 -13
- distributed/tasks/queued_add_edges.py +0 -13
- distributed/tasks/queued_add_nodes.py +0 -13
- distributed/test.py +0 -28
- distributed/utils.py +0 -19
- distributed/workers/data_point_saving_worker.py +0 -93
- distributed/workers/graph_saving_worker.py +0 -104
- /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
- /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
- /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
- /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
- /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
- /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
- /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
- /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
- {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
- {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
- /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/WHEEL +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/NOTICE.md +0 -0
cognee/api/v1/search/search.py
CHANGED
|
@@ -71,6 +71,12 @@ async def search(
|
|
|
71
71
|
Best for: Advanced users, specific graph traversals, debugging.
|
|
72
72
|
Returns: Raw graph query results.
|
|
73
73
|
|
|
74
|
+
**FEELING_LUCKY**:
|
|
75
|
+
Intelligently selects and runs the most appropriate search type.
|
|
76
|
+
Best for: General-purpose queries or when you're unsure which search type is best.
|
|
77
|
+
Returns: The results from the automatically selected search type.
|
|
78
|
+
|
|
79
|
+
|
|
74
80
|
Args:
|
|
75
81
|
query_text: Your question or search query in natural language.
|
|
76
82
|
Examples:
|
|
@@ -119,6 +125,9 @@ async def search(
|
|
|
119
125
|
**CODE**:
|
|
120
126
|
[List of structured code information with context]
|
|
121
127
|
|
|
128
|
+
**FEELING_LUCKY**:
|
|
129
|
+
[List of results in the format of the search type that is automatically selected]
|
|
130
|
+
|
|
122
131
|
|
|
123
132
|
|
|
124
133
|
|
|
@@ -130,6 +139,7 @@ async def search(
|
|
|
130
139
|
- **CHUNKS**: Fastest, pure vector similarity search without LLM
|
|
131
140
|
- **SUMMARIES**: Fast, returns pre-computed summaries
|
|
132
141
|
- **CODE**: Medium speed, specialized for code understanding
|
|
142
|
+
- **FEELING_LUCKY**: Variable speed, uses LLM + search type selection intelligently
|
|
133
143
|
- **top_k**: Start with 10, increase for comprehensive analysis (max 100)
|
|
134
144
|
- **datasets**: Specify datasets to improve speed and relevance
|
|
135
145
|
|
cognee/base_config.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from typing import Any, Dict, List
|
|
2
2
|
from pydantic import BaseModel
|
|
3
|
-
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
|
4
3
|
from cognee.eval_framework.evaluation.base_eval_adapter import BaseEvalAdapter
|
|
5
|
-
from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt
|
|
6
4
|
from cognee.eval_framework.eval_config import EvalConfig
|
|
7
5
|
|
|
6
|
+
from cognee.infrastructure.llm import LLMGateway
|
|
7
|
+
|
|
8
8
|
|
|
9
9
|
class CorrectnessEvaluation(BaseModel):
|
|
10
10
|
"""Response model containing evaluation score and explanation."""
|
|
@@ -19,17 +19,16 @@ class DirectLLMEvalAdapter(BaseEvalAdapter):
|
|
|
19
19
|
config = EvalConfig()
|
|
20
20
|
self.system_prompt_path = config.direct_llm_system_prompt
|
|
21
21
|
self.eval_prompt_path = config.direct_llm_eval_prompt
|
|
22
|
-
self.llm_client = get_llm_client()
|
|
23
22
|
|
|
24
23
|
async def evaluate_correctness(
|
|
25
24
|
self, question: str, answer: str, golden_answer: str
|
|
26
25
|
) -> Dict[str, Any]:
|
|
27
26
|
args = {"question": question, "answer": answer, "golden_answer": golden_answer}
|
|
28
27
|
|
|
29
|
-
user_prompt = render_prompt(self.eval_prompt_path, args)
|
|
30
|
-
system_prompt = read_query_prompt(self.system_prompt_path)
|
|
28
|
+
user_prompt = LLMGateway.render_prompt(self.eval_prompt_path, args)
|
|
29
|
+
system_prompt = LLMGateway.read_query_prompt(self.system_prompt_path)
|
|
31
30
|
|
|
32
|
-
evaluation = await
|
|
31
|
+
evaluation = await LLMGateway.acreate_structured_output(
|
|
33
32
|
text_input=user_prompt,
|
|
34
33
|
system_prompt=system_prompt,
|
|
35
34
|
response_model=CorrectnessEvaluation,
|
|
@@ -36,6 +36,7 @@ class GraphConfig(BaseSettings):
|
|
|
36
36
|
graph_database_provider: str = Field("kuzu", env="GRAPH_DATABASE_PROVIDER")
|
|
37
37
|
|
|
38
38
|
graph_database_url: str = ""
|
|
39
|
+
graph_database_name: str = ""
|
|
39
40
|
graph_database_username: str = ""
|
|
40
41
|
graph_database_password: str = ""
|
|
41
42
|
graph_database_port: int = 123
|
|
@@ -105,6 +106,7 @@ class GraphConfig(BaseSettings):
|
|
|
105
106
|
return {
|
|
106
107
|
"graph_database_provider": self.graph_database_provider,
|
|
107
108
|
"graph_database_url": self.graph_database_url,
|
|
109
|
+
"graph_database_name": self.graph_database_name,
|
|
108
110
|
"graph_database_username": self.graph_database_username,
|
|
109
111
|
"graph_database_password": self.graph_database_password,
|
|
110
112
|
"graph_database_port": self.graph_database_port,
|
|
@@ -33,6 +33,7 @@ def create_graph_engine(
|
|
|
33
33
|
graph_database_provider,
|
|
34
34
|
graph_file_path,
|
|
35
35
|
graph_database_url="",
|
|
36
|
+
graph_database_name="",
|
|
36
37
|
graph_database_username="",
|
|
37
38
|
graph_database_password="",
|
|
38
39
|
graph_database_port="",
|
|
@@ -48,13 +49,13 @@ def create_graph_engine(
|
|
|
48
49
|
-----------
|
|
49
50
|
|
|
50
51
|
- graph_database_provider: The type of graph database provider to use (e.g., neo4j,
|
|
51
|
-
falkordb, kuzu
|
|
52
|
-
- graph_database_url: The URL for the graph database instance. Required for neo4j
|
|
53
|
-
|
|
52
|
+
falkordb, kuzu).
|
|
53
|
+
- graph_database_url: The URL for the graph database instance. Required for neo4j
|
|
54
|
+
and falkordb providers.
|
|
54
55
|
- graph_database_username: The username for authentication with the graph database.
|
|
55
|
-
Required for neo4j
|
|
56
|
+
Required for neo4j provider.
|
|
56
57
|
- graph_database_password: The password for authentication with the graph database.
|
|
57
|
-
Required for neo4j
|
|
58
|
+
Required for neo4j provider.
|
|
58
59
|
- graph_database_port: The port number for the graph database connection. Required
|
|
59
60
|
for the falkordb provider.
|
|
60
61
|
- graph_file_path: The filesystem path to the graph file. Required for the kuzu
|
|
@@ -86,6 +87,7 @@ def create_graph_engine(
|
|
|
86
87
|
graph_database_url=graph_database_url,
|
|
87
88
|
graph_database_username=graph_database_username or None,
|
|
88
89
|
graph_database_password=graph_database_password or None,
|
|
90
|
+
graph_database_name=graph_database_name or None,
|
|
89
91
|
)
|
|
90
92
|
|
|
91
93
|
elif graph_database_provider == "falkordb":
|
|
@@ -122,17 +124,61 @@ def create_graph_engine(
|
|
|
122
124
|
username=graph_database_username,
|
|
123
125
|
password=graph_database_password,
|
|
124
126
|
)
|
|
127
|
+
elif graph_database_provider == "neptune":
|
|
128
|
+
try:
|
|
129
|
+
from langchain_aws import NeptuneAnalyticsGraph
|
|
130
|
+
except ImportError:
|
|
131
|
+
raise ImportError(
|
|
132
|
+
"langchain_aws is not installed. Please install it with 'pip install langchain_aws'"
|
|
133
|
+
)
|
|
125
134
|
|
|
126
|
-
elif graph_database_provider == "memgraph":
|
|
127
135
|
if not graph_database_url:
|
|
128
|
-
raise EnvironmentError("Missing
|
|
136
|
+
raise EnvironmentError("Missing Neptune endpoint.")
|
|
129
137
|
|
|
130
|
-
from .
|
|
138
|
+
from .neptune_driver.adapter import NeptuneGraphDB, NEPTUNE_ENDPOINT_URL
|
|
131
139
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
140
|
+
if not graph_database_url.startswith(NEPTUNE_ENDPOINT_URL):
|
|
141
|
+
raise ValueError(
|
|
142
|
+
f"Neptune endpoint must have the format {NEPTUNE_ENDPOINT_URL}<GRAPH_ID>"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
graph_identifier = graph_database_url.replace(NEPTUNE_ENDPOINT_URL, "")
|
|
146
|
+
|
|
147
|
+
return NeptuneGraphDB(
|
|
148
|
+
graph_id=graph_identifier,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
elif graph_database_provider == "neptune_analytics":
|
|
152
|
+
"""
|
|
153
|
+
Creates a graph DB from config
|
|
154
|
+
We want to use a hybrid (graph & vector) DB and we should update this
|
|
155
|
+
to make a single instance of the hybrid configuration (with embedder)
|
|
156
|
+
instead of creating the hybrid object twice.
|
|
157
|
+
"""
|
|
158
|
+
try:
|
|
159
|
+
from langchain_aws import NeptuneAnalyticsGraph
|
|
160
|
+
except ImportError:
|
|
161
|
+
raise ImportError(
|
|
162
|
+
"langchain_aws is not installed. Please install it with 'pip install langchain_aws'"
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
if not graph_database_url:
|
|
166
|
+
raise EnvironmentError("Missing Neptune endpoint.")
|
|
167
|
+
|
|
168
|
+
from ..hybrid.neptune_analytics.NeptuneAnalyticsAdapter import (
|
|
169
|
+
NeptuneAnalyticsAdapter,
|
|
170
|
+
NEPTUNE_ANALYTICS_ENDPOINT_URL,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
if not graph_database_url.startswith(NEPTUNE_ANALYTICS_ENDPOINT_URL):
|
|
174
|
+
raise ValueError(
|
|
175
|
+
f"Neptune endpoint must have the format '{NEPTUNE_ANALYTICS_ENDPOINT_URL}<GRAPH_ID>'"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
graph_identifier = graph_database_url.replace(NEPTUNE_ANALYTICS_ENDPOINT_URL, "")
|
|
179
|
+
|
|
180
|
+
return NeptuneAnalyticsAdapter(
|
|
181
|
+
graph_id=graph_identifier,
|
|
136
182
|
)
|
|
137
183
|
|
|
138
184
|
from .networkx.adapter import NetworkXAdapter
|
|
@@ -2,7 +2,7 @@ import inspect
|
|
|
2
2
|
from functools import wraps
|
|
3
3
|
from abc import abstractmethod, ABC
|
|
4
4
|
from datetime import datetime, timezone
|
|
5
|
-
from typing import Optional, Dict, Any, List, Tuple, Type
|
|
5
|
+
from typing import Optional, Dict, Any, List, Tuple, Type, Union
|
|
6
6
|
from uuid import NAMESPACE_OID, UUID, uuid5
|
|
7
7
|
from cognee.shared.logging_utils import get_logger
|
|
8
8
|
from cognee.infrastructure.engine import DataPoint
|
|
@@ -173,28 +173,31 @@ class GraphDBInterface(ABC):
|
|
|
173
173
|
raise NotImplementedError
|
|
174
174
|
|
|
175
175
|
@abstractmethod
|
|
176
|
-
async def add_node(
|
|
176
|
+
async def add_node(
|
|
177
|
+
self, node: Union[DataPoint, str], properties: Optional[Dict[str, Any]] = None
|
|
178
|
+
) -> None:
|
|
177
179
|
"""
|
|
178
180
|
Add a single node with specified properties to the graph.
|
|
179
181
|
|
|
180
182
|
Parameters:
|
|
181
183
|
-----------
|
|
182
184
|
|
|
183
|
-
-
|
|
184
|
-
- properties (Dict[str, Any]): A dictionary of properties associated with the node.
|
|
185
|
+
- node (Union[DataPoint, str]): Either a DataPoint object or a string identifier for the node being added.
|
|
186
|
+
- properties (Optional[Dict[str, Any]]): A dictionary of properties associated with the node.
|
|
187
|
+
Required when node is a string, ignored when node is a DataPoint.
|
|
185
188
|
"""
|
|
186
189
|
raise NotImplementedError
|
|
187
190
|
|
|
188
191
|
@abstractmethod
|
|
189
192
|
@record_graph_changes
|
|
190
|
-
async def add_nodes(self, nodes: List[Node]) -> None:
|
|
193
|
+
async def add_nodes(self, nodes: Union[List[Node], List[DataPoint]]) -> None:
|
|
191
194
|
"""
|
|
192
195
|
Add multiple nodes to the graph in a single operation.
|
|
193
196
|
|
|
194
197
|
Parameters:
|
|
195
198
|
-----------
|
|
196
199
|
|
|
197
|
-
- nodes (List[Node]): A list of Node objects to be added to the graph.
|
|
200
|
+
- nodes (Union[List[Node], List[DataPoint]]): A list of Node objects or DataPoint objects to be added to the graph.
|
|
198
201
|
"""
|
|
199
202
|
raise NotImplementedError
|
|
200
203
|
|
|
@@ -271,14 +274,16 @@ class GraphDBInterface(ABC):
|
|
|
271
274
|
|
|
272
275
|
@abstractmethod
|
|
273
276
|
@record_graph_changes
|
|
274
|
-
async def add_edges(
|
|
277
|
+
async def add_edges(
|
|
278
|
+
self, edges: Union[List[EdgeData], List[Tuple[str, str, str, Optional[Dict[str, Any]]]]]
|
|
279
|
+
) -> None:
|
|
275
280
|
"""
|
|
276
281
|
Add multiple edges to the graph in a single operation.
|
|
277
282
|
|
|
278
283
|
Parameters:
|
|
279
284
|
-----------
|
|
280
285
|
|
|
281
|
-
- edges (List[EdgeData]): A list of EdgeData objects representing edges to be added.
|
|
286
|
+
- edges (Union[List[EdgeData], List[Tuple[str, str, str, Optional[Dict[str, Any]]]]]): A list of EdgeData objects or tuples representing edges to be added.
|
|
282
287
|
"""
|
|
283
288
|
raise NotImplementedError
|
|
284
289
|
|
|
@@ -377,7 +382,7 @@ class GraphDBInterface(ABC):
|
|
|
377
382
|
|
|
378
383
|
@abstractmethod
|
|
379
384
|
async def get_connections(
|
|
380
|
-
self, node_id: str
|
|
385
|
+
self, node_id: Union[str, UUID]
|
|
381
386
|
) -> List[Tuple[NodeData, Dict[str, Any], NodeData]]:
|
|
382
387
|
"""
|
|
383
388
|
Get all nodes connected to a specified node and their relationship details.
|
|
@@ -385,6 +390,6 @@ class GraphDBInterface(ABC):
|
|
|
385
390
|
Parameters:
|
|
386
391
|
-----------
|
|
387
392
|
|
|
388
|
-
- node_id (str): Unique identifier of the node for which to retrieve connections.
|
|
393
|
+
- node_id (Union[str, UUID]): Unique identifier of the node for which to retrieve connections.
|
|
389
394
|
"""
|
|
390
395
|
raise NotImplementedError
|
|
@@ -42,6 +42,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
42
42
|
self.connection: Optional[Connection] = None
|
|
43
43
|
self.executor = ThreadPoolExecutor()
|
|
44
44
|
self._initialize_connection()
|
|
45
|
+
self.KUZU_ASYNC_LOCK = asyncio.Lock()
|
|
45
46
|
|
|
46
47
|
def _initialize_connection(self) -> None:
|
|
47
48
|
"""Initialize the Kuzu database connection and schema."""
|
|
@@ -72,11 +73,36 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
72
73
|
|
|
73
74
|
run_sync(file_storage.ensure_directory_exists())
|
|
74
75
|
|
|
75
|
-
|
|
76
|
-
self.
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
76
|
+
try:
|
|
77
|
+
self.db = Database(
|
|
78
|
+
self.db_path,
|
|
79
|
+
buffer_pool_size=2048 * 1024 * 1024, # 2048MB buffer pool
|
|
80
|
+
max_db_size=4096 * 1024 * 1024,
|
|
81
|
+
)
|
|
82
|
+
except RuntimeError:
|
|
83
|
+
from .kuzu_migrate import read_kuzu_storage_version
|
|
84
|
+
import kuzu
|
|
85
|
+
|
|
86
|
+
kuzu_db_version = read_kuzu_storage_version(self.db_path)
|
|
87
|
+
if (
|
|
88
|
+
kuzu_db_version == "0.9.0" or kuzu_db_version == "0.8.2"
|
|
89
|
+
) and kuzu_db_version != kuzu.__version__:
|
|
90
|
+
# Try to migrate kuzu database to latest version
|
|
91
|
+
from .kuzu_migrate import kuzu_migration
|
|
92
|
+
|
|
93
|
+
kuzu_migration(
|
|
94
|
+
new_db=self.db_path + "_new",
|
|
95
|
+
old_db=self.db_path,
|
|
96
|
+
new_version=kuzu.__version__,
|
|
97
|
+
old_version=kuzu_db_version,
|
|
98
|
+
overwrite=True,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
self.db = Database(
|
|
102
|
+
self.db_path,
|
|
103
|
+
buffer_pool_size=2048 * 1024 * 1024, # 2048MB buffer pool
|
|
104
|
+
max_db_size=4096 * 1024 * 1024,
|
|
105
|
+
)
|
|
80
106
|
|
|
81
107
|
self.db.init_database()
|
|
82
108
|
self.connection = Connection(self.db)
|
|
@@ -111,6 +137,10 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
111
137
|
from cognee.infrastructure.files.storage.S3FileStorage import S3FileStorage
|
|
112
138
|
|
|
113
139
|
s3_file_storage = S3FileStorage("")
|
|
140
|
+
|
|
141
|
+
async with self.KUZU_ASYNC_LOCK:
|
|
142
|
+
self.connection.execute("CHECKPOINT;")
|
|
143
|
+
|
|
114
144
|
s3_file_storage.s3.put(self.temp_graph_file, self.db_path, recursive=True)
|
|
115
145
|
|
|
116
146
|
async def pull_from_s3(self) -> None:
|
|
@@ -120,7 +150,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
120
150
|
try:
|
|
121
151
|
s3_file_storage.s3.get(self.db_path, self.temp_graph_file, recursive=True)
|
|
122
152
|
except FileNotFoundError:
|
|
123
|
-
|
|
153
|
+
logger.warning(f"Kuzu S3 storage file not found: {self.db_path}")
|
|
124
154
|
|
|
125
155
|
async def query(self, query: str, params: Optional[dict] = None) -> List[Tuple]:
|
|
126
156
|
"""
|
|
@@ -1438,11 +1468,8 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1438
1468
|
It raises exceptions for failures occurring during deletion processes.
|
|
1439
1469
|
"""
|
|
1440
1470
|
try:
|
|
1441
|
-
# Use DETACH DELETE to remove both nodes and their relationships in one operation
|
|
1442
|
-
await self.query("MATCH (n:Node) DETACH DELETE n")
|
|
1443
|
-
logger.info("Cleared all data from graph while preserving structure")
|
|
1444
|
-
|
|
1445
1471
|
if self.connection:
|
|
1472
|
+
self.connection.close()
|
|
1446
1473
|
self.connection = None
|
|
1447
1474
|
if self.db:
|
|
1448
1475
|
self.db.close()
|
|
@@ -1502,7 +1529,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1502
1529
|
logger.error(f"Error during database clearing: {e}")
|
|
1503
1530
|
raise
|
|
1504
1531
|
|
|
1505
|
-
async def get_document_subgraph(self,
|
|
1532
|
+
async def get_document_subgraph(self, data_id: str):
|
|
1506
1533
|
"""
|
|
1507
1534
|
Get all nodes that should be deleted when removing a document.
|
|
1508
1535
|
|
|
@@ -1513,7 +1540,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1513
1540
|
Parameters:
|
|
1514
1541
|
-----------
|
|
1515
1542
|
|
|
1516
|
-
-
|
|
1543
|
+
- data_id (str): The identifier for the document to query against.
|
|
1517
1544
|
|
|
1518
1545
|
Returns:
|
|
1519
1546
|
--------
|
|
@@ -1523,7 +1550,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1523
1550
|
"""
|
|
1524
1551
|
query = """
|
|
1525
1552
|
MATCH (doc:Node)
|
|
1526
|
-
WHERE (doc.type = 'TextDocument' OR doc.type = 'PdfDocument') AND doc.
|
|
1553
|
+
WHERE (doc.type = 'TextDocument' OR doc.type = 'PdfDocument' OR doc.type = 'AudioDocument' OR doc.type = 'ImageDocument' OR doc.type = 'UnstructuredDocument') AND doc.id = $data_id
|
|
1527
1554
|
|
|
1528
1555
|
OPTIONAL MATCH (doc)<-[e1:EDGE]-(chunk:Node)
|
|
1529
1556
|
WHERE e1.relationship_name = 'is_part_of' AND chunk.type = 'DocumentChunk'
|
|
@@ -1534,7 +1561,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1534
1561
|
MATCH (entity)<-[e3:EDGE]-(otherChunk:Node)-[e4:EDGE]->(otherDoc:Node)
|
|
1535
1562
|
WHERE e3.relationship_name = 'contains'
|
|
1536
1563
|
AND e4.relationship_name = 'is_part_of'
|
|
1537
|
-
AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument')
|
|
1564
|
+
AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument' OR otherDoc.type = 'AudioDocument' OR otherDoc.type = 'ImageDocument' OR otherDoc.type = 'UnstructuredDocument')
|
|
1538
1565
|
AND otherDoc.id <> doc.id
|
|
1539
1566
|
}
|
|
1540
1567
|
|
|
@@ -1550,7 +1577,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1550
1577
|
AND e9.relationship_name = 'is_part_of'
|
|
1551
1578
|
AND otherEntity.type = 'Entity'
|
|
1552
1579
|
AND otherChunk.type = 'DocumentChunk'
|
|
1553
|
-
AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument')
|
|
1580
|
+
AND (otherDoc.type = 'TextDocument' OR otherDoc.type = 'PdfDocument' OR otherDoc.type = 'AudioDocument' OR otherDoc.type = 'ImageDocument' OR otherDoc.type = 'UnstructuredDocument')
|
|
1554
1581
|
AND otherDoc.id <> doc.id
|
|
1555
1582
|
}
|
|
1556
1583
|
|
|
@@ -1561,7 +1588,7 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1561
1588
|
COLLECT(DISTINCT made_node) as made_from_nodes,
|
|
1562
1589
|
COLLECT(DISTINCT type) as orphan_types
|
|
1563
1590
|
"""
|
|
1564
|
-
result = await self.query(query, {"
|
|
1591
|
+
result = await self.query(query, {"data_id": f"{data_id}"})
|
|
1565
1592
|
if not result or not result[0]:
|
|
1566
1593
|
return None
|
|
1567
1594
|
|