cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +44 -4
- cognee/api/health.py +332 -0
- cognee/api/v1/add/add.py +5 -2
- cognee/api/v1/add/routers/get_add_router.py +3 -0
- cognee/api/v1/cognify/code_graph_pipeline.py +3 -1
- cognee/api/v1/cognify/cognify.py +8 -0
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -1
- cognee/api/v1/config/config.py +3 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +2 -8
- cognee/api/v1/delete/delete.py +16 -12
- cognee/api/v1/responses/routers/get_responses_router.py +3 -1
- cognee/api/v1/search/search.py +10 -0
- cognee/api/v1/settings/routers/get_settings_router.py +0 -2
- cognee/base_config.py +1 -0
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +5 -6
- cognee/infrastructure/databases/graph/config.py +2 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +58 -12
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -10
- cognee/infrastructure/databases/graph/kuzu/adapter.py +43 -16
- cognee/infrastructure/databases/graph/kuzu/kuzu_migrate.py +281 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +151 -77
- cognee/infrastructure/databases/graph/neptune_driver/__init__.py +15 -0
- cognee/infrastructure/databases/graph/neptune_driver/adapter.py +1427 -0
- cognee/infrastructure/databases/graph/neptune_driver/exceptions.py +115 -0
- cognee/infrastructure/databases/graph/neptune_driver/neptune_utils.py +224 -0
- cognee/infrastructure/databases/graph/networkx/adapter.py +3 -3
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +449 -0
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +11 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +8 -3
- cognee/infrastructure/databases/vector/create_vector_engine.py +31 -23
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +3 -1
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +21 -6
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +4 -3
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +3 -1
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +22 -16
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +36 -34
- cognee/infrastructure/databases/vector/vector_db_interface.py +78 -7
- cognee/infrastructure/files/utils/get_data_file_path.py +39 -0
- cognee/infrastructure/files/utils/guess_file_type.py +2 -2
- cognee/infrastructure/files/utils/open_data_file.py +4 -23
- cognee/infrastructure/llm/LLMGateway.py +137 -0
- cognee/infrastructure/llm/__init__.py +14 -4
- cognee/infrastructure/llm/config.py +29 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_hotpot_using_cognee_search.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question.txt +1 -1
- cognee/infrastructure/llm/prompts/answer_simple_question_restricted.txt +1 -1
- cognee/infrastructure/llm/prompts/categorize_categories.txt +1 -1
- cognee/infrastructure/llm/prompts/classify_content.txt +1 -1
- cognee/infrastructure/llm/prompts/context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/graph_context_for_question.txt +1 -1
- cognee/infrastructure/llm/prompts/natural_language_retriever_system.txt +1 -1
- cognee/infrastructure/llm/prompts/patch_gen_instructions.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +130 -0
- cognee/infrastructure/llm/prompts/summarize_code.txt +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +57 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +533 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/config.py +94 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/globals.py +37 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +21 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +131 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +266 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +137 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +550 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/tracing.py +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +962 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +52 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +166 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +109 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +343 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/baml/baml_src}/extraction/__init__.py +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +89 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +33 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +18 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/__init__.py +3 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/extraction/extract_categories.py +12 -0
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/extract_summary.py +16 -7
- cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/extract_content_graph.py +7 -6
- cognee/infrastructure/llm/{anthropic → structured_output_framework/litellm_instructor/llm/anthropic}/adapter.py +10 -4
- cognee/infrastructure/llm/{gemini → structured_output_framework/litellm_instructor/llm/gemini}/adapter.py +6 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/__init__.py +0 -0
- cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/litellm_instructor/llm/generic_llm_api}/adapter.py +7 -3
- cognee/infrastructure/llm/{get_llm_client.py → structured_output_framework/litellm_instructor/llm/get_llm_client.py} +18 -6
- cognee/infrastructure/llm/{llm_interface.py → structured_output_framework/litellm_instructor/llm/llm_interface.py} +2 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/__init__.py +0 -0
- cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor/llm/ollama}/adapter.py +4 -2
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/__init__.py +0 -0
- cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm/openai}/adapter.py +6 -4
- cognee/infrastructure/llm/{rate_limiter.py → structured_output_framework/litellm_instructor/llm/rate_limiter.py} +0 -5
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +4 -2
- cognee/infrastructure/llm/tokenizer/TikToken/adapter.py +7 -3
- cognee/infrastructure/llm/tokenizer/__init__.py +4 -0
- cognee/infrastructure/llm/utils.py +3 -1
- cognee/infrastructure/loaders/LoaderEngine.py +156 -0
- cognee/infrastructure/loaders/LoaderInterface.py +73 -0
- cognee/infrastructure/loaders/__init__.py +18 -0
- cognee/infrastructure/loaders/core/__init__.py +7 -0
- cognee/infrastructure/loaders/core/audio_loader.py +98 -0
- cognee/infrastructure/loaders/core/image_loader.py +114 -0
- cognee/infrastructure/loaders/core/text_loader.py +90 -0
- cognee/infrastructure/loaders/create_loader_engine.py +32 -0
- cognee/infrastructure/loaders/external/__init__.py +22 -0
- cognee/infrastructure/loaders/external/pypdf_loader.py +96 -0
- cognee/infrastructure/loaders/external/unstructured_loader.py +127 -0
- cognee/infrastructure/loaders/get_loader_engine.py +18 -0
- cognee/infrastructure/loaders/supported_loaders.py +18 -0
- cognee/infrastructure/loaders/use_loader.py +21 -0
- cognee/infrastructure/loaders/utils/__init__.py +0 -0
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/get_authorized_dataset.py +23 -0
- cognee/modules/data/models/Data.py +13 -3
- cognee/modules/data/processing/document_types/AudioDocument.py +2 -2
- cognee/modules/data/processing/document_types/ImageDocument.py +2 -2
- cognee/modules/data/processing/document_types/PdfDocument.py +4 -11
- cognee/modules/data/processing/document_types/UnstructuredDocument.py +2 -5
- cognee/modules/engine/utils/generate_edge_id.py +5 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +45 -35
- cognee/modules/graph/methods/get_formatted_graph_data.py +8 -2
- cognee/modules/graph/utils/get_graph_from_model.py +93 -101
- cognee/modules/ingestion/data_types/TextData.py +8 -2
- cognee/modules/ingestion/save_data_to_file.py +1 -1
- cognee/modules/pipelines/exceptions/__init__.py +1 -0
- cognee/modules/pipelines/exceptions/exceptions.py +12 -0
- cognee/modules/pipelines/models/DataItemStatus.py +5 -0
- cognee/modules/pipelines/models/PipelineRunInfo.py +6 -0
- cognee/modules/pipelines/models/__init__.py +1 -0
- cognee/modules/pipelines/operations/pipeline.py +10 -2
- cognee/modules/pipelines/operations/run_tasks.py +252 -20
- cognee/modules/pipelines/operations/run_tasks_distributed.py +1 -1
- cognee/modules/retrieval/chunks_retriever.py +23 -1
- cognee/modules/retrieval/code_retriever.py +66 -9
- cognee/modules/retrieval/completion_retriever.py +11 -9
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +0 -2
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +0 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +8 -9
- cognee/modules/retrieval/graph_completion_retriever.py +1 -1
- cognee/modules/retrieval/insights_retriever.py +4 -0
- cognee/modules/retrieval/natural_language_retriever.py +9 -15
- cognee/modules/retrieval/summaries_retriever.py +23 -1
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +23 -4
- cognee/modules/retrieval/utils/completion.py +6 -9
- cognee/modules/retrieval/utils/description_to_codepart_search.py +2 -3
- cognee/modules/search/methods/search.py +5 -1
- cognee/modules/search/operations/__init__.py +1 -0
- cognee/modules/search/operations/select_search_type.py +42 -0
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/settings/get_settings.py +0 -8
- cognee/modules/settings/save_vector_db_config.py +1 -1
- cognee/shared/data_models.py +3 -1
- cognee/shared/logging_utils.py +0 -5
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/documents/extract_chunks_from_documents.py +10 -12
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +4 -6
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +6 -7
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +4 -7
- cognee/tasks/graph/extract_graph_from_code.py +3 -2
- cognee/tasks/graph/extract_graph_from_data.py +4 -3
- cognee/tasks/graph/infer_data_ontology.py +5 -6
- cognee/tasks/ingestion/data_item_to_text_file.py +79 -0
- cognee/tasks/ingestion/ingest_data.py +91 -61
- cognee/tasks/ingestion/resolve_data_directories.py +3 -0
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +3 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +4 -1
- cognee/tasks/summarization/summarize_code.py +2 -3
- cognee/tasks/summarization/summarize_text.py +3 -2
- cognee/tests/test_cognee_server_start.py +12 -7
- cognee/tests/test_deduplication.py +2 -2
- cognee/tests/test_deletion.py +58 -17
- cognee/tests/test_graph_visualization_permissions.py +161 -0
- cognee/tests/test_neptune_analytics_graph.py +309 -0
- cognee/tests/test_neptune_analytics_hybrid.py +176 -0
- cognee/tests/{test_weaviate.py → test_neptune_analytics_vector.py} +86 -11
- cognee/tests/test_pgvector.py +5 -5
- cognee/tests/test_s3.py +1 -6
- cognee/tests/unit/infrastructure/databases/test_rate_limiter.py +11 -10
- cognee/tests/unit/infrastructure/databases/vector/__init__.py +0 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +1 -1
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +5 -5
- cognee/tests/unit/infrastructure/test_rate_limiting_realistic.py +6 -4
- cognee/tests/unit/infrastructure/test_rate_limiting_retry.py +1 -1
- cognee/tests/unit/interfaces/graph/get_graph_from_model_unit_test.py +61 -3
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +84 -9
- cognee/tests/unit/modules/search/search_methods_test.py +55 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/METADATA +13 -9
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/RECORD +203 -164
- cognee/infrastructure/databases/vector/pinecone/adapter.py +0 -8
- cognee/infrastructure/databases/vector/qdrant/QDrantAdapter.py +0 -514
- cognee/infrastructure/databases/vector/qdrant/__init__.py +0 -2
- cognee/infrastructure/databases/vector/weaviate_db/WeaviateAdapter.py +0 -527
- cognee/infrastructure/databases/vector/weaviate_db/__init__.py +0 -1
- cognee/modules/data/extraction/extract_categories.py +0 -14
- cognee/tests/test_qdrant.py +0 -99
- distributed/Dockerfile +0 -34
- distributed/app.py +0 -4
- distributed/entrypoint.py +0 -71
- distributed/entrypoint.sh +0 -5
- distributed/modal_image.py +0 -11
- distributed/queues.py +0 -5
- distributed/tasks/queued_add_data_points.py +0 -13
- distributed/tasks/queued_add_edges.py +0 -13
- distributed/tasks/queued_add_nodes.py +0 -13
- distributed/test.py +0 -28
- distributed/utils.py +0 -19
- distributed/workers/data_point_saving_worker.py +0 -93
- distributed/workers/graph_saving_worker.py +0 -104
- /cognee/infrastructure/databases/{graph/memgraph → hybrid/neptune_analytics}/__init__.py +0 -0
- /cognee/infrastructure/{llm → databases/vector/embeddings}/embedding_rate_limiter.py +0 -0
- /cognee/infrastructure/{databases/vector/pinecone → llm/structured_output_framework}/__init__.py +0 -0
- /cognee/infrastructure/llm/{anthropic → structured_output_framework/baml/baml_src}/__init__.py +0 -0
- /cognee/infrastructure/llm/{gemini/__init__.py → structured_output_framework/baml/baml_src/extraction/extract_categories.py} +0 -0
- /cognee/infrastructure/llm/{generic_llm_api → structured_output_framework/baml/baml_src/extraction/knowledge_graph}/__init__.py +0 -0
- /cognee/infrastructure/llm/{ollama → structured_output_framework/litellm_instructor}/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/knowledge_graph/__init__.py +0 -0
- /cognee/{modules/data → infrastructure/llm/structured_output_framework/litellm_instructor}/extraction/texts.json +0 -0
- /cognee/infrastructure/llm/{openai → structured_output_framework/litellm_instructor/llm}/__init__.py +0 -0
- {distributed → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic}/__init__.py +0 -0
- {distributed/tasks → cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini}/__init__.py +0 -0
- /cognee/modules/data/{extraction/knowledge_graph → methods}/add_model_class_to_graph.py +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/WHEEL +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.2.1.dev7.dist-info → cognee-0.2.2.dev1.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Kuzu Database Migration Script
|
|
4
|
+
|
|
5
|
+
This script migrates Kuzu databases between different versions by:
|
|
6
|
+
1. Setting up isolated Python environments for each Kuzu version
|
|
7
|
+
2. Exporting data from the source database using the old version
|
|
8
|
+
3. Importing data into the target database using the new version
|
|
9
|
+
4. If overwrite is enabled target database will replace source database and source database will have the prefix _old
|
|
10
|
+
5. If delete-old is enabled target database will be renamed to source database and source database will be deleted
|
|
11
|
+
|
|
12
|
+
The script automatically handles:
|
|
13
|
+
- Environment setup (creates virtual environments as needed)
|
|
14
|
+
- Export/import validation
|
|
15
|
+
- Error handling and reporting
|
|
16
|
+
|
|
17
|
+
Usage Examples:
|
|
18
|
+
# Basic migration from 0.9.0 to 0.11.0
|
|
19
|
+
python kuzu_migrate.py --old-version 0.9.0 --new-version 0.11.0 --old-db /path/to/old/database --new-db /path/to/new/database
|
|
20
|
+
|
|
21
|
+
Requirements:
|
|
22
|
+
- Python 3.7+
|
|
23
|
+
- Internet connection (to download Kuzu packages)
|
|
24
|
+
- Sufficient disk space for virtual environments and temporary exports
|
|
25
|
+
|
|
26
|
+
Notes:
|
|
27
|
+
- Can only be used to migrate to newer Kuzu versions, from 0.11.0 onwards
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
import tempfile
|
|
31
|
+
import sys
|
|
32
|
+
import struct
|
|
33
|
+
import shutil
|
|
34
|
+
import subprocess
|
|
35
|
+
import argparse
|
|
36
|
+
import os
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
kuzu_version_mapping = {
|
|
40
|
+
34: "0.7.0",
|
|
41
|
+
35: "0.7.1",
|
|
42
|
+
36: "0.8.2",
|
|
43
|
+
37: "0.9.0",
|
|
44
|
+
38: "0.10.1",
|
|
45
|
+
39: "0.11.0",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def read_kuzu_storage_version(kuzu_db_path: str) -> int:
|
|
50
|
+
"""
|
|
51
|
+
Reads the Kùzu storage version code from the first catalog.bin file bytes.
|
|
52
|
+
|
|
53
|
+
:param kuzu_db_path: Path to the Kuzu database file/directory.
|
|
54
|
+
:return: Storage version code as an integer.
|
|
55
|
+
"""
|
|
56
|
+
if os.path.isdir(kuzu_db_path):
|
|
57
|
+
kuzu_version_file_path = os.path.join(kuzu_db_path, "catalog.kz")
|
|
58
|
+
if not os.path.isfile(kuzu_version_file_path):
|
|
59
|
+
raise FileExistsError("Kuzu catalog.kz file does not exist")
|
|
60
|
+
else:
|
|
61
|
+
kuzu_version_file_path = kuzu_db_path
|
|
62
|
+
|
|
63
|
+
with open(kuzu_version_file_path, "rb") as f:
|
|
64
|
+
# Skip the 3-byte magic "KUZ" and one byte of padding
|
|
65
|
+
f.seek(4)
|
|
66
|
+
# Read the next 8 bytes as a little-endian unsigned 64-bit integer
|
|
67
|
+
data = f.read(8)
|
|
68
|
+
if len(data) < 8:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
f"File '{kuzu_version_file_path}' does not contain a storage version code."
|
|
71
|
+
)
|
|
72
|
+
version_code = struct.unpack("<Q", data)[0]
|
|
73
|
+
|
|
74
|
+
if kuzu_version_mapping.get(version_code):
|
|
75
|
+
return kuzu_version_mapping[version_code]
|
|
76
|
+
else:
|
|
77
|
+
raise ValueError("Could not map version_code to proper Kuzu version.")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def ensure_env(version: str, export_dir) -> str:
|
|
81
|
+
"""
|
|
82
|
+
Create (if needed) a venv at .kuzu_envs/{version} and install kuzu=={version}.
|
|
83
|
+
Returns the path to the venv's python executable.
|
|
84
|
+
"""
|
|
85
|
+
# Use temp directory to create venv
|
|
86
|
+
kuzu_envs_dir = os.path.join(export_dir, ".kuzu_envs")
|
|
87
|
+
|
|
88
|
+
# venv base under the script directory
|
|
89
|
+
base = os.path.join(kuzu_envs_dir, version)
|
|
90
|
+
py_bin = os.path.join(base, "bin", "python")
|
|
91
|
+
# If environment already exists clean it
|
|
92
|
+
if os.path.isfile(py_bin):
|
|
93
|
+
shutil.rmtree(base)
|
|
94
|
+
|
|
95
|
+
print(f"→ Setting up venv for Kùzu {version}...", file=sys.stderr)
|
|
96
|
+
# Create venv
|
|
97
|
+
# NOTE: Running python in debug mode can cause issues with creating a virtual environment from that python instance
|
|
98
|
+
subprocess.run([sys.executable, "-m", "venv", base], check=True)
|
|
99
|
+
# Install the specific Kùzu version
|
|
100
|
+
subprocess.run([py_bin, "-m", "pip", "install", "--upgrade", "pip"], check=True)
|
|
101
|
+
subprocess.run([py_bin, "-m", "pip", "install", f"kuzu=={version}"], check=True)
|
|
102
|
+
return py_bin
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def run_migration_step(python_exe: str, db_path: str, cypher: str):
|
|
106
|
+
"""
|
|
107
|
+
Uses the given python_exe to execute a short snippet that
|
|
108
|
+
connects to the Kùzu database and runs a Cypher command.
|
|
109
|
+
"""
|
|
110
|
+
snippet = f"""
|
|
111
|
+
import kuzu
|
|
112
|
+
db = kuzu.Database(r"{db_path}")
|
|
113
|
+
conn = kuzu.Connection(db)
|
|
114
|
+
conn.execute(r\"\"\"{cypher}\"\"\")
|
|
115
|
+
"""
|
|
116
|
+
proc = subprocess.run([python_exe, "-c", snippet], capture_output=True, text=True)
|
|
117
|
+
if proc.returncode != 0:
|
|
118
|
+
print(f"[ERROR] {cypher} failed:\n{proc.stderr}", file=sys.stderr)
|
|
119
|
+
sys.exit(proc.returncode)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def kuzu_migration(new_db, old_db, new_version, old_version=None, overwrite=None, delete_old=None):
|
|
123
|
+
"""
|
|
124
|
+
Main migration function that handles the complete migration process.
|
|
125
|
+
"""
|
|
126
|
+
print(f"🔄 Migrating Kuzu database from {old_version} to {new_version}", file=sys.stderr)
|
|
127
|
+
print(f"📂 Source: {old_db}", file=sys.stderr)
|
|
128
|
+
print("", file=sys.stderr)
|
|
129
|
+
|
|
130
|
+
# If version of old kuzu db is not provided try to determine it based on file info
|
|
131
|
+
if not old_version:
|
|
132
|
+
old_version = read_kuzu_storage_version(old_db)
|
|
133
|
+
|
|
134
|
+
# Check if old database exists
|
|
135
|
+
if not os.path.exists(old_db):
|
|
136
|
+
print(f"Source database '{old_db}' does not exist.", file=sys.stderr)
|
|
137
|
+
sys.exit(1)
|
|
138
|
+
|
|
139
|
+
# Prepare target - ensure parent directory exists but remove target if it exists
|
|
140
|
+
parent_dir = os.path.dirname(new_db)
|
|
141
|
+
if parent_dir:
|
|
142
|
+
os.makedirs(parent_dir, exist_ok=True)
|
|
143
|
+
|
|
144
|
+
if os.path.exists(new_db):
|
|
145
|
+
raise FileExistsError(
|
|
146
|
+
"File already exists at new database location, remove file or change new database file path to continue"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Use temp directory for all processing, it will be cleaned up after with statement
|
|
150
|
+
with tempfile.TemporaryDirectory() as export_dir:
|
|
151
|
+
# Set up environments
|
|
152
|
+
print(f"Setting up Kuzu {old_version} environment...", file=sys.stderr)
|
|
153
|
+
old_py = ensure_env(old_version, export_dir)
|
|
154
|
+
print(f"Setting up Kuzu {new_version} environment...", file=sys.stderr)
|
|
155
|
+
new_py = ensure_env(new_version, export_dir)
|
|
156
|
+
|
|
157
|
+
export_file = os.path.join(export_dir, "kuzu_export")
|
|
158
|
+
print(f"Exporting old DB → {export_dir}", file=sys.stderr)
|
|
159
|
+
run_migration_step(old_py, old_db, f"EXPORT DATABASE '{export_file}'")
|
|
160
|
+
print("Export complete.", file=sys.stderr)
|
|
161
|
+
|
|
162
|
+
# Check if export files were created and have content
|
|
163
|
+
schema_file = os.path.join(export_file, "schema.cypher")
|
|
164
|
+
if not os.path.exists(schema_file) or os.path.getsize(schema_file) == 0:
|
|
165
|
+
raise ValueError(f"Schema file not found: {schema_file}")
|
|
166
|
+
|
|
167
|
+
print(f"Importing into new DB at {new_db}", file=sys.stderr)
|
|
168
|
+
run_migration_step(new_py, new_db, f"IMPORT DATABASE '{export_file}'")
|
|
169
|
+
print("Import complete.", file=sys.stderr)
|
|
170
|
+
|
|
171
|
+
# Rename new kuzu database to old kuzu database name if enabled
|
|
172
|
+
if overwrite or delete_old:
|
|
173
|
+
# Remove kuzu lock from migrated DB
|
|
174
|
+
lock_file = new_db + ".lock"
|
|
175
|
+
if os.path.exists(lock_file):
|
|
176
|
+
os.remove(lock_file)
|
|
177
|
+
rename_databases(old_db, old_version, new_db, delete_old)
|
|
178
|
+
|
|
179
|
+
print("✅ Kuzu graph database migration finished successfully!")
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def rename_databases(old_db: str, old_version: str, new_db: str, delete_old: bool):
|
|
183
|
+
"""
|
|
184
|
+
When overwrite is enabled, back up the original old_db (file with .lock and .wal or directory)
|
|
185
|
+
by renaming it to *_old, and replace it with the newly imported new_db files.
|
|
186
|
+
|
|
187
|
+
When delete_old is enabled replace the old database with the new one and delete old database
|
|
188
|
+
"""
|
|
189
|
+
base_dir = os.path.dirname(old_db)
|
|
190
|
+
name = os.path.basename(old_db.rstrip(os.sep))
|
|
191
|
+
# Add _old_ and version info to backup graph database
|
|
192
|
+
backup_database_name = f"{name}_old_" + old_version.replace(".", "_")
|
|
193
|
+
backup_base = os.path.join(base_dir, backup_database_name)
|
|
194
|
+
|
|
195
|
+
if os.path.isfile(old_db):
|
|
196
|
+
# File-based database: handle main file and accompanying lock/WAL
|
|
197
|
+
for ext in ["", ".wal"]:
|
|
198
|
+
src = old_db + ext
|
|
199
|
+
dst = backup_base + ext
|
|
200
|
+
if os.path.exists(src):
|
|
201
|
+
if delete_old:
|
|
202
|
+
os.remove(src)
|
|
203
|
+
else:
|
|
204
|
+
os.rename(src, dst)
|
|
205
|
+
print(f"Renamed '{src}' to '{dst}'", file=sys.stderr)
|
|
206
|
+
elif os.path.isdir(old_db):
|
|
207
|
+
# Directory-based Kuzu database
|
|
208
|
+
backup_dir = backup_base
|
|
209
|
+
if delete_old:
|
|
210
|
+
shutil.rmtree(old_db)
|
|
211
|
+
else:
|
|
212
|
+
os.rename(old_db, backup_dir)
|
|
213
|
+
print(f"Renamed directory '{old_db}' to '{backup_dir}'", file=sys.stderr)
|
|
214
|
+
else:
|
|
215
|
+
print(f"Original database path '{old_db}' not found for renaming.", file=sys.stderr)
|
|
216
|
+
sys.exit(1)
|
|
217
|
+
|
|
218
|
+
# Now move new files into place
|
|
219
|
+
for ext in ["", ".wal"]:
|
|
220
|
+
src_new = new_db + ext
|
|
221
|
+
dst_new = os.path.join(base_dir, name + ext)
|
|
222
|
+
if os.path.exists(src_new):
|
|
223
|
+
os.rename(src_new, dst_new)
|
|
224
|
+
print(f"Renamed '{src_new}' to '{dst_new}'", file=sys.stderr)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def main():
|
|
228
|
+
p = argparse.ArgumentParser(
|
|
229
|
+
description="Migrate Kùzu DB via PyPI versions",
|
|
230
|
+
epilog="""
|
|
231
|
+
Examples:
|
|
232
|
+
%(prog)s --old-version 0.9.0 --new-version 0.11.0 \\
|
|
233
|
+
--old-db /path/to/old/db --new-db /path/to/new/db --overwrite
|
|
234
|
+
|
|
235
|
+
Note: This script will create temporary virtual environments in .kuzu_envs/ directory
|
|
236
|
+
to isolate different Kuzu versions.
|
|
237
|
+
""",
|
|
238
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
239
|
+
)
|
|
240
|
+
p.add_argument(
|
|
241
|
+
"--old-version",
|
|
242
|
+
required=False,
|
|
243
|
+
default=None,
|
|
244
|
+
help="Source Kuzu version (e.g., 0.9.0). If not provided automatic kuzu version detection will be attempted.",
|
|
245
|
+
)
|
|
246
|
+
p.add_argument("--new-version", required=True, help="Target Kuzu version (e.g., 0.11.0)")
|
|
247
|
+
p.add_argument("--old-db", required=True, help="Path to source database directory")
|
|
248
|
+
p.add_argument(
|
|
249
|
+
"--new-db",
|
|
250
|
+
required=True,
|
|
251
|
+
help="Path to target database directory, it can't be the same path as the old database. Use the overwrite flag if you want to replace the old database with the new one.",
|
|
252
|
+
)
|
|
253
|
+
p.add_argument(
|
|
254
|
+
"--overwrite",
|
|
255
|
+
required=False,
|
|
256
|
+
action="store_true",
|
|
257
|
+
default=False,
|
|
258
|
+
help="Rename new-db to the old-db name and location, keeps old-db as backup if delete-old is not True",
|
|
259
|
+
)
|
|
260
|
+
p.add_argument(
|
|
261
|
+
"--delete-old",
|
|
262
|
+
required=False,
|
|
263
|
+
action="store_true",
|
|
264
|
+
default=False,
|
|
265
|
+
help="When overwrite and delete-old is True old-db will not be stored as backup",
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
args = p.parse_args()
|
|
269
|
+
|
|
270
|
+
kuzu_migration(
|
|
271
|
+
new_db=args.new_db,
|
|
272
|
+
old_db=args.old_db,
|
|
273
|
+
new_version=args.new_version,
|
|
274
|
+
old_version=args.old_version,
|
|
275
|
+
overwrite=args.overwrite,
|
|
276
|
+
delete_old=args.delete_old,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
if __name__ == "__main__":
|
|
281
|
+
main()
|
|
@@ -33,7 +33,7 @@ from .neo4j_metrics_utils import (
|
|
|
33
33
|
from .deadlock_retry import deadlock_retry
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
logger = get_logger("Neo4jAdapter"
|
|
36
|
+
logger = get_logger("Neo4jAdapter")
|
|
37
37
|
|
|
38
38
|
BASE_LABEL = "__Node__"
|
|
39
39
|
|
|
@@ -50,6 +50,7 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
50
50
|
graph_database_url: str,
|
|
51
51
|
graph_database_username: Optional[str] = None,
|
|
52
52
|
graph_database_password: Optional[str] = None,
|
|
53
|
+
graph_database_name: Optional[str] = None,
|
|
53
54
|
driver: Optional[Any] = None,
|
|
54
55
|
):
|
|
55
56
|
# Only use auth if both username and password are provided
|
|
@@ -59,7 +60,7 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
59
60
|
elif graph_database_username or graph_database_password:
|
|
60
61
|
logger = get_logger(__name__)
|
|
61
62
|
logger.warning("Neo4j credentials incomplete – falling back to anonymous connection.")
|
|
62
|
-
|
|
63
|
+
self.graph_database_name = graph_database_name
|
|
63
64
|
self.driver = driver or AsyncGraphDatabase.driver(
|
|
64
65
|
graph_database_url,
|
|
65
66
|
auth=auth,
|
|
@@ -80,7 +81,7 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
80
81
|
"""
|
|
81
82
|
Get a session for database operations.
|
|
82
83
|
"""
|
|
83
|
-
async with self.driver.session() as session:
|
|
84
|
+
async with self.driver.session(database=self.graph_database_name) as session:
|
|
84
85
|
yield session
|
|
85
86
|
|
|
86
87
|
@deadlock_retry()
|
|
@@ -410,6 +411,38 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
410
411
|
|
|
411
412
|
return await self.query(query, params)
|
|
412
413
|
|
|
414
|
+
def _flatten_edge_properties(self, properties: Dict[str, Any]) -> Dict[str, Any]:
|
|
415
|
+
"""
|
|
416
|
+
Flatten edge properties to handle nested dictionaries like weights.
|
|
417
|
+
|
|
418
|
+
Neo4j doesn't support nested dictionaries as property values, so we need to
|
|
419
|
+
flatten the 'weights' dictionary into individual properties with prefixes.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
properties: Dictionary of edge properties that may contain nested dicts
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
Flattened properties dictionary suitable for Neo4j storage
|
|
426
|
+
"""
|
|
427
|
+
flattened = {}
|
|
428
|
+
|
|
429
|
+
for key, value in properties.items():
|
|
430
|
+
if key == "weights" and isinstance(value, dict):
|
|
431
|
+
# Flatten weights dictionary into individual properties
|
|
432
|
+
for weight_name, weight_value in value.items():
|
|
433
|
+
flattened[f"weight_{weight_name}"] = weight_value
|
|
434
|
+
elif isinstance(value, dict):
|
|
435
|
+
# For other nested dictionaries, serialize as JSON string
|
|
436
|
+
flattened[f"{key}_json"] = json.dumps(value, cls=JSONEncoder)
|
|
437
|
+
elif isinstance(value, list):
|
|
438
|
+
# For lists, serialize as JSON string
|
|
439
|
+
flattened[f"{key}_json"] = json.dumps(value, cls=JSONEncoder)
|
|
440
|
+
else:
|
|
441
|
+
# Keep primitive types as-is
|
|
442
|
+
flattened[key] = value
|
|
443
|
+
|
|
444
|
+
return flattened
|
|
445
|
+
|
|
413
446
|
@record_graph_changes
|
|
414
447
|
@override_distributed(queued_add_edges)
|
|
415
448
|
async def add_edges(self, edges: list[tuple[str, str, str, dict[str, Any]]]) -> None:
|
|
@@ -448,11 +481,13 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
448
481
|
"from_node": str(edge[0]),
|
|
449
482
|
"to_node": str(edge[1]),
|
|
450
483
|
"relationship_name": edge[2],
|
|
451
|
-
"properties":
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
484
|
+
"properties": self._flatten_edge_properties(
|
|
485
|
+
{
|
|
486
|
+
**(edge[3] if edge[3] else {}),
|
|
487
|
+
"source_node_id": str(edge[0]),
|
|
488
|
+
"target_node_id": str(edge[1]),
|
|
489
|
+
}
|
|
490
|
+
),
|
|
456
491
|
}
|
|
457
492
|
for edge in edges
|
|
458
493
|
]
|
|
@@ -870,34 +905,52 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
870
905
|
|
|
871
906
|
A tuple containing two lists: nodes and edges with their properties.
|
|
872
907
|
"""
|
|
873
|
-
|
|
908
|
+
import time
|
|
874
909
|
|
|
875
|
-
|
|
910
|
+
start_time = time.time()
|
|
876
911
|
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
912
|
+
try:
|
|
913
|
+
# Retrieve nodes
|
|
914
|
+
query = "MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties"
|
|
915
|
+
result = await self.query(query)
|
|
916
|
+
|
|
917
|
+
nodes = []
|
|
918
|
+
for record in result:
|
|
919
|
+
nodes.append(
|
|
920
|
+
(
|
|
921
|
+
record["properties"]["id"],
|
|
922
|
+
record["properties"],
|
|
923
|
+
)
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
# Retrieve edges
|
|
927
|
+
query = """
|
|
928
|
+
MATCH (n)-[r]->(m)
|
|
929
|
+
RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
|
|
930
|
+
"""
|
|
931
|
+
result = await self.query(query)
|
|
932
|
+
|
|
933
|
+
edges = []
|
|
934
|
+
for record in result:
|
|
935
|
+
edges.append(
|
|
936
|
+
(
|
|
937
|
+
record["properties"]["source_node_id"],
|
|
938
|
+
record["properties"]["target_node_id"],
|
|
939
|
+
record["type"],
|
|
940
|
+
record["properties"],
|
|
941
|
+
)
|
|
942
|
+
)
|
|
943
|
+
|
|
944
|
+
retrieval_time = time.time() - start_time
|
|
945
|
+
logger.info(
|
|
946
|
+
f"Retrieved {len(nodes)} nodes and {len(edges)} edges in {retrieval_time:.2f} seconds"
|
|
881
947
|
)
|
|
882
|
-
for record in result
|
|
883
|
-
]
|
|
884
948
|
|
|
885
|
-
|
|
886
|
-
MATCH (n)-[r]->(m)
|
|
887
|
-
RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties
|
|
888
|
-
"""
|
|
889
|
-
result = await self.query(query)
|
|
890
|
-
edges = [
|
|
891
|
-
(
|
|
892
|
-
record["properties"]["source_node_id"],
|
|
893
|
-
record["properties"]["target_node_id"],
|
|
894
|
-
record["type"],
|
|
895
|
-
record["properties"],
|
|
896
|
-
)
|
|
897
|
-
for record in result
|
|
898
|
-
]
|
|
949
|
+
return (nodes, edges)
|
|
899
950
|
|
|
900
|
-
|
|
951
|
+
except Exception as e:
|
|
952
|
+
logger.error(f"Error during graph data retrieval: {str(e)}")
|
|
953
|
+
raise
|
|
901
954
|
|
|
902
955
|
async def get_nodeset_subgraph(
|
|
903
956
|
self, node_type: Type[Any], node_name: List[str]
|
|
@@ -918,50 +971,71 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
918
971
|
- Tuple[List[Tuple[int, dict]], List[Tuple[int, int, str, dict]]}: A tuple
|
|
919
972
|
containing nodes and edges in the requested subgraph.
|
|
920
973
|
"""
|
|
921
|
-
|
|
974
|
+
import time
|
|
922
975
|
|
|
923
|
-
|
|
924
|
-
UNWIND $names AS wantedName
|
|
925
|
-
MATCH (n:`{label}`)
|
|
926
|
-
WHERE n.name = wantedName
|
|
927
|
-
WITH collect(DISTINCT n) AS primary
|
|
928
|
-
UNWIND primary AS p
|
|
929
|
-
OPTIONAL MATCH (p)--(nbr)
|
|
930
|
-
WITH primary, collect(DISTINCT nbr) AS nbrs
|
|
931
|
-
WITH primary + nbrs AS nodelist
|
|
932
|
-
UNWIND nodelist AS node
|
|
933
|
-
WITH collect(DISTINCT node) AS nodes
|
|
934
|
-
MATCH (a)-[r]-(b)
|
|
935
|
-
WHERE a IN nodes AND b IN nodes
|
|
936
|
-
WITH nodes, collect(DISTINCT r) AS rels
|
|
937
|
-
RETURN
|
|
938
|
-
[n IN nodes |
|
|
939
|
-
{{ id: n.id,
|
|
940
|
-
properties: properties(n) }}] AS rawNodes,
|
|
941
|
-
[r IN rels |
|
|
942
|
-
{{ type: type(r),
|
|
943
|
-
properties: properties(r) }}] AS rawRels
|
|
944
|
-
"""
|
|
976
|
+
start_time = time.time()
|
|
945
977
|
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
return [], []
|
|
978
|
+
try:
|
|
979
|
+
label = node_type.__name__
|
|
949
980
|
|
|
950
|
-
|
|
951
|
-
|
|
981
|
+
query = f"""
|
|
982
|
+
UNWIND $names AS wantedName
|
|
983
|
+
MATCH (n:`{label}`)
|
|
984
|
+
WHERE n.name = wantedName
|
|
985
|
+
WITH collect(DISTINCT n) AS primary
|
|
986
|
+
UNWIND primary AS p
|
|
987
|
+
OPTIONAL MATCH (p)--(nbr)
|
|
988
|
+
WITH primary, collect(DISTINCT nbr) AS nbrs
|
|
989
|
+
WITH primary + nbrs AS nodelist
|
|
990
|
+
UNWIND nodelist AS node
|
|
991
|
+
WITH collect(DISTINCT node) AS nodes
|
|
992
|
+
MATCH (a)-[r]-(b)
|
|
993
|
+
WHERE a IN nodes AND b IN nodes
|
|
994
|
+
WITH nodes, collect(DISTINCT r) AS rels
|
|
995
|
+
RETURN
|
|
996
|
+
[n IN nodes |
|
|
997
|
+
{{ id: n.id,
|
|
998
|
+
properties: properties(n) }}] AS rawNodes,
|
|
999
|
+
[r IN rels |
|
|
1000
|
+
{{ type: type(r),
|
|
1001
|
+
properties: properties(r) }}] AS rawRels
|
|
1002
|
+
"""
|
|
952
1003
|
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
1004
|
+
result = await self.query(query, {"names": node_name})
|
|
1005
|
+
|
|
1006
|
+
if not result:
|
|
1007
|
+
return [], []
|
|
1008
|
+
|
|
1009
|
+
raw_nodes = result[0]["rawNodes"]
|
|
1010
|
+
raw_rels = result[0]["rawRels"]
|
|
1011
|
+
|
|
1012
|
+
# Process nodes
|
|
1013
|
+
nodes = []
|
|
1014
|
+
for n in raw_nodes:
|
|
1015
|
+
nodes.append((n["properties"]["id"], n["properties"]))
|
|
1016
|
+
|
|
1017
|
+
# Process edges
|
|
1018
|
+
edges = []
|
|
1019
|
+
for r in raw_rels:
|
|
1020
|
+
edges.append(
|
|
1021
|
+
(
|
|
1022
|
+
r["properties"]["source_node_id"],
|
|
1023
|
+
r["properties"]["target_node_id"],
|
|
1024
|
+
r["type"],
|
|
1025
|
+
r["properties"],
|
|
1026
|
+
)
|
|
1027
|
+
)
|
|
1028
|
+
|
|
1029
|
+
retrieval_time = time.time() - start_time
|
|
1030
|
+
logger.info(
|
|
1031
|
+
f"Retrieved {len(nodes)} nodes and {len(edges)} edges for {node_type.__name__} in {retrieval_time:.2f} seconds"
|
|
960
1032
|
)
|
|
961
|
-
for r in raw_rels
|
|
962
|
-
]
|
|
963
1033
|
|
|
964
|
-
|
|
1034
|
+
return nodes, edges
|
|
1035
|
+
|
|
1036
|
+
except Exception as e:
|
|
1037
|
+
logger.error(f"Error during nodeset subgraph retrieval: {str(e)}")
|
|
1038
|
+
raise
|
|
965
1039
|
|
|
966
1040
|
async def get_filtered_graph_data(self, attribute_filters):
|
|
967
1041
|
"""
|
|
@@ -1011,8 +1085,8 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
1011
1085
|
|
|
1012
1086
|
edges = [
|
|
1013
1087
|
(
|
|
1014
|
-
record["
|
|
1015
|
-
record["
|
|
1088
|
+
record["properties"]["source_node_id"],
|
|
1089
|
+
record["properties"]["target_node_id"],
|
|
1016
1090
|
record["type"],
|
|
1017
1091
|
record["properties"],
|
|
1018
1092
|
)
|
|
@@ -1178,7 +1252,7 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
1178
1252
|
|
|
1179
1253
|
return mandatory_metrics | optional_metrics
|
|
1180
1254
|
|
|
1181
|
-
async def get_document_subgraph(self,
|
|
1255
|
+
async def get_document_subgraph(self, data_id: str):
|
|
1182
1256
|
"""
|
|
1183
1257
|
Retrieve a subgraph related to a document identified by its content hash, including
|
|
1184
1258
|
related entities and chunks.
|
|
@@ -1196,21 +1270,21 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
1196
1270
|
"""
|
|
1197
1271
|
query = """
|
|
1198
1272
|
MATCH (doc)
|
|
1199
|
-
WHERE (doc:TextDocument OR doc:PdfDocument)
|
|
1200
|
-
AND doc.
|
|
1273
|
+
WHERE (doc:TextDocument OR doc:PdfDocument OR doc:UnstructuredDocument OR doc:AudioDocument or doc:ImageDocument)
|
|
1274
|
+
AND doc.id = $data_id
|
|
1201
1275
|
|
|
1202
1276
|
OPTIONAL MATCH (doc)<-[:is_part_of]-(chunk:DocumentChunk)
|
|
1203
1277
|
OPTIONAL MATCH (chunk)-[:contains]->(entity:Entity)
|
|
1204
1278
|
WHERE NOT EXISTS {
|
|
1205
1279
|
MATCH (entity)<-[:contains]-(otherChunk:DocumentChunk)-[:is_part_of]->(otherDoc)
|
|
1206
|
-
WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument)
|
|
1280
|
+
WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument OR otherDoc:UnstructuredDocument OR otherDoc:AudioDocument or otherDoc:ImageDocument)
|
|
1207
1281
|
AND otherDoc.id <> doc.id
|
|
1208
1282
|
}
|
|
1209
1283
|
OPTIONAL MATCH (chunk)<-[:made_from]-(made_node:TextSummary)
|
|
1210
1284
|
OPTIONAL MATCH (entity)-[:is_a]->(type:EntityType)
|
|
1211
1285
|
WHERE NOT EXISTS {
|
|
1212
1286
|
MATCH (type)<-[:is_a]-(otherEntity:Entity)<-[:contains]-(otherChunk:DocumentChunk)-[:is_part_of]->(otherDoc)
|
|
1213
|
-
WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument)
|
|
1287
|
+
WHERE (otherDoc:TextDocument OR otherDoc:PdfDocument OR otherDoc:UnstructuredDocument OR otherDoc:AudioDocument or otherDoc:ImageDocument)
|
|
1214
1288
|
AND otherDoc.id <> doc.id
|
|
1215
1289
|
}
|
|
1216
1290
|
|
|
@@ -1221,7 +1295,7 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
1221
1295
|
collect(DISTINCT made_node) as made_from_nodes,
|
|
1222
1296
|
collect(DISTINCT type) as orphan_types
|
|
1223
1297
|
"""
|
|
1224
|
-
result = await self.query(query, {"
|
|
1298
|
+
result = await self.query(query, {"data_id": data_id})
|
|
1225
1299
|
return result[0] if result else None
|
|
1226
1300
|
|
|
1227
1301
|
async def get_degree_one_nodes(self, node_type: str):
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Neptune Analytics Driver Module
|
|
2
|
+
|
|
3
|
+
This module provides the Neptune Analytics adapter and utilities for interacting
|
|
4
|
+
with Amazon Neptune Analytics graph databases.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .adapter import NeptuneGraphDB
|
|
8
|
+
from . import neptune_utils
|
|
9
|
+
from . import exceptions
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"NeptuneGraphDB",
|
|
13
|
+
"neptune_utils",
|
|
14
|
+
"exceptions",
|
|
15
|
+
]
|