cognee 0.3.4.dev4__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/api/client.py +16 -7
- cognee/api/health.py +5 -9
- cognee/api/v1/add/add.py +3 -1
- cognee/api/v1/cognify/cognify.py +44 -7
- cognee/api/v1/permissions/routers/get_permissions_router.py +8 -4
- cognee/api/v1/search/search.py +3 -0
- cognee/api/v1/ui/__init__.py +1 -1
- cognee/api/v1/ui/ui.py +215 -150
- cognee/api/v1/update/__init__.py +1 -0
- cognee/api/v1/update/routers/__init__.py +1 -0
- cognee/api/v1/update/routers/get_update_router.py +90 -0
- cognee/api/v1/update/update.py +100 -0
- cognee/base_config.py +5 -2
- cognee/cli/_cognee.py +28 -10
- cognee/cli/commands/delete_command.py +34 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +2 -2
- cognee/eval_framework/evaluation/direct_llm_eval_adapter.py +3 -2
- cognee/eval_framework/modal_eval_dashboard.py +9 -1
- cognee/infrastructure/databases/graph/config.py +9 -9
- cognee/infrastructure/databases/graph/get_graph_engine.py +4 -21
- cognee/infrastructure/databases/graph/kuzu/adapter.py +60 -9
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +3 -3
- cognee/infrastructure/databases/relational/config.py +4 -4
- cognee/infrastructure/databases/relational/create_relational_engine.py +11 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +7 -3
- cognee/infrastructure/databases/vector/config.py +7 -7
- cognee/infrastructure/databases/vector/create_vector_engine.py +7 -15
- cognee/infrastructure/databases/vector/embeddings/EmbeddingEngine.py +9 -0
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +11 -0
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +19 -2
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -0
- cognee/infrastructure/databases/vector/embeddings/config.py +8 -0
- cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py +5 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +11 -10
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +48 -38
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -4
- cognee/infrastructure/files/storage/S3FileStorage.py +15 -5
- cognee/infrastructure/files/storage/s3_config.py +1 -0
- cognee/infrastructure/files/utils/open_data_file.py +7 -14
- cognee/infrastructure/llm/LLMGateway.py +19 -117
- cognee/infrastructure/llm/config.py +28 -13
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_categories.py +2 -1
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_event_entities.py +3 -2
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/extract_summary.py +3 -2
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_content_graph.py +2 -1
- cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/extract_event_graph.py +3 -2
- cognee/infrastructure/llm/prompts/read_query_prompt.py +3 -2
- cognee/infrastructure/llm/prompts/show_prompt.py +35 -0
- cognee/infrastructure/llm/prompts/test.txt +1 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/__init__.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/async_client.py +50 -397
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/inlinedbaml.py +2 -3
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/parser.py +8 -88
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/runtime.py +78 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/stream_types.py +2 -99
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/sync_client.py +49 -401
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_builder.py +19 -882
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/type_map.py +2 -34
- cognee/infrastructure/llm/structured_output_framework/baml/baml_client/types.py +2 -107
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/acreate_structured_output.baml +26 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/__init__.py +1 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +76 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/create_dynamic_baml_type.py +122 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/generators.baml +3 -3
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +0 -32
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +107 -98
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +5 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +5 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +0 -26
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +17 -67
- cognee/infrastructure/llm/tokenizer/Gemini/adapter.py +8 -7
- cognee/infrastructure/llm/utils.py +4 -4
- cognee/infrastructure/loaders/LoaderEngine.py +5 -2
- cognee/infrastructure/loaders/external/__init__.py +7 -0
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +244 -0
- cognee/infrastructure/loaders/supported_loaders.py +7 -0
- cognee/modules/data/methods/create_authorized_dataset.py +9 -0
- cognee/modules/data/methods/get_authorized_dataset.py +1 -1
- cognee/modules/data/methods/get_authorized_dataset_by_name.py +11 -0
- cognee/modules/data/methods/get_deletion_counts.py +92 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +1 -1
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +22 -8
- cognee/modules/graph/utils/retrieve_existing_edges.py +0 -2
- cognee/modules/ingestion/data_types/TextData.py +0 -1
- cognee/modules/observability/get_observe.py +14 -0
- cognee/modules/observability/observers.py +1 -0
- cognee/modules/ontology/base_ontology_resolver.py +42 -0
- cognee/modules/ontology/get_default_ontology_resolver.py +41 -0
- cognee/modules/ontology/matching_strategies.py +53 -0
- cognee/modules/ontology/models.py +20 -0
- cognee/modules/ontology/ontology_config.py +24 -0
- cognee/modules/ontology/ontology_env_config.py +45 -0
- cognee/modules/ontology/rdf_xml/{OntologyResolver.py → RDFLibOntologyResolver.py} +20 -28
- cognee/modules/pipelines/layers/resolve_authorized_user_dataset.py +21 -24
- cognee/modules/pipelines/layers/resolve_authorized_user_datasets.py +3 -3
- cognee/modules/retrieval/code_retriever.py +2 -1
- cognee/modules/retrieval/context_providers/TripletSearchContextProvider.py +1 -4
- cognee/modules/retrieval/graph_completion_cot_retriever.py +6 -5
- cognee/modules/retrieval/graph_completion_retriever.py +0 -3
- cognee/modules/retrieval/insights_retriever.py +1 -1
- cognee/modules/retrieval/jaccard_retrival.py +60 -0
- cognee/modules/retrieval/lexical_retriever.py +123 -0
- cognee/modules/retrieval/natural_language_retriever.py +2 -1
- cognee/modules/retrieval/temporal_retriever.py +3 -2
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +2 -12
- cognee/modules/retrieval/utils/completion.py +4 -7
- cognee/modules/search/methods/get_search_type_tools.py +7 -0
- cognee/modules/search/methods/no_access_control_search.py +1 -1
- cognee/modules/search/methods/search.py +32 -13
- cognee/modules/search/types/SearchType.py +1 -0
- cognee/modules/users/permissions/methods/authorized_give_permission_on_datasets.py +12 -0
- cognee/modules/users/permissions/methods/check_permission_on_dataset.py +11 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +10 -0
- cognee/modules/users/permissions/methods/get_document_ids_for_user.py +10 -0
- cognee/modules/users/permissions/methods/get_principal.py +9 -0
- cognee/modules/users/permissions/methods/get_principal_datasets.py +11 -0
- cognee/modules/users/permissions/methods/get_role.py +10 -0
- cognee/modules/users/permissions/methods/get_specific_user_permission_datasets.py +3 -3
- cognee/modules/users/permissions/methods/get_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_role.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_tenant.py +9 -0
- cognee/modules/users/permissions/methods/give_default_permission_to_user.py +9 -0
- cognee/modules/users/permissions/methods/give_permission_on_dataset.py +10 -0
- cognee/modules/users/roles/methods/add_user_to_role.py +11 -0
- cognee/modules/users/roles/methods/create_role.py +12 -1
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +12 -0
- cognee/modules/users/tenants/methods/create_tenant.py +12 -1
- cognee/modules/visualization/cognee_network_visualization.py +13 -9
- cognee/shared/data_models.py +0 -1
- cognee/shared/utils.py +0 -32
- cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py +2 -2
- cognee/tasks/codingagents/coding_rule_associations.py +3 -2
- cognee/tasks/entity_completion/entity_extractors/llm_entity_extractor.py +3 -2
- cognee/tasks/graph/cascade_extract/utils/extract_content_nodes_and_relationship_names.py +3 -2
- cognee/tasks/graph/cascade_extract/utils/extract_edge_triplets.py +3 -2
- cognee/tasks/graph/cascade_extract/utils/extract_nodes.py +3 -2
- cognee/tasks/graph/extract_graph_from_code.py +2 -2
- cognee/tasks/graph/extract_graph_from_data.py +55 -12
- cognee/tasks/graph/extract_graph_from_data_v2.py +16 -4
- cognee/tasks/ingestion/migrate_relational_database.py +132 -41
- cognee/tasks/ingestion/resolve_data_directories.py +4 -1
- cognee/tasks/schema/ingest_database_schema.py +134 -0
- cognee/tasks/schema/models.py +40 -0
- cognee/tasks/storage/index_data_points.py +1 -1
- cognee/tasks/storage/index_graph_edges.py +3 -1
- cognee/tasks/summarization/summarize_code.py +2 -2
- cognee/tasks/summarization/summarize_text.py +2 -2
- cognee/tasks/temporal_graph/enrich_events.py +2 -2
- cognee/tasks/temporal_graph/extract_events_and_entities.py +2 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +13 -4
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +13 -3
- cognee/tests/test_advanced_pdf_loader.py +141 -0
- cognee/tests/test_chromadb.py +40 -0
- cognee/tests/test_cognee_server_start.py +6 -1
- cognee/tests/test_data/Quantum_computers.txt +9 -0
- cognee/tests/test_lancedb.py +211 -0
- cognee/tests/test_pgvector.py +40 -0
- cognee/tests/test_relational_db_migration.py +76 -0
- cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +2 -1
- cognee/tests/unit/modules/ontology/test_ontology_adapter.py +330 -13
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +0 -4
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -4
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +0 -4
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/METADATA +92 -96
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/RECORD +173 -159
- distributed/pyproject.toml +0 -1
- cognee/infrastructure/data/utils/extract_keywords.py +0 -48
- cognee/infrastructure/databases/hybrid/falkordb/FalkorDBAdapter.py +0 -1227
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_categories.baml +0 -109
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extract_content_graph.baml +0 -343
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_categories.py +0 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/extract_summary.py +0 -89
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/__init__.py +0 -0
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/knowledge_graph/extract_content_graph.py +0 -44
- cognee/tasks/graph/infer_data_ontology.py +0 -309
- cognee/tests/test_falkordb.py +0 -174
- /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/__init__.py +0 -0
- /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/knowledge_graph/__init__.py +0 -0
- /cognee/infrastructure/llm/{structured_output_framework/litellm_instructor/extraction → extraction}/texts.json +0 -0
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/WHEEL +0 -0
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/entry_points.txt +0 -0
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.4.dev4.dist-info → cognee-0.3.5.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
from typing import Union, BinaryIO, List, Optional
|
|
3
|
+
|
|
4
|
+
from cognee.modules.users.models import User
|
|
5
|
+
from cognee.api.v1.delete import delete
|
|
6
|
+
from cognee.api.v1.add import add
|
|
7
|
+
from cognee.api.v1.cognify import cognify
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def update(
|
|
11
|
+
data_id: UUID,
|
|
12
|
+
data: Union[BinaryIO, list[BinaryIO], str, list[str]],
|
|
13
|
+
user: User = None,
|
|
14
|
+
node_set: Optional[List[str]] = None,
|
|
15
|
+
dataset_id: Optional[UUID] = None,
|
|
16
|
+
vector_db_config: dict = None,
|
|
17
|
+
graph_db_config: dict = None,
|
|
18
|
+
preferred_loaders: List[str] = None,
|
|
19
|
+
incremental_loading: bool = True,
|
|
20
|
+
):
|
|
21
|
+
"""
|
|
22
|
+
Update existing data in Cognee.
|
|
23
|
+
|
|
24
|
+
Supported Input Types:
|
|
25
|
+
- **Text strings**: Direct text content (str) - any string not starting with "/" or "file://"
|
|
26
|
+
- **File paths**: Local file paths as strings in these formats:
|
|
27
|
+
* Absolute paths: "/path/to/document.pdf"
|
|
28
|
+
* File URLs: "file:///path/to/document.pdf" or "file://relative/path.txt"
|
|
29
|
+
* S3 paths: "s3://bucket-name/path/to/file.pdf"
|
|
30
|
+
- **Binary file objects**: File handles/streams (BinaryIO)
|
|
31
|
+
- **Lists**: Multiple files or text strings in a single call
|
|
32
|
+
|
|
33
|
+
Supported File Formats:
|
|
34
|
+
- Text files (.txt, .md, .csv)
|
|
35
|
+
- PDFs (.pdf)
|
|
36
|
+
- Images (.png, .jpg, .jpeg) - extracted via OCR/vision models
|
|
37
|
+
- Audio files (.mp3, .wav) - transcribed to text
|
|
38
|
+
- Code files (.py, .js, .ts, etc.) - parsed for structure and content
|
|
39
|
+
- Office documents (.docx, .pptx)
|
|
40
|
+
|
|
41
|
+
Workflow:
|
|
42
|
+
1. **Data Resolution**: Resolves file paths and validates accessibility
|
|
43
|
+
2. **Content Extraction**: Extracts text content from various file formats
|
|
44
|
+
3. **Dataset Storage**: Stores processed content in the specified dataset
|
|
45
|
+
4. **Metadata Tracking**: Records file metadata, timestamps, and user permissions
|
|
46
|
+
5. **Permission Assignment**: Grants user read/write/delete/share permissions on dataset
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
data_id: UUID of existing data to update
|
|
50
|
+
data: The latest version of the data. Can be:
|
|
51
|
+
- Single text string: "Your text content here"
|
|
52
|
+
- Absolute file path: "/path/to/document.pdf"
|
|
53
|
+
- File URL: "file:///absolute/path/to/document.pdf" or "file://relative/path.txt"
|
|
54
|
+
- S3 path: "s3://my-bucket/documents/file.pdf"
|
|
55
|
+
- List of mixed types: ["text content", "/path/file.pdf", "file://doc.txt", file_handle]
|
|
56
|
+
- Binary file object: open("file.txt", "rb")
|
|
57
|
+
dataset_name: Name of the dataset to store data in. Defaults to "main_dataset".
|
|
58
|
+
Create separate datasets to organize different knowledge domains.
|
|
59
|
+
user: User object for authentication and permissions. Uses default user if None.
|
|
60
|
+
Default user: "default_user@example.com" (created automatically on first use).
|
|
61
|
+
Users can only access datasets they have permissions for.
|
|
62
|
+
node_set: Optional list of node identifiers for graph organization and access control.
|
|
63
|
+
Used for grouping related data points in the knowledge graph.
|
|
64
|
+
vector_db_config: Optional configuration for vector database (for custom setups).
|
|
65
|
+
graph_db_config: Optional configuration for graph database (for custom setups).
|
|
66
|
+
dataset_id: Optional specific dataset UUID to use instead of dataset_name.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
PipelineRunInfo: Information about the ingestion pipeline execution including:
|
|
70
|
+
- Pipeline run ID for tracking
|
|
71
|
+
- Dataset ID where data was stored
|
|
72
|
+
- Processing status and any errors
|
|
73
|
+
- Execution timestamps and metadata
|
|
74
|
+
"""
|
|
75
|
+
await delete(
|
|
76
|
+
data_id=data_id,
|
|
77
|
+
dataset_id=dataset_id,
|
|
78
|
+
user=user,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
await add(
|
|
82
|
+
data=data,
|
|
83
|
+
dataset_id=dataset_id,
|
|
84
|
+
user=user,
|
|
85
|
+
node_set=node_set,
|
|
86
|
+
vector_db_config=vector_db_config,
|
|
87
|
+
graph_db_config=graph_db_config,
|
|
88
|
+
preferred_loaders=preferred_loaders,
|
|
89
|
+
incremental_loading=incremental_loading,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
cognify_run = await cognify(
|
|
93
|
+
datasets=[dataset_id],
|
|
94
|
+
user=user,
|
|
95
|
+
vector_db_config=vector_db_config,
|
|
96
|
+
graph_db_config=graph_db_config,
|
|
97
|
+
incremental_loading=incremental_loading,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
return cognify_run
|
cognee/base_config.py
CHANGED
|
@@ -11,7 +11,7 @@ class BaseConfig(BaseSettings):
|
|
|
11
11
|
data_root_directory: str = get_absolute_path(".data_storage")
|
|
12
12
|
system_root_directory: str = get_absolute_path(".cognee_system")
|
|
13
13
|
cache_root_directory: str = get_absolute_path(".cognee_cache")
|
|
14
|
-
monitoring_tool: object = Observer.
|
|
14
|
+
monitoring_tool: object = Observer.NONE
|
|
15
15
|
|
|
16
16
|
@pydantic.model_validator(mode="after")
|
|
17
17
|
def validate_paths(self):
|
|
@@ -30,7 +30,10 @@ class BaseConfig(BaseSettings):
|
|
|
30
30
|
# Require absolute paths for root directories
|
|
31
31
|
self.data_root_directory = ensure_absolute_path(self.data_root_directory)
|
|
32
32
|
self.system_root_directory = ensure_absolute_path(self.system_root_directory)
|
|
33
|
-
|
|
33
|
+
# Set monitoring tool based on available keys
|
|
34
|
+
if self.langfuse_public_key and self.langfuse_secret_key:
|
|
35
|
+
self.monitoring_tool = Observer.LANGFUSE
|
|
36
|
+
|
|
34
37
|
return self
|
|
35
38
|
|
|
36
39
|
langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")
|
cognee/cli/_cognee.py
CHANGED
|
@@ -183,10 +183,20 @@ def main() -> int:
|
|
|
183
183
|
|
|
184
184
|
for pid in spawned_pids:
|
|
185
185
|
try:
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
186
|
+
if hasattr(os, "killpg"):
|
|
187
|
+
# Unix-like systems: Use process groups
|
|
188
|
+
pgid = os.getpgid(pid)
|
|
189
|
+
os.killpg(pgid, signal.SIGTERM)
|
|
190
|
+
fmt.success(f"✓ Process group {pgid} (PID {pid}) terminated.")
|
|
191
|
+
else:
|
|
192
|
+
# Windows: Use taskkill to terminate process and its children
|
|
193
|
+
subprocess.run(
|
|
194
|
+
["taskkill", "/F", "/T", "/PID", str(pid)],
|
|
195
|
+
capture_output=True,
|
|
196
|
+
check=False,
|
|
197
|
+
)
|
|
198
|
+
fmt.success(f"✓ Process {pid} and its children terminated.")
|
|
199
|
+
except (OSError, ProcessLookupError, subprocess.SubprocessError) as e:
|
|
190
200
|
fmt.warning(f"Could not terminate process {pid}: {e}")
|
|
191
201
|
|
|
192
202
|
sys.exit(0)
|
|
@@ -204,19 +214,27 @@ def main() -> int:
|
|
|
204
214
|
nonlocal spawned_pids
|
|
205
215
|
spawned_pids.append(pid)
|
|
206
216
|
|
|
217
|
+
frontend_port = 3000
|
|
218
|
+
start_backend, backend_port = True, 8000
|
|
219
|
+
start_mcp, mcp_port = True, 8001
|
|
207
220
|
server_process = start_ui(
|
|
208
|
-
|
|
209
|
-
port=
|
|
221
|
+
pid_callback=pid_callback,
|
|
222
|
+
port=frontend_port,
|
|
210
223
|
open_browser=True,
|
|
211
|
-
start_backend=True,
|
|
212
224
|
auto_download=True,
|
|
213
|
-
|
|
225
|
+
start_backend=start_backend,
|
|
226
|
+
backend_port=backend_port,
|
|
227
|
+
start_mcp=start_mcp,
|
|
228
|
+
mcp_port=mcp_port,
|
|
214
229
|
)
|
|
215
230
|
|
|
216
231
|
if server_process:
|
|
217
232
|
fmt.success("UI server started successfully!")
|
|
218
|
-
fmt.echo("The interface is available at: http://localhost:
|
|
219
|
-
|
|
233
|
+
fmt.echo(f"The interface is available at: http://localhost:{frontend_port}")
|
|
234
|
+
if start_backend:
|
|
235
|
+
fmt.echo(f"The API backend is available at: http://localhost:{backend_port}")
|
|
236
|
+
if start_mcp:
|
|
237
|
+
fmt.echo(f"The MCP server is available at: http://localhost:{mcp_port}")
|
|
220
238
|
fmt.note("Press Ctrl+C to stop the server...")
|
|
221
239
|
|
|
222
240
|
try:
|
|
@@ -6,6 +6,7 @@ from cognee.cli.reference import SupportsCliCommand
|
|
|
6
6
|
from cognee.cli import DEFAULT_DOCS_URL
|
|
7
7
|
import cognee.cli.echo as fmt
|
|
8
8
|
from cognee.cli.exceptions import CliCommandException, CliCommandInnerException
|
|
9
|
+
from cognee.modules.data.methods.get_deletion_counts import get_deletion_counts
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class DeleteCommand(SupportsCliCommand):
|
|
@@ -41,7 +42,34 @@ Be careful with deletion operations as they are irreversible.
|
|
|
41
42
|
fmt.error("Please specify what to delete: --dataset-name, --user-id, or --all")
|
|
42
43
|
return
|
|
43
44
|
|
|
44
|
-
#
|
|
45
|
+
# If --force is used, skip the preview and go straight to deletion
|
|
46
|
+
if not args.force:
|
|
47
|
+
# --- START PREVIEW LOGIC ---
|
|
48
|
+
fmt.echo("Gathering data for preview...")
|
|
49
|
+
try:
|
|
50
|
+
preview_data = asyncio.run(
|
|
51
|
+
get_deletion_counts(
|
|
52
|
+
dataset_name=args.dataset_name,
|
|
53
|
+
user_id=args.user_id,
|
|
54
|
+
all_data=args.all,
|
|
55
|
+
)
|
|
56
|
+
)
|
|
57
|
+
except CliCommandException as e:
|
|
58
|
+
fmt.error(f"Error occured when fetching preview data: {str(e)}")
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
if not preview_data:
|
|
62
|
+
fmt.success("No data found to delete.")
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
fmt.echo("You are about to delete:")
|
|
66
|
+
fmt.echo(
|
|
67
|
+
f"Datasets: {preview_data.datasets}\nEntries: {preview_data.entries}\nUsers: {preview_data.users}"
|
|
68
|
+
)
|
|
69
|
+
fmt.echo("-" * 20)
|
|
70
|
+
# --- END PREVIEW LOGIC ---
|
|
71
|
+
|
|
72
|
+
# Build operation message for success/failure logging
|
|
45
73
|
if args.all:
|
|
46
74
|
confirm_msg = "Delete ALL data from cognee?"
|
|
47
75
|
operation = "all data"
|
|
@@ -51,8 +79,9 @@ Be careful with deletion operations as they are irreversible.
|
|
|
51
79
|
elif args.user_id:
|
|
52
80
|
confirm_msg = f"Delete all data for user '{args.user_id}'?"
|
|
53
81
|
operation = f"data for user '{args.user_id}'"
|
|
82
|
+
else:
|
|
83
|
+
operation = "data"
|
|
54
84
|
|
|
55
|
-
# Confirm deletion unless forced
|
|
56
85
|
if not args.force:
|
|
57
86
|
fmt.warning("This operation is irreversible!")
|
|
58
87
|
if not fmt.confirm(confirm_msg):
|
|
@@ -64,6 +93,8 @@ Be careful with deletion operations as they are irreversible.
|
|
|
64
93
|
# Run the async delete function
|
|
65
94
|
async def run_delete():
|
|
66
95
|
try:
|
|
96
|
+
# NOTE: The underlying cognee.delete() function is currently not working as expected.
|
|
97
|
+
# This is a separate bug that this preview feature helps to expose.
|
|
67
98
|
if args.all:
|
|
68
99
|
await cognee.delete(dataset_name=None, user_id=args.user_id)
|
|
69
100
|
else:
|
|
@@ -72,6 +103,7 @@ Be careful with deletion operations as they are irreversible.
|
|
|
72
103
|
raise CliCommandInnerException(f"Failed to delete: {str(e)}")
|
|
73
104
|
|
|
74
105
|
asyncio.run(run_delete())
|
|
106
|
+
# This success message may be inaccurate due to the underlying bug, but we leave it for now.
|
|
75
107
|
fmt.success(f"Successfully deleted {operation}")
|
|
76
108
|
|
|
77
109
|
except Exception as e:
|
|
@@ -5,7 +5,7 @@ from cognee.modules.chunking.TextChunker import TextChunker
|
|
|
5
5
|
from cognee.tasks.graph import extract_graph_from_data
|
|
6
6
|
from cognee.tasks.storage import add_data_points
|
|
7
7
|
from cognee.shared.data_models import KnowledgeGraph
|
|
8
|
-
from cognee.modules.ontology.rdf_xml.
|
|
8
|
+
from cognee.modules.ontology.rdf_xml.RDFLibOntologyResolver import RDFLibOntologyResolver
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
async def get_default_tasks_by_indices(
|
|
@@ -33,7 +33,7 @@ async def get_no_summary_tasks(
|
|
|
33
33
|
# Get base tasks (0=classify, 1=check_permissions, 2=extract_chunks)
|
|
34
34
|
base_tasks = await get_default_tasks_by_indices([0, 1, 2], chunk_size, chunker)
|
|
35
35
|
|
|
36
|
-
ontology_adapter =
|
|
36
|
+
ontology_adapter = RDFLibOntologyResolver(ontology_file=ontology_file_path)
|
|
37
37
|
|
|
38
38
|
graph_task = Task(
|
|
39
39
|
extract_graph_from_data,
|
|
@@ -3,6 +3,7 @@ from pydantic import BaseModel
|
|
|
3
3
|
from cognee.eval_framework.evaluation.base_eval_adapter import BaseEvalAdapter
|
|
4
4
|
from cognee.eval_framework.eval_config import EvalConfig
|
|
5
5
|
|
|
6
|
+
from cognee.infrastructure.llm.prompts import render_prompt, read_query_prompt
|
|
6
7
|
from cognee.infrastructure.llm import LLMGateway
|
|
7
8
|
|
|
8
9
|
|
|
@@ -25,8 +26,8 @@ class DirectLLMEvalAdapter(BaseEvalAdapter):
|
|
|
25
26
|
) -> Dict[str, Any]:
|
|
26
27
|
args = {"question": question, "answer": answer, "golden_answer": golden_answer}
|
|
27
28
|
|
|
28
|
-
user_prompt =
|
|
29
|
-
system_prompt =
|
|
29
|
+
user_prompt = render_prompt(self.eval_prompt_path, args)
|
|
30
|
+
system_prompt = read_query_prompt(self.system_prompt_path)
|
|
30
31
|
|
|
31
32
|
evaluation = await LLMGateway.acreate_structured_output(
|
|
32
33
|
text_input=user_prompt,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import json
|
|
3
|
-
|
|
3
|
+
|
|
4
4
|
import subprocess
|
|
5
5
|
import modal
|
|
6
6
|
import streamlit as st
|
|
@@ -78,6 +78,14 @@ def main():
|
|
|
78
78
|
}
|
|
79
79
|
)
|
|
80
80
|
|
|
81
|
+
try:
|
|
82
|
+
import pandas as pd
|
|
83
|
+
except ImportError:
|
|
84
|
+
st.error(
|
|
85
|
+
"Pandas is required for the evaluation dashboard. Install with 'pip install cognee\"[evals]\"' to use this feature."
|
|
86
|
+
)
|
|
87
|
+
return
|
|
88
|
+
|
|
81
89
|
df = pd.DataFrame(records)
|
|
82
90
|
if df.empty:
|
|
83
91
|
st.warning("No JSON files found in the volume.")
|
|
@@ -50,26 +50,26 @@ class GraphConfig(BaseSettings):
|
|
|
50
50
|
# Model validator updates graph_filename and path dynamically after class creation based on current database provider
|
|
51
51
|
# If no specific graph_filename or path are provided
|
|
52
52
|
@pydantic.model_validator(mode="after")
|
|
53
|
-
def fill_derived(
|
|
54
|
-
provider =
|
|
53
|
+
def fill_derived(self):
|
|
54
|
+
provider = self.graph_database_provider.lower()
|
|
55
55
|
base_config = get_base_config()
|
|
56
56
|
|
|
57
57
|
# Set default filename if no filename is provided
|
|
58
|
-
if not
|
|
59
|
-
|
|
58
|
+
if not self.graph_filename:
|
|
59
|
+
self.graph_filename = f"cognee_graph_{provider}"
|
|
60
60
|
|
|
61
61
|
# Handle graph file path
|
|
62
|
-
if
|
|
62
|
+
if self.graph_file_path:
|
|
63
63
|
# Check if absolute path is provided
|
|
64
|
-
|
|
65
|
-
os.path.join(
|
|
64
|
+
self.graph_file_path = ensure_absolute_path(
|
|
65
|
+
os.path.join(self.graph_file_path, self.graph_filename)
|
|
66
66
|
)
|
|
67
67
|
else:
|
|
68
68
|
# Default path
|
|
69
69
|
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
|
|
70
|
-
|
|
70
|
+
self.graph_file_path = os.path.join(databases_directory_path, self.graph_filename)
|
|
71
71
|
|
|
72
|
-
return
|
|
72
|
+
return self
|
|
73
73
|
|
|
74
74
|
def to_dict(self) -> dict:
|
|
75
75
|
"""
|
|
@@ -44,16 +44,14 @@ def create_graph_engine(
|
|
|
44
44
|
Parameters:
|
|
45
45
|
-----------
|
|
46
46
|
|
|
47
|
-
- graph_database_provider: The type of graph database provider to use (e.g., neo4j,
|
|
48
|
-
|
|
49
|
-
- graph_database_url: The URL for the graph database instance. Required for neo4j
|
|
50
|
-
and falkordb providers.
|
|
47
|
+
- graph_database_provider: The type of graph database provider to use (e.g., neo4j, falkor, kuzu).
|
|
48
|
+
- graph_database_url: The URL for the graph database instance. Required for neo4j and falkordb providers.
|
|
51
49
|
- graph_database_username: The username for authentication with the graph database.
|
|
52
50
|
Required for neo4j provider.
|
|
53
51
|
- graph_database_password: The password for authentication with the graph database.
|
|
54
52
|
Required for neo4j provider.
|
|
55
53
|
- graph_database_port: The port number for the graph database connection. Required
|
|
56
|
-
for the falkordb provider
|
|
54
|
+
for the falkordb provider
|
|
57
55
|
- graph_file_path: The filesystem path to the graph file. Required for the kuzu
|
|
58
56
|
provider.
|
|
59
57
|
|
|
@@ -86,21 +84,6 @@ def create_graph_engine(
|
|
|
86
84
|
graph_database_name=graph_database_name or None,
|
|
87
85
|
)
|
|
88
86
|
|
|
89
|
-
elif graph_database_provider == "falkordb":
|
|
90
|
-
if not (graph_database_url and graph_database_port):
|
|
91
|
-
raise EnvironmentError("Missing required FalkorDB credentials.")
|
|
92
|
-
|
|
93
|
-
from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine
|
|
94
|
-
from cognee.infrastructure.databases.hybrid.falkordb.FalkorDBAdapter import FalkorDBAdapter
|
|
95
|
-
|
|
96
|
-
embedding_engine = get_embedding_engine()
|
|
97
|
-
|
|
98
|
-
return FalkorDBAdapter(
|
|
99
|
-
database_url=graph_database_url,
|
|
100
|
-
database_port=graph_database_port,
|
|
101
|
-
embedding_engine=embedding_engine,
|
|
102
|
-
)
|
|
103
|
-
|
|
104
87
|
elif graph_database_provider == "kuzu":
|
|
105
88
|
if not graph_file_path:
|
|
106
89
|
raise EnvironmentError("Missing required Kuzu database path.")
|
|
@@ -179,5 +162,5 @@ def create_graph_engine(
|
|
|
179
162
|
|
|
180
163
|
raise EnvironmentError(
|
|
181
164
|
f"Unsupported graph database provider: {graph_database_provider}. "
|
|
182
|
-
f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', '
|
|
165
|
+
f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['neo4j', 'kuzu', 'kuzu-remote', 'memgraph', 'neptune', 'neptune_analytics'])}"
|
|
183
166
|
)
|
|
@@ -48,6 +48,29 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
48
48
|
|
|
49
49
|
def _initialize_connection(self) -> None:
|
|
50
50
|
"""Initialize the Kuzu database connection and schema."""
|
|
51
|
+
|
|
52
|
+
def _install_json_extension():
|
|
53
|
+
"""
|
|
54
|
+
Function handles installing of the json extension for the current Kuzu version.
|
|
55
|
+
This has to be done with an empty graph db before connecting to an existing database otherwise
|
|
56
|
+
missing json extension errors will be raised.
|
|
57
|
+
"""
|
|
58
|
+
try:
|
|
59
|
+
with tempfile.NamedTemporaryFile(mode="w", delete=True) as temp_file:
|
|
60
|
+
temp_graph_file = temp_file.name
|
|
61
|
+
tmp_db = Database(
|
|
62
|
+
temp_graph_file,
|
|
63
|
+
buffer_pool_size=2048 * 1024 * 1024, # 2048MB buffer pool
|
|
64
|
+
max_db_size=4096 * 1024 * 1024,
|
|
65
|
+
)
|
|
66
|
+
tmp_db.init_database()
|
|
67
|
+
connection = Connection(tmp_db)
|
|
68
|
+
connection.execute("INSTALL JSON;")
|
|
69
|
+
except Exception as e:
|
|
70
|
+
logger.info(f"JSON extension already installed or not needed: {e}")
|
|
71
|
+
|
|
72
|
+
_install_json_extension()
|
|
73
|
+
|
|
51
74
|
try:
|
|
52
75
|
if "s3://" in self.db_path:
|
|
53
76
|
with tempfile.NamedTemporaryFile(mode="w", delete=False) as temp_file:
|
|
@@ -109,11 +132,6 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
109
132
|
self.db.init_database()
|
|
110
133
|
self.connection = Connection(self.db)
|
|
111
134
|
|
|
112
|
-
try:
|
|
113
|
-
self.connection.execute("INSTALL JSON;")
|
|
114
|
-
except Exception as e:
|
|
115
|
-
logger.info(f"JSON extension already installed or not needed: {e}")
|
|
116
|
-
|
|
117
135
|
try:
|
|
118
136
|
self.connection.execute("LOAD EXTENSION JSON;")
|
|
119
137
|
logger.info("Loaded JSON extension")
|
|
@@ -1277,7 +1295,6 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1277
1295
|
A tuple containing a list of filtered node properties and a list of filtered edge
|
|
1278
1296
|
properties.
|
|
1279
1297
|
"""
|
|
1280
|
-
|
|
1281
1298
|
where_clauses = []
|
|
1282
1299
|
params = {}
|
|
1283
1300
|
|
|
@@ -1288,16 +1305,50 @@ class KuzuAdapter(GraphDBInterface):
|
|
|
1288
1305
|
params[param_name] = values
|
|
1289
1306
|
|
|
1290
1307
|
where_clause = " AND ".join(where_clauses)
|
|
1291
|
-
nodes_query =
|
|
1308
|
+
nodes_query = (
|
|
1309
|
+
f"MATCH (n:Node) WHERE {where_clause} RETURN n.id, {{properties: n.properties}}"
|
|
1310
|
+
)
|
|
1292
1311
|
edges_query = f"""
|
|
1293
1312
|
MATCH (n1:Node)-[r:EDGE]->(n2:Node)
|
|
1294
1313
|
WHERE {where_clause.replace("n.", "n1.")} AND {where_clause.replace("n.", "n2.")}
|
|
1295
|
-
RETURN properties
|
|
1314
|
+
RETURN n1.id, n2.id, r.relationship_name, r.properties
|
|
1296
1315
|
"""
|
|
1297
1316
|
nodes, edges = await asyncio.gather(
|
|
1298
1317
|
self.query(nodes_query, params), self.query(edges_query, params)
|
|
1299
1318
|
)
|
|
1300
|
-
|
|
1319
|
+
formatted_nodes = []
|
|
1320
|
+
for n in nodes:
|
|
1321
|
+
if n[0]:
|
|
1322
|
+
node_id = str(n[0])
|
|
1323
|
+
props = n[1]
|
|
1324
|
+
if props.get("properties"):
|
|
1325
|
+
try:
|
|
1326
|
+
additional_props = json.loads(props["properties"])
|
|
1327
|
+
props.update(additional_props)
|
|
1328
|
+
del props["properties"]
|
|
1329
|
+
except json.JSONDecodeError:
|
|
1330
|
+
logger.warning(f"Failed to parse properties JSON for node {node_id}")
|
|
1331
|
+
formatted_nodes.append((node_id, props))
|
|
1332
|
+
if not formatted_nodes:
|
|
1333
|
+
logger.warning("No nodes found in the database")
|
|
1334
|
+
return [], []
|
|
1335
|
+
|
|
1336
|
+
formatted_edges = []
|
|
1337
|
+
for e in edges:
|
|
1338
|
+
if e and len(e) >= 3:
|
|
1339
|
+
source_id = str(e[0])
|
|
1340
|
+
target_id = str(e[1])
|
|
1341
|
+
rel_type = str(e[2])
|
|
1342
|
+
props = {}
|
|
1343
|
+
if len(e) > 3 and e[3]:
|
|
1344
|
+
try:
|
|
1345
|
+
props = json.loads(e[3])
|
|
1346
|
+
except (json.JSONDecodeError, TypeError):
|
|
1347
|
+
logger.warning(
|
|
1348
|
+
f"Failed to parse edge properties for {source_id}->{target_id}"
|
|
1349
|
+
)
|
|
1350
|
+
formatted_edges.append((source_id, target_id, rel_type, props))
|
|
1351
|
+
return formatted_nodes, formatted_edges
|
|
1301
1352
|
|
|
1302
1353
|
async def get_graph_metrics(self, include_optional=False) -> Dict[str, Any]:
|
|
1303
1354
|
"""
|
|
@@ -234,7 +234,7 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
|
|
234
234
|
collection_name: str,
|
|
235
235
|
query_text: Optional[str] = None,
|
|
236
236
|
query_vector: Optional[List[float]] = None,
|
|
237
|
-
limit: int = None,
|
|
237
|
+
limit: Optional[int] = None,
|
|
238
238
|
with_vector: bool = False,
|
|
239
239
|
):
|
|
240
240
|
"""
|
|
@@ -265,10 +265,10 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
|
|
265
265
|
"Use this option only when vector data is required."
|
|
266
266
|
)
|
|
267
267
|
|
|
268
|
-
# In the case of excessive limit, or zero / negative value, limit will be set to 10.
|
|
268
|
+
# In the case of excessive limit, or None / zero / negative value, limit will be set to 10.
|
|
269
269
|
if not limit or limit <= self._TOPK_LOWER_BOUND or limit > self._TOPK_UPPER_BOUND:
|
|
270
270
|
logger.warning(
|
|
271
|
-
"Provided limit (%s) is invalid (zero, negative, or exceeds maximum). "
|
|
271
|
+
"Provided limit (%s) is invalid (None, zero, negative, or exceeds maximum). "
|
|
272
272
|
"Defaulting to limit=10.",
|
|
273
273
|
limit,
|
|
274
274
|
)
|
|
@@ -23,14 +23,14 @@ class RelationalConfig(BaseSettings):
|
|
|
23
23
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
|
24
24
|
|
|
25
25
|
@pydantic.model_validator(mode="after")
|
|
26
|
-
def fill_derived(
|
|
26
|
+
def fill_derived(self):
|
|
27
27
|
# Set file path based on graph database provider if no file path is provided
|
|
28
|
-
if not
|
|
28
|
+
if not self.db_path:
|
|
29
29
|
base_config = get_base_config()
|
|
30
30
|
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
|
|
31
|
-
|
|
31
|
+
self.db_path = databases_directory_path
|
|
32
32
|
|
|
33
|
-
return
|
|
33
|
+
return self
|
|
34
34
|
|
|
35
35
|
def to_dict(self) -> dict:
|
|
36
36
|
"""
|
|
@@ -39,8 +39,16 @@ def create_relational_engine(
|
|
|
39
39
|
connection_string = f"sqlite+aiosqlite:///{db_path}/{db_name}"
|
|
40
40
|
|
|
41
41
|
if db_provider == "postgres":
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
try:
|
|
43
|
+
# Test if asyncpg is available
|
|
44
|
+
import asyncpg
|
|
45
|
+
|
|
46
|
+
connection_string = (
|
|
47
|
+
f"postgresql+asyncpg://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
|
|
48
|
+
)
|
|
49
|
+
except ImportError:
|
|
50
|
+
raise ImportError(
|
|
51
|
+
"PostgreSQL dependencies are not installed. Please install with 'pip install cognee\"[postgres]\"' or 'pip install cognee\"[postgres-binary]\"' to use PostgreSQL functionality."
|
|
52
|
+
)
|
|
45
53
|
|
|
46
54
|
return SQLAlchemyAdapter(connection_string)
|
|
@@ -352,7 +352,7 @@ class ChromaDBAdapter(VectorDBInterface):
|
|
|
352
352
|
collection_name: str,
|
|
353
353
|
query_text: str = None,
|
|
354
354
|
query_vector: List[float] = None,
|
|
355
|
-
limit: int = 15,
|
|
355
|
+
limit: Optional[int] = 15,
|
|
356
356
|
with_vector: bool = False,
|
|
357
357
|
normalized: bool = True,
|
|
358
358
|
):
|
|
@@ -386,9 +386,13 @@ class ChromaDBAdapter(VectorDBInterface):
|
|
|
386
386
|
try:
|
|
387
387
|
collection = await self.get_collection(collection_name)
|
|
388
388
|
|
|
389
|
-
if limit
|
|
389
|
+
if limit is None:
|
|
390
390
|
limit = await collection.count()
|
|
391
391
|
|
|
392
|
+
# If limit is still 0, no need to do the search, just return empty results
|
|
393
|
+
if limit <= 0:
|
|
394
|
+
return []
|
|
395
|
+
|
|
392
396
|
results = await collection.query(
|
|
393
397
|
query_embeddings=[query_vector],
|
|
394
398
|
include=["metadatas", "distances", "embeddings"]
|
|
@@ -428,7 +432,7 @@ class ChromaDBAdapter(VectorDBInterface):
|
|
|
428
432
|
for row in vector_list
|
|
429
433
|
]
|
|
430
434
|
except Exception as e:
|
|
431
|
-
logger.
|
|
435
|
+
logger.warning(f"Error in search: {str(e)}")
|
|
432
436
|
return []
|
|
433
437
|
|
|
434
438
|
async def batch_search(
|
|
@@ -30,21 +30,21 @@ class VectorConfig(BaseSettings):
|
|
|
30
30
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
|
31
31
|
|
|
32
32
|
@pydantic.model_validator(mode="after")
|
|
33
|
-
def validate_paths(
|
|
33
|
+
def validate_paths(self):
|
|
34
34
|
base_config = get_base_config()
|
|
35
35
|
|
|
36
36
|
# If vector_db_url is provided and is not a path skip checking if path is absolute (as it can also be a url)
|
|
37
|
-
if
|
|
37
|
+
if self.vector_db_url and Path(self.vector_db_url).exists():
|
|
38
38
|
# Relative path to absolute
|
|
39
|
-
|
|
40
|
-
|
|
39
|
+
self.vector_db_url = ensure_absolute_path(
|
|
40
|
+
self.vector_db_url,
|
|
41
41
|
)
|
|
42
|
-
elif not
|
|
42
|
+
elif not self.vector_db_url:
|
|
43
43
|
# Default path
|
|
44
44
|
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
|
|
45
|
-
|
|
45
|
+
self.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb")
|
|
46
46
|
|
|
47
|
-
return
|
|
47
|
+
return self
|
|
48
48
|
|
|
49
49
|
def to_dict(self) -> dict:
|
|
50
50
|
"""
|