cognee 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/api/client.py +9 -5
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/add/routers/get_add_router.py +3 -1
- cognee/api/v1/cognify/cognify.py +24 -16
- cognee/api/v1/cognify/routers/__init__.py +0 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/__init__.py +4 -0
- cognee/api/v1/ontologies/ontologies.py +158 -0
- cognee/api/v1/ontologies/routers/__init__.py +0 -0
- cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
- cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
- cognee/api/v1/search/search.py +4 -0
- cognee/api/v1/ui/node_setup.py +360 -0
- cognee/api/v1/ui/npm_utils.py +50 -0
- cognee/api/v1/ui/ui.py +38 -68
- cognee/cli/commands/cognify_command.py +8 -1
- cognee/cli/config.py +1 -1
- cognee/context_global_variables.py +86 -9
- cognee/eval_framework/Dockerfile +29 -0
- cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +16 -28
- cognee/infrastructure/databases/cache/config.py +3 -1
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
- cognee/infrastructure/databases/graph/config.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
- cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
- cognee/infrastructure/databases/utils/__init__.py +3 -0
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
- cognee/infrastructure/databases/vector/config.py +5 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -13
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
- cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
- cognee/infrastructure/engine/models/Edge.py +13 -1
- cognee/infrastructure/files/storage/s3_config.py +2 -0
- cognee/infrastructure/files/utils/guess_file_type.py +4 -0
- cognee/infrastructure/llm/LLMGateway.py +5 -2
- cognee/infrastructure/llm/config.py +37 -0
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
- cognee/infrastructure/loaders/LoaderEngine.py +1 -0
- cognee/infrastructure/loaders/core/__init__.py +2 -1
- cognee/infrastructure/loaders/core/csv_loader.py +93 -0
- cognee/infrastructure/loaders/core/text_loader.py +1 -2
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
- cognee/infrastructure/loaders/supported_loaders.py +2 -1
- cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
- cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
- cognee/modules/chunking/CsvChunker.py +35 -0
- cognee/modules/chunking/models/DocumentChunk.py +2 -1
- cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/deletion/prune_system.py +52 -2
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/create_dataset.py +4 -2
- cognee/modules/data/methods/delete_dataset.py +26 -0
- cognee/modules/data/methods/get_dataset_ids.py +5 -1
- cognee/modules/data/methods/get_unique_data_id.py +68 -0
- cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
- cognee/modules/data/models/Dataset.py +2 -0
- cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
- cognee/modules/data/processing/document_types/__init__.py +1 -0
- cognee/modules/engine/models/Triplet.py +9 -0
- cognee/modules/engine/models/__init__.py +1 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
- cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
- cognee/modules/ingestion/identify.py +4 -4
- cognee/modules/memify/memify.py +1 -7
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
- cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
- cognee/modules/pipelines/operations/pipeline.py +18 -2
- cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
- cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/base_graph_retriever.py +7 -3
- cognee/modules/retrieval/base_retriever.py +7 -3
- cognee/modules/retrieval/completion_retriever.py +11 -4
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
- cognee/modules/retrieval/graph_completion_retriever.py +14 -1
- cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
- cognee/modules/retrieval/register_retriever.py +10 -0
- cognee/modules/retrieval/registered_community_retrievers.py +1 -0
- cognee/modules/retrieval/temporal_retriever.py +13 -2
- cognee/modules/retrieval/triplet_retriever.py +182 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
- cognee/modules/retrieval/utils/completion.py +2 -22
- cognee/modules/run_custom_pipeline/__init__.py +1 -0
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
- cognee/modules/search/methods/get_search_type_tools.py +54 -8
- cognee/modules/search/methods/no_access_control_search.py +4 -0
- cognee/modules/search/methods/search.py +26 -3
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +19 -0
- cognee/modules/users/methods/create_user.py +12 -27
- cognee/modules/users/methods/get_authenticated_user.py +3 -2
- cognee/modules/users/methods/get_default_user.py +4 -2
- cognee/modules/users/methods/get_user.py +1 -1
- cognee/modules/users/methods/get_user_by_email.py +1 -1
- cognee/modules/users/models/DatasetDatabase.py +24 -3
- cognee/modules/users/models/Tenant.py +6 -7
- cognee/modules/users/models/User.py +6 -5
- cognee/modules/users/models/UserTenant.py +12 -0
- cognee/modules/users/models/__init__.py +1 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
- cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
- cognee/modules/users/tenants/methods/__init__.py +1 -0
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
- cognee/modules/users/tenants/methods/create_tenant.py +22 -8
- cognee/modules/users/tenants/methods/select_tenant.py +62 -0
- cognee/shared/logging_utils.py +6 -0
- cognee/shared/rate_limiting.py +30 -0
- cognee/tasks/chunks/__init__.py +1 -0
- cognee/tasks/chunks/chunk_by_row.py +94 -0
- cognee/tasks/documents/__init__.py +0 -1
- cognee/tasks/documents/classify_documents.py +2 -0
- cognee/tasks/feedback/generate_improved_answers.py +3 -3
- cognee/tasks/graph/extract_graph_from_data.py +9 -10
- cognee/tasks/ingestion/ingest_data.py +1 -1
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/cognify_session.py +41 -0
- cognee/tasks/memify/extract_user_sessions.py +73 -0
- cognee/tasks/memify/get_triplet_datapoints.py +289 -0
- cognee/tasks/storage/add_data_points.py +142 -2
- cognee/tasks/storage/index_data_points.py +33 -22
- cognee/tasks/storage/index_graph_edges.py +37 -57
- cognee/tests/integration/documents/CsvDocument_test.py +70 -0
- cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
- cognee/tests/integration/tasks/test_add_data_points.py +139 -0
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
- cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
- cognee/tests/test_add_docling_document.py +2 -2
- cognee/tests/test_cognee_server_start.py +84 -3
- cognee/tests/test_conversation_history.py +68 -5
- cognee/tests/test_data/example_with_header.csv +3 -0
- cognee/tests/test_dataset_database_handler.py +137 -0
- cognee/tests/test_dataset_delete.py +76 -0
- cognee/tests/test_edge_centered_payload.py +170 -0
- cognee/tests/test_edge_ingestion.py +27 -0
- cognee/tests/test_feedback_enrichment.py +1 -1
- cognee/tests/test_library.py +6 -4
- cognee/tests/test_load.py +62 -0
- cognee/tests/test_multi_tenancy.py +165 -0
- cognee/tests/test_parallel_databases.py +2 -0
- cognee/tests/test_pipeline_cache.py +164 -0
- cognee/tests/test_relational_db_migration.py +54 -2
- cognee/tests/test_search_db.py +44 -2
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
- cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
- cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
- cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
- cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
- cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
- cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
- cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
- cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
- cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
- cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -7
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/RECORD +212 -160
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
- cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
- cognee/modules/retrieval/code_retriever.py +0 -232
- cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
- cognee/tasks/code/get_local_dependencies_checker.py +0 -20
- cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
- cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
- cognee/tasks/repo_processor/__init__.py +0 -2
- cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
- cognee/tasks/repo_processor/get_non_code_files.py +0 -158
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/WHEEL +0 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -8,7 +8,7 @@ from neo4j import AsyncSession
|
|
|
8
8
|
from neo4j import AsyncGraphDatabase
|
|
9
9
|
from neo4j.exceptions import Neo4jError
|
|
10
10
|
from contextlib import asynccontextmanager
|
|
11
|
-
from typing import Optional, Any, List, Dict, Type, Tuple
|
|
11
|
+
from typing import Optional, Any, List, Dict, Type, Tuple, Coroutine
|
|
12
12
|
|
|
13
13
|
from cognee.infrastructure.engine import DataPoint
|
|
14
14
|
from cognee.modules.engine.utils.generate_timestamp_datapoint import date_to_int
|
|
@@ -964,6 +964,63 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
964
964
|
logger.error(f"Error during graph data retrieval: {str(e)}")
|
|
965
965
|
raise
|
|
966
966
|
|
|
967
|
+
async def get_id_filtered_graph_data(self, target_ids: list[str]):
|
|
968
|
+
"""
|
|
969
|
+
Retrieve graph data filtered by specific node IDs, including their direct neighbors
|
|
970
|
+
and only edges where one endpoint matches those IDs.
|
|
971
|
+
|
|
972
|
+
This version uses a single Cypher query for efficiency.
|
|
973
|
+
"""
|
|
974
|
+
import time
|
|
975
|
+
|
|
976
|
+
start_time = time.time()
|
|
977
|
+
|
|
978
|
+
try:
|
|
979
|
+
if not target_ids:
|
|
980
|
+
logger.warning("No target IDs provided for ID-filtered graph retrieval.")
|
|
981
|
+
return [], []
|
|
982
|
+
|
|
983
|
+
query = """
|
|
984
|
+
MATCH ()-[r]-()
|
|
985
|
+
WHERE startNode(r).id IN $target_ids
|
|
986
|
+
OR endNode(r).id IN $target_ids
|
|
987
|
+
WITH DISTINCT r, startNode(r) AS a, endNode(r) AS b
|
|
988
|
+
RETURN
|
|
989
|
+
properties(a) AS n_properties,
|
|
990
|
+
properties(b) AS m_properties,
|
|
991
|
+
type(r) AS type,
|
|
992
|
+
properties(r) AS properties
|
|
993
|
+
"""
|
|
994
|
+
|
|
995
|
+
result = await self.query(query, {"target_ids": target_ids})
|
|
996
|
+
|
|
997
|
+
nodes_dict = {}
|
|
998
|
+
edges = []
|
|
999
|
+
|
|
1000
|
+
for record in result:
|
|
1001
|
+
n_props = record["n_properties"]
|
|
1002
|
+
m_props = record["m_properties"]
|
|
1003
|
+
r_props = record["properties"]
|
|
1004
|
+
r_type = record["type"]
|
|
1005
|
+
|
|
1006
|
+
nodes_dict[n_props["id"]] = (n_props["id"], n_props)
|
|
1007
|
+
nodes_dict[m_props["id"]] = (m_props["id"], m_props)
|
|
1008
|
+
|
|
1009
|
+
source_id = r_props.get("source_node_id", n_props["id"])
|
|
1010
|
+
target_id = r_props.get("target_node_id", m_props["id"])
|
|
1011
|
+
edges.append((source_id, target_id, r_type, r_props))
|
|
1012
|
+
|
|
1013
|
+
retrieval_time = time.time() - start_time
|
|
1014
|
+
logger.info(
|
|
1015
|
+
f"ID-filtered retrieval: {len(nodes_dict)} nodes and {len(edges)} edges in {retrieval_time:.2f}s"
|
|
1016
|
+
)
|
|
1017
|
+
|
|
1018
|
+
return list(nodes_dict.values()), edges
|
|
1019
|
+
|
|
1020
|
+
except Exception as e:
|
|
1021
|
+
logger.error(f"Error during ID-filtered graph data retrieval: {str(e)}")
|
|
1022
|
+
raise
|
|
1023
|
+
|
|
967
1024
|
async def get_nodeset_subgraph(
|
|
968
1025
|
self, node_type: Type[Any], node_name: List[str]
|
|
969
1026
|
) -> Tuple[List[Tuple[int, dict]], List[Tuple[int, int, str, dict]]]:
|
|
@@ -1470,3 +1527,25 @@ class Neo4jAdapter(GraphDBInterface):
|
|
|
1470
1527
|
time_ids_list = [item["id"] for item in time_nodes if "id" in item]
|
|
1471
1528
|
|
|
1472
1529
|
return ", ".join(f"'{uid}'" for uid in time_ids_list)
|
|
1530
|
+
|
|
1531
|
+
async def get_triplets_batch(self, offset: int, limit: int) -> list[dict[str, Any]]:
|
|
1532
|
+
"""
|
|
1533
|
+
Retrieve a batch of triplets (start_node, relationship, end_node) from the graph.
|
|
1534
|
+
|
|
1535
|
+
Parameters:
|
|
1536
|
+
-----------
|
|
1537
|
+
- offset (int): Number of triplets to skip before returning results.
|
|
1538
|
+
- limit (int): Maximum number of triplets to return.
|
|
1539
|
+
|
|
1540
|
+
Returns:
|
|
1541
|
+
--------
|
|
1542
|
+
- list[dict[str, Any]]: A list of triplets.
|
|
1543
|
+
"""
|
|
1544
|
+
query = f"""
|
|
1545
|
+
MATCH (start_node:`{BASE_LABEL}`)-[relationship]->(end_node:`{BASE_LABEL}`)
|
|
1546
|
+
RETURN start_node, properties(relationship) AS relationship_properties, end_node
|
|
1547
|
+
SKIP $offset LIMIT $limit
|
|
1548
|
+
"""
|
|
1549
|
+
results = await self.query(query, {"offset": offset, "limit": limit})
|
|
1550
|
+
|
|
1551
|
+
return results
|
|
@@ -416,6 +416,15 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
|
|
416
416
|
self._client.query(f"MATCH (n :{self._VECTOR_NODE_LABEL}) DETACH DELETE n")
|
|
417
417
|
pass
|
|
418
418
|
|
|
419
|
+
async def is_empty(self) -> bool:
|
|
420
|
+
query = """
|
|
421
|
+
MATCH (n)
|
|
422
|
+
RETURN true
|
|
423
|
+
LIMIT 1;
|
|
424
|
+
"""
|
|
425
|
+
query_result = await self._client.query(query)
|
|
426
|
+
return len(query_result) == 0
|
|
427
|
+
|
|
419
428
|
@staticmethod
|
|
420
429
|
def _get_scored_result(
|
|
421
430
|
item: dict, with_vector: bool = False, with_score: bool = False
|
|
@@ -1 +1,4 @@
|
|
|
1
1
|
from .get_or_create_dataset_database import get_or_create_dataset_database
|
|
2
|
+
from .resolve_dataset_database_connection_info import resolve_dataset_database_connection_info
|
|
3
|
+
from .get_graph_dataset_database_handler import get_graph_dataset_database_handler
|
|
4
|
+
from .get_vector_dataset_database_handler import get_vector_dataset_database_handler
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_graph_dataset_database_handler(dataset_database: DatasetDatabase) -> dict:
|
|
5
|
+
from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
|
|
6
|
+
supported_dataset_database_handlers,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
handler = supported_dataset_database_handlers[dataset_database.graph_dataset_database_handler]
|
|
10
|
+
return handler
|
|
@@ -1,16 +1,65 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
|
-
from typing import Union
|
|
2
|
+
from typing import Union, Optional
|
|
3
3
|
|
|
4
4
|
from sqlalchemy import select
|
|
5
5
|
from sqlalchemy.exc import IntegrityError
|
|
6
|
-
from cognee.modules.data.methods import create_dataset
|
|
7
6
|
|
|
7
|
+
from cognee.modules.data.methods import create_dataset
|
|
8
8
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
9
|
+
from cognee.infrastructure.databases.vector import get_vectordb_config
|
|
10
|
+
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
9
11
|
from cognee.modules.data.methods import get_unique_dataset_id
|
|
10
12
|
from cognee.modules.users.models import DatasetDatabase
|
|
11
13
|
from cognee.modules.users.models import User
|
|
12
14
|
|
|
13
15
|
|
|
16
|
+
async def _get_vector_db_info(dataset_id: UUID, user: User) -> dict:
|
|
17
|
+
vector_config = get_vectordb_config()
|
|
18
|
+
|
|
19
|
+
from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
|
|
20
|
+
supported_dataset_database_handlers,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
handler = supported_dataset_database_handlers[vector_config.vector_dataset_database_handler]
|
|
24
|
+
return await handler["handler_instance"].create_dataset(dataset_id, user)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def _get_graph_db_info(dataset_id: UUID, user: User) -> dict:
|
|
28
|
+
graph_config = get_graph_config()
|
|
29
|
+
|
|
30
|
+
from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
|
|
31
|
+
supported_dataset_database_handlers,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
handler = supported_dataset_database_handlers[graph_config.graph_dataset_database_handler]
|
|
35
|
+
return await handler["handler_instance"].create_dataset(dataset_id, user)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def _existing_dataset_database(
|
|
39
|
+
dataset_id: UUID,
|
|
40
|
+
user: User,
|
|
41
|
+
) -> Optional[DatasetDatabase]:
|
|
42
|
+
"""
|
|
43
|
+
Check if a DatasetDatabase row already exists for the given owner + dataset.
|
|
44
|
+
Return None if it doesn't exist, return the row if it does.
|
|
45
|
+
Args:
|
|
46
|
+
dataset_id:
|
|
47
|
+
user:
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
DatasetDatabase or None
|
|
51
|
+
"""
|
|
52
|
+
db_engine = get_relational_engine()
|
|
53
|
+
|
|
54
|
+
async with db_engine.get_async_session() as session:
|
|
55
|
+
stmt = select(DatasetDatabase).where(
|
|
56
|
+
DatasetDatabase.owner_id == user.id,
|
|
57
|
+
DatasetDatabase.dataset_id == dataset_id,
|
|
58
|
+
)
|
|
59
|
+
existing: DatasetDatabase = await session.scalar(stmt)
|
|
60
|
+
return existing
|
|
61
|
+
|
|
62
|
+
|
|
14
63
|
async def get_or_create_dataset_database(
|
|
15
64
|
dataset: Union[str, UUID],
|
|
16
65
|
user: User,
|
|
@@ -21,6 +70,8 @@ async def get_or_create_dataset_database(
|
|
|
21
70
|
• If the row already exists, it is fetched and returned.
|
|
22
71
|
• Otherwise a new one is created atomically and returned.
|
|
23
72
|
|
|
73
|
+
DatasetDatabase row contains connection and provider info for vector and graph databases.
|
|
74
|
+
|
|
24
75
|
Parameters
|
|
25
76
|
----------
|
|
26
77
|
user : User
|
|
@@ -32,29 +83,26 @@ async def get_or_create_dataset_database(
|
|
|
32
83
|
|
|
33
84
|
dataset_id = await get_unique_dataset_id(dataset, user)
|
|
34
85
|
|
|
35
|
-
|
|
36
|
-
|
|
86
|
+
# If dataset is given as name make sure the dataset is created first
|
|
87
|
+
if isinstance(dataset, str):
|
|
88
|
+
async with db_engine.get_async_session() as session:
|
|
89
|
+
await create_dataset(dataset, user, session)
|
|
37
90
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
91
|
+
# If dataset database already exists return it
|
|
92
|
+
existing_dataset_database = await _existing_dataset_database(dataset_id, user)
|
|
93
|
+
if existing_dataset_database:
|
|
94
|
+
return existing_dataset_database
|
|
42
95
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
DatasetDatabase.owner_id == user.id,
|
|
46
|
-
DatasetDatabase.dataset_id == dataset_id,
|
|
47
|
-
)
|
|
48
|
-
existing: DatasetDatabase = await session.scalar(stmt)
|
|
49
|
-
if existing:
|
|
50
|
-
return existing
|
|
96
|
+
graph_config_dict = await _get_graph_db_info(dataset_id, user)
|
|
97
|
+
vector_config_dict = await _get_vector_db_info(dataset_id, user)
|
|
51
98
|
|
|
99
|
+
async with db_engine.get_async_session() as session:
|
|
52
100
|
# If there are no existing rows build a new row
|
|
53
101
|
record = DatasetDatabase(
|
|
54
102
|
owner_id=user.id,
|
|
55
103
|
dataset_id=dataset_id,
|
|
56
|
-
|
|
57
|
-
|
|
104
|
+
**graph_config_dict, # Unpack graph db config
|
|
105
|
+
**vector_config_dict, # Unpack vector db config
|
|
58
106
|
)
|
|
59
107
|
|
|
60
108
|
try:
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def get_vector_dataset_database_handler(dataset_database: DatasetDatabase) -> dict:
|
|
5
|
+
from cognee.infrastructure.databases.dataset_database_handler.supported_dataset_database_handlers import (
|
|
6
|
+
supported_dataset_database_handlers,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
handler = supported_dataset_database_handlers[dataset_database.vector_dataset_database_handler]
|
|
10
|
+
return handler
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from cognee.infrastructure.databases.utils.get_graph_dataset_database_handler import (
|
|
2
|
+
get_graph_dataset_database_handler,
|
|
3
|
+
)
|
|
4
|
+
from cognee.infrastructure.databases.utils.get_vector_dataset_database_handler import (
|
|
5
|
+
get_vector_dataset_database_handler,
|
|
6
|
+
)
|
|
7
|
+
from cognee.modules.users.models.DatasetDatabase import DatasetDatabase
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
async def resolve_dataset_database_connection_info(
|
|
11
|
+
dataset_database: DatasetDatabase,
|
|
12
|
+
) -> DatasetDatabase:
|
|
13
|
+
"""
|
|
14
|
+
Resolve the connection info for the given DatasetDatabase instance.
|
|
15
|
+
Resolve both vector and graph database connection info and return the updated DatasetDatabase instance.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
dataset_database: DatasetDatabase instance
|
|
19
|
+
Returns:
|
|
20
|
+
DatasetDatabase instance with resolved connection info
|
|
21
|
+
"""
|
|
22
|
+
vector_dataset_database_handler = get_vector_dataset_database_handler(dataset_database)
|
|
23
|
+
graph_dataset_database_handler = get_graph_dataset_database_handler(dataset_database)
|
|
24
|
+
dataset_database = await vector_dataset_database_handler[
|
|
25
|
+
"handler_instance"
|
|
26
|
+
].resolve_dataset_connection_info(dataset_database)
|
|
27
|
+
dataset_database = await graph_dataset_database_handler[
|
|
28
|
+
"handler_instance"
|
|
29
|
+
].resolve_dataset_connection_info(dataset_database)
|
|
30
|
+
return dataset_database
|
|
@@ -18,14 +18,17 @@ class VectorConfig(BaseSettings):
|
|
|
18
18
|
Instance variables:
|
|
19
19
|
- vector_db_url: The URL of the vector database.
|
|
20
20
|
- vector_db_port: The port for the vector database.
|
|
21
|
+
- vector_db_name: The name of the vector database.
|
|
21
22
|
- vector_db_key: The key for accessing the vector database.
|
|
22
23
|
- vector_db_provider: The provider for the vector database.
|
|
23
24
|
"""
|
|
24
25
|
|
|
25
26
|
vector_db_url: str = ""
|
|
26
27
|
vector_db_port: int = 1234
|
|
28
|
+
vector_db_name: str = ""
|
|
27
29
|
vector_db_key: str = ""
|
|
28
30
|
vector_db_provider: str = "lancedb"
|
|
31
|
+
vector_dataset_database_handler: str = "lancedb"
|
|
29
32
|
|
|
30
33
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
|
31
34
|
|
|
@@ -58,8 +61,10 @@ class VectorConfig(BaseSettings):
|
|
|
58
61
|
return {
|
|
59
62
|
"vector_db_url": self.vector_db_url,
|
|
60
63
|
"vector_db_port": self.vector_db_port,
|
|
64
|
+
"vector_db_name": self.vector_db_name,
|
|
61
65
|
"vector_db_key": self.vector_db_key,
|
|
62
66
|
"vector_db_provider": self.vector_db_provider,
|
|
67
|
+
"vector_dataset_database_handler": self.vector_dataset_database_handler,
|
|
63
68
|
}
|
|
64
69
|
|
|
65
70
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from .supported_databases import supported_databases
|
|
2
2
|
from .embeddings import get_embedding_engine
|
|
3
|
+
from cognee.infrastructure.databases.graph.config import get_graph_context_config
|
|
3
4
|
|
|
4
5
|
from functools import lru_cache
|
|
5
6
|
|
|
@@ -8,8 +9,10 @@ from functools import lru_cache
|
|
|
8
9
|
def create_vector_engine(
|
|
9
10
|
vector_db_provider: str,
|
|
10
11
|
vector_db_url: str,
|
|
12
|
+
vector_db_name: str,
|
|
11
13
|
vector_db_port: str = "",
|
|
12
14
|
vector_db_key: str = "",
|
|
15
|
+
vector_dataset_database_handler: str = "",
|
|
13
16
|
):
|
|
14
17
|
"""
|
|
15
18
|
Create a vector database engine based on the specified provider.
|
|
@@ -27,6 +30,7 @@ def create_vector_engine(
|
|
|
27
30
|
- vector_db_url (str): The URL for the vector database instance.
|
|
28
31
|
- vector_db_port (str): The port for the vector database instance. Required for some
|
|
29
32
|
providers.
|
|
33
|
+
- vector_db_name (str): The name of the vector database instance.
|
|
30
34
|
- vector_db_key (str): The API key or access token for the vector database instance.
|
|
31
35
|
- vector_db_provider (str): The name of the vector database provider to use (e.g.,
|
|
32
36
|
'pgvector').
|
|
@@ -45,6 +49,7 @@ def create_vector_engine(
|
|
|
45
49
|
url=vector_db_url,
|
|
46
50
|
api_key=vector_db_key,
|
|
47
51
|
embedding_engine=embedding_engine,
|
|
52
|
+
database_name=vector_db_name,
|
|
48
53
|
)
|
|
49
54
|
|
|
50
55
|
if vector_db_provider.lower() == "pgvector":
|
|
@@ -133,6 +138,6 @@ def create_vector_engine(
|
|
|
133
138
|
|
|
134
139
|
else:
|
|
135
140
|
raise EnvironmentError(
|
|
136
|
-
f"Unsupported
|
|
141
|
+
f"Unsupported vector database provider: {vector_db_provider}. "
|
|
137
142
|
f"Supported providers are: {', '.join(list(supported_databases.keys()) + ['LanceDB', 'PGVector', 'neptune_analytics', 'ChromaDB'])}"
|
|
138
143
|
)
|
|
@@ -17,6 +17,7 @@ from cognee.infrastructure.databases.exceptions import EmbeddingException
|
|
|
17
17
|
from cognee.infrastructure.llm.tokenizer.TikToken import (
|
|
18
18
|
TikTokenTokenizer,
|
|
19
19
|
)
|
|
20
|
+
from cognee.shared.rate_limiting import embedding_rate_limiter_context_manager
|
|
20
21
|
|
|
21
22
|
litellm.set_verbose = False
|
|
22
23
|
logger = get_logger("FastembedEmbeddingEngine")
|
|
@@ -68,7 +69,7 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
|
|
|
68
69
|
|
|
69
70
|
@retry(
|
|
70
71
|
stop=stop_after_delay(128),
|
|
71
|
-
wait=wait_exponential_jitter(
|
|
72
|
+
wait=wait_exponential_jitter(8, 128),
|
|
72
73
|
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
|
73
74
|
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
|
74
75
|
reraise=True,
|
|
@@ -96,11 +97,12 @@ class FastembedEmbeddingEngine(EmbeddingEngine):
|
|
|
96
97
|
if self.mock:
|
|
97
98
|
return [[0.0] * self.dimensions for _ in text]
|
|
98
99
|
else:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
100
|
+
async with embedding_rate_limiter_context_manager():
|
|
101
|
+
embeddings = self.embedding_model.embed(
|
|
102
|
+
text,
|
|
103
|
+
batch_size=len(text),
|
|
104
|
+
parallel=None,
|
|
105
|
+
)
|
|
104
106
|
|
|
105
107
|
return list(embeddings)
|
|
106
108
|
|
|
@@ -25,6 +25,7 @@ from cognee.infrastructure.llm.tokenizer.Mistral import (
|
|
|
25
25
|
from cognee.infrastructure.llm.tokenizer.TikToken import (
|
|
26
26
|
TikTokenTokenizer,
|
|
27
27
|
)
|
|
28
|
+
from cognee.shared.rate_limiting import embedding_rate_limiter_context_manager
|
|
28
29
|
|
|
29
30
|
litellm.set_verbose = False
|
|
30
31
|
logger = get_logger("LiteLLMEmbeddingEngine")
|
|
@@ -109,13 +110,14 @@ class LiteLLMEmbeddingEngine(EmbeddingEngine):
|
|
|
109
110
|
response = {"data": [{"embedding": [0.0] * self.dimensions} for _ in text]}
|
|
110
111
|
return [data["embedding"] for data in response["data"]]
|
|
111
112
|
else:
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
113
|
+
async with embedding_rate_limiter_context_manager():
|
|
114
|
+
response = await litellm.aembedding(
|
|
115
|
+
model=self.model,
|
|
116
|
+
input=text,
|
|
117
|
+
api_key=self.api_key,
|
|
118
|
+
api_base=self.endpoint,
|
|
119
|
+
api_version=self.api_version,
|
|
120
|
+
)
|
|
119
121
|
|
|
120
122
|
return [data["embedding"] for data in response.data]
|
|
121
123
|
|
|
@@ -18,10 +18,7 @@ from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import Em
|
|
|
18
18
|
from cognee.infrastructure.llm.tokenizer.HuggingFace import (
|
|
19
19
|
HuggingFaceTokenizer,
|
|
20
20
|
)
|
|
21
|
-
from cognee.
|
|
22
|
-
embedding_rate_limit_async,
|
|
23
|
-
embedding_sleep_and_retry_async,
|
|
24
|
-
)
|
|
21
|
+
from cognee.shared.rate_limiting import embedding_rate_limiter_context_manager
|
|
25
22
|
from cognee.shared.utils import create_secure_ssl_context
|
|
26
23
|
|
|
27
24
|
logger = get_logger("OllamaEmbeddingEngine")
|
|
@@ -101,7 +98,7 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
|
|
|
101
98
|
|
|
102
99
|
@retry(
|
|
103
100
|
stop=stop_after_delay(128),
|
|
104
|
-
wait=wait_exponential_jitter(
|
|
101
|
+
wait=wait_exponential_jitter(8, 128),
|
|
105
102
|
retry=retry_if_not_exception_type(litellm.exceptions.NotFoundError),
|
|
106
103
|
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
|
107
104
|
reraise=True,
|
|
@@ -120,14 +117,15 @@ class OllamaEmbeddingEngine(EmbeddingEngine):
|
|
|
120
117
|
ssl_context = create_secure_ssl_context()
|
|
121
118
|
connector = aiohttp.TCPConnector(ssl=ssl_context)
|
|
122
119
|
async with aiohttp.ClientSession(connector=connector) as session:
|
|
123
|
-
async with
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
120
|
+
async with embedding_rate_limiter_context_manager():
|
|
121
|
+
async with session.post(
|
|
122
|
+
self.endpoint, json=payload, headers=headers, timeout=60.0
|
|
123
|
+
) as response:
|
|
124
|
+
data = await response.json()
|
|
125
|
+
if "embeddings" in data:
|
|
126
|
+
return data["embeddings"][0]
|
|
127
|
+
else:
|
|
128
|
+
return data["data"][0]["embedding"]
|
|
131
129
|
|
|
132
130
|
def get_vector_size(self) -> int:
|
|
133
131
|
"""
|
|
@@ -193,6 +193,8 @@ class LanceDBAdapter(VectorDBInterface):
|
|
|
193
193
|
for (data_point_index, data_point) in enumerate(data_points)
|
|
194
194
|
]
|
|
195
195
|
|
|
196
|
+
lance_data_points = list({dp.id: dp for dp in lance_data_points}.values())
|
|
197
|
+
|
|
196
198
|
async with self.VECTOR_DB_LOCK:
|
|
197
199
|
await (
|
|
198
200
|
collection.merge_insert("id")
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from cognee.infrastructure.databases.vector.create_vector_engine import create_vector_engine
|
|
6
|
+
from cognee.modules.users.models import User
|
|
7
|
+
from cognee.modules.users.models import DatasetDatabase
|
|
8
|
+
from cognee.base_config import get_base_config
|
|
9
|
+
from cognee.infrastructure.databases.vector import get_vectordb_config
|
|
10
|
+
from cognee.infrastructure.databases.dataset_database_handler import DatasetDatabaseHandlerInterface
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LanceDBDatasetDatabaseHandler(DatasetDatabaseHandlerInterface):
|
|
14
|
+
"""
|
|
15
|
+
Handler for interacting with LanceDB Dataset databases.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
|
|
20
|
+
vector_config = get_vectordb_config()
|
|
21
|
+
base_config = get_base_config()
|
|
22
|
+
|
|
23
|
+
if vector_config.vector_db_provider != "lancedb":
|
|
24
|
+
raise ValueError(
|
|
25
|
+
"LanceDBDatasetDatabaseHandler can only be used with LanceDB vector database provider."
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
databases_directory_path = os.path.join(
|
|
29
|
+
base_config.system_root_directory, "databases", str(user.id)
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
vector_db_name = f"{dataset_id}.lance.db"
|
|
33
|
+
|
|
34
|
+
return {
|
|
35
|
+
"vector_database_provider": vector_config.vector_db_provider,
|
|
36
|
+
"vector_database_url": os.path.join(databases_directory_path, vector_db_name),
|
|
37
|
+
"vector_database_key": vector_config.vector_db_key,
|
|
38
|
+
"vector_database_name": vector_db_name,
|
|
39
|
+
"vector_dataset_database_handler": "lancedb",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
async def delete_dataset(cls, dataset_database: DatasetDatabase):
|
|
44
|
+
vector_engine = create_vector_engine(
|
|
45
|
+
vector_db_provider=dataset_database.vector_database_provider,
|
|
46
|
+
vector_db_url=dataset_database.vector_database_url,
|
|
47
|
+
vector_db_key=dataset_database.vector_database_key,
|
|
48
|
+
vector_db_name=dataset_database.vector_database_name,
|
|
49
|
+
)
|
|
50
|
+
await vector_engine.prune()
|
|
@@ -2,6 +2,8 @@ from typing import List, Protocol, Optional, Union, Any
|
|
|
2
2
|
from abc import abstractmethod
|
|
3
3
|
from cognee.infrastructure.engine import DataPoint
|
|
4
4
|
from .models.PayloadSchema import PayloadSchema
|
|
5
|
+
from uuid import UUID
|
|
6
|
+
from cognee.modules.users.models import User
|
|
5
7
|
|
|
6
8
|
|
|
7
9
|
class VectorDBInterface(Protocol):
|
|
@@ -217,3 +219,36 @@ class VectorDBInterface(Protocol):
|
|
|
217
219
|
- Any: The schema object suitable for this vector database
|
|
218
220
|
"""
|
|
219
221
|
return model_type
|
|
222
|
+
|
|
223
|
+
@classmethod
|
|
224
|
+
async def create_dataset(cls, dataset_id: Optional[UUID], user: Optional[User]) -> dict:
|
|
225
|
+
"""
|
|
226
|
+
Return a dictionary with connection info for a vector database for the given dataset.
|
|
227
|
+
Function can auto handle deploying of the actual database if needed, but is not necessary.
|
|
228
|
+
Only providing connection info is sufficient, this info will be mapped when trying to connect to the provided dataset in the future.
|
|
229
|
+
Needed for Cognee multi-tenant/multi-user and backend access control support.
|
|
230
|
+
|
|
231
|
+
Dictionary returned from this function will be used to create a DatasetDatabase row in the relational database.
|
|
232
|
+
From which internal mapping of dataset -> database connection info will be done.
|
|
233
|
+
|
|
234
|
+
Each dataset needs to map to a unique vector database when backend access control is enabled to facilitate a separation of concern for data.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
dataset_id: UUID of the dataset if needed by the database creation logic
|
|
238
|
+
user: User object if needed by the database creation logic
|
|
239
|
+
Returns:
|
|
240
|
+
dict: Connection info for the created vector database instance.
|
|
241
|
+
"""
|
|
242
|
+
pass
|
|
243
|
+
|
|
244
|
+
async def delete_dataset(self, dataset_id: UUID, user: User) -> None:
|
|
245
|
+
"""
|
|
246
|
+
Delete the vector database for the given dataset.
|
|
247
|
+
Function should auto handle deleting of the actual database or send a request to the proper service to delete the database.
|
|
248
|
+
Needed for maintaining a database for Cognee multi-tenant/multi-user and backend access control.
|
|
249
|
+
|
|
250
|
+
Args:
|
|
251
|
+
dataset_id: UUID of the dataset
|
|
252
|
+
user: User object
|
|
253
|
+
"""
|
|
254
|
+
pass
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pydantic import BaseModel
|
|
1
|
+
from pydantic import BaseModel, field_validator
|
|
2
2
|
from typing import Optional, Any, Dict
|
|
3
3
|
|
|
4
4
|
|
|
@@ -18,9 +18,21 @@ class Edge(BaseModel):
|
|
|
18
18
|
|
|
19
19
|
# Mixed usage
|
|
20
20
|
has_items: (Edge(weight=0.5, weights={"confidence": 0.9}), list[Item])
|
|
21
|
+
|
|
22
|
+
# With edge_text for rich embedding representation
|
|
23
|
+
contains: (Edge(relationship_type="contains", edge_text="relationship_name: contains; entity_description: Alice"), Entity)
|
|
21
24
|
"""
|
|
22
25
|
|
|
23
26
|
weight: Optional[float] = None
|
|
24
27
|
weights: Optional[Dict[str, float]] = None
|
|
25
28
|
relationship_type: Optional[str] = None
|
|
26
29
|
properties: Optional[Dict[str, Any]] = None
|
|
30
|
+
edge_text: Optional[str] = None
|
|
31
|
+
|
|
32
|
+
@field_validator("edge_text", mode="before")
|
|
33
|
+
@classmethod
|
|
34
|
+
def ensure_edge_text(cls, v, info):
|
|
35
|
+
"""Auto-populate edge_text from relationship_type if not explicitly provided."""
|
|
36
|
+
if v is None and info.data.get("relationship_type"):
|
|
37
|
+
return info.data["relationship_type"]
|
|
38
|
+
return v
|
|
@@ -9,6 +9,8 @@ class S3Config(BaseSettings):
|
|
|
9
9
|
aws_access_key_id: Optional[str] = None
|
|
10
10
|
aws_secret_access_key: Optional[str] = None
|
|
11
11
|
aws_session_token: Optional[str] = None
|
|
12
|
+
aws_profile_name: Optional[str] = None
|
|
13
|
+
aws_bedrock_runtime_endpoint: Optional[str] = None
|
|
12
14
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
|
13
15
|
|
|
14
16
|
|
|
@@ -55,6 +55,10 @@ def guess_file_type(file: BinaryIO, name: Optional[str] = None) -> filetype.Type
|
|
|
55
55
|
file_type = Type("text/plain", "txt")
|
|
56
56
|
return file_type
|
|
57
57
|
|
|
58
|
+
if ext in [".csv"]:
|
|
59
|
+
file_type = Type("text/csv", "csv")
|
|
60
|
+
return file_type
|
|
61
|
+
|
|
58
62
|
file_type = filetype.guess(file)
|
|
59
63
|
|
|
60
64
|
# If file type could not be determined consider it a plain text file as they don't have magic number encoding
|
|
@@ -11,7 +11,7 @@ class LLMGateway:
|
|
|
11
11
|
|
|
12
12
|
@staticmethod
|
|
13
13
|
def acreate_structured_output(
|
|
14
|
-
text_input: str, system_prompt: str, response_model: Type[BaseModel]
|
|
14
|
+
text_input: str, system_prompt: str, response_model: Type[BaseModel], **kwargs
|
|
15
15
|
) -> Coroutine:
|
|
16
16
|
llm_config = get_llm_config()
|
|
17
17
|
if llm_config.structured_output_framework.upper() == "BAML":
|
|
@@ -31,7 +31,10 @@ class LLMGateway:
|
|
|
31
31
|
|
|
32
32
|
llm_client = get_llm_client()
|
|
33
33
|
return llm_client.acreate_structured_output(
|
|
34
|
-
text_input=text_input,
|
|
34
|
+
text_input=text_input,
|
|
35
|
+
system_prompt=system_prompt,
|
|
36
|
+
response_model=response_model,
|
|
37
|
+
**kwargs,
|
|
35
38
|
)
|
|
36
39
|
|
|
37
40
|
@staticmethod
|