cognee 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +1 -0
- cognee/api/client.py +9 -5
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/add/routers/get_add_router.py +3 -1
- cognee/api/v1/cognify/cognify.py +24 -16
- cognee/api/v1/cognify/routers/__init__.py +0 -1
- cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
- cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
- cognee/api/v1/ontologies/__init__.py +4 -0
- cognee/api/v1/ontologies/ontologies.py +158 -0
- cognee/api/v1/ontologies/routers/__init__.py +0 -0
- cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
- cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
- cognee/api/v1/search/search.py +4 -0
- cognee/api/v1/ui/node_setup.py +360 -0
- cognee/api/v1/ui/npm_utils.py +50 -0
- cognee/api/v1/ui/ui.py +38 -68
- cognee/cli/commands/cognify_command.py +8 -1
- cognee/cli/config.py +1 -1
- cognee/context_global_variables.py +86 -9
- cognee/eval_framework/Dockerfile +29 -0
- cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
- cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
- cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
- cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
- cognee/eval_framework/eval_config.py +2 -2
- cognee/eval_framework/modal_run_eval.py +16 -28
- cognee/infrastructure/databases/cache/config.py +3 -1
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
- cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
- cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
- cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
- cognee/infrastructure/databases/graph/config.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
- cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
- cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
- cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
- cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
- cognee/infrastructure/databases/utils/__init__.py +3 -0
- cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
- cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
- cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
- cognee/infrastructure/databases/vector/config.py +5 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
- cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -10
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
- cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
- cognee/infrastructure/engine/models/Edge.py +13 -1
- cognee/infrastructure/files/storage/s3_config.py +2 -0
- cognee/infrastructure/files/utils/guess_file_type.py +4 -0
- cognee/infrastructure/llm/LLMGateway.py +5 -2
- cognee/infrastructure/llm/config.py +37 -0
- cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
- cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
- cognee/infrastructure/loaders/LoaderEngine.py +1 -0
- cognee/infrastructure/loaders/core/__init__.py +2 -1
- cognee/infrastructure/loaders/core/csv_loader.py +93 -0
- cognee/infrastructure/loaders/core/text_loader.py +1 -2
- cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
- cognee/infrastructure/loaders/supported_loaders.py +2 -1
- cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
- cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
- cognee/modules/chunking/CsvChunker.py +35 -0
- cognee/modules/chunking/models/DocumentChunk.py +2 -1
- cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/deletion/prune_system.py +52 -2
- cognee/modules/data/methods/__init__.py +1 -0
- cognee/modules/data/methods/create_dataset.py +4 -2
- cognee/modules/data/methods/delete_dataset.py +26 -0
- cognee/modules/data/methods/get_dataset_ids.py +5 -1
- cognee/modules/data/methods/get_unique_data_id.py +68 -0
- cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
- cognee/modules/data/models/Dataset.py +2 -0
- cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
- cognee/modules/data/processing/document_types/__init__.py +1 -0
- cognee/modules/engine/models/Triplet.py +9 -0
- cognee/modules/engine/models/__init__.py +1 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
- cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
- cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
- cognee/modules/ingestion/identify.py +4 -4
- cognee/modules/memify/memify.py +1 -7
- cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
- cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
- cognee/modules/pipelines/operations/pipeline.py +18 -2
- cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
- cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
- cognee/modules/retrieval/__init__.py +1 -1
- cognee/modules/retrieval/base_graph_retriever.py +7 -3
- cognee/modules/retrieval/base_retriever.py +7 -3
- cognee/modules/retrieval/completion_retriever.py +11 -4
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
- cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
- cognee/modules/retrieval/graph_completion_retriever.py +14 -1
- cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
- cognee/modules/retrieval/register_retriever.py +10 -0
- cognee/modules/retrieval/registered_community_retrievers.py +1 -0
- cognee/modules/retrieval/temporal_retriever.py +13 -2
- cognee/modules/retrieval/triplet_retriever.py +182 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
- cognee/modules/retrieval/utils/completion.py +2 -22
- cognee/modules/run_custom_pipeline/__init__.py +1 -0
- cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
- cognee/modules/search/methods/get_search_type_tools.py +54 -8
- cognee/modules/search/methods/no_access_control_search.py +4 -0
- cognee/modules/search/methods/search.py +26 -3
- cognee/modules/search/types/SearchType.py +1 -1
- cognee/modules/settings/get_settings.py +19 -0
- cognee/modules/users/methods/create_user.py +12 -27
- cognee/modules/users/methods/get_authenticated_user.py +3 -2
- cognee/modules/users/methods/get_default_user.py +4 -2
- cognee/modules/users/methods/get_user.py +1 -1
- cognee/modules/users/methods/get_user_by_email.py +1 -1
- cognee/modules/users/models/DatasetDatabase.py +24 -3
- cognee/modules/users/models/Tenant.py +6 -7
- cognee/modules/users/models/User.py +6 -5
- cognee/modules/users/models/UserTenant.py +12 -0
- cognee/modules/users/models/__init__.py +1 -0
- cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
- cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
- cognee/modules/users/tenants/methods/__init__.py +1 -0
- cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
- cognee/modules/users/tenants/methods/create_tenant.py +22 -8
- cognee/modules/users/tenants/methods/select_tenant.py +62 -0
- cognee/shared/logging_utils.py +6 -0
- cognee/shared/rate_limiting.py +30 -0
- cognee/tasks/chunks/__init__.py +1 -0
- cognee/tasks/chunks/chunk_by_row.py +94 -0
- cognee/tasks/documents/__init__.py +0 -1
- cognee/tasks/documents/classify_documents.py +2 -0
- cognee/tasks/feedback/generate_improved_answers.py +3 -3
- cognee/tasks/graph/extract_graph_from_data.py +9 -10
- cognee/tasks/ingestion/ingest_data.py +1 -1
- cognee/tasks/memify/__init__.py +2 -0
- cognee/tasks/memify/cognify_session.py +41 -0
- cognee/tasks/memify/extract_user_sessions.py +73 -0
- cognee/tasks/memify/get_triplet_datapoints.py +289 -0
- cognee/tasks/storage/add_data_points.py +142 -2
- cognee/tasks/storage/index_data_points.py +33 -22
- cognee/tasks/storage/index_graph_edges.py +37 -57
- cognee/tests/integration/documents/CsvDocument_test.py +70 -0
- cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
- cognee/tests/integration/tasks/test_add_data_points.py +139 -0
- cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
- cognee/tests/integration/web_url_crawler/test_default_url_crawler.py +1 -1
- cognee/tests/integration/web_url_crawler/test_tavily_crawler.py +1 -1
- cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +13 -27
- cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
- cognee/tests/test_add_docling_document.py +2 -2
- cognee/tests/test_cognee_server_start.py +84 -3
- cognee/tests/test_conversation_history.py +68 -5
- cognee/tests/test_data/example_with_header.csv +3 -0
- cognee/tests/test_dataset_database_handler.py +137 -0
- cognee/tests/test_dataset_delete.py +76 -0
- cognee/tests/test_edge_centered_payload.py +170 -0
- cognee/tests/test_edge_ingestion.py +27 -0
- cognee/tests/test_feedback_enrichment.py +1 -1
- cognee/tests/test_library.py +6 -4
- cognee/tests/test_load.py +62 -0
- cognee/tests/test_multi_tenancy.py +165 -0
- cognee/tests/test_parallel_databases.py +2 -0
- cognee/tests/test_pipeline_cache.py +164 -0
- cognee/tests/test_relational_db_migration.py +54 -2
- cognee/tests/test_search_db.py +44 -2
- cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
- cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
- cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
- cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
- cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
- cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
- cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
- cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
- cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
- cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
- cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
- cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
- cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
- cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
- cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
- cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
- {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -6
- {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/RECORD +215 -163
- {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/WHEEL +1 -1
- {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
- cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
- cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
- cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
- cognee/modules/retrieval/code_retriever.py +0 -232
- cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
- cognee/tasks/code/get_local_dependencies_checker.py +0 -20
- cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
- cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
- cognee/tasks/repo_processor/__init__.py +0 -2
- cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
- cognee/tasks/repo_processor/get_non_code_files.py +0 -158
- cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
- {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.4.0.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -5,14 +5,15 @@ from ..models import User
|
|
|
5
5
|
from ..get_fastapi_users import get_fastapi_users
|
|
6
6
|
from .get_default_user import get_default_user
|
|
7
7
|
from cognee.shared.logging_utils import get_logger
|
|
8
|
+
from cognee.context_global_variables import backend_access_control_enabled
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
logger = get_logger("get_authenticated_user")
|
|
11
12
|
|
|
12
13
|
# Check environment variable to determine authentication requirement
|
|
13
14
|
REQUIRE_AUTHENTICATION = (
|
|
14
|
-
os.getenv("REQUIRE_AUTHENTICATION", "
|
|
15
|
-
or os.
|
|
15
|
+
os.getenv("REQUIRE_AUTHENTICATION", "true").lower() == "true"
|
|
16
|
+
or os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", "true").lower() == "true"
|
|
16
17
|
)
|
|
17
18
|
|
|
18
19
|
fastapi_users = get_fastapi_users()
|
|
@@ -10,7 +10,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine
|
|
|
10
10
|
from cognee.modules.users.methods.create_default_user import create_default_user
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
async def get_default_user() ->
|
|
13
|
+
async def get_default_user() -> User:
|
|
14
14
|
db_engine = get_relational_engine()
|
|
15
15
|
base_config = get_base_config()
|
|
16
16
|
default_email = base_config.default_user_email or "default_user@example.com"
|
|
@@ -18,7 +18,9 @@ async def get_default_user() -> SimpleNamespace:
|
|
|
18
18
|
try:
|
|
19
19
|
async with db_engine.get_async_session() as session:
|
|
20
20
|
query = (
|
|
21
|
-
select(User)
|
|
21
|
+
select(User)
|
|
22
|
+
.options(selectinload(User.roles), selectinload(User.tenants))
|
|
23
|
+
.where(User.email == default_email)
|
|
22
24
|
)
|
|
23
25
|
|
|
24
26
|
result = await session.execute(query)
|
|
@@ -14,7 +14,7 @@ async def get_user(user_id: UUID):
|
|
|
14
14
|
user = (
|
|
15
15
|
await session.execute(
|
|
16
16
|
select(User)
|
|
17
|
-
.options(selectinload(User.roles), selectinload(User.
|
|
17
|
+
.options(selectinload(User.roles), selectinload(User.tenants))
|
|
18
18
|
.where(User.id == user_id)
|
|
19
19
|
)
|
|
20
20
|
).scalar()
|
|
@@ -13,7 +13,7 @@ async def get_user_by_email(user_email: str):
|
|
|
13
13
|
user = (
|
|
14
14
|
await session.execute(
|
|
15
15
|
select(User)
|
|
16
|
-
.options(joinedload(User.roles), joinedload(User.
|
|
16
|
+
.options(joinedload(User.roles), joinedload(User.tenants))
|
|
17
17
|
.where(User.email == user_email)
|
|
18
18
|
)
|
|
19
19
|
).scalar()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from datetime import datetime, timezone
|
|
2
2
|
|
|
3
|
-
from sqlalchemy import Column, DateTime, String, UUID, ForeignKey
|
|
3
|
+
from sqlalchemy import Column, DateTime, String, UUID, ForeignKey, JSON, text
|
|
4
4
|
from cognee.infrastructure.databases.relational import Base
|
|
5
5
|
|
|
6
6
|
|
|
@@ -12,8 +12,29 @@ class DatasetDatabase(Base):
|
|
|
12
12
|
UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key=True, index=True
|
|
13
13
|
)
|
|
14
14
|
|
|
15
|
-
vector_database_name = Column(String, unique=
|
|
16
|
-
graph_database_name = Column(String, unique=
|
|
15
|
+
vector_database_name = Column(String, unique=False, nullable=False)
|
|
16
|
+
graph_database_name = Column(String, unique=False, nullable=False)
|
|
17
|
+
|
|
18
|
+
vector_database_provider = Column(String, unique=False, nullable=False)
|
|
19
|
+
graph_database_provider = Column(String, unique=False, nullable=False)
|
|
20
|
+
|
|
21
|
+
graph_dataset_database_handler = Column(String, unique=False, nullable=False)
|
|
22
|
+
vector_dataset_database_handler = Column(String, unique=False, nullable=False)
|
|
23
|
+
|
|
24
|
+
vector_database_url = Column(String, unique=False, nullable=True)
|
|
25
|
+
graph_database_url = Column(String, unique=False, nullable=True)
|
|
26
|
+
|
|
27
|
+
vector_database_key = Column(String, unique=False, nullable=True)
|
|
28
|
+
graph_database_key = Column(String, unique=False, nullable=True)
|
|
29
|
+
|
|
30
|
+
# configuration details for different database types. This would make it more flexible to add new database types
|
|
31
|
+
# without changing the database schema.
|
|
32
|
+
graph_database_connection_info = Column(
|
|
33
|
+
JSON, unique=False, nullable=False, server_default=text("'{}'")
|
|
34
|
+
)
|
|
35
|
+
vector_database_connection_info = Column(
|
|
36
|
+
JSON, unique=False, nullable=False, server_default=text("'{}'")
|
|
37
|
+
)
|
|
17
38
|
|
|
18
39
|
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
|
19
40
|
updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
from sqlalchemy.orm import relationship
|
|
1
|
+
from sqlalchemy.orm import relationship, Mapped
|
|
2
2
|
from sqlalchemy import Column, String, ForeignKey, UUID
|
|
3
3
|
from .Principal import Principal
|
|
4
|
-
from .
|
|
4
|
+
from .UserTenant import UserTenant
|
|
5
5
|
from .Role import Role
|
|
6
6
|
|
|
7
7
|
|
|
@@ -13,14 +13,13 @@ class Tenant(Principal):
|
|
|
13
13
|
|
|
14
14
|
owner_id = Column(UUID, index=True)
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
users = relationship(
|
|
16
|
+
users: Mapped[list["User"]] = relationship( # noqa: F821
|
|
18
17
|
"User",
|
|
19
|
-
|
|
20
|
-
|
|
18
|
+
secondary=UserTenant.__tablename__,
|
|
19
|
+
back_populates="tenants",
|
|
21
20
|
)
|
|
22
21
|
|
|
23
|
-
# One-to-Many relationship with Role
|
|
22
|
+
# One-to-Many relationship with Role
|
|
24
23
|
roles = relationship(
|
|
25
24
|
"Role",
|
|
26
25
|
back_populates="tenant",
|
|
@@ -6,8 +6,10 @@ from sqlalchemy import ForeignKey, Column, UUID
|
|
|
6
6
|
from sqlalchemy.orm import relationship, Mapped
|
|
7
7
|
|
|
8
8
|
from .Principal import Principal
|
|
9
|
+
from .UserTenant import UserTenant
|
|
9
10
|
from .UserRole import UserRole
|
|
10
11
|
from .Role import Role
|
|
12
|
+
from .Tenant import Tenant
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
class User(SQLAlchemyBaseUserTableUUID, Principal):
|
|
@@ -15,7 +17,7 @@ class User(SQLAlchemyBaseUserTableUUID, Principal):
|
|
|
15
17
|
|
|
16
18
|
id = Column(UUID, ForeignKey("principals.id", ondelete="CASCADE"), primary_key=True)
|
|
17
19
|
|
|
18
|
-
# Foreign key to Tenant (Many-to-One relationship)
|
|
20
|
+
# Foreign key to current Tenant (Many-to-One relationship)
|
|
19
21
|
tenant_id = Column(UUID, ForeignKey("tenants.id"))
|
|
20
22
|
|
|
21
23
|
# Many-to-Many Relationship with Roles
|
|
@@ -25,11 +27,11 @@ class User(SQLAlchemyBaseUserTableUUID, Principal):
|
|
|
25
27
|
back_populates="users",
|
|
26
28
|
)
|
|
27
29
|
|
|
28
|
-
# Relationship
|
|
29
|
-
|
|
30
|
+
# Many-to-Many Relationship with Tenants user is a part of
|
|
31
|
+
tenants: Mapped[list["Tenant"]] = relationship(
|
|
30
32
|
"Tenant",
|
|
33
|
+
secondary=UserTenant.__tablename__,
|
|
31
34
|
back_populates="users",
|
|
32
|
-
foreign_keys=[tenant_id],
|
|
33
35
|
)
|
|
34
36
|
|
|
35
37
|
# ACL Relationship (One-to-Many)
|
|
@@ -46,7 +48,6 @@ class UserRead(schemas.BaseUser[uuid_UUID]):
|
|
|
46
48
|
|
|
47
49
|
|
|
48
50
|
class UserCreate(schemas.BaseUserCreate):
|
|
49
|
-
tenant_id: Optional[uuid_UUID] = None
|
|
50
51
|
is_verified: bool = True
|
|
51
52
|
|
|
52
53
|
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
from sqlalchemy import Column, ForeignKey, DateTime, UUID
|
|
3
|
+
from cognee.infrastructure.databases.relational import Base
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class UserTenant(Base):
|
|
7
|
+
__tablename__ = "user_tenants"
|
|
8
|
+
|
|
9
|
+
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
|
10
|
+
|
|
11
|
+
user_id = Column(UUID, ForeignKey("users.id"), primary_key=True)
|
|
12
|
+
tenant_id = Column(UUID, ForeignKey("tenants.id"), primary_key=True)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from .User import User
|
|
2
2
|
from .Role import Role
|
|
3
3
|
from .UserRole import UserRole
|
|
4
|
+
from .UserTenant import UserTenant
|
|
4
5
|
from .DatasetDatabase import DatasetDatabase
|
|
5
6
|
from .RoleDefaultPermissions import RoleDefaultPermissions
|
|
6
7
|
from .UserDefaultPermissions import UserDefaultPermissions
|
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
from types import SimpleNamespace
|
|
2
|
-
|
|
3
1
|
from cognee.shared.logging_utils import get_logger
|
|
4
2
|
|
|
5
3
|
from ...models.User import User
|
|
6
4
|
from cognee.modules.data.models.Dataset import Dataset
|
|
7
5
|
from cognee.modules.users.permissions.methods import get_principal_datasets
|
|
8
|
-
from cognee.modules.users.permissions.methods import get_role, get_tenant
|
|
9
6
|
|
|
10
7
|
logger = get_logger()
|
|
11
8
|
|
|
@@ -25,17 +22,14 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) ->
|
|
|
25
22
|
# Get all datasets User has explicit access to
|
|
26
23
|
datasets.extend(await get_principal_datasets(user, permission_type))
|
|
27
24
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
25
|
+
# Get all tenants user is a part of
|
|
26
|
+
tenants = await user.awaitable_attrs.tenants
|
|
27
|
+
for tenant in tenants:
|
|
28
|
+
# Get all datasets all tenant members have access to
|
|
31
29
|
datasets.extend(await get_principal_datasets(tenant, permission_type))
|
|
32
30
|
|
|
33
|
-
# Get all datasets
|
|
34
|
-
|
|
35
|
-
# If simple namespace use roles defined in user
|
|
36
|
-
roles = user.roles
|
|
37
|
-
else:
|
|
38
|
-
roles = await user.awaitable_attrs.roles
|
|
31
|
+
# Get all datasets accessible by roles user is a part of
|
|
32
|
+
roles = await user.awaitable_attrs.roles
|
|
39
33
|
for role in roles:
|
|
40
34
|
datasets.extend(await get_principal_datasets(role, permission_type))
|
|
41
35
|
|
|
@@ -45,4 +39,10 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) ->
|
|
|
45
39
|
# If the dataset id key already exists, leave the dictionary unchanged.
|
|
46
40
|
unique.setdefault(dataset.id, dataset)
|
|
47
41
|
|
|
48
|
-
|
|
42
|
+
# Filter out dataset that aren't part of the selected user's tenant
|
|
43
|
+
filtered_datasets = []
|
|
44
|
+
for dataset in list(unique.values()):
|
|
45
|
+
if dataset.tenant_id == user.tenant_id:
|
|
46
|
+
filtered_datasets.append(dataset)
|
|
47
|
+
|
|
48
|
+
return filtered_datasets
|
|
@@ -42,11 +42,13 @@ async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
|
|
|
42
42
|
.first()
|
|
43
43
|
)
|
|
44
44
|
|
|
45
|
+
user_tenants = await user.awaitable_attrs.tenants
|
|
46
|
+
|
|
45
47
|
if not user:
|
|
46
48
|
raise UserNotFoundError
|
|
47
49
|
elif not role:
|
|
48
50
|
raise RoleNotFoundError
|
|
49
|
-
elif
|
|
51
|
+
elif role.tenant_id not in [tenant.id for tenant in user_tenants]:
|
|
50
52
|
raise TenantNotFoundError(
|
|
51
53
|
message="User tenant does not match role tenant. User cannot be added to role."
|
|
52
54
|
)
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
from typing import Optional
|
|
1
2
|
from uuid import UUID
|
|
2
3
|
from sqlalchemy.exc import IntegrityError
|
|
4
|
+
from sqlalchemy import insert
|
|
3
5
|
|
|
4
6
|
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
|
|
5
7
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
8
|
+
from cognee.modules.users.models.UserTenant import UserTenant
|
|
6
9
|
from cognee.modules.users.methods import get_user
|
|
7
10
|
from cognee.modules.users.permissions.methods import get_tenant
|
|
8
11
|
from cognee.modules.users.exceptions import (
|
|
@@ -12,14 +15,19 @@ from cognee.modules.users.exceptions import (
|
|
|
12
15
|
)
|
|
13
16
|
|
|
14
17
|
|
|
15
|
-
async def add_user_to_tenant(
|
|
18
|
+
async def add_user_to_tenant(
|
|
19
|
+
user_id: UUID, tenant_id: UUID, owner_id: UUID, set_as_active_tenant: Optional[bool] = False
|
|
20
|
+
):
|
|
16
21
|
"""
|
|
17
22
|
Add a user with the given id to the tenant with the given id.
|
|
18
23
|
This can only be successful if the request owner with the given id is the tenant owner.
|
|
24
|
+
|
|
25
|
+
If set_as_active_tenant is true it will automatically set the users active tenant to provided tenant.
|
|
19
26
|
Args:
|
|
20
27
|
user_id: Id of the user.
|
|
21
28
|
tenant_id: Id of the tenant.
|
|
22
29
|
owner_id: Id of the request owner.
|
|
30
|
+
set_as_active_tenant: If set_as_active_tenant is true it will automatically set the users active tenant to provided tenant.
|
|
23
31
|
|
|
24
32
|
Returns:
|
|
25
33
|
None
|
|
@@ -40,17 +48,18 @@ async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
|
|
|
40
48
|
message="Only tenant owner can add other users to organization."
|
|
41
49
|
)
|
|
42
50
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
user.tenant_id = tenant_id
|
|
46
|
-
elif user.tenant_id == tenant_id:
|
|
47
|
-
return
|
|
48
|
-
else:
|
|
49
|
-
raise IntegrityError
|
|
50
|
-
|
|
51
|
+
if set_as_active_tenant:
|
|
52
|
+
user.tenant_id = tenant_id
|
|
51
53
|
await session.merge(user)
|
|
52
54
|
await session.commit()
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
# Add association directly to the association table
|
|
58
|
+
create_user_tenant_statement = insert(UserTenant).values(
|
|
59
|
+
user_id=user_id, tenant_id=tenant_id
|
|
56
60
|
)
|
|
61
|
+
await session.execute(create_user_tenant_statement)
|
|
62
|
+
await session.commit()
|
|
63
|
+
|
|
64
|
+
except IntegrityError:
|
|
65
|
+
raise EntityAlreadyExistsError(message="User is already part of group.")
|
|
@@ -1,19 +1,25 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
|
+
from sqlalchemy import insert
|
|
2
3
|
from sqlalchemy.exc import IntegrityError
|
|
4
|
+
from typing import Optional
|
|
3
5
|
|
|
6
|
+
from cognee.modules.users.models.UserTenant import UserTenant
|
|
4
7
|
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
|
|
5
8
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
6
9
|
from cognee.modules.users.models import Tenant
|
|
7
10
|
from cognee.modules.users.methods import get_user
|
|
8
11
|
|
|
9
12
|
|
|
10
|
-
async def create_tenant(
|
|
13
|
+
async def create_tenant(
|
|
14
|
+
tenant_name: str, user_id: UUID, set_as_active_tenant: Optional[bool] = True
|
|
15
|
+
) -> UUID:
|
|
11
16
|
"""
|
|
12
17
|
Create a new tenant with the given name, for the user with the given id.
|
|
13
18
|
This user is the owner of the tenant.
|
|
14
19
|
Args:
|
|
15
20
|
tenant_name: Name of the new tenant.
|
|
16
21
|
user_id: Id of the user.
|
|
22
|
+
set_as_active_tenant: If true, set the newly created tenant as the active tenant for the user.
|
|
17
23
|
|
|
18
24
|
Returns:
|
|
19
25
|
None
|
|
@@ -22,18 +28,26 @@ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID:
|
|
|
22
28
|
async with db_engine.get_async_session() as session:
|
|
23
29
|
try:
|
|
24
30
|
user = await get_user(user_id)
|
|
25
|
-
if user.tenant_id:
|
|
26
|
-
raise EntityAlreadyExistsError(
|
|
27
|
-
message="User already has a tenant. New tenant cannot be created."
|
|
28
|
-
)
|
|
29
31
|
|
|
30
32
|
tenant = Tenant(name=tenant_name, owner_id=user_id)
|
|
31
33
|
session.add(tenant)
|
|
32
34
|
await session.flush()
|
|
33
35
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
36
|
+
if set_as_active_tenant:
|
|
37
|
+
user.tenant_id = tenant.id
|
|
38
|
+
await session.merge(user)
|
|
39
|
+
await session.commit()
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
# Add association directly to the association table
|
|
43
|
+
create_user_tenant_statement = insert(UserTenant).values(
|
|
44
|
+
user_id=user_id, tenant_id=tenant.id
|
|
45
|
+
)
|
|
46
|
+
await session.execute(create_user_tenant_statement)
|
|
47
|
+
await session.commit()
|
|
48
|
+
except IntegrityError:
|
|
49
|
+
raise EntityAlreadyExistsError(message="User is already part of tenant.")
|
|
50
|
+
|
|
37
51
|
return tenant.id
|
|
38
52
|
except IntegrityError as e:
|
|
39
53
|
raise EntityAlreadyExistsError(message="Tenant already exists.") from e
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
from typing import Union
|
|
3
|
+
|
|
4
|
+
import sqlalchemy.exc
|
|
5
|
+
from sqlalchemy import select
|
|
6
|
+
|
|
7
|
+
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
8
|
+
from cognee.modules.users.methods.get_user import get_user
|
|
9
|
+
from cognee.modules.users.models.UserTenant import UserTenant
|
|
10
|
+
from cognee.modules.users.models.User import User
|
|
11
|
+
from cognee.modules.users.permissions.methods import get_tenant
|
|
12
|
+
from cognee.modules.users.exceptions import UserNotFoundError, TenantNotFoundError
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]) -> User:
|
|
16
|
+
"""
|
|
17
|
+
Set the users active tenant to provided tenant.
|
|
18
|
+
|
|
19
|
+
If None tenant_id is provided set current Tenant to the default single user-tenant
|
|
20
|
+
Args:
|
|
21
|
+
user_id: UUID of the user.
|
|
22
|
+
tenant_id: Id of the tenant.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
None
|
|
26
|
+
|
|
27
|
+
"""
|
|
28
|
+
db_engine = get_relational_engine()
|
|
29
|
+
async with db_engine.get_async_session() as session:
|
|
30
|
+
user = await get_user(user_id)
|
|
31
|
+
if tenant_id is None:
|
|
32
|
+
# If no tenant_id is provided set current Tenant to the single user-tenant
|
|
33
|
+
user.tenant_id = None
|
|
34
|
+
await session.merge(user)
|
|
35
|
+
await session.commit()
|
|
36
|
+
return user
|
|
37
|
+
|
|
38
|
+
tenant = await get_tenant(tenant_id)
|
|
39
|
+
|
|
40
|
+
if not user:
|
|
41
|
+
raise UserNotFoundError
|
|
42
|
+
elif not tenant:
|
|
43
|
+
raise TenantNotFoundError
|
|
44
|
+
|
|
45
|
+
# Check if User is part of Tenant
|
|
46
|
+
result = await session.execute(
|
|
47
|
+
select(UserTenant)
|
|
48
|
+
.where(UserTenant.user_id == user.id)
|
|
49
|
+
.where(UserTenant.tenant_id == tenant_id)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
result = result.scalar_one()
|
|
54
|
+
except sqlalchemy.exc.NoResultFound as e:
|
|
55
|
+
raise TenantNotFoundError("User is not part of the tenant.") from e
|
|
56
|
+
|
|
57
|
+
if result:
|
|
58
|
+
# If user is part of tenant update current tenant of user
|
|
59
|
+
user.tenant_id = tenant_id
|
|
60
|
+
await session.merge(user)
|
|
61
|
+
await session.commit()
|
|
62
|
+
return user
|
cognee/shared/logging_utils.py
CHANGED
|
@@ -450,6 +450,8 @@ def setup_logging(log_level=None, name=None):
|
|
|
450
450
|
try:
|
|
451
451
|
msg = self.format(record)
|
|
452
452
|
stream = self.stream
|
|
453
|
+
if hasattr(stream, "closed") and stream.closed:
|
|
454
|
+
return
|
|
453
455
|
stream.write("\n" + msg + self.terminator)
|
|
454
456
|
self.flush()
|
|
455
457
|
except Exception:
|
|
@@ -532,6 +534,10 @@ def setup_logging(log_level=None, name=None):
|
|
|
532
534
|
# Get a configured logger and log system information
|
|
533
535
|
logger = structlog.get_logger(name if name else __name__)
|
|
534
536
|
|
|
537
|
+
logger.warning(
|
|
538
|
+
"From version 0.5.0 onwards, Cognee will run with multi-user access control mode set to on by default. Data isolation between different users and datasets will be enforced and data created before multi-user access control mode was turned on won't be accessible by default. To disable multi-user access control mode and regain access to old data set the environment variable ENABLE_BACKEND_ACCESS_CONTROL to false before starting Cognee. For more information, please refer to the Cognee documentation."
|
|
539
|
+
)
|
|
540
|
+
|
|
535
541
|
if logs_dir is not None:
|
|
536
542
|
logger.info(f"Log file created at: {log_file_path}", log_file=log_file_path)
|
|
537
543
|
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from aiolimiter import AsyncLimiter
|
|
2
|
+
from contextlib import nullcontext
|
|
3
|
+
from cognee.infrastructure.llm.config import get_llm_config
|
|
4
|
+
|
|
5
|
+
llm_config = get_llm_config()
|
|
6
|
+
|
|
7
|
+
llm_rate_limiter = AsyncLimiter(
|
|
8
|
+
llm_config.llm_rate_limit_requests, llm_config.embedding_rate_limit_interval
|
|
9
|
+
)
|
|
10
|
+
embedding_rate_limiter = AsyncLimiter(
|
|
11
|
+
llm_config.embedding_rate_limit_requests, llm_config.embedding_rate_limit_interval
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def llm_rate_limiter_context_manager():
|
|
16
|
+
global llm_rate_limiter
|
|
17
|
+
if llm_config.llm_rate_limit_enabled:
|
|
18
|
+
return llm_rate_limiter
|
|
19
|
+
else:
|
|
20
|
+
# Return a no-op context manager if rate limiting is disabled
|
|
21
|
+
return nullcontext()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def embedding_rate_limiter_context_manager():
|
|
25
|
+
global embedding_rate_limiter
|
|
26
|
+
if llm_config.embedding_rate_limit_enabled:
|
|
27
|
+
return embedding_rate_limiter
|
|
28
|
+
else:
|
|
29
|
+
# Return a no-op context manager if rate limiting is disabled
|
|
30
|
+
return nullcontext()
|
cognee/tasks/chunks/__init__.py
CHANGED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from typing import Any, Dict, Iterator
|
|
2
|
+
from uuid import NAMESPACE_OID, uuid5
|
|
3
|
+
|
|
4
|
+
from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def _get_pair_size(pair_text: str) -> int:
|
|
8
|
+
"""
|
|
9
|
+
Calculate the size of a given text in terms of tokens.
|
|
10
|
+
|
|
11
|
+
If an embedding engine's tokenizer is available, count the tokens for the provided word.
|
|
12
|
+
If the tokenizer is not available, assume the word counts as one token.
|
|
13
|
+
|
|
14
|
+
Parameters:
|
|
15
|
+
-----------
|
|
16
|
+
|
|
17
|
+
- pair_text (str): The key:value pair text for which the token size is to be calculated.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
--------
|
|
21
|
+
|
|
22
|
+
- int: The number of tokens representing the text, typically an integer, depending
|
|
23
|
+
on the tokenizer's output.
|
|
24
|
+
"""
|
|
25
|
+
embedding_engine = get_embedding_engine()
|
|
26
|
+
if embedding_engine.tokenizer:
|
|
27
|
+
return embedding_engine.tokenizer.count_tokens(pair_text)
|
|
28
|
+
else:
|
|
29
|
+
return 3
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def chunk_by_row(
|
|
33
|
+
data: str,
|
|
34
|
+
max_chunk_size,
|
|
35
|
+
) -> Iterator[Dict[str, Any]]:
|
|
36
|
+
"""
|
|
37
|
+
Chunk the input text by row while enabling exact text reconstruction.
|
|
38
|
+
|
|
39
|
+
This function divides the given text data into smaller chunks on a line-by-line basis,
|
|
40
|
+
ensuring that the size of each chunk is less than or equal to the specified maximum
|
|
41
|
+
chunk size. It guarantees that when the generated chunks are concatenated, they
|
|
42
|
+
reproduce the original text accurately. The tokenization process is handled by
|
|
43
|
+
adapters compatible with the vector engine's embedding model.
|
|
44
|
+
|
|
45
|
+
Parameters:
|
|
46
|
+
-----------
|
|
47
|
+
|
|
48
|
+
- data (str): The input text to be chunked.
|
|
49
|
+
- max_chunk_size: The maximum allowed size for each chunk, in terms of tokens or
|
|
50
|
+
words.
|
|
51
|
+
"""
|
|
52
|
+
current_chunk_list = []
|
|
53
|
+
chunk_index = 0
|
|
54
|
+
current_chunk_size = 0
|
|
55
|
+
|
|
56
|
+
lines = data.split("\n\n")
|
|
57
|
+
for line in lines:
|
|
58
|
+
pairs_text = line.split(", ")
|
|
59
|
+
|
|
60
|
+
for pair_text in pairs_text:
|
|
61
|
+
pair_size = _get_pair_size(pair_text)
|
|
62
|
+
if current_chunk_size > 0 and (current_chunk_size + pair_size > max_chunk_size):
|
|
63
|
+
# Yield current cut chunk
|
|
64
|
+
current_chunk = ", ".join(current_chunk_list)
|
|
65
|
+
chunk_dict = {
|
|
66
|
+
"text": current_chunk,
|
|
67
|
+
"chunk_size": current_chunk_size,
|
|
68
|
+
"chunk_id": uuid5(NAMESPACE_OID, current_chunk),
|
|
69
|
+
"chunk_index": chunk_index,
|
|
70
|
+
"cut_type": "row_cut",
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
yield chunk_dict
|
|
74
|
+
|
|
75
|
+
# Start new chunk with current pair text
|
|
76
|
+
current_chunk_list = []
|
|
77
|
+
current_chunk_size = 0
|
|
78
|
+
chunk_index += 1
|
|
79
|
+
|
|
80
|
+
current_chunk_list.append(pair_text)
|
|
81
|
+
current_chunk_size += pair_size
|
|
82
|
+
|
|
83
|
+
# Yield row chunk
|
|
84
|
+
current_chunk = ", ".join(current_chunk_list)
|
|
85
|
+
if current_chunk:
|
|
86
|
+
chunk_dict = {
|
|
87
|
+
"text": current_chunk,
|
|
88
|
+
"chunk_size": current_chunk_size,
|
|
89
|
+
"chunk_id": uuid5(NAMESPACE_OID, current_chunk),
|
|
90
|
+
"chunk_index": chunk_index,
|
|
91
|
+
"cut_type": "row_end",
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
yield chunk_dict
|
|
@@ -7,6 +7,7 @@ from cognee.modules.data.processing.document_types import (
|
|
|
7
7
|
ImageDocument,
|
|
8
8
|
TextDocument,
|
|
9
9
|
UnstructuredDocument,
|
|
10
|
+
CsvDocument,
|
|
10
11
|
)
|
|
11
12
|
from cognee.modules.engine.models.node_set import NodeSet
|
|
12
13
|
from cognee.modules.engine.utils.generate_node_id import generate_node_id
|
|
@@ -15,6 +16,7 @@ from cognee.tasks.documents.exceptions import WrongDataDocumentInputError
|
|
|
15
16
|
EXTENSION_TO_DOCUMENT_CLASS = {
|
|
16
17
|
"pdf": PdfDocument, # Text documents
|
|
17
18
|
"txt": TextDocument,
|
|
19
|
+
"csv": CsvDocument,
|
|
18
20
|
"docx": UnstructuredDocument,
|
|
19
21
|
"doc": UnstructuredDocument,
|
|
20
22
|
"odt": UnstructuredDocument,
|
|
@@ -61,7 +61,7 @@ async def _generate_improved_answer_for_single_interaction(
|
|
|
61
61
|
)
|
|
62
62
|
|
|
63
63
|
retrieved_context = await retriever.get_context(query_text)
|
|
64
|
-
completion = await retriever.
|
|
64
|
+
completion = await retriever.get_completion(
|
|
65
65
|
query=query_text,
|
|
66
66
|
context=retrieved_context,
|
|
67
67
|
response_model=ImprovedAnswerResponse,
|
|
@@ -70,9 +70,9 @@ async def _generate_improved_answer_for_single_interaction(
|
|
|
70
70
|
new_context_text = await retriever.resolve_edges_to_text(retrieved_context)
|
|
71
71
|
|
|
72
72
|
if completion:
|
|
73
|
-
enrichment.improved_answer = completion.answer
|
|
73
|
+
enrichment.improved_answer = completion[0].answer
|
|
74
74
|
enrichment.new_context = new_context_text
|
|
75
|
-
enrichment.explanation = completion.explanation
|
|
75
|
+
enrichment.explanation = completion[0].explanation
|
|
76
76
|
return enrichment
|
|
77
77
|
else:
|
|
78
78
|
logger.warning(
|