cognee 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. cognee/__init__.py +1 -0
  2. cognee/api/client.py +9 -5
  3. cognee/api/v1/add/add.py +2 -1
  4. cognee/api/v1/add/routers/get_add_router.py +3 -1
  5. cognee/api/v1/cognify/cognify.py +24 -16
  6. cognee/api/v1/cognify/routers/__init__.py +0 -1
  7. cognee/api/v1/cognify/routers/get_cognify_router.py +30 -1
  8. cognee/api/v1/datasets/routers/get_datasets_router.py +3 -3
  9. cognee/api/v1/ontologies/__init__.py +4 -0
  10. cognee/api/v1/ontologies/ontologies.py +158 -0
  11. cognee/api/v1/ontologies/routers/__init__.py +0 -0
  12. cognee/api/v1/ontologies/routers/get_ontology_router.py +109 -0
  13. cognee/api/v1/permissions/routers/get_permissions_router.py +41 -1
  14. cognee/api/v1/search/search.py +4 -0
  15. cognee/api/v1/ui/node_setup.py +360 -0
  16. cognee/api/v1/ui/npm_utils.py +50 -0
  17. cognee/api/v1/ui/ui.py +38 -68
  18. cognee/cli/commands/cognify_command.py +8 -1
  19. cognee/cli/config.py +1 -1
  20. cognee/context_global_variables.py +86 -9
  21. cognee/eval_framework/Dockerfile +29 -0
  22. cognee/eval_framework/answer_generation/answer_generation_executor.py +10 -0
  23. cognee/eval_framework/answer_generation/run_question_answering_module.py +1 -1
  24. cognee/eval_framework/corpus_builder/task_getters/get_cascade_graph_tasks.py +0 -2
  25. cognee/eval_framework/corpus_builder/task_getters/get_default_tasks_by_indices.py +4 -4
  26. cognee/eval_framework/eval_config.py +2 -2
  27. cognee/eval_framework/modal_run_eval.py +16 -28
  28. cognee/infrastructure/databases/cache/config.py +3 -1
  29. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +151 -0
  30. cognee/infrastructure/databases/cache/get_cache_engine.py +20 -10
  31. cognee/infrastructure/databases/dataset_database_handler/__init__.py +3 -0
  32. cognee/infrastructure/databases/dataset_database_handler/dataset_database_handler_interface.py +80 -0
  33. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +18 -0
  34. cognee/infrastructure/databases/dataset_database_handler/use_dataset_database_handler.py +10 -0
  35. cognee/infrastructure/databases/exceptions/exceptions.py +16 -0
  36. cognee/infrastructure/databases/graph/config.py +7 -0
  37. cognee/infrastructure/databases/graph/get_graph_engine.py +3 -0
  38. cognee/infrastructure/databases/graph/graph_db_interface.py +15 -0
  39. cognee/infrastructure/databases/graph/kuzu/KuzuDatasetDatabaseHandler.py +81 -0
  40. cognee/infrastructure/databases/graph/kuzu/adapter.py +228 -0
  41. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +168 -0
  42. cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +80 -1
  43. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +9 -0
  44. cognee/infrastructure/databases/utils/__init__.py +3 -0
  45. cognee/infrastructure/databases/utils/get_graph_dataset_database_handler.py +10 -0
  46. cognee/infrastructure/databases/utils/get_or_create_dataset_database.py +66 -18
  47. cognee/infrastructure/databases/utils/get_vector_dataset_database_handler.py +10 -0
  48. cognee/infrastructure/databases/utils/resolve_dataset_database_connection_info.py +30 -0
  49. cognee/infrastructure/databases/vector/config.py +5 -0
  50. cognee/infrastructure/databases/vector/create_vector_engine.py +6 -1
  51. cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py +8 -6
  52. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +9 -7
  53. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +11 -13
  54. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +2 -0
  55. cognee/infrastructure/databases/vector/lancedb/LanceDBDatasetDatabaseHandler.py +50 -0
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +35 -0
  57. cognee/infrastructure/engine/models/Edge.py +13 -1
  58. cognee/infrastructure/files/storage/s3_config.py +2 -0
  59. cognee/infrastructure/files/utils/guess_file_type.py +4 -0
  60. cognee/infrastructure/llm/LLMGateway.py +5 -2
  61. cognee/infrastructure/llm/config.py +37 -0
  62. cognee/infrastructure/llm/extraction/knowledge_graph/extract_content_graph.py +2 -2
  63. cognee/infrastructure/llm/structured_output_framework/baml/baml_src/extraction/acreate_structured_output.py +23 -8
  64. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +22 -18
  65. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/__init__.py +5 -0
  66. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/bedrock/adapter.py +153 -0
  67. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +47 -38
  68. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +46 -37
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +20 -10
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +23 -11
  71. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +36 -23
  72. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +47 -36
  73. cognee/infrastructure/loaders/LoaderEngine.py +1 -0
  74. cognee/infrastructure/loaders/core/__init__.py +2 -1
  75. cognee/infrastructure/loaders/core/csv_loader.py +93 -0
  76. cognee/infrastructure/loaders/core/text_loader.py +1 -2
  77. cognee/infrastructure/loaders/external/advanced_pdf_loader.py +0 -9
  78. cognee/infrastructure/loaders/supported_loaders.py +2 -1
  79. cognee/memify_pipelines/create_triplet_embeddings.py +53 -0
  80. cognee/memify_pipelines/persist_sessions_in_knowledge_graph.py +55 -0
  81. cognee/modules/chunking/CsvChunker.py +35 -0
  82. cognee/modules/chunking/models/DocumentChunk.py +2 -1
  83. cognee/modules/chunking/text_chunker_with_overlap.py +124 -0
  84. cognee/modules/cognify/config.py +2 -0
  85. cognee/modules/data/deletion/prune_system.py +52 -2
  86. cognee/modules/data/methods/__init__.py +1 -0
  87. cognee/modules/data/methods/create_dataset.py +4 -2
  88. cognee/modules/data/methods/delete_dataset.py +26 -0
  89. cognee/modules/data/methods/get_dataset_ids.py +5 -1
  90. cognee/modules/data/methods/get_unique_data_id.py +68 -0
  91. cognee/modules/data/methods/get_unique_dataset_id.py +66 -4
  92. cognee/modules/data/models/Dataset.py +2 -0
  93. cognee/modules/data/processing/document_types/CsvDocument.py +33 -0
  94. cognee/modules/data/processing/document_types/__init__.py +1 -0
  95. cognee/modules/engine/models/Triplet.py +9 -0
  96. cognee/modules/engine/models/__init__.py +1 -0
  97. cognee/modules/graph/cognee_graph/CogneeGraph.py +89 -39
  98. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +8 -3
  99. cognee/modules/graph/utils/expand_with_nodes_and_edges.py +19 -2
  100. cognee/modules/graph/utils/resolve_edges_to_text.py +48 -49
  101. cognee/modules/ingestion/identify.py +4 -4
  102. cognee/modules/memify/memify.py +1 -7
  103. cognee/modules/notebooks/operations/run_in_local_sandbox.py +3 -0
  104. cognee/modules/ontology/rdf_xml/RDFLibOntologyResolver.py +55 -23
  105. cognee/modules/pipelines/operations/pipeline.py +18 -2
  106. cognee/modules/pipelines/operations/run_tasks_data_item.py +1 -1
  107. cognee/modules/retrieval/EntityCompletionRetriever.py +10 -3
  108. cognee/modules/retrieval/__init__.py +1 -1
  109. cognee/modules/retrieval/base_graph_retriever.py +7 -3
  110. cognee/modules/retrieval/base_retriever.py +7 -3
  111. cognee/modules/retrieval/completion_retriever.py +11 -4
  112. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +10 -2
  113. cognee/modules/retrieval/graph_completion_cot_retriever.py +18 -51
  114. cognee/modules/retrieval/graph_completion_retriever.py +14 -1
  115. cognee/modules/retrieval/graph_summary_completion_retriever.py +4 -0
  116. cognee/modules/retrieval/register_retriever.py +10 -0
  117. cognee/modules/retrieval/registered_community_retrievers.py +1 -0
  118. cognee/modules/retrieval/temporal_retriever.py +13 -2
  119. cognee/modules/retrieval/triplet_retriever.py +182 -0
  120. cognee/modules/retrieval/utils/brute_force_triplet_search.py +43 -11
  121. cognee/modules/retrieval/utils/completion.py +2 -22
  122. cognee/modules/run_custom_pipeline/__init__.py +1 -0
  123. cognee/modules/run_custom_pipeline/run_custom_pipeline.py +76 -0
  124. cognee/modules/search/methods/get_search_type_tools.py +54 -8
  125. cognee/modules/search/methods/no_access_control_search.py +4 -0
  126. cognee/modules/search/methods/search.py +26 -3
  127. cognee/modules/search/types/SearchType.py +1 -1
  128. cognee/modules/settings/get_settings.py +19 -0
  129. cognee/modules/users/methods/create_user.py +12 -27
  130. cognee/modules/users/methods/get_authenticated_user.py +3 -2
  131. cognee/modules/users/methods/get_default_user.py +4 -2
  132. cognee/modules/users/methods/get_user.py +1 -1
  133. cognee/modules/users/methods/get_user_by_email.py +1 -1
  134. cognee/modules/users/models/DatasetDatabase.py +24 -3
  135. cognee/modules/users/models/Tenant.py +6 -7
  136. cognee/modules/users/models/User.py +6 -5
  137. cognee/modules/users/models/UserTenant.py +12 -0
  138. cognee/modules/users/models/__init__.py +1 -0
  139. cognee/modules/users/permissions/methods/get_all_user_permission_datasets.py +13 -13
  140. cognee/modules/users/roles/methods/add_user_to_role.py +3 -1
  141. cognee/modules/users/tenants/methods/__init__.py +1 -0
  142. cognee/modules/users/tenants/methods/add_user_to_tenant.py +21 -12
  143. cognee/modules/users/tenants/methods/create_tenant.py +22 -8
  144. cognee/modules/users/tenants/methods/select_tenant.py +62 -0
  145. cognee/shared/logging_utils.py +6 -0
  146. cognee/shared/rate_limiting.py +30 -0
  147. cognee/tasks/chunks/__init__.py +1 -0
  148. cognee/tasks/chunks/chunk_by_row.py +94 -0
  149. cognee/tasks/documents/__init__.py +0 -1
  150. cognee/tasks/documents/classify_documents.py +2 -0
  151. cognee/tasks/feedback/generate_improved_answers.py +3 -3
  152. cognee/tasks/graph/extract_graph_from_data.py +9 -10
  153. cognee/tasks/ingestion/ingest_data.py +1 -1
  154. cognee/tasks/memify/__init__.py +2 -0
  155. cognee/tasks/memify/cognify_session.py +41 -0
  156. cognee/tasks/memify/extract_user_sessions.py +73 -0
  157. cognee/tasks/memify/get_triplet_datapoints.py +289 -0
  158. cognee/tasks/storage/add_data_points.py +142 -2
  159. cognee/tasks/storage/index_data_points.py +33 -22
  160. cognee/tasks/storage/index_graph_edges.py +37 -57
  161. cognee/tests/integration/documents/CsvDocument_test.py +70 -0
  162. cognee/tests/integration/retrieval/test_triplet_retriever.py +84 -0
  163. cognee/tests/integration/tasks/test_add_data_points.py +139 -0
  164. cognee/tests/integration/tasks/test_get_triplet_datapoints.py +69 -0
  165. cognee/tests/tasks/entity_extraction/entity_extraction_test.py +1 -1
  166. cognee/tests/test_add_docling_document.py +2 -2
  167. cognee/tests/test_cognee_server_start.py +84 -3
  168. cognee/tests/test_conversation_history.py +68 -5
  169. cognee/tests/test_data/example_with_header.csv +3 -0
  170. cognee/tests/test_dataset_database_handler.py +137 -0
  171. cognee/tests/test_dataset_delete.py +76 -0
  172. cognee/tests/test_edge_centered_payload.py +170 -0
  173. cognee/tests/test_edge_ingestion.py +27 -0
  174. cognee/tests/test_feedback_enrichment.py +1 -1
  175. cognee/tests/test_library.py +6 -4
  176. cognee/tests/test_load.py +62 -0
  177. cognee/tests/test_multi_tenancy.py +165 -0
  178. cognee/tests/test_parallel_databases.py +2 -0
  179. cognee/tests/test_pipeline_cache.py +164 -0
  180. cognee/tests/test_relational_db_migration.py +54 -2
  181. cognee/tests/test_search_db.py +44 -2
  182. cognee/tests/unit/api/test_conditional_authentication_endpoints.py +12 -3
  183. cognee/tests/unit/api/test_ontology_endpoint.py +252 -0
  184. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +5 -0
  185. cognee/tests/unit/infrastructure/databases/test_index_data_points.py +27 -0
  186. cognee/tests/unit/infrastructure/databases/test_index_graph_edges.py +14 -16
  187. cognee/tests/unit/infrastructure/llm/test_llm_config.py +46 -0
  188. cognee/tests/unit/infrastructure/mock_embedding_engine.py +3 -7
  189. cognee/tests/unit/infrastructure/test_embedding_rate_limiting_realistic.py +0 -5
  190. cognee/tests/unit/modules/chunking/test_text_chunker.py +248 -0
  191. cognee/tests/unit/modules/chunking/test_text_chunker_with_overlap.py +324 -0
  192. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +2 -2
  193. cognee/tests/unit/modules/graph/cognee_graph_test.py +406 -0
  194. cognee/tests/unit/modules/memify_tasks/test_cognify_session.py +111 -0
  195. cognee/tests/unit/modules/memify_tasks/test_extract_user_sessions.py +175 -0
  196. cognee/tests/unit/modules/memify_tasks/test_get_triplet_datapoints.py +214 -0
  197. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +0 -51
  198. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +1 -0
  199. cognee/tests/unit/modules/retrieval/structured_output_test.py +204 -0
  200. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +1 -1
  201. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +0 -1
  202. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +608 -0
  203. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +83 -0
  204. cognee/tests/unit/modules/users/test_conditional_authentication.py +0 -63
  205. cognee/tests/unit/processing/chunks/chunk_by_row_test.py +52 -0
  206. cognee/tests/unit/tasks/storage/test_add_data_points.py +288 -0
  207. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/METADATA +11 -7
  208. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/RECORD +212 -160
  209. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/entry_points.txt +0 -1
  210. cognee/api/v1/cognify/code_graph_pipeline.py +0 -119
  211. cognee/api/v1/cognify/routers/get_code_pipeline_router.py +0 -90
  212. cognee/infrastructure/databases/vector/embeddings/embedding_rate_limiter.py +0 -544
  213. cognee/modules/retrieval/code_retriever.py +0 -232
  214. cognee/tasks/code/enrich_dependency_graph_checker.py +0 -35
  215. cognee/tasks/code/get_local_dependencies_checker.py +0 -20
  216. cognee/tasks/code/get_repo_dependency_graph_checker.py +0 -35
  217. cognee/tasks/documents/check_permissions_on_dataset.py +0 -26
  218. cognee/tasks/repo_processor/__init__.py +0 -2
  219. cognee/tasks/repo_processor/get_local_dependencies.py +0 -335
  220. cognee/tasks/repo_processor/get_non_code_files.py +0 -158
  221. cognee/tasks/repo_processor/get_repo_file_dependencies.py +0 -243
  222. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/WHEEL +0 -0
  223. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/LICENSE +0 -0
  224. {cognee-0.4.1.dist-info → cognee-0.5.0.dist-info}/licenses/NOTICE.md +0 -0
@@ -5,14 +5,15 @@ from ..models import User
5
5
  from ..get_fastapi_users import get_fastapi_users
6
6
  from .get_default_user import get_default_user
7
7
  from cognee.shared.logging_utils import get_logger
8
+ from cognee.context_global_variables import backend_access_control_enabled
8
9
 
9
10
 
10
11
  logger = get_logger("get_authenticated_user")
11
12
 
12
13
  # Check environment variable to determine authentication requirement
13
14
  REQUIRE_AUTHENTICATION = (
14
- os.getenv("REQUIRE_AUTHENTICATION", "false").lower() == "true"
15
- or os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true"
15
+ os.getenv("REQUIRE_AUTHENTICATION", "true").lower() == "true"
16
+ or os.environ.get("ENABLE_BACKEND_ACCESS_CONTROL", "true").lower() == "true"
16
17
  )
17
18
 
18
19
  fastapi_users = get_fastapi_users()
@@ -10,7 +10,7 @@ from cognee.infrastructure.databases.relational import get_relational_engine
10
10
  from cognee.modules.users.methods.create_default_user import create_default_user
11
11
 
12
12
 
13
- async def get_default_user() -> SimpleNamespace:
13
+ async def get_default_user() -> User:
14
14
  db_engine = get_relational_engine()
15
15
  base_config = get_base_config()
16
16
  default_email = base_config.default_user_email or "default_user@example.com"
@@ -18,7 +18,9 @@ async def get_default_user() -> SimpleNamespace:
18
18
  try:
19
19
  async with db_engine.get_async_session() as session:
20
20
  query = (
21
- select(User).options(selectinload(User.roles)).where(User.email == default_email)
21
+ select(User)
22
+ .options(selectinload(User.roles), selectinload(User.tenants))
23
+ .where(User.email == default_email)
22
24
  )
23
25
 
24
26
  result = await session.execute(query)
@@ -14,7 +14,7 @@ async def get_user(user_id: UUID):
14
14
  user = (
15
15
  await session.execute(
16
16
  select(User)
17
- .options(selectinload(User.roles), selectinload(User.tenant))
17
+ .options(selectinload(User.roles), selectinload(User.tenants))
18
18
  .where(User.id == user_id)
19
19
  )
20
20
  ).scalar()
@@ -13,7 +13,7 @@ async def get_user_by_email(user_email: str):
13
13
  user = (
14
14
  await session.execute(
15
15
  select(User)
16
- .options(joinedload(User.roles), joinedload(User.tenant))
16
+ .options(joinedload(User.roles), joinedload(User.tenants))
17
17
  .where(User.email == user_email)
18
18
  )
19
19
  ).scalar()
@@ -1,6 +1,6 @@
1
1
  from datetime import datetime, timezone
2
2
 
3
- from sqlalchemy import Column, DateTime, String, UUID, ForeignKey
3
+ from sqlalchemy import Column, DateTime, String, UUID, ForeignKey, JSON, text
4
4
  from cognee.infrastructure.databases.relational import Base
5
5
 
6
6
 
@@ -12,8 +12,29 @@ class DatasetDatabase(Base):
12
12
  UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key=True, index=True
13
13
  )
14
14
 
15
- vector_database_name = Column(String, unique=True, nullable=False)
16
- graph_database_name = Column(String, unique=True, nullable=False)
15
+ vector_database_name = Column(String, unique=False, nullable=False)
16
+ graph_database_name = Column(String, unique=False, nullable=False)
17
+
18
+ vector_database_provider = Column(String, unique=False, nullable=False)
19
+ graph_database_provider = Column(String, unique=False, nullable=False)
20
+
21
+ graph_dataset_database_handler = Column(String, unique=False, nullable=False)
22
+ vector_dataset_database_handler = Column(String, unique=False, nullable=False)
23
+
24
+ vector_database_url = Column(String, unique=False, nullable=True)
25
+ graph_database_url = Column(String, unique=False, nullable=True)
26
+
27
+ vector_database_key = Column(String, unique=False, nullable=True)
28
+ graph_database_key = Column(String, unique=False, nullable=True)
29
+
30
+ # configuration details for different database types. This would make it more flexible to add new database types
31
+ # without changing the database schema.
32
+ graph_database_connection_info = Column(
33
+ JSON, unique=False, nullable=False, server_default=text("'{}'")
34
+ )
35
+ vector_database_connection_info = Column(
36
+ JSON, unique=False, nullable=False, server_default=text("'{}'")
37
+ )
17
38
 
18
39
  created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
19
40
  updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
@@ -1,7 +1,7 @@
1
- from sqlalchemy.orm import relationship
1
+ from sqlalchemy.orm import relationship, Mapped
2
2
  from sqlalchemy import Column, String, ForeignKey, UUID
3
3
  from .Principal import Principal
4
- from .User import User
4
+ from .UserTenant import UserTenant
5
5
  from .Role import Role
6
6
 
7
7
 
@@ -13,14 +13,13 @@ class Tenant(Principal):
13
13
 
14
14
  owner_id = Column(UUID, index=True)
15
15
 
16
- # One-to-Many relationship with User; specify the join via User.tenant_id
17
- users = relationship(
16
+ users: Mapped[list["User"]] = relationship( # noqa: F821
18
17
  "User",
19
- back_populates="tenant",
20
- foreign_keys=lambda: [User.tenant_id],
18
+ secondary=UserTenant.__tablename__,
19
+ back_populates="tenants",
21
20
  )
22
21
 
23
- # One-to-Many relationship with Role (if needed; similar fix)
22
+ # One-to-Many relationship with Role
24
23
  roles = relationship(
25
24
  "Role",
26
25
  back_populates="tenant",
@@ -6,8 +6,10 @@ from sqlalchemy import ForeignKey, Column, UUID
6
6
  from sqlalchemy.orm import relationship, Mapped
7
7
 
8
8
  from .Principal import Principal
9
+ from .UserTenant import UserTenant
9
10
  from .UserRole import UserRole
10
11
  from .Role import Role
12
+ from .Tenant import Tenant
11
13
 
12
14
 
13
15
  class User(SQLAlchemyBaseUserTableUUID, Principal):
@@ -15,7 +17,7 @@ class User(SQLAlchemyBaseUserTableUUID, Principal):
15
17
 
16
18
  id = Column(UUID, ForeignKey("principals.id", ondelete="CASCADE"), primary_key=True)
17
19
 
18
- # Foreign key to Tenant (Many-to-One relationship)
20
+ # Foreign key to current Tenant (Many-to-One relationship)
19
21
  tenant_id = Column(UUID, ForeignKey("tenants.id"))
20
22
 
21
23
  # Many-to-Many Relationship with Roles
@@ -25,11 +27,11 @@ class User(SQLAlchemyBaseUserTableUUID, Principal):
25
27
  back_populates="users",
26
28
  )
27
29
 
28
- # Relationship to Tenant
29
- tenant = relationship(
30
+ # Many-to-Many Relationship with Tenants user is a part of
31
+ tenants: Mapped[list["Tenant"]] = relationship(
30
32
  "Tenant",
33
+ secondary=UserTenant.__tablename__,
31
34
  back_populates="users",
32
- foreign_keys=[tenant_id],
33
35
  )
34
36
 
35
37
  # ACL Relationship (One-to-Many)
@@ -46,7 +48,6 @@ class UserRead(schemas.BaseUser[uuid_UUID]):
46
48
 
47
49
 
48
50
  class UserCreate(schemas.BaseUserCreate):
49
- tenant_id: Optional[uuid_UUID] = None
50
51
  is_verified: bool = True
51
52
 
52
53
 
@@ -0,0 +1,12 @@
1
+ from datetime import datetime, timezone
2
+ from sqlalchemy import Column, ForeignKey, DateTime, UUID
3
+ from cognee.infrastructure.databases.relational import Base
4
+
5
+
6
+ class UserTenant(Base):
7
+ __tablename__ = "user_tenants"
8
+
9
+ created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
10
+
11
+ user_id = Column(UUID, ForeignKey("users.id"), primary_key=True)
12
+ tenant_id = Column(UUID, ForeignKey("tenants.id"), primary_key=True)
@@ -1,6 +1,7 @@
1
1
  from .User import User
2
2
  from .Role import Role
3
3
  from .UserRole import UserRole
4
+ from .UserTenant import UserTenant
4
5
  from .DatasetDatabase import DatasetDatabase
5
6
  from .RoleDefaultPermissions import RoleDefaultPermissions
6
7
  from .UserDefaultPermissions import UserDefaultPermissions
@@ -1,11 +1,8 @@
1
- from types import SimpleNamespace
2
-
3
1
  from cognee.shared.logging_utils import get_logger
4
2
 
5
3
  from ...models.User import User
6
4
  from cognee.modules.data.models.Dataset import Dataset
7
5
  from cognee.modules.users.permissions.methods import get_principal_datasets
8
- from cognee.modules.users.permissions.methods import get_role, get_tenant
9
6
 
10
7
  logger = get_logger()
11
8
 
@@ -25,17 +22,14 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) ->
25
22
  # Get all datasets User has explicit access to
26
23
  datasets.extend(await get_principal_datasets(user, permission_type))
27
24
 
28
- if user.tenant_id:
29
- # Get all datasets all tenants have access to
30
- tenant = await get_tenant(user.tenant_id)
25
+ # Get all tenants user is a part of
26
+ tenants = await user.awaitable_attrs.tenants
27
+ for tenant in tenants:
28
+ # Get all datasets all tenant members have access to
31
29
  datasets.extend(await get_principal_datasets(tenant, permission_type))
32
30
 
33
- # Get all datasets Users roles have access to
34
- if isinstance(user, SimpleNamespace):
35
- # If simple namespace use roles defined in user
36
- roles = user.roles
37
- else:
38
- roles = await user.awaitable_attrs.roles
31
+ # Get all datasets accessible by roles user is a part of
32
+ roles = await user.awaitable_attrs.roles
39
33
  for role in roles:
40
34
  datasets.extend(await get_principal_datasets(role, permission_type))
41
35
 
@@ -45,4 +39,10 @@ async def get_all_user_permission_datasets(user: User, permission_type: str) ->
45
39
  # If the dataset id key already exists, leave the dictionary unchanged.
46
40
  unique.setdefault(dataset.id, dataset)
47
41
 
48
- return list(unique.values())
42
+ # Filter out dataset that aren't part of the selected user's tenant
43
+ filtered_datasets = []
44
+ for dataset in list(unique.values()):
45
+ if dataset.tenant_id == user.tenant_id:
46
+ filtered_datasets.append(dataset)
47
+
48
+ return filtered_datasets
@@ -42,11 +42,13 @@ async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
42
42
  .first()
43
43
  )
44
44
 
45
+ user_tenants = await user.awaitable_attrs.tenants
46
+
45
47
  if not user:
46
48
  raise UserNotFoundError
47
49
  elif not role:
48
50
  raise RoleNotFoundError
49
- elif user.tenant_id != role.tenant_id:
51
+ elif role.tenant_id not in [tenant.id for tenant in user_tenants]:
50
52
  raise TenantNotFoundError(
51
53
  message="User tenant does not match role tenant. User cannot be added to role."
52
54
  )
@@ -1,2 +1,3 @@
1
1
  from .create_tenant import create_tenant
2
2
  from .add_user_to_tenant import add_user_to_tenant
3
+ from .select_tenant import select_tenant
@@ -1,8 +1,11 @@
1
+ from typing import Optional
1
2
  from uuid import UUID
2
3
  from sqlalchemy.exc import IntegrityError
4
+ from sqlalchemy import insert
3
5
 
4
6
  from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
5
7
  from cognee.infrastructure.databases.relational import get_relational_engine
8
+ from cognee.modules.users.models.UserTenant import UserTenant
6
9
  from cognee.modules.users.methods import get_user
7
10
  from cognee.modules.users.permissions.methods import get_tenant
8
11
  from cognee.modules.users.exceptions import (
@@ -12,14 +15,19 @@ from cognee.modules.users.exceptions import (
12
15
  )
13
16
 
14
17
 
15
- async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
18
+ async def add_user_to_tenant(
19
+ user_id: UUID, tenant_id: UUID, owner_id: UUID, set_as_active_tenant: Optional[bool] = False
20
+ ):
16
21
  """
17
22
  Add a user with the given id to the tenant with the given id.
18
23
  This can only be successful if the request owner with the given id is the tenant owner.
24
+
25
+ If set_as_active_tenant is true it will automatically set the users active tenant to provided tenant.
19
26
  Args:
20
27
  user_id: Id of the user.
21
28
  tenant_id: Id of the tenant.
22
29
  owner_id: Id of the request owner.
30
+ set_as_active_tenant: If set_as_active_tenant is true it will automatically set the users active tenant to provided tenant.
23
31
 
24
32
  Returns:
25
33
  None
@@ -40,17 +48,18 @@ async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
40
48
  message="Only tenant owner can add other users to organization."
41
49
  )
42
50
 
43
- try:
44
- if user.tenant_id is None:
45
- user.tenant_id = tenant_id
46
- elif user.tenant_id == tenant_id:
47
- return
48
- else:
49
- raise IntegrityError
50
-
51
+ if set_as_active_tenant:
52
+ user.tenant_id = tenant_id
51
53
  await session.merge(user)
52
54
  await session.commit()
53
- except IntegrityError:
54
- raise EntityAlreadyExistsError(
55
- message="User is already part of a tenant. Only one tenant can be assigned to user."
55
+
56
+ try:
57
+ # Add association directly to the association table
58
+ create_user_tenant_statement = insert(UserTenant).values(
59
+ user_id=user_id, tenant_id=tenant_id
56
60
  )
61
+ await session.execute(create_user_tenant_statement)
62
+ await session.commit()
63
+
64
+ except IntegrityError:
65
+ raise EntityAlreadyExistsError(message="User is already part of group.")
@@ -1,19 +1,25 @@
1
1
  from uuid import UUID
2
+ from sqlalchemy import insert
2
3
  from sqlalchemy.exc import IntegrityError
4
+ from typing import Optional
3
5
 
6
+ from cognee.modules.users.models.UserTenant import UserTenant
4
7
  from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
5
8
  from cognee.infrastructure.databases.relational import get_relational_engine
6
9
  from cognee.modules.users.models import Tenant
7
10
  from cognee.modules.users.methods import get_user
8
11
 
9
12
 
10
- async def create_tenant(tenant_name: str, user_id: UUID) -> UUID:
13
+ async def create_tenant(
14
+ tenant_name: str, user_id: UUID, set_as_active_tenant: Optional[bool] = True
15
+ ) -> UUID:
11
16
  """
12
17
  Create a new tenant with the given name, for the user with the given id.
13
18
  This user is the owner of the tenant.
14
19
  Args:
15
20
  tenant_name: Name of the new tenant.
16
21
  user_id: Id of the user.
22
+ set_as_active_tenant: If true, set the newly created tenant as the active tenant for the user.
17
23
 
18
24
  Returns:
19
25
  None
@@ -22,18 +28,26 @@ async def create_tenant(tenant_name: str, user_id: UUID) -> UUID:
22
28
  async with db_engine.get_async_session() as session:
23
29
  try:
24
30
  user = await get_user(user_id)
25
- if user.tenant_id:
26
- raise EntityAlreadyExistsError(
27
- message="User already has a tenant. New tenant cannot be created."
28
- )
29
31
 
30
32
  tenant = Tenant(name=tenant_name, owner_id=user_id)
31
33
  session.add(tenant)
32
34
  await session.flush()
33
35
 
34
- user.tenant_id = tenant.id
35
- await session.merge(user)
36
- await session.commit()
36
+ if set_as_active_tenant:
37
+ user.tenant_id = tenant.id
38
+ await session.merge(user)
39
+ await session.commit()
40
+
41
+ try:
42
+ # Add association directly to the association table
43
+ create_user_tenant_statement = insert(UserTenant).values(
44
+ user_id=user_id, tenant_id=tenant.id
45
+ )
46
+ await session.execute(create_user_tenant_statement)
47
+ await session.commit()
48
+ except IntegrityError:
49
+ raise EntityAlreadyExistsError(message="User is already part of tenant.")
50
+
37
51
  return tenant.id
38
52
  except IntegrityError as e:
39
53
  raise EntityAlreadyExistsError(message="Tenant already exists.") from e
@@ -0,0 +1,62 @@
1
+ from uuid import UUID
2
+ from typing import Union
3
+
4
+ import sqlalchemy.exc
5
+ from sqlalchemy import select
6
+
7
+ from cognee.infrastructure.databases.relational import get_relational_engine
8
+ from cognee.modules.users.methods.get_user import get_user
9
+ from cognee.modules.users.models.UserTenant import UserTenant
10
+ from cognee.modules.users.models.User import User
11
+ from cognee.modules.users.permissions.methods import get_tenant
12
+ from cognee.modules.users.exceptions import UserNotFoundError, TenantNotFoundError
13
+
14
+
15
+ async def select_tenant(user_id: UUID, tenant_id: Union[UUID, None]) -> User:
16
+ """
17
+ Set the users active tenant to provided tenant.
18
+
19
+ If None tenant_id is provided set current Tenant to the default single user-tenant
20
+ Args:
21
+ user_id: UUID of the user.
22
+ tenant_id: Id of the tenant.
23
+
24
+ Returns:
25
+ None
26
+
27
+ """
28
+ db_engine = get_relational_engine()
29
+ async with db_engine.get_async_session() as session:
30
+ user = await get_user(user_id)
31
+ if tenant_id is None:
32
+ # If no tenant_id is provided set current Tenant to the single user-tenant
33
+ user.tenant_id = None
34
+ await session.merge(user)
35
+ await session.commit()
36
+ return user
37
+
38
+ tenant = await get_tenant(tenant_id)
39
+
40
+ if not user:
41
+ raise UserNotFoundError
42
+ elif not tenant:
43
+ raise TenantNotFoundError
44
+
45
+ # Check if User is part of Tenant
46
+ result = await session.execute(
47
+ select(UserTenant)
48
+ .where(UserTenant.user_id == user.id)
49
+ .where(UserTenant.tenant_id == tenant_id)
50
+ )
51
+
52
+ try:
53
+ result = result.scalar_one()
54
+ except sqlalchemy.exc.NoResultFound as e:
55
+ raise TenantNotFoundError("User is not part of the tenant.") from e
56
+
57
+ if result:
58
+ # If user is part of tenant update current tenant of user
59
+ user.tenant_id = tenant_id
60
+ await session.merge(user)
61
+ await session.commit()
62
+ return user
@@ -450,6 +450,8 @@ def setup_logging(log_level=None, name=None):
450
450
  try:
451
451
  msg = self.format(record)
452
452
  stream = self.stream
453
+ if hasattr(stream, "closed") and stream.closed:
454
+ return
453
455
  stream.write("\n" + msg + self.terminator)
454
456
  self.flush()
455
457
  except Exception:
@@ -532,6 +534,10 @@ def setup_logging(log_level=None, name=None):
532
534
  # Get a configured logger and log system information
533
535
  logger = structlog.get_logger(name if name else __name__)
534
536
 
537
+ logger.warning(
538
+ "From version 0.5.0 onwards, Cognee will run with multi-user access control mode set to on by default. Data isolation between different users and datasets will be enforced and data created before multi-user access control mode was turned on won't be accessible by default. To disable multi-user access control mode and regain access to old data set the environment variable ENABLE_BACKEND_ACCESS_CONTROL to false before starting Cognee. For more information, please refer to the Cognee documentation."
539
+ )
540
+
535
541
  if logs_dir is not None:
536
542
  logger.info(f"Log file created at: {log_file_path}", log_file=log_file_path)
537
543
 
@@ -0,0 +1,30 @@
1
+ from aiolimiter import AsyncLimiter
2
+ from contextlib import nullcontext
3
+ from cognee.infrastructure.llm.config import get_llm_config
4
+
5
+ llm_config = get_llm_config()
6
+
7
+ llm_rate_limiter = AsyncLimiter(
8
+ llm_config.llm_rate_limit_requests, llm_config.embedding_rate_limit_interval
9
+ )
10
+ embedding_rate_limiter = AsyncLimiter(
11
+ llm_config.embedding_rate_limit_requests, llm_config.embedding_rate_limit_interval
12
+ )
13
+
14
+
15
+ def llm_rate_limiter_context_manager():
16
+ global llm_rate_limiter
17
+ if llm_config.llm_rate_limit_enabled:
18
+ return llm_rate_limiter
19
+ else:
20
+ # Return a no-op context manager if rate limiting is disabled
21
+ return nullcontext()
22
+
23
+
24
+ def embedding_rate_limiter_context_manager():
25
+ global embedding_rate_limiter
26
+ if llm_config.embedding_rate_limit_enabled:
27
+ return embedding_rate_limiter
28
+ else:
29
+ # Return a no-op context manager if rate limiting is disabled
30
+ return nullcontext()
@@ -1,4 +1,5 @@
1
1
  from .chunk_by_word import chunk_by_word
2
2
  from .chunk_by_sentence import chunk_by_sentence
3
3
  from .chunk_by_paragraph import chunk_by_paragraph
4
+ from .chunk_by_row import chunk_by_row
4
5
  from .remove_disconnected_chunks import remove_disconnected_chunks
@@ -0,0 +1,94 @@
1
+ from typing import Any, Dict, Iterator
2
+ from uuid import NAMESPACE_OID, uuid5
3
+
4
+ from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine
5
+
6
+
7
+ def _get_pair_size(pair_text: str) -> int:
8
+ """
9
+ Calculate the size of a given text in terms of tokens.
10
+
11
+ If an embedding engine's tokenizer is available, count the tokens for the provided word.
12
+ If the tokenizer is not available, assume the word counts as one token.
13
+
14
+ Parameters:
15
+ -----------
16
+
17
+ - pair_text (str): The key:value pair text for which the token size is to be calculated.
18
+
19
+ Returns:
20
+ --------
21
+
22
+ - int: The number of tokens representing the text, typically an integer, depending
23
+ on the tokenizer's output.
24
+ """
25
+ embedding_engine = get_embedding_engine()
26
+ if embedding_engine.tokenizer:
27
+ return embedding_engine.tokenizer.count_tokens(pair_text)
28
+ else:
29
+ return 3
30
+
31
+
32
+ def chunk_by_row(
33
+ data: str,
34
+ max_chunk_size,
35
+ ) -> Iterator[Dict[str, Any]]:
36
+ """
37
+ Chunk the input text by row while enabling exact text reconstruction.
38
+
39
+ This function divides the given text data into smaller chunks on a line-by-line basis,
40
+ ensuring that the size of each chunk is less than or equal to the specified maximum
41
+ chunk size. It guarantees that when the generated chunks are concatenated, they
42
+ reproduce the original text accurately. The tokenization process is handled by
43
+ adapters compatible with the vector engine's embedding model.
44
+
45
+ Parameters:
46
+ -----------
47
+
48
+ - data (str): The input text to be chunked.
49
+ - max_chunk_size: The maximum allowed size for each chunk, in terms of tokens or
50
+ words.
51
+ """
52
+ current_chunk_list = []
53
+ chunk_index = 0
54
+ current_chunk_size = 0
55
+
56
+ lines = data.split("\n\n")
57
+ for line in lines:
58
+ pairs_text = line.split(", ")
59
+
60
+ for pair_text in pairs_text:
61
+ pair_size = _get_pair_size(pair_text)
62
+ if current_chunk_size > 0 and (current_chunk_size + pair_size > max_chunk_size):
63
+ # Yield current cut chunk
64
+ current_chunk = ", ".join(current_chunk_list)
65
+ chunk_dict = {
66
+ "text": current_chunk,
67
+ "chunk_size": current_chunk_size,
68
+ "chunk_id": uuid5(NAMESPACE_OID, current_chunk),
69
+ "chunk_index": chunk_index,
70
+ "cut_type": "row_cut",
71
+ }
72
+
73
+ yield chunk_dict
74
+
75
+ # Start new chunk with current pair text
76
+ current_chunk_list = []
77
+ current_chunk_size = 0
78
+ chunk_index += 1
79
+
80
+ current_chunk_list.append(pair_text)
81
+ current_chunk_size += pair_size
82
+
83
+ # Yield row chunk
84
+ current_chunk = ", ".join(current_chunk_list)
85
+ if current_chunk:
86
+ chunk_dict = {
87
+ "text": current_chunk,
88
+ "chunk_size": current_chunk_size,
89
+ "chunk_id": uuid5(NAMESPACE_OID, current_chunk),
90
+ "chunk_index": chunk_index,
91
+ "cut_type": "row_end",
92
+ }
93
+
94
+ yield chunk_dict
@@ -1,3 +1,2 @@
1
1
  from .classify_documents import classify_documents
2
2
  from .extract_chunks_from_documents import extract_chunks_from_documents
3
- from .check_permissions_on_dataset import check_permissions_on_dataset
@@ -7,6 +7,7 @@ from cognee.modules.data.processing.document_types import (
7
7
  ImageDocument,
8
8
  TextDocument,
9
9
  UnstructuredDocument,
10
+ CsvDocument,
10
11
  )
11
12
  from cognee.modules.engine.models.node_set import NodeSet
12
13
  from cognee.modules.engine.utils.generate_node_id import generate_node_id
@@ -15,6 +16,7 @@ from cognee.tasks.documents.exceptions import WrongDataDocumentInputError
15
16
  EXTENSION_TO_DOCUMENT_CLASS = {
16
17
  "pdf": PdfDocument, # Text documents
17
18
  "txt": TextDocument,
19
+ "csv": CsvDocument,
18
20
  "docx": UnstructuredDocument,
19
21
  "doc": UnstructuredDocument,
20
22
  "odt": UnstructuredDocument,
@@ -61,7 +61,7 @@ async def _generate_improved_answer_for_single_interaction(
61
61
  )
62
62
 
63
63
  retrieved_context = await retriever.get_context(query_text)
64
- completion = await retriever.get_structured_completion(
64
+ completion = await retriever.get_completion(
65
65
  query=query_text,
66
66
  context=retrieved_context,
67
67
  response_model=ImprovedAnswerResponse,
@@ -70,9 +70,9 @@ async def _generate_improved_answer_for_single_interaction(
70
70
  new_context_text = await retriever.resolve_edges_to_text(retrieved_context)
71
71
 
72
72
  if completion:
73
- enrichment.improved_answer = completion.answer
73
+ enrichment.improved_answer = completion[0].answer
74
74
  enrichment.new_context = new_context_text
75
- enrichment.explanation = completion.explanation
75
+ enrichment.explanation = completion[0].explanation
76
76
  return enrichment
77
77
  else:
78
78
  logger.warning(