cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  156. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  157. cognee/tasks/summarization/models.py +0 -2
  158. cognee/tasks/temporal_graph/__init__.py +0 -1
  159. cognee/tasks/translation/__init__.py +96 -0
  160. cognee/tasks/translation/config.py +110 -0
  161. cognee/tasks/translation/detect_language.py +190 -0
  162. cognee/tasks/translation/exceptions.py +62 -0
  163. cognee/tasks/translation/models.py +72 -0
  164. cognee/tasks/translation/providers/__init__.py +44 -0
  165. cognee/tasks/translation/providers/azure_provider.py +192 -0
  166. cognee/tasks/translation/providers/base.py +85 -0
  167. cognee/tasks/translation/providers/google_provider.py +158 -0
  168. cognee/tasks/translation/providers/llm_provider.py +143 -0
  169. cognee/tasks/translation/translate_content.py +282 -0
  170. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  171. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  172. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  173. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  174. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  175. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  176. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  177. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  178. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  179. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  180. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  181. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  182. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  183. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  184. cognee/tests/tasks/translation/README.md +147 -0
  185. cognee/tests/tasks/translation/__init__.py +1 -0
  186. cognee/tests/tasks/translation/config_test.py +93 -0
  187. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  188. cognee/tests/tasks/translation/providers_test.py +151 -0
  189. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  190. cognee/tests/test_chromadb.py +1 -1
  191. cognee/tests/test_cleanup_unused_data.py +165 -0
  192. cognee/tests/test_delete_by_id.py +6 -6
  193. cognee/tests/test_extract_usage_frequency.py +308 -0
  194. cognee/tests/test_kuzu.py +17 -7
  195. cognee/tests/test_lancedb.py +3 -1
  196. cognee/tests/test_library.py +1 -1
  197. cognee/tests/test_neo4j.py +17 -7
  198. cognee/tests/test_neptune_analytics_vector.py +3 -1
  199. cognee/tests/test_permissions.py +172 -187
  200. cognee/tests/test_pgvector.py +3 -1
  201. cognee/tests/test_relational_db_migration.py +15 -1
  202. cognee/tests/test_remote_kuzu.py +3 -1
  203. cognee/tests/test_s3_file_storage.py +1 -1
  204. cognee/tests/test_search_db.py +97 -110
  205. cognee/tests/test_usage_logger_e2e.py +268 -0
  206. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  207. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  208. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  209. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  210. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  211. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  212. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  213. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  214. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  215. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  216. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  217. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  218. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  219. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  220. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  221. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  222. cognee/tests/unit/modules/search/test_search.py +176 -0
  223. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  224. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  225. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  226. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  227. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/METADATA +17 -10
  228. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/RECORD +232 -144
  229. cognee/api/.env.example +0 -5
  230. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  231. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  232. cognee/modules/search/methods/no_access_control_search.py +0 -62
  233. cognee/modules/search/utils/prepare_search_result.py +0 -63
  234. cognee/tests/test_feedback_enrichment.py +0 -174
  235. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/WHEEL +0 -0
  236. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/entry_points.txt +0 -0
  237. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/LICENSE +0 -0
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,236 @@
1
+ """permission_system_rework
2
+
3
+ Revision ID: ab7e313804ae
4
+ Revises: 1d0bb7fede17
5
+ Create Date: 2025-06-16 15:20:43.118246
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+ from alembic import op
11
+ import sqlalchemy as sa
12
+ from sqlalchemy import UUID
13
+ from datetime import datetime, timezone
14
+ from uuid import uuid4
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = "ab7e313804ae"
18
+ down_revision: Union[str, None] = "1d0bb7fede17"
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def _now():
24
+ return datetime.now(timezone.utc)
25
+
26
+
27
+ def _define_dataset_table() -> sa.Table:
28
+ # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
29
+ # definition or load what is in the database
30
+ table = sa.Table(
31
+ "datasets",
32
+ sa.MetaData(),
33
+ sa.Column("id", UUID, primary_key=True, default=uuid4),
34
+ sa.Column("name", sa.Text),
35
+ sa.Column(
36
+ "created_at",
37
+ sa.DateTime(timezone=True),
38
+ default=lambda: datetime.now(timezone.utc),
39
+ ),
40
+ sa.Column(
41
+ "updated_at",
42
+ sa.DateTime(timezone=True),
43
+ onupdate=lambda: datetime.now(timezone.utc),
44
+ ),
45
+ sa.Column("owner_id", UUID, sa.ForeignKey("principals.id"), index=True),
46
+ )
47
+
48
+ return table
49
+
50
+
51
+ def _define_data_table() -> sa.Table:
52
+ # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
53
+ # definition or load what is in the database
54
+ table = sa.Table(
55
+ "data",
56
+ sa.MetaData(),
57
+ sa.Column("id", UUID, primary_key=True, default=uuid4),
58
+ sa.Column("name", sa.String),
59
+ sa.Column("extension", sa.String),
60
+ sa.Column("mime_type", sa.String),
61
+ sa.Column("raw_data_location", sa.String),
62
+ sa.Column("owner_id", UUID, index=True),
63
+ sa.Column("content_hash", sa.String),
64
+ sa.Column("external_metadata", sa.JSON),
65
+ sa.Column("node_set", sa.JSON, nullable=True), # list of strings
66
+ sa.Column("token_count", sa.Integer),
67
+ sa.Column(
68
+ "created_at",
69
+ sa.DateTime(timezone=True),
70
+ default=lambda: datetime.now(timezone.utc),
71
+ ),
72
+ sa.Column(
73
+ "updated_at",
74
+ sa.DateTime(timezone=True),
75
+ onupdate=lambda: datetime.now(timezone.utc),
76
+ ),
77
+ )
78
+
79
+ return table
80
+
81
+
82
+ def _ensure_permission(conn, permission_name) -> str:
83
+ """
84
+ Return the permission.id for the given name, creating the row if needed.
85
+ """
86
+ permissions_table = sa.Table(
87
+ "permissions",
88
+ sa.MetaData(),
89
+ sa.Column("id", UUID, primary_key=True, index=True, default=uuid4),
90
+ sa.Column(
91
+ "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
92
+ ),
93
+ sa.Column(
94
+ "updated_at",
95
+ sa.DateTime(timezone=True),
96
+ onupdate=lambda: datetime.now(timezone.utc),
97
+ ),
98
+ sa.Column("name", sa.String, unique=True, nullable=False, index=True),
99
+ )
100
+ row = conn.execute(
101
+ sa.select(permissions_table).filter(permissions_table.c.name == permission_name)
102
+ ).fetchone()
103
+
104
+ if row is None:
105
+ permission_id = uuid4()
106
+
107
+ op.bulk_insert(
108
+ permissions_table,
109
+ [
110
+ {
111
+ "id": permission_id,
112
+ "name": permission_name,
113
+ "created_at": _now(),
114
+ }
115
+ ],
116
+ )
117
+ return permission_id
118
+
119
+ return row.id
120
+
121
+
122
+ def _build_acl_row(*, user_id, target_id, permission_id, target_col) -> dict:
123
+ """Create a dict with the correct column names for the ACL row."""
124
+ return {
125
+ "id": uuid4(),
126
+ "created_at": _now(),
127
+ "principal_id": user_id,
128
+ target_col: target_id,
129
+ "permission_id": permission_id,
130
+ }
131
+
132
+
133
+ def _create_dataset_permission(conn, user_id, dataset_id, permission_name):
134
+ perm_id = _ensure_permission(conn, permission_name)
135
+ return _build_acl_row(
136
+ user_id=user_id, target_id=dataset_id, permission_id=perm_id, target_col="dataset_id"
137
+ )
138
+
139
+
140
+ def _create_data_permission(conn, user_id, data_id, permission_name):
141
+ perm_id = _ensure_permission(conn, permission_name)
142
+ return _build_acl_row(
143
+ user_id=user_id, target_id=data_id, permission_id=perm_id, target_col="data_id"
144
+ )
145
+
146
+
147
+ def _get_column(inspector, table, name, schema=None):
148
+ for col in inspector.get_columns(table, schema=schema):
149
+ if col["name"] == name:
150
+ return col
151
+ return None
152
+
153
+
154
+ def upgrade() -> None:
155
+ conn = op.get_bind()
156
+ insp = sa.inspect(conn)
157
+
158
+ dataset_id_column = _get_column(insp, "acls", "dataset_id")
159
+ if not dataset_id_column:
160
+ # Recreate ACLs table with default permissions set to datasets instead of documents
161
+ op.drop_table("acls")
162
+
163
+ acls_table = op.create_table(
164
+ "acls",
165
+ sa.Column("id", UUID, primary_key=True, default=uuid4),
166
+ sa.Column(
167
+ "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
168
+ ),
169
+ sa.Column(
170
+ "updated_at",
171
+ sa.DateTime(timezone=True),
172
+ onupdate=lambda: datetime.now(timezone.utc),
173
+ ),
174
+ sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")),
175
+ sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")),
176
+ sa.Column("dataset_id", UUID, sa.ForeignKey("datasets.id", ondelete="CASCADE")),
177
+ )
178
+
179
+ # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
180
+ # definition or load what is in the database
181
+ dataset_table = _define_dataset_table()
182
+ datasets = conn.execute(sa.select(dataset_table)).fetchall()
183
+
184
+ if not datasets:
185
+ return
186
+
187
+ acl_list = []
188
+
189
+ for dataset in datasets:
190
+ acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "read"))
191
+ acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "write"))
192
+ acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "share"))
193
+ acl_list.append(
194
+ _create_dataset_permission(conn, dataset.owner_id, dataset.id, "delete")
195
+ )
196
+
197
+ if acl_list:
198
+ op.bulk_insert(acls_table, acl_list)
199
+
200
+
201
+ def downgrade() -> None:
202
+ conn = op.get_bind()
203
+
204
+ op.drop_table("acls")
205
+
206
+ acls_table = op.create_table(
207
+ "acls",
208
+ sa.Column("id", UUID, primary_key=True, nullable=False, default=uuid4),
209
+ sa.Column(
210
+ "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
211
+ ),
212
+ sa.Column(
213
+ "updated_at", sa.DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)
214
+ ),
215
+ sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")),
216
+ sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")),
217
+ sa.Column("data_id", UUID, sa.ForeignKey("data.id", ondelete="CASCADE")),
218
+ )
219
+
220
+ # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
221
+ # definition or load what is in the database
222
+ data_table = _define_data_table()
223
+ data = conn.execute(sa.select(data_table)).fetchall()
224
+
225
+ if not data:
226
+ return
227
+
228
+ acl_list = []
229
+ for single_data in data:
230
+ acl_list.append(_create_data_permission(conn, single_data.owner_id, single_data.id, "read"))
231
+ acl_list.append(
232
+ _create_data_permission(conn, single_data.owner_id, single_data.id, "write")
233
+ )
234
+
235
+ if acl_list:
236
+ op.bulk_insert(acls_table, acl_list)
@@ -0,0 +1,75 @@
1
+ """kuzu-11-migration
2
+
3
+ Revision ID: b9274c27a25a
4
+ Revises: e4ebee1091e7
5
+ Create Date: 2025-07-24 17:11:52.174737
6
+
7
+ """
8
+
9
+ import os
10
+ from typing import Sequence, Union
11
+
12
+ from cognee.infrastructure.databases.graph.kuzu.kuzu_migrate import (
13
+ kuzu_migration,
14
+ read_kuzu_storage_version,
15
+ )
16
+ import kuzu
17
+
18
+ # revision identifiers, used by Alembic.
19
+ revision: str = "b9274c27a25a"
20
+ down_revision: Union[str, None] = "e4ebee1091e7"
21
+ branch_labels: Union[str, Sequence[str], None] = None
22
+ depends_on: Union[str, Sequence[str], None] = None
23
+
24
+
25
+ def upgrade() -> None:
26
+ # This migration is only for multi-user Cognee mode
27
+ if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
28
+ from cognee.base_config import get_base_config
29
+
30
+ base_config = get_base_config()
31
+
32
+ databases_root = os.path.join(base_config.system_root_directory, "databases")
33
+ if not os.path.isdir(databases_root):
34
+ raise FileNotFoundError(f"Directory not found: {databases_root}")
35
+
36
+ for current_path, dirnames, _ in os.walk(databases_root):
37
+ # If file is kuzu graph database
38
+ if ".pkl" in current_path[-4:]:
39
+ kuzu_db_version = read_kuzu_storage_version(current_path)
40
+ if (
41
+ kuzu_db_version == "0.9.0" or kuzu_db_version == "0.8.2"
42
+ ) and kuzu_db_version != kuzu.__version__:
43
+ # Try to migrate kuzu database to latest version
44
+ kuzu_migration(
45
+ new_db=current_path + "_new",
46
+ old_db=current_path,
47
+ new_version=kuzu.__version__,
48
+ old_version=kuzu_db_version,
49
+ overwrite=True,
50
+ )
51
+ else:
52
+ from cognee.infrastructure.databases.graph import get_graph_config
53
+
54
+ graph_config = get_graph_config()
55
+ if graph_config.graph_database_provider.lower() == "kuzu":
56
+ if os.path.exists(graph_config.graph_file_path):
57
+ kuzu_db_version = read_kuzu_storage_version(graph_config.graph_file_path)
58
+ if (
59
+ kuzu_db_version == "0.9.0" or kuzu_db_version == "0.8.2"
60
+ ) and kuzu_db_version != kuzu.__version__:
61
+ # Try to migrate kuzu database to latest version
62
+ kuzu_migration(
63
+ new_db=graph_config.graph_file_path + "_new",
64
+ old_db=graph_config.graph_file_path,
65
+ new_version=kuzu.__version__,
66
+ old_version=kuzu_db_version,
67
+ overwrite=True,
68
+ )
69
+
70
+
71
+ def downgrade() -> None:
72
+ # To downgrade you will have to manually change the backup old kuzu graph databases
73
+ # stored in the user folder to its previous name and remove the new kuzu graph
74
+ # database that replaced it
75
+ pass
@@ -0,0 +1,137 @@
1
+ """Multi Tenant Support
2
+
3
+ Revision ID: c946955da633
4
+ Revises: 211ab850ef3d
5
+ Create Date: 2025-11-04 18:11:09.325158
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+ from datetime import datetime, timezone
11
+ from uuid import uuid4
12
+
13
+ from alembic import op
14
+ import sqlalchemy as sa
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = "c946955da633"
18
+ down_revision: Union[str, None] = "211ab850ef3d"
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def _now():
24
+ return datetime.now(timezone.utc)
25
+
26
+
27
+ def _define_user_table() -> sa.Table:
28
+ table = sa.Table(
29
+ "users",
30
+ sa.MetaData(),
31
+ sa.Column(
32
+ "id",
33
+ sa.UUID,
34
+ sa.ForeignKey("principals.id", ondelete="CASCADE"),
35
+ primary_key=True,
36
+ nullable=False,
37
+ ),
38
+ sa.Column("tenant_id", sa.UUID, sa.ForeignKey("tenants.id"), index=True, nullable=True),
39
+ )
40
+ return table
41
+
42
+
43
+ def _define_dataset_table() -> sa.Table:
44
+ # Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
45
+ # definition or load what is in the database
46
+ table = sa.Table(
47
+ "datasets",
48
+ sa.MetaData(),
49
+ sa.Column("id", sa.UUID, primary_key=True, default=uuid4),
50
+ sa.Column("name", sa.Text),
51
+ sa.Column(
52
+ "created_at",
53
+ sa.DateTime(timezone=True),
54
+ default=lambda: datetime.now(timezone.utc),
55
+ ),
56
+ sa.Column(
57
+ "updated_at",
58
+ sa.DateTime(timezone=True),
59
+ onupdate=lambda: datetime.now(timezone.utc),
60
+ ),
61
+ sa.Column("owner_id", sa.UUID(), sa.ForeignKey("principals.id"), index=True),
62
+ sa.Column("tenant_id", sa.UUID(), sa.ForeignKey("tenants.id"), index=True, nullable=True),
63
+ )
64
+
65
+ return table
66
+
67
+
68
+ def _get_column(inspector, table, name, schema=None):
69
+ for col in inspector.get_columns(table, schema=schema):
70
+ if col["name"] == name:
71
+ return col
72
+ return None
73
+
74
+
75
+ def upgrade() -> None:
76
+ conn = op.get_bind()
77
+ insp = sa.inspect(conn)
78
+
79
+ dataset = _define_dataset_table()
80
+ user = _define_user_table()
81
+
82
+ if "user_tenants" not in insp.get_table_names():
83
+ # Define table with all necessary columns including primary key
84
+ user_tenants = op.create_table(
85
+ "user_tenants",
86
+ sa.Column("user_id", sa.UUID, sa.ForeignKey("users.id"), primary_key=True),
87
+ sa.Column("tenant_id", sa.UUID, sa.ForeignKey("tenants.id"), primary_key=True),
88
+ sa.Column(
89
+ "created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
90
+ ),
91
+ )
92
+
93
+ # Get all users with their tenant_id
94
+ user_data = conn.execute(
95
+ sa.select(user.c.id, user.c.tenant_id).where(user.c.tenant_id.isnot(None))
96
+ ).fetchall()
97
+
98
+ # Insert into user_tenants table
99
+ if user_data:
100
+ op.bulk_insert(
101
+ user_tenants,
102
+ [
103
+ {"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()}
104
+ for user_id, tenant_id in user_data
105
+ ],
106
+ )
107
+
108
+ tenant_id_column = _get_column(insp, "datasets", "tenant_id")
109
+ if not tenant_id_column:
110
+ op.add_column("datasets", sa.Column("tenant_id", sa.UUID(), nullable=True))
111
+
112
+ # Build subquery, select users.tenant_id for each dataset.owner_id
113
+ tenant_id_from_dataset_owner = (
114
+ sa.select(user.c.tenant_id).where(user.c.id == dataset.c.owner_id).scalar_subquery()
115
+ )
116
+
117
+ if op.get_context().dialect.name == "sqlite":
118
+ # If column doesn't exist create new original_extension column and update from values of extension column
119
+ with op.batch_alter_table("datasets") as batch_op:
120
+ batch_op.execute(
121
+ dataset.update().values(
122
+ tenant_id=tenant_id_from_dataset_owner,
123
+ )
124
+ )
125
+ else:
126
+ conn = op.get_bind()
127
+ conn.execute(dataset.update().values(tenant_id=tenant_id_from_dataset_owner))
128
+
129
+ op.create_index(op.f("ix_datasets_tenant_id"), "datasets", ["tenant_id"])
130
+
131
+
132
+ def downgrade() -> None:
133
+ # ### commands auto generated by Alembic - please adjust! ###
134
+ op.drop_table("user_tenants")
135
+ op.drop_index(op.f("ix_datasets_tenant_id"), table_name="datasets")
136
+ op.drop_column("datasets", "tenant_id")
137
+ # ### end Alembic commands ###
@@ -0,0 +1,51 @@
1
+ """add_last_accessed_to_data
2
+
3
+ Revision ID: e1ec1dcb50b6
4
+ Revises: 211ab850ef3d
5
+ Create Date: 2025-11-04 21:45:52.642322
6
+
7
+ """
8
+
9
+ import os
10
+ from typing import Sequence, Union
11
+
12
+ from alembic import op
13
+ import sqlalchemy as sa
14
+
15
+
16
+ # revision identifiers, used by Alembic.
17
+ revision: str = "e1ec1dcb50b6"
18
+ down_revision: Union[str, None] = "a1b2c3d4e5f6"
19
+ branch_labels: Union[str, Sequence[str], None] = None
20
+ depends_on: Union[str, Sequence[str], None] = None
21
+
22
+
23
+ def _get_column(inspector, table, name, schema=None):
24
+ for col in inspector.get_columns(table, schema=schema):
25
+ if col["name"] == name:
26
+ return col
27
+ return None
28
+
29
+
30
+ def upgrade() -> None:
31
+ conn = op.get_bind()
32
+ insp = sa.inspect(conn)
33
+
34
+ last_accessed_column = _get_column(insp, "data", "last_accessed")
35
+ if not last_accessed_column:
36
+ # Always create the column for schema consistency
37
+ op.add_column("data", sa.Column("last_accessed", sa.DateTime(timezone=True), nullable=True))
38
+
39
+ # Only initialize existing records if feature is enabled
40
+ enable_last_accessed = os.getenv("ENABLE_LAST_ACCESSED", "false").lower() == "true"
41
+ if enable_last_accessed:
42
+ op.execute("UPDATE data SET last_accessed = CURRENT_TIMESTAMP")
43
+
44
+
45
+ def downgrade() -> None:
46
+ conn = op.get_bind()
47
+ insp = sa.inspect(conn)
48
+
49
+ last_accessed_column = _get_column(insp, "data", "last_accessed")
50
+ if last_accessed_column:
51
+ op.drop_column("data", "last_accessed")
@@ -0,0 +1,140 @@
1
+ """Expand data model info
2
+
3
+ Revision ID: e4ebee1091e7
4
+ Revises: ab7e313804ae
5
+ Create Date: 2025-07-24 13:21:30.738486
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+ from sqlalchemy.dialects import postgresql
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "e4ebee1091e7"
17
+ down_revision: Union[str, None] = "ab7e313804ae"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def _get_column(inspector, table, name, schema=None):
23
+ for col in inspector.get_columns(table, schema=schema):
24
+ if col["name"] == name:
25
+ return col
26
+ return None
27
+
28
+
29
+ def _index_exists(inspector, table, name, schema=None):
30
+ return any(ix["name"] == name for ix in inspector.get_indexes(table, schema=schema))
31
+
32
+
33
+ def upgrade() -> None:
34
+ TABLES_TO_DROP = [
35
+ "file_metadata",
36
+ "_dlt_loads",
37
+ "_dlt_version",
38
+ "_dlt_pipeline_state",
39
+ ]
40
+
41
+ conn = op.get_bind()
42
+ insp = sa.inspect(conn)
43
+ existing = set(insp.get_table_names())
44
+
45
+ for tbl in TABLES_TO_DROP:
46
+ if tbl in existing:
47
+ op.drop_table(tbl)
48
+
49
+ DATA_TABLE = "data"
50
+ DATA_TENANT_COL = "tenant_id"
51
+ DATA_SIZE_COL = "data_size"
52
+ DATA_TENANT_IDX = "ix_data_tenant_id"
53
+
54
+ # --- tenant_id ---
55
+ col = _get_column(insp, DATA_TABLE, DATA_TENANT_COL)
56
+ if col is None:
57
+ op.add_column(
58
+ DATA_TABLE,
59
+ sa.Column(DATA_TENANT_COL, postgresql.UUID(as_uuid=True), nullable=True),
60
+ )
61
+ else:
62
+ # Column exists – fix nullability if needed
63
+ if col.get("nullable", True) is False:
64
+ op.alter_column(
65
+ DATA_TABLE,
66
+ DATA_TENANT_COL,
67
+ existing_type=postgresql.UUID(as_uuid=True),
68
+ nullable=True,
69
+ )
70
+
71
+ # --- data_size ---
72
+ col = _get_column(insp, DATA_TABLE, DATA_SIZE_COL)
73
+ if col is None:
74
+ op.add_column(DATA_TABLE, sa.Column(DATA_SIZE_COL, sa.Integer(), nullable=True))
75
+ else:
76
+ # If you also need to change nullability for data_size, do it here
77
+ if col.get("nullable", True) is False:
78
+ op.alter_column(
79
+ DATA_TABLE,
80
+ DATA_SIZE_COL,
81
+ existing_type=sa.Integer(),
82
+ nullable=True,
83
+ )
84
+
85
+ # --- index on tenant_id ---
86
+ if not _index_exists(insp, DATA_TABLE, DATA_TENANT_IDX):
87
+ op.create_index(DATA_TENANT_IDX, DATA_TABLE, [DATA_TENANT_COL], unique=False)
88
+
89
+
90
+ def downgrade() -> None:
91
+ op.drop_index(op.f("ix_data_tenant_id"), table_name="data")
92
+ op.drop_column("data", "data_size")
93
+ op.drop_column("data", "tenant_id")
94
+ op.create_table(
95
+ "_dlt_pipeline_state",
96
+ sa.Column("version", sa.BIGINT(), autoincrement=False, nullable=False),
97
+ sa.Column("engine_version", sa.BIGINT(), autoincrement=False, nullable=False),
98
+ sa.Column("pipeline_name", sa.TEXT(), autoincrement=False, nullable=False),
99
+ sa.Column("state", sa.TEXT(), autoincrement=False, nullable=False),
100
+ sa.Column(
101
+ "created_at", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False
102
+ ),
103
+ sa.Column("version_hash", sa.TEXT(), autoincrement=False, nullable=True),
104
+ sa.Column("_dlt_load_id", sa.TEXT(), autoincrement=False, nullable=False),
105
+ sa.Column("_dlt_id", sa.VARCHAR(length=128), autoincrement=False, nullable=False),
106
+ )
107
+ op.create_table(
108
+ "_dlt_version",
109
+ sa.Column("version", sa.BIGINT(), autoincrement=False, nullable=False),
110
+ sa.Column("engine_version", sa.BIGINT(), autoincrement=False, nullable=False),
111
+ sa.Column(
112
+ "inserted_at", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False
113
+ ),
114
+ sa.Column("schema_name", sa.TEXT(), autoincrement=False, nullable=False),
115
+ sa.Column("version_hash", sa.TEXT(), autoincrement=False, nullable=False),
116
+ sa.Column("schema", sa.TEXT(), autoincrement=False, nullable=False),
117
+ )
118
+ op.create_table(
119
+ "_dlt_loads",
120
+ sa.Column("load_id", sa.TEXT(), autoincrement=False, nullable=False),
121
+ sa.Column("schema_name", sa.TEXT(), autoincrement=False, nullable=True),
122
+ sa.Column("status", sa.BIGINT(), autoincrement=False, nullable=False),
123
+ sa.Column(
124
+ "inserted_at", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False
125
+ ),
126
+ sa.Column("schema_version_hash", sa.TEXT(), autoincrement=False, nullable=True),
127
+ )
128
+ op.create_table(
129
+ "file_metadata",
130
+ sa.Column("id", sa.TEXT(), autoincrement=False, nullable=False),
131
+ sa.Column("name", sa.TEXT(), autoincrement=False, nullable=True),
132
+ sa.Column("file_path", sa.TEXT(), autoincrement=False, nullable=True),
133
+ sa.Column("extension", sa.TEXT(), autoincrement=False, nullable=True),
134
+ sa.Column("mime_type", sa.TEXT(), autoincrement=False, nullable=True),
135
+ sa.Column("content_hash", sa.TEXT(), autoincrement=False, nullable=True),
136
+ sa.Column("owner_id", sa.TEXT(), autoincrement=False, nullable=True),
137
+ sa.Column("_dlt_load_id", sa.TEXT(), autoincrement=False, nullable=False),
138
+ sa.Column("_dlt_id", sa.VARCHAR(length=128), autoincrement=False, nullable=False),
139
+ sa.Column("node_set", sa.TEXT(), autoincrement=False, nullable=True),
140
+ )