cognee 0.5.1.dev0__py3-none-any.whl → 0.5.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. cognee/__init__.py +2 -0
  2. cognee/alembic/README +1 -0
  3. cognee/alembic/env.py +107 -0
  4. cognee/alembic/script.py.mako +26 -0
  5. cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
  6. cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
  7. cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
  8. cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
  9. cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
  10. cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
  11. cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
  12. cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
  13. cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
  14. cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
  15. cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
  16. cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
  17. cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
  18. cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
  19. cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
  20. cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
  21. cognee/alembic.ini +117 -0
  22. cognee/api/v1/add/routers/get_add_router.py +2 -0
  23. cognee/api/v1/cognify/cognify.py +11 -6
  24. cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
  25. cognee/api/v1/config/config.py +60 -0
  26. cognee/api/v1/datasets/routers/get_datasets_router.py +45 -3
  27. cognee/api/v1/memify/routers/get_memify_router.py +2 -0
  28. cognee/api/v1/search/routers/get_search_router.py +21 -6
  29. cognee/api/v1/search/search.py +25 -5
  30. cognee/api/v1/sync/routers/get_sync_router.py +3 -3
  31. cognee/cli/commands/add_command.py +1 -1
  32. cognee/cli/commands/cognify_command.py +6 -0
  33. cognee/cli/commands/config_command.py +1 -1
  34. cognee/context_global_variables.py +5 -1
  35. cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
  36. cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
  37. cognee/infrastructure/databases/cache/config.py +6 -0
  38. cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
  39. cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
  40. cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
  41. cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
  42. cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
  43. cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
  44. cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
  45. cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +2 -1
  46. cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
  47. cognee/infrastructure/databases/vector/config.py +6 -0
  48. cognee/infrastructure/databases/vector/create_vector_engine.py +69 -22
  49. cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
  50. cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
  51. cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
  52. cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
  53. cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
  54. cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
  55. cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
  56. cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
  57. cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
  58. cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
  59. cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
  60. cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
  61. cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
  62. cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
  63. cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
  64. cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
  65. cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
  66. cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
  67. cognee/infrastructure/llm/prompts/test.txt +1 -1
  68. cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
  69. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +24 -0
  70. cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
  71. cognee/modules/chunking/models/DocumentChunk.py +0 -1
  72. cognee/modules/cognify/config.py +2 -0
  73. cognee/modules/data/models/Data.py +1 -0
  74. cognee/modules/engine/models/Entity.py +0 -1
  75. cognee/modules/engine/operations/setup.py +6 -0
  76. cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
  77. cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
  78. cognee/modules/graph/utils/__init__.py +1 -0
  79. cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
  80. cognee/modules/notebooks/methods/__init__.py +1 -0
  81. cognee/modules/notebooks/methods/create_notebook.py +0 -34
  82. cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
  83. cognee/modules/notebooks/methods/get_notebooks.py +12 -8
  84. cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
  85. cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
  86. cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
  87. cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
  88. cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
  89. cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
  90. cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
  91. cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
  92. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
  93. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
  94. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
  95. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
  96. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
  97. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
  98. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
  99. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
  100. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
  101. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
  102. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
  103. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
  104. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
  105. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
  106. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
  107. cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
  108. cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
  109. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
  110. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
  111. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
  112. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
  113. cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
  114. cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
  115. cognee/modules/retrieval/__init__.py +0 -1
  116. cognee/modules/retrieval/base_retriever.py +66 -10
  117. cognee/modules/retrieval/chunks_retriever.py +57 -49
  118. cognee/modules/retrieval/coding_rules_retriever.py +12 -5
  119. cognee/modules/retrieval/completion_retriever.py +29 -28
  120. cognee/modules/retrieval/cypher_search_retriever.py +25 -20
  121. cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
  122. cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
  123. cognee/modules/retrieval/graph_completion_retriever.py +78 -63
  124. cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
  125. cognee/modules/retrieval/lexical_retriever.py +34 -12
  126. cognee/modules/retrieval/natural_language_retriever.py +18 -15
  127. cognee/modules/retrieval/summaries_retriever.py +51 -34
  128. cognee/modules/retrieval/temporal_retriever.py +59 -49
  129. cognee/modules/retrieval/triplet_retriever.py +31 -32
  130. cognee/modules/retrieval/utils/access_tracking.py +88 -0
  131. cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -85
  132. cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
  133. cognee/modules/search/methods/__init__.py +1 -0
  134. cognee/modules/search/methods/get_retriever_output.py +53 -0
  135. cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
  136. cognee/modules/search/methods/search.py +90 -215
  137. cognee/modules/search/models/SearchResultPayload.py +67 -0
  138. cognee/modules/search/types/SearchResult.py +1 -8
  139. cognee/modules/search/types/SearchType.py +1 -2
  140. cognee/modules/search/types/__init__.py +1 -1
  141. cognee/modules/search/utils/__init__.py +1 -2
  142. cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
  143. cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
  144. cognee/modules/users/authentication/default/default_transport.py +11 -1
  145. cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
  146. cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
  147. cognee/modules/users/methods/create_user.py +0 -9
  148. cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
  149. cognee/modules/visualization/cognee_network_visualization.py +1 -1
  150. cognee/run_migrations.py +48 -0
  151. cognee/shared/exceptions/__init__.py +1 -3
  152. cognee/shared/exceptions/exceptions.py +11 -1
  153. cognee/shared/usage_logger.py +332 -0
  154. cognee/shared/utils.py +12 -5
  155. cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
  156. cognee/tasks/memify/extract_usage_frequency.py +613 -0
  157. cognee/tasks/summarization/models.py +0 -2
  158. cognee/tasks/temporal_graph/__init__.py +0 -1
  159. cognee/tasks/translation/__init__.py +96 -0
  160. cognee/tasks/translation/config.py +110 -0
  161. cognee/tasks/translation/detect_language.py +190 -0
  162. cognee/tasks/translation/exceptions.py +62 -0
  163. cognee/tasks/translation/models.py +72 -0
  164. cognee/tasks/translation/providers/__init__.py +44 -0
  165. cognee/tasks/translation/providers/azure_provider.py +192 -0
  166. cognee/tasks/translation/providers/base.py +85 -0
  167. cognee/tasks/translation/providers/google_provider.py +158 -0
  168. cognee/tasks/translation/providers/llm_provider.py +143 -0
  169. cognee/tasks/translation/translate_content.py +282 -0
  170. cognee/tasks/web_scraper/default_url_crawler.py +6 -2
  171. cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
  172. cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
  173. cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
  174. cognee/tests/integration/retrieval/test_chunks_retriever.py +115 -16
  175. cognee/tests/integration/retrieval/test_graph_completion_retriever.py +13 -5
  176. cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +22 -20
  177. cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +23 -24
  178. cognee/tests/integration/retrieval/test_rag_completion_retriever.py +70 -5
  179. cognee/tests/integration/retrieval/test_structured_output.py +62 -18
  180. cognee/tests/integration/retrieval/test_summaries_retriever.py +20 -9
  181. cognee/tests/integration/retrieval/test_temporal_retriever.py +38 -8
  182. cognee/tests/integration/retrieval/test_triplet_retriever.py +13 -4
  183. cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
  184. cognee/tests/tasks/translation/README.md +147 -0
  185. cognee/tests/tasks/translation/__init__.py +1 -0
  186. cognee/tests/tasks/translation/config_test.py +93 -0
  187. cognee/tests/tasks/translation/detect_language_test.py +118 -0
  188. cognee/tests/tasks/translation/providers_test.py +151 -0
  189. cognee/tests/tasks/translation/translate_content_test.py +213 -0
  190. cognee/tests/test_chromadb.py +1 -1
  191. cognee/tests/test_cleanup_unused_data.py +165 -0
  192. cognee/tests/test_delete_by_id.py +6 -6
  193. cognee/tests/test_extract_usage_frequency.py +308 -0
  194. cognee/tests/test_kuzu.py +17 -7
  195. cognee/tests/test_lancedb.py +3 -1
  196. cognee/tests/test_library.py +1 -1
  197. cognee/tests/test_neo4j.py +17 -7
  198. cognee/tests/test_neptune_analytics_vector.py +3 -1
  199. cognee/tests/test_permissions.py +172 -187
  200. cognee/tests/test_pgvector.py +3 -1
  201. cognee/tests/test_relational_db_migration.py +15 -1
  202. cognee/tests/test_remote_kuzu.py +3 -1
  203. cognee/tests/test_s3_file_storage.py +1 -1
  204. cognee/tests/test_search_db.py +97 -110
  205. cognee/tests/test_usage_logger_e2e.py +268 -0
  206. cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
  207. cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
  208. cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
  209. cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
  210. cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
  211. cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +31 -59
  212. cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +70 -33
  213. cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +72 -52
  214. cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +27 -33
  215. cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +28 -15
  216. cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +37 -42
  217. cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +48 -64
  218. cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +263 -24
  219. cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
  220. cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +30 -16
  221. cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
  222. cognee/tests/unit/modules/search/test_search.py +176 -0
  223. cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
  224. cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
  225. cognee/tests/unit/shared/test_usage_logger.py +241 -0
  226. cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
  227. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/METADATA +17 -10
  228. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/RECORD +232 -144
  229. cognee/api/.env.example +0 -5
  230. cognee/modules/retrieval/base_graph_retriever.py +0 -24
  231. cognee/modules/search/methods/get_search_type_tools.py +0 -223
  232. cognee/modules/search/methods/no_access_control_search.py +0 -62
  233. cognee/modules/search/utils/prepare_search_result.py +0 -63
  234. cognee/tests/test_feedback_enrichment.py +0 -174
  235. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/WHEEL +0 -0
  236. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/entry_points.txt +0 -0
  237. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/LICENSE +0 -0
  238. {cognee-0.5.1.dev0.dist-info → cognee-0.5.2.dev0.dist-info}/licenses/NOTICE.md +0 -0
@@ -0,0 +1,333 @@
1
+ """Expand dataset database with json connection field
2
+
3
+ Revision ID: 46a6ce2bd2b2
4
+ Revises: 76625596c5c3
5
+ Create Date: 2025-11-25 17:56:28.938931
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "46a6ce2bd2b2"
17
+ down_revision: Union[str, None] = "76625596c5c3"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+ graph_constraint_name = "dataset_database_graph_database_name_key"
22
+ vector_constraint_name = "dataset_database_vector_database_name_key"
23
+ TABLE_NAME = "dataset_database"
24
+
25
+
26
+ def _get_column(inspector, table, name, schema=None):
27
+ for col in inspector.get_columns(table, schema=schema):
28
+ if col["name"] == name:
29
+ return col
30
+ return None
31
+
32
+
33
+ def _recreate_table_without_unique_constraint_sqlite(op, insp):
34
+ """
35
+ SQLite cannot drop unique constraints on individual columns. We must:
36
+ 1. Create a new table without the unique constraints.
37
+ 2. Copy data from the old table.
38
+ 3. Drop the old table.
39
+ 4. Rename the new table.
40
+ """
41
+ conn = op.get_bind()
42
+
43
+ # Create new table definition (without unique constraints)
44
+ op.create_table(
45
+ f"{TABLE_NAME}_new",
46
+ sa.Column("owner_id", sa.UUID()),
47
+ sa.Column("dataset_id", sa.UUID(), primary_key=True, nullable=False),
48
+ sa.Column("vector_database_name", sa.String(), nullable=False),
49
+ sa.Column("graph_database_name", sa.String(), nullable=False),
50
+ sa.Column("vector_database_provider", sa.String(), nullable=False),
51
+ sa.Column("graph_database_provider", sa.String(), nullable=False),
52
+ sa.Column(
53
+ "vector_dataset_database_handler",
54
+ sa.String(),
55
+ unique=False,
56
+ nullable=False,
57
+ server_default="lancedb",
58
+ ),
59
+ sa.Column(
60
+ "graph_dataset_database_handler",
61
+ sa.String(),
62
+ unique=False,
63
+ nullable=False,
64
+ server_default="kuzu",
65
+ ),
66
+ sa.Column("vector_database_url", sa.String()),
67
+ sa.Column("graph_database_url", sa.String()),
68
+ sa.Column("vector_database_key", sa.String()),
69
+ sa.Column("graph_database_key", sa.String()),
70
+ sa.Column(
71
+ "graph_database_connection_info",
72
+ sa.JSON(),
73
+ nullable=False,
74
+ server_default=sa.text("'{}'"),
75
+ ),
76
+ sa.Column(
77
+ "vector_database_connection_info",
78
+ sa.JSON(),
79
+ nullable=False,
80
+ server_default=sa.text("'{}'"),
81
+ ),
82
+ sa.Column("created_at", sa.DateTime()),
83
+ sa.Column("updated_at", sa.DateTime()),
84
+ sa.ForeignKeyConstraint(["dataset_id"], ["datasets.id"], ondelete="CASCADE"),
85
+ sa.ForeignKeyConstraint(["owner_id"], ["principals.id"], ondelete="CASCADE"),
86
+ )
87
+
88
+ # Copy data into new table
89
+ conn.execute(
90
+ sa.text(f"""
91
+ INSERT INTO {TABLE_NAME}_new
92
+ SELECT
93
+ owner_id,
94
+ dataset_id,
95
+ vector_database_name,
96
+ graph_database_name,
97
+ vector_database_provider,
98
+ graph_database_provider,
99
+ vector_dataset_database_handler,
100
+ graph_dataset_database_handler,
101
+ vector_database_url,
102
+ graph_database_url,
103
+ vector_database_key,
104
+ graph_database_key,
105
+ COALESCE(graph_database_connection_info, '{{}}'),
106
+ COALESCE(vector_database_connection_info, '{{}}'),
107
+ created_at,
108
+ updated_at
109
+ FROM {TABLE_NAME}
110
+ """)
111
+ )
112
+
113
+ # Drop old table
114
+ op.drop_table(TABLE_NAME)
115
+
116
+ # Rename new table
117
+ op.rename_table(f"{TABLE_NAME}_new", TABLE_NAME)
118
+
119
+
120
+ def _recreate_table_with_unique_constraint_sqlite(op, insp):
121
+ """
122
+ SQLite cannot drop unique constraints on individual columns. We must:
123
+ 1. Create a new table without the unique constraints.
124
+ 2. Copy data from the old table.
125
+ 3. Drop the old table.
126
+ 4. Rename the new table.
127
+ """
128
+ conn = op.get_bind()
129
+
130
+ # Create new table definition (without unique constraints)
131
+ op.create_table(
132
+ f"{TABLE_NAME}_new",
133
+ sa.Column("owner_id", sa.UUID()),
134
+ sa.Column("dataset_id", sa.UUID(), primary_key=True, nullable=False),
135
+ sa.Column("vector_database_name", sa.String(), nullable=False, unique=True),
136
+ sa.Column("graph_database_name", sa.String(), nullable=False, unique=True),
137
+ sa.Column("vector_database_provider", sa.String(), nullable=False),
138
+ sa.Column("graph_database_provider", sa.String(), nullable=False),
139
+ sa.Column(
140
+ "vector_dataset_database_handler",
141
+ sa.String(),
142
+ unique=False,
143
+ nullable=False,
144
+ server_default="lancedb",
145
+ ),
146
+ sa.Column(
147
+ "graph_dataset_database_handler",
148
+ sa.String(),
149
+ unique=False,
150
+ nullable=False,
151
+ server_default="kuzu",
152
+ ),
153
+ sa.Column("vector_database_url", sa.String()),
154
+ sa.Column("graph_database_url", sa.String()),
155
+ sa.Column("vector_database_key", sa.String()),
156
+ sa.Column("graph_database_key", sa.String()),
157
+ sa.Column(
158
+ "graph_database_connection_info",
159
+ sa.JSON(),
160
+ nullable=False,
161
+ server_default=sa.text("'{}'"),
162
+ ),
163
+ sa.Column(
164
+ "vector_database_connection_info",
165
+ sa.JSON(),
166
+ nullable=False,
167
+ server_default=sa.text("'{}'"),
168
+ ),
169
+ sa.Column("created_at", sa.DateTime()),
170
+ sa.Column("updated_at", sa.DateTime()),
171
+ sa.ForeignKeyConstraint(["dataset_id"], ["datasets.id"], ondelete="CASCADE"),
172
+ sa.ForeignKeyConstraint(["owner_id"], ["principals.id"], ondelete="CASCADE"),
173
+ )
174
+
175
+ # Copy data into new table
176
+ conn.execute(
177
+ sa.text(f"""
178
+ INSERT INTO {TABLE_NAME}_new
179
+ SELECT
180
+ owner_id,
181
+ dataset_id,
182
+ vector_database_name,
183
+ graph_database_name,
184
+ vector_database_provider,
185
+ graph_database_provider,
186
+ vector_dataset_database_handler,
187
+ graph_dataset_database_handler,
188
+ vector_database_url,
189
+ graph_database_url,
190
+ vector_database_key,
191
+ graph_database_key,
192
+ COALESCE(graph_database_connection_info, '{{}}'),
193
+ COALESCE(vector_database_connection_info, '{{}}'),
194
+ created_at,
195
+ updated_at
196
+ FROM {TABLE_NAME}
197
+ """)
198
+ )
199
+
200
+ # Drop old table
201
+ op.drop_table(TABLE_NAME)
202
+
203
+ # Rename new table
204
+ op.rename_table(f"{TABLE_NAME}_new", TABLE_NAME)
205
+
206
+
207
+ def upgrade() -> None:
208
+ conn = op.get_bind()
209
+ insp = sa.inspect(conn)
210
+
211
+ unique_constraints = insp.get_unique_constraints(TABLE_NAME)
212
+
213
+ vector_database_connection_info_column = _get_column(
214
+ insp, "dataset_database", "vector_database_connection_info"
215
+ )
216
+ if not vector_database_connection_info_column:
217
+ op.add_column(
218
+ "dataset_database",
219
+ sa.Column(
220
+ "vector_database_connection_info",
221
+ sa.JSON(),
222
+ unique=False,
223
+ nullable=False,
224
+ server_default=sa.text("'{}'"),
225
+ ),
226
+ )
227
+
228
+ vector_dataset_database_handler = _get_column(
229
+ insp, "dataset_database", "vector_dataset_database_handler"
230
+ )
231
+ if not vector_dataset_database_handler:
232
+ # Add LanceDB as the default graph dataset database handler
233
+ op.add_column(
234
+ "dataset_database",
235
+ sa.Column(
236
+ "vector_dataset_database_handler",
237
+ sa.String(),
238
+ unique=False,
239
+ nullable=False,
240
+ server_default="lancedb",
241
+ ),
242
+ )
243
+
244
+ graph_database_connection_info_column = _get_column(
245
+ insp, "dataset_database", "graph_database_connection_info"
246
+ )
247
+ if not graph_database_connection_info_column:
248
+ op.add_column(
249
+ "dataset_database",
250
+ sa.Column(
251
+ "graph_database_connection_info",
252
+ sa.JSON(),
253
+ unique=False,
254
+ nullable=False,
255
+ server_default=sa.text("'{}'"),
256
+ ),
257
+ )
258
+
259
+ graph_dataset_database_handler = _get_column(
260
+ insp, "dataset_database", "graph_dataset_database_handler"
261
+ )
262
+ if not graph_dataset_database_handler:
263
+ # Add Kuzu as the default graph dataset database handler
264
+ op.add_column(
265
+ "dataset_database",
266
+ sa.Column(
267
+ "graph_dataset_database_handler",
268
+ sa.String(),
269
+ unique=False,
270
+ nullable=False,
271
+ server_default="kuzu",
272
+ ),
273
+ )
274
+
275
+ with op.batch_alter_table("dataset_database", schema=None) as batch_op:
276
+ # Drop the unique constraint to make unique=False
277
+ graph_constraint_to_drop = None
278
+ for uc in unique_constraints:
279
+ # Check if the constraint covers ONLY the target column
280
+ if uc["name"] == graph_constraint_name:
281
+ graph_constraint_to_drop = uc["name"]
282
+ break
283
+
284
+ vector_constraint_to_drop = None
285
+ for uc in unique_constraints:
286
+ # Check if the constraint covers ONLY the target column
287
+ if uc["name"] == vector_constraint_name:
288
+ vector_constraint_to_drop = uc["name"]
289
+ break
290
+
291
+ if (
292
+ vector_constraint_to_drop
293
+ and graph_constraint_to_drop
294
+ and op.get_context().dialect.name == "postgresql"
295
+ ):
296
+ # PostgreSQL
297
+ batch_op.drop_constraint(graph_constraint_name, type_="unique")
298
+ batch_op.drop_constraint(vector_constraint_name, type_="unique")
299
+
300
+ if op.get_context().dialect.name == "sqlite":
301
+ conn = op.get_bind()
302
+ # Fun fact: SQLite has hidden auto indexes for unique constraints that can't be dropped or accessed directly
303
+ # So we need to check for them and drop them by recreating the table (altering column also won't work)
304
+ result = conn.execute(sa.text("PRAGMA index_list('dataset_database')"))
305
+ rows = result.fetchall()
306
+ unique_auto_indexes = [row for row in rows if row[3] == "u"]
307
+ for row in unique_auto_indexes:
308
+ result = conn.execute(sa.text(f"PRAGMA index_info('{row[1]}')"))
309
+ index_info = result.fetchall()
310
+ if index_info[0][2] == "vector_database_name":
311
+ # In case a unique index exists on vector_database_name, drop it and the graph_database_name one
312
+ _recreate_table_without_unique_constraint_sqlite(op, insp)
313
+
314
+
315
+ def downgrade() -> None:
316
+ conn = op.get_bind()
317
+ insp = sa.inspect(conn)
318
+
319
+ if op.get_context().dialect.name == "sqlite":
320
+ _recreate_table_with_unique_constraint_sqlite(op, insp)
321
+ elif op.get_context().dialect.name == "postgresql":
322
+ with op.batch_alter_table("dataset_database", schema=None) as batch_op:
323
+ # Re-add the unique constraint to return to unique=True
324
+ batch_op.create_unique_constraint(graph_constraint_name, ["graph_database_name"])
325
+
326
+ with op.batch_alter_table("dataset_database", schema=None) as batch_op:
327
+ # Re-add the unique constraint to return to unique=True
328
+ batch_op.create_unique_constraint(vector_constraint_name, ["vector_database_name"])
329
+
330
+ op.drop_column("dataset_database", "vector_database_connection_info")
331
+ op.drop_column("dataset_database", "graph_database_connection_info")
332
+ op.drop_column("dataset_database", "vector_dataset_database_handler")
333
+ op.drop_column("dataset_database", "graph_dataset_database_handler")
@@ -0,0 +1,30 @@
1
+ """Add default user
2
+
3
+ Revision ID: 482cd6517ce4
4
+ Revises: 8057ae7329c2
5
+ Create Date: 2024-10-16 22:17:18.634638
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from sqlalchemy.util import await_only
12
+
13
+ from cognee.modules.users.methods import create_default_user, delete_user
14
+
15
+ from fastapi_users.exceptions import UserAlreadyExists
16
+
17
+
18
+ # revision identifiers, used by Alembic.
19
+ revision: str = "482cd6517ce4"
20
+ down_revision: Union[str, None] = "8057ae7329c2"
21
+ branch_labels: Union[str, Sequence[str], None] = None
22
+ depends_on: Union[str, Sequence[str], None] = "8057ae7329c2"
23
+
24
+
25
+ def upgrade() -> None:
26
+ pass
27
+
28
+
29
+ def downgrade() -> None:
30
+ pass
@@ -0,0 +1,98 @@
1
+ """Expand dataset database for multi user
2
+
3
+ Revision ID: 76625596c5c3
4
+ Revises: 211ab850ef3d
5
+ Create Date: 2025-10-30 12:55:20.239562
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "76625596c5c3"
17
+ down_revision: Union[str, None] = "c946955da633"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def _get_column(inspector, table, name, schema=None):
23
+ for col in inspector.get_columns(table, schema=schema):
24
+ if col["name"] == name:
25
+ return col
26
+ return None
27
+
28
+
29
+ def upgrade() -> None:
30
+ conn = op.get_bind()
31
+ insp = sa.inspect(conn)
32
+
33
+ vector_database_provider_column = _get_column(
34
+ insp, "dataset_database", "vector_database_provider"
35
+ )
36
+ if not vector_database_provider_column:
37
+ op.add_column(
38
+ "dataset_database",
39
+ sa.Column(
40
+ "vector_database_provider",
41
+ sa.String(),
42
+ unique=False,
43
+ nullable=False,
44
+ server_default="lancedb",
45
+ ),
46
+ )
47
+
48
+ graph_database_provider_column = _get_column(
49
+ insp, "dataset_database", "graph_database_provider"
50
+ )
51
+ if not graph_database_provider_column:
52
+ op.add_column(
53
+ "dataset_database",
54
+ sa.Column(
55
+ "graph_database_provider",
56
+ sa.String(),
57
+ unique=False,
58
+ nullable=False,
59
+ server_default="kuzu",
60
+ ),
61
+ )
62
+
63
+ vector_database_url_column = _get_column(insp, "dataset_database", "vector_database_url")
64
+ if not vector_database_url_column:
65
+ op.add_column(
66
+ "dataset_database",
67
+ sa.Column("vector_database_url", sa.String(), unique=False, nullable=True),
68
+ )
69
+
70
+ graph_database_url_column = _get_column(insp, "dataset_database", "graph_database_url")
71
+ if not graph_database_url_column:
72
+ op.add_column(
73
+ "dataset_database",
74
+ sa.Column("graph_database_url", sa.String(), unique=False, nullable=True),
75
+ )
76
+
77
+ vector_database_key_column = _get_column(insp, "dataset_database", "vector_database_key")
78
+ if not vector_database_key_column:
79
+ op.add_column(
80
+ "dataset_database",
81
+ sa.Column("vector_database_key", sa.String(), unique=False, nullable=True),
82
+ )
83
+
84
+ graph_database_key_column = _get_column(insp, "dataset_database", "graph_database_key")
85
+ if not graph_database_key_column:
86
+ op.add_column(
87
+ "dataset_database",
88
+ sa.Column("graph_database_key", sa.String(), unique=False, nullable=True),
89
+ )
90
+
91
+
92
+ def downgrade() -> None:
93
+ op.drop_column("dataset_database", "vector_database_provider")
94
+ op.drop_column("dataset_database", "graph_database_provider")
95
+ op.drop_column("dataset_database", "vector_database_url")
96
+ op.drop_column("dataset_database", "graph_database_url")
97
+ op.drop_column("dataset_database", "vector_database_key")
98
+ op.drop_column("dataset_database", "graph_database_key")
@@ -0,0 +1,25 @@
1
+ """Initial migration
2
+
3
+ Revision ID: 8057ae7329c2
4
+ Revises:
5
+ Create Date: 2024-10-02 12:55:20.989372
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+ from sqlalchemy.util import await_only
11
+ from cognee.infrastructure.databases.relational import get_relational_engine
12
+
13
+ # revision identifiers, used by Alembic.
14
+ revision: str = "8057ae7329c2"
15
+ down_revision: Union[str, None] = None
16
+ branch_labels: Union[str, Sequence[str], None] = None
17
+ depends_on: Union[str, Sequence[str], None] = None
18
+
19
+
20
+ def upgrade() -> None:
21
+ pass
22
+
23
+
24
+ def downgrade() -> None:
25
+ pass
@@ -0,0 +1,104 @@
1
+ """loader_separation
2
+
3
+ Revision ID: 9e7a3cb85175
4
+ Revises: 1daae0df1866
5
+ Create Date: 2025-08-14 19:18:11.406907
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+
15
+ # revision identifiers, used by Alembic.
16
+ revision: str = "9e7a3cb85175"
17
+ down_revision: Union[str, None] = "1daae0df1866"
18
+ branch_labels: Union[str, Sequence[str], None] = None
19
+ depends_on: Union[str, Sequence[str], None] = None
20
+
21
+
22
+ def _get_column(inspector, table, name, schema=None):
23
+ for col in inspector.get_columns(table, schema=schema):
24
+ if col["name"] == name:
25
+ return col
26
+ return None
27
+
28
+
29
+ def upgrade() -> None:
30
+ conn = op.get_bind()
31
+ insp = sa.inspect(conn)
32
+
33
+ # Define table with all necessary columns including primary key
34
+ data = sa.table(
35
+ "data",
36
+ sa.Column("id", sa.UUID, primary_key=True), # Critical for SQLite
37
+ sa.Column("original_extension", sa.String()),
38
+ sa.Column("original_mime_type", sa.String()),
39
+ sa.Column("original_data_location", sa.String()),
40
+ sa.Column("extension", sa.String()),
41
+ sa.Column("mime_type", sa.String()),
42
+ sa.Column("raw_data_location", sa.String()),
43
+ )
44
+
45
+ original_extension_column = _get_column(insp, "data", "original_extension")
46
+ if not original_extension_column:
47
+ op.add_column("data", sa.Column("original_extension", sa.String(), nullable=True))
48
+ if op.get_context().dialect.name == "sqlite":
49
+ # If column doesn't exist create new original_extension column and update from values of extension column
50
+ with op.batch_alter_table("data") as batch_op:
51
+ batch_op.execute(
52
+ data.update().values(
53
+ original_extension=data.c.extension,
54
+ )
55
+ )
56
+ else:
57
+ conn = op.get_bind()
58
+ conn.execute(data.update().values(original_extension=data.c.extension))
59
+
60
+ original_mime_type = _get_column(insp, "data", "original_mime_type")
61
+ if not original_mime_type:
62
+ # If column doesn't exist create new original_mime_type column and update from values of mime_type column
63
+ op.add_column("data", sa.Column("original_mime_type", sa.String(), nullable=True))
64
+ if op.get_context().dialect.name == "sqlite":
65
+ with op.batch_alter_table("data") as batch_op:
66
+ batch_op.execute(
67
+ data.update().values(
68
+ original_mime_type=data.c.mime_type,
69
+ )
70
+ )
71
+ else:
72
+ conn = op.get_bind()
73
+ conn.execute(data.update().values(original_mime_type=data.c.mime_type))
74
+
75
+ loader_engine = _get_column(insp, "data", "loader_engine")
76
+ if not loader_engine:
77
+ op.add_column("data", sa.Column("loader_engine", sa.String(), nullable=True))
78
+
79
+ original_data_location = _get_column(insp, "data", "original_data_location")
80
+ if not original_data_location:
81
+ # If column doesn't exist create new original data column and update from values of raw_data_location column
82
+ op.add_column("data", sa.Column("original_data_location", sa.String(), nullable=True))
83
+ if op.get_context().dialect.name == "sqlite":
84
+ with op.batch_alter_table("data") as batch_op:
85
+ batch_op.execute(
86
+ data.update().values(
87
+ original_data_location=data.c.raw_data_location,
88
+ )
89
+ )
90
+ else:
91
+ conn = op.get_bind()
92
+ conn.execute(data.update().values(original_data_location=data.c.raw_data_location))
93
+
94
+ raw_content_hash = _get_column(insp, "data", "raw_content_hash")
95
+ if not raw_content_hash:
96
+ op.add_column("data", sa.Column("raw_content_hash", sa.String(), nullable=True))
97
+
98
+
99
+ def downgrade() -> None:
100
+ op.drop_column("data", "raw_content_hash")
101
+ op.drop_column("data", "original_data_location")
102
+ op.drop_column("data", "loader_engine")
103
+ op.drop_column("data", "original_mime_type")
104
+ op.drop_column("data", "original_extension")
@@ -0,0 +1,38 @@
1
+ """Add label column to data table
2
+
3
+ Revision ID: a1b2c3d4e5f6
4
+ Revises: 211ab850ef3d
5
+ Create Date: 2025-11-17 17:54:32.123456
6
+
7
+ """
8
+
9
+ from typing import Sequence, Union
10
+
11
+ from alembic import op
12
+ import sqlalchemy as sa
13
+
14
+ # revision identifiers, used by Alembic.
15
+ revision: str = "a1b2c3d4e5f6"
16
+ down_revision: Union[str, None] = "46a6ce2bd2b2"
17
+ branch_labels: Union[str, Sequence[str], None] = None
18
+ depends_on: Union[str, Sequence[str], None] = None
19
+
20
+
21
+ def _get_column(inspector, table, name, schema=None):
22
+ for col in inspector.get_columns(table, schema=schema):
23
+ if col["name"] == name:
24
+ return col
25
+ return None
26
+
27
+
28
+ def upgrade() -> None:
29
+ conn = op.get_bind()
30
+ insp = sa.inspect(conn)
31
+
32
+ label_column = _get_column(insp, "data", "label")
33
+ if not label_column:
34
+ op.add_column("data", sa.Column("label", sa.String(), nullable=True))
35
+
36
+
37
+ def downgrade() -> None:
38
+ op.drop_column("data", "label")