cognee 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/__init__.py +2 -0
- cognee/alembic/README +1 -0
- cognee/alembic/env.py +107 -0
- cognee/alembic/script.py.mako +26 -0
- cognee/alembic/versions/1a58b986e6e1_enable_delete_for_old_tutorial_notebooks.py +52 -0
- cognee/alembic/versions/1d0bb7fede17_add_pipeline_run_status.py +33 -0
- cognee/alembic/versions/1daae0df1866_incremental_loading.py +48 -0
- cognee/alembic/versions/211ab850ef3d_add_sync_operations_table.py +118 -0
- cognee/alembic/versions/45957f0a9849_add_notebook_table.py +46 -0
- cognee/alembic/versions/46a6ce2bd2b2_expand_dataset_database_with_json_.py +333 -0
- cognee/alembic/versions/482cd6517ce4_add_default_user.py +30 -0
- cognee/alembic/versions/76625596c5c3_expand_dataset_database_for_multi_user.py +98 -0
- cognee/alembic/versions/8057ae7329c2_initial_migration.py +25 -0
- cognee/alembic/versions/9e7a3cb85175_loader_separation.py +104 -0
- cognee/alembic/versions/a1b2c3d4e5f6_add_label_column_to_data.py +38 -0
- cognee/alembic/versions/ab7e313804ae_permission_system_rework.py +236 -0
- cognee/alembic/versions/b9274c27a25a_kuzu_11_migration.py +75 -0
- cognee/alembic/versions/c946955da633_multi_tenant_support.py +137 -0
- cognee/alembic/versions/e1ec1dcb50b6_add_last_accessed_to_data.py +51 -0
- cognee/alembic/versions/e4ebee1091e7_expand_data_model_info.py +140 -0
- cognee/alembic.ini +117 -0
- cognee/api/v1/add/add.py +2 -1
- cognee/api/v1/add/routers/get_add_router.py +2 -0
- cognee/api/v1/cognify/cognify.py +11 -6
- cognee/api/v1/cognify/routers/get_cognify_router.py +8 -0
- cognee/api/v1/config/config.py +60 -0
- cognee/api/v1/datasets/routers/get_datasets_router.py +46 -3
- cognee/api/v1/memify/routers/get_memify_router.py +3 -0
- cognee/api/v1/search/routers/get_search_router.py +21 -6
- cognee/api/v1/search/search.py +21 -5
- cognee/api/v1/sync/routers/get_sync_router.py +3 -3
- cognee/cli/commands/add_command.py +1 -1
- cognee/cli/commands/cognify_command.py +6 -0
- cognee/cli/commands/config_command.py +1 -1
- cognee/context_global_variables.py +5 -1
- cognee/eval_framework/answer_generation/answer_generation_executor.py +7 -8
- cognee/infrastructure/databases/cache/cache_db_interface.py +38 -1
- cognee/infrastructure/databases/cache/config.py +6 -0
- cognee/infrastructure/databases/cache/fscache/FsCacheAdapter.py +21 -0
- cognee/infrastructure/databases/cache/get_cache_engine.py +9 -3
- cognee/infrastructure/databases/cache/redis/RedisAdapter.py +60 -1
- cognee/infrastructure/databases/dataset_database_handler/supported_dataset_database_handlers.py +7 -0
- cognee/infrastructure/databases/graph/get_graph_engine.py +29 -1
- cognee/infrastructure/databases/graph/neo4j_driver/Neo4jAuraDevDatasetDatabaseHandler.py +62 -27
- cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py +17 -4
- cognee/infrastructure/databases/relational/config.py +16 -1
- cognee/infrastructure/databases/relational/create_relational_engine.py +13 -3
- cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +26 -3
- cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py +2 -0
- cognee/infrastructure/databases/vector/config.py +6 -0
- cognee/infrastructure/databases/vector/create_vector_engine.py +70 -16
- cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py +64 -9
- cognee/infrastructure/databases/vector/embeddings/OllamaEmbeddingEngine.py +13 -2
- cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py +16 -3
- cognee/infrastructure/databases/vector/models/ScoredResult.py +3 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +16 -3
- cognee/infrastructure/databases/vector/pgvector/PGVectorDatasetDatabaseHandler.py +86 -0
- cognee/infrastructure/databases/vector/pgvector/create_db_and_tables.py +81 -2
- cognee/infrastructure/databases/vector/vector_db_interface.py +8 -0
- cognee/infrastructure/files/utils/get_data_file_path.py +33 -27
- cognee/infrastructure/llm/LLMGateway.py +0 -13
- cognee/infrastructure/llm/prompts/extract_query_time.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_entity_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_event_graph_prompt.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_guided.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_oneshot.txt +2 -2
- cognee/infrastructure/llm/prompts/generate_graph_prompt_simple.txt +1 -1
- cognee/infrastructure/llm/prompts/generate_graph_prompt_strict.txt +1 -1
- cognee/infrastructure/llm/prompts/search_type_selector_prompt.txt +6 -6
- cognee/infrastructure/llm/prompts/test.txt +1 -1
- cognee/infrastructure/llm/prompts/translate_content.txt +19 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/anthropic/adapter.py +17 -12
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/gemini/adapter.py +31 -25
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/generic_llm_api/adapter.py +132 -7
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/get_llm_client.py +29 -5
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llama_cpp/adapter.py +191 -0
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/llm_interface.py +2 -6
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/mistral/adapter.py +58 -13
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/ollama/adapter.py +0 -1
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/openai/adapter.py +25 -131
- cognee/infrastructure/llm/structured_output_framework/litellm_instructor/llm/types.py +10 -0
- cognee/modules/chunking/models/DocumentChunk.py +0 -1
- cognee/modules/cognify/config.py +2 -0
- cognee/modules/data/models/Data.py +3 -1
- cognee/modules/engine/models/Entity.py +0 -1
- cognee/modules/engine/operations/setup.py +6 -0
- cognee/modules/graph/cognee_graph/CogneeGraph.py +150 -37
- cognee/modules/graph/cognee_graph/CogneeGraphElements.py +48 -2
- cognee/modules/graph/utils/__init__.py +1 -0
- cognee/modules/graph/utils/get_entity_nodes_from_triplets.py +12 -0
- cognee/modules/notebooks/methods/__init__.py +1 -0
- cognee/modules/notebooks/methods/create_notebook.py +0 -34
- cognee/modules/notebooks/methods/create_tutorial_notebooks.py +191 -0
- cognee/modules/notebooks/methods/get_notebooks.py +12 -8
- cognee/modules/notebooks/tutorials/cognee-basics/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-2.md +10 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-4.py +28 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-5.py +3 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-6.py +9 -0
- cognee/modules/notebooks/tutorials/cognee-basics/cell-7.py +17 -0
- cognee/modules/notebooks/tutorials/cognee-basics/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-1.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-10.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-11.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-12.py +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-13.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-14.py +6 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-15.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-16.py +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-2.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-3.md +7 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-4.md +9 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-5.md +5 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-6.py +13 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-7.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-8.md +3 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/cell-9.py +31 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/config.json +4 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/copilot_conversations.json +107 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/guido_contributions.json +976 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/my_developer_rules.md +79 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/pep_style_guide.md +74 -0
- cognee/modules/notebooks/tutorials/python-development-with-cognee/data/zen_principles.md +74 -0
- cognee/modules/retrieval/EntityCompletionRetriever.py +51 -38
- cognee/modules/retrieval/__init__.py +0 -1
- cognee/modules/retrieval/base_retriever.py +66 -10
- cognee/modules/retrieval/chunks_retriever.py +57 -49
- cognee/modules/retrieval/coding_rules_retriever.py +12 -5
- cognee/modules/retrieval/completion_retriever.py +29 -28
- cognee/modules/retrieval/cypher_search_retriever.py +25 -20
- cognee/modules/retrieval/graph_completion_context_extension_retriever.py +42 -46
- cognee/modules/retrieval/graph_completion_cot_retriever.py +68 -51
- cognee/modules/retrieval/graph_completion_retriever.py +78 -63
- cognee/modules/retrieval/graph_summary_completion_retriever.py +2 -0
- cognee/modules/retrieval/lexical_retriever.py +34 -12
- cognee/modules/retrieval/natural_language_retriever.py +18 -15
- cognee/modules/retrieval/summaries_retriever.py +51 -34
- cognee/modules/retrieval/temporal_retriever.py +59 -49
- cognee/modules/retrieval/triplet_retriever.py +32 -33
- cognee/modules/retrieval/utils/access_tracking.py +88 -0
- cognee/modules/retrieval/utils/brute_force_triplet_search.py +99 -103
- cognee/modules/retrieval/utils/node_edge_vector_search.py +174 -0
- cognee/modules/search/methods/__init__.py +1 -0
- cognee/modules/search/methods/get_retriever_output.py +53 -0
- cognee/modules/search/methods/get_search_type_retriever_instance.py +252 -0
- cognee/modules/search/methods/search.py +90 -222
- cognee/modules/search/models/SearchResultPayload.py +67 -0
- cognee/modules/search/types/SearchResult.py +1 -8
- cognee/modules/search/types/SearchType.py +1 -2
- cognee/modules/search/types/__init__.py +1 -1
- cognee/modules/search/utils/__init__.py +1 -2
- cognee/modules/search/utils/transform_insights_to_graph.py +2 -2
- cognee/modules/search/utils/{transform_context_to_graph.py → transform_triplets_to_graph.py} +2 -2
- cognee/modules/users/authentication/default/default_transport.py +11 -1
- cognee/modules/users/authentication/get_api_auth_backend.py +2 -1
- cognee/modules/users/authentication/get_client_auth_backend.py +2 -1
- cognee/modules/users/methods/create_user.py +0 -9
- cognee/modules/users/permissions/methods/has_user_management_permission.py +29 -0
- cognee/modules/visualization/cognee_network_visualization.py +1 -1
- cognee/run_migrations.py +48 -0
- cognee/shared/exceptions/__init__.py +1 -3
- cognee/shared/exceptions/exceptions.py +11 -1
- cognee/shared/usage_logger.py +332 -0
- cognee/shared/utils.py +12 -5
- cognee/tasks/chunks/__init__.py +9 -0
- cognee/tasks/cleanup/cleanup_unused_data.py +172 -0
- cognee/tasks/graph/__init__.py +7 -0
- cognee/tasks/ingestion/data_item.py +8 -0
- cognee/tasks/ingestion/ingest_data.py +12 -1
- cognee/tasks/ingestion/save_data_item_to_storage.py +5 -0
- cognee/tasks/memify/__init__.py +8 -0
- cognee/tasks/memify/extract_usage_frequency.py +613 -0
- cognee/tasks/summarization/models.py +0 -2
- cognee/tasks/temporal_graph/__init__.py +0 -1
- cognee/tasks/translation/__init__.py +96 -0
- cognee/tasks/translation/config.py +110 -0
- cognee/tasks/translation/detect_language.py +190 -0
- cognee/tasks/translation/exceptions.py +62 -0
- cognee/tasks/translation/models.py +72 -0
- cognee/tasks/translation/providers/__init__.py +44 -0
- cognee/tasks/translation/providers/azure_provider.py +192 -0
- cognee/tasks/translation/providers/base.py +85 -0
- cognee/tasks/translation/providers/google_provider.py +158 -0
- cognee/tasks/translation/providers/llm_provider.py +143 -0
- cognee/tasks/translation/translate_content.py +282 -0
- cognee/tasks/web_scraper/default_url_crawler.py +6 -2
- cognee/tests/cli_tests/cli_unit_tests/test_cli_commands.py +1 -0
- cognee/tests/cli_tests/cli_unit_tests/test_cli_edge_cases.py +3 -0
- cognee/tests/integration/retrieval/test_brute_force_triplet_search_with_cognify.py +62 -0
- cognee/tests/integration/retrieval/test_chunks_retriever.py +351 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever.py +276 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever_context_extension.py +228 -0
- cognee/tests/integration/retrieval/test_graph_completion_retriever_cot.py +217 -0
- cognee/tests/integration/retrieval/test_rag_completion_retriever.py +319 -0
- cognee/tests/integration/retrieval/test_structured_output.py +258 -0
- cognee/tests/integration/retrieval/test_summaries_retriever.py +195 -0
- cognee/tests/integration/retrieval/test_temporal_retriever.py +336 -0
- cognee/tests/integration/retrieval/test_triplet_retriever.py +45 -1
- cognee/tests/integration/shared/test_usage_logger_integration.py +255 -0
- cognee/tests/tasks/translation/README.md +147 -0
- cognee/tests/tasks/translation/__init__.py +1 -0
- cognee/tests/tasks/translation/config_test.py +93 -0
- cognee/tests/tasks/translation/detect_language_test.py +118 -0
- cognee/tests/tasks/translation/providers_test.py +151 -0
- cognee/tests/tasks/translation/translate_content_test.py +213 -0
- cognee/tests/test_chromadb.py +1 -1
- cognee/tests/test_cleanup_unused_data.py +165 -0
- cognee/tests/test_custom_data_label.py +68 -0
- cognee/tests/test_delete_by_id.py +6 -6
- cognee/tests/test_extract_usage_frequency.py +308 -0
- cognee/tests/test_kuzu.py +17 -7
- cognee/tests/test_lancedb.py +3 -1
- cognee/tests/test_library.py +1 -1
- cognee/tests/test_neo4j.py +17 -7
- cognee/tests/test_neptune_analytics_vector.py +3 -1
- cognee/tests/test_permissions.py +172 -187
- cognee/tests/test_pgvector.py +3 -1
- cognee/tests/test_relational_db_migration.py +15 -1
- cognee/tests/test_remote_kuzu.py +3 -1
- cognee/tests/test_s3_file_storage.py +1 -1
- cognee/tests/test_search_db.py +345 -205
- cognee/tests/test_usage_logger_e2e.py +268 -0
- cognee/tests/unit/api/test_get_raw_data_endpoint.py +206 -0
- cognee/tests/unit/eval_framework/answer_generation_test.py +4 -3
- cognee/tests/unit/eval_framework/benchmark_adapters_test.py +25 -0
- cognee/tests/unit/eval_framework/corpus_builder_test.py +33 -4
- cognee/tests/unit/infrastructure/databases/cache/test_cache_config.py +2 -0
- cognee/tests/unit/infrastructure/databases/relational/test_RelationalConfig.py +69 -0
- cognee/tests/unit/modules/graph/cognee_graph_elements_test.py +42 -2
- cognee/tests/unit/modules/graph/cognee_graph_test.py +329 -31
- cognee/tests/unit/modules/retrieval/chunks_retriever_test.py +122 -168
- cognee/tests/unit/modules/retrieval/conversation_history_test.py +338 -0
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_context_extension_test.py +486 -157
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_cot_test.py +693 -155
- cognee/tests/unit/modules/retrieval/graph_completion_retriever_test.py +619 -200
- cognee/tests/unit/modules/retrieval/rag_completion_retriever_test.py +300 -171
- cognee/tests/unit/modules/retrieval/summaries_retriever_test.py +184 -155
- cognee/tests/unit/modules/retrieval/temporal_retriever_test.py +544 -79
- cognee/tests/unit/modules/retrieval/test_brute_force_triplet_search.py +476 -28
- cognee/tests/unit/modules/retrieval/test_completion.py +343 -0
- cognee/tests/unit/modules/retrieval/test_graph_summary_completion_retriever.py +157 -0
- cognee/tests/unit/modules/retrieval/test_node_edge_vector_search.py +273 -0
- cognee/tests/unit/modules/retrieval/test_user_qa_feedback.py +312 -0
- cognee/tests/unit/modules/retrieval/triplet_retriever_test.py +267 -7
- cognee/tests/unit/modules/search/test_get_search_type_retriever_instance.py +125 -0
- cognee/tests/unit/modules/search/test_search.py +96 -20
- cognee/tests/unit/modules/search/test_search_prepare_search_result_contract.py +190 -0
- cognee/tests/unit/modules/users/test_tutorial_notebook_creation.py +511 -297
- cognee/tests/unit/shared/test_usage_logger.py +241 -0
- cognee/tests/unit/users/permissions/test_has_user_management_permission.py +46 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/METADATA +22 -17
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/RECORD +258 -157
- cognee/api/.env.example +0 -5
- cognee/modules/retrieval/base_graph_retriever.py +0 -24
- cognee/modules/search/methods/get_search_type_tools.py +0 -223
- cognee/modules/search/methods/no_access_control_search.py +0 -62
- cognee/modules/search/utils/prepare_search_result.py +0 -63
- cognee/tests/test_feedback_enrichment.py +0 -174
- cognee/tests/unit/modules/retrieval/structured_output_test.py +0 -204
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/WHEEL +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/entry_points.txt +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.5.1.dist-info → cognee-0.5.2.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
"""permission_system_rework
|
|
2
|
+
|
|
3
|
+
Revision ID: ab7e313804ae
|
|
4
|
+
Revises: 1d0bb7fede17
|
|
5
|
+
Create Date: 2025-06-16 15:20:43.118246
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Sequence, Union
|
|
10
|
+
from alembic import op
|
|
11
|
+
import sqlalchemy as sa
|
|
12
|
+
from sqlalchemy import UUID
|
|
13
|
+
from datetime import datetime, timezone
|
|
14
|
+
from uuid import uuid4
|
|
15
|
+
|
|
16
|
+
# revision identifiers, used by Alembic.
|
|
17
|
+
revision: str = "ab7e313804ae"
|
|
18
|
+
down_revision: Union[str, None] = "1d0bb7fede17"
|
|
19
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
20
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _now():
|
|
24
|
+
return datetime.now(timezone.utc)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _define_dataset_table() -> sa.Table:
|
|
28
|
+
# Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
|
|
29
|
+
# definition or load what is in the database
|
|
30
|
+
table = sa.Table(
|
|
31
|
+
"datasets",
|
|
32
|
+
sa.MetaData(),
|
|
33
|
+
sa.Column("id", UUID, primary_key=True, default=uuid4),
|
|
34
|
+
sa.Column("name", sa.Text),
|
|
35
|
+
sa.Column(
|
|
36
|
+
"created_at",
|
|
37
|
+
sa.DateTime(timezone=True),
|
|
38
|
+
default=lambda: datetime.now(timezone.utc),
|
|
39
|
+
),
|
|
40
|
+
sa.Column(
|
|
41
|
+
"updated_at",
|
|
42
|
+
sa.DateTime(timezone=True),
|
|
43
|
+
onupdate=lambda: datetime.now(timezone.utc),
|
|
44
|
+
),
|
|
45
|
+
sa.Column("owner_id", UUID, sa.ForeignKey("principals.id"), index=True),
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
return table
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _define_data_table() -> sa.Table:
|
|
52
|
+
# Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
|
|
53
|
+
# definition or load what is in the database
|
|
54
|
+
table = sa.Table(
|
|
55
|
+
"data",
|
|
56
|
+
sa.MetaData(),
|
|
57
|
+
sa.Column("id", UUID, primary_key=True, default=uuid4),
|
|
58
|
+
sa.Column("name", sa.String),
|
|
59
|
+
sa.Column("extension", sa.String),
|
|
60
|
+
sa.Column("mime_type", sa.String),
|
|
61
|
+
sa.Column("raw_data_location", sa.String),
|
|
62
|
+
sa.Column("owner_id", UUID, index=True),
|
|
63
|
+
sa.Column("content_hash", sa.String),
|
|
64
|
+
sa.Column("external_metadata", sa.JSON),
|
|
65
|
+
sa.Column("node_set", sa.JSON, nullable=True), # list of strings
|
|
66
|
+
sa.Column("token_count", sa.Integer),
|
|
67
|
+
sa.Column(
|
|
68
|
+
"created_at",
|
|
69
|
+
sa.DateTime(timezone=True),
|
|
70
|
+
default=lambda: datetime.now(timezone.utc),
|
|
71
|
+
),
|
|
72
|
+
sa.Column(
|
|
73
|
+
"updated_at",
|
|
74
|
+
sa.DateTime(timezone=True),
|
|
75
|
+
onupdate=lambda: datetime.now(timezone.utc),
|
|
76
|
+
),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
return table
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _ensure_permission(conn, permission_name) -> str:
|
|
83
|
+
"""
|
|
84
|
+
Return the permission.id for the given name, creating the row if needed.
|
|
85
|
+
"""
|
|
86
|
+
permissions_table = sa.Table(
|
|
87
|
+
"permissions",
|
|
88
|
+
sa.MetaData(),
|
|
89
|
+
sa.Column("id", UUID, primary_key=True, index=True, default=uuid4),
|
|
90
|
+
sa.Column(
|
|
91
|
+
"created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
|
92
|
+
),
|
|
93
|
+
sa.Column(
|
|
94
|
+
"updated_at",
|
|
95
|
+
sa.DateTime(timezone=True),
|
|
96
|
+
onupdate=lambda: datetime.now(timezone.utc),
|
|
97
|
+
),
|
|
98
|
+
sa.Column("name", sa.String, unique=True, nullable=False, index=True),
|
|
99
|
+
)
|
|
100
|
+
row = conn.execute(
|
|
101
|
+
sa.select(permissions_table).filter(permissions_table.c.name == permission_name)
|
|
102
|
+
).fetchone()
|
|
103
|
+
|
|
104
|
+
if row is None:
|
|
105
|
+
permission_id = uuid4()
|
|
106
|
+
|
|
107
|
+
op.bulk_insert(
|
|
108
|
+
permissions_table,
|
|
109
|
+
[
|
|
110
|
+
{
|
|
111
|
+
"id": permission_id,
|
|
112
|
+
"name": permission_name,
|
|
113
|
+
"created_at": _now(),
|
|
114
|
+
}
|
|
115
|
+
],
|
|
116
|
+
)
|
|
117
|
+
return permission_id
|
|
118
|
+
|
|
119
|
+
return row.id
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _build_acl_row(*, user_id, target_id, permission_id, target_col) -> dict:
|
|
123
|
+
"""Create a dict with the correct column names for the ACL row."""
|
|
124
|
+
return {
|
|
125
|
+
"id": uuid4(),
|
|
126
|
+
"created_at": _now(),
|
|
127
|
+
"principal_id": user_id,
|
|
128
|
+
target_col: target_id,
|
|
129
|
+
"permission_id": permission_id,
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _create_dataset_permission(conn, user_id, dataset_id, permission_name):
|
|
134
|
+
perm_id = _ensure_permission(conn, permission_name)
|
|
135
|
+
return _build_acl_row(
|
|
136
|
+
user_id=user_id, target_id=dataset_id, permission_id=perm_id, target_col="dataset_id"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _create_data_permission(conn, user_id, data_id, permission_name):
|
|
141
|
+
perm_id = _ensure_permission(conn, permission_name)
|
|
142
|
+
return _build_acl_row(
|
|
143
|
+
user_id=user_id, target_id=data_id, permission_id=perm_id, target_col="data_id"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _get_column(inspector, table, name, schema=None):
|
|
148
|
+
for col in inspector.get_columns(table, schema=schema):
|
|
149
|
+
if col["name"] == name:
|
|
150
|
+
return col
|
|
151
|
+
return None
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def upgrade() -> None:
|
|
155
|
+
conn = op.get_bind()
|
|
156
|
+
insp = sa.inspect(conn)
|
|
157
|
+
|
|
158
|
+
dataset_id_column = _get_column(insp, "acls", "dataset_id")
|
|
159
|
+
if not dataset_id_column:
|
|
160
|
+
# Recreate ACLs table with default permissions set to datasets instead of documents
|
|
161
|
+
op.drop_table("acls")
|
|
162
|
+
|
|
163
|
+
acls_table = op.create_table(
|
|
164
|
+
"acls",
|
|
165
|
+
sa.Column("id", UUID, primary_key=True, default=uuid4),
|
|
166
|
+
sa.Column(
|
|
167
|
+
"created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
|
168
|
+
),
|
|
169
|
+
sa.Column(
|
|
170
|
+
"updated_at",
|
|
171
|
+
sa.DateTime(timezone=True),
|
|
172
|
+
onupdate=lambda: datetime.now(timezone.utc),
|
|
173
|
+
),
|
|
174
|
+
sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")),
|
|
175
|
+
sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")),
|
|
176
|
+
sa.Column("dataset_id", UUID, sa.ForeignKey("datasets.id", ondelete="CASCADE")),
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
|
|
180
|
+
# definition or load what is in the database
|
|
181
|
+
dataset_table = _define_dataset_table()
|
|
182
|
+
datasets = conn.execute(sa.select(dataset_table)).fetchall()
|
|
183
|
+
|
|
184
|
+
if not datasets:
|
|
185
|
+
return
|
|
186
|
+
|
|
187
|
+
acl_list = []
|
|
188
|
+
|
|
189
|
+
for dataset in datasets:
|
|
190
|
+
acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "read"))
|
|
191
|
+
acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "write"))
|
|
192
|
+
acl_list.append(_create_dataset_permission(conn, dataset.owner_id, dataset.id, "share"))
|
|
193
|
+
acl_list.append(
|
|
194
|
+
_create_dataset_permission(conn, dataset.owner_id, dataset.id, "delete")
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
if acl_list:
|
|
198
|
+
op.bulk_insert(acls_table, acl_list)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def downgrade() -> None:
|
|
202
|
+
conn = op.get_bind()
|
|
203
|
+
|
|
204
|
+
op.drop_table("acls")
|
|
205
|
+
|
|
206
|
+
acls_table = op.create_table(
|
|
207
|
+
"acls",
|
|
208
|
+
sa.Column("id", UUID, primary_key=True, nullable=False, default=uuid4),
|
|
209
|
+
sa.Column(
|
|
210
|
+
"created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
|
211
|
+
),
|
|
212
|
+
sa.Column(
|
|
213
|
+
"updated_at", sa.DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc)
|
|
214
|
+
),
|
|
215
|
+
sa.Column("principal_id", UUID, sa.ForeignKey("principals.id")),
|
|
216
|
+
sa.Column("permission_id", UUID, sa.ForeignKey("permissions.id")),
|
|
217
|
+
sa.Column("data_id", UUID, sa.ForeignKey("data.id", ondelete="CASCADE")),
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
|
|
221
|
+
# definition or load what is in the database
|
|
222
|
+
data_table = _define_data_table()
|
|
223
|
+
data = conn.execute(sa.select(data_table)).fetchall()
|
|
224
|
+
|
|
225
|
+
if not data:
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
acl_list = []
|
|
229
|
+
for single_data in data:
|
|
230
|
+
acl_list.append(_create_data_permission(conn, single_data.owner_id, single_data.id, "read"))
|
|
231
|
+
acl_list.append(
|
|
232
|
+
_create_data_permission(conn, single_data.owner_id, single_data.id, "write")
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
if acl_list:
|
|
236
|
+
op.bulk_insert(acls_table, acl_list)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""kuzu-11-migration
|
|
2
|
+
|
|
3
|
+
Revision ID: b9274c27a25a
|
|
4
|
+
Revises: e4ebee1091e7
|
|
5
|
+
Create Date: 2025-07-24 17:11:52.174737
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from typing import Sequence, Union
|
|
11
|
+
|
|
12
|
+
from cognee.infrastructure.databases.graph.kuzu.kuzu_migrate import (
|
|
13
|
+
kuzu_migration,
|
|
14
|
+
read_kuzu_storage_version,
|
|
15
|
+
)
|
|
16
|
+
import kuzu
|
|
17
|
+
|
|
18
|
+
# revision identifiers, used by Alembic.
|
|
19
|
+
revision: str = "b9274c27a25a"
|
|
20
|
+
down_revision: Union[str, None] = "e4ebee1091e7"
|
|
21
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
22
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def upgrade() -> None:
|
|
26
|
+
# This migration is only for multi-user Cognee mode
|
|
27
|
+
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
|
|
28
|
+
from cognee.base_config import get_base_config
|
|
29
|
+
|
|
30
|
+
base_config = get_base_config()
|
|
31
|
+
|
|
32
|
+
databases_root = os.path.join(base_config.system_root_directory, "databases")
|
|
33
|
+
if not os.path.isdir(databases_root):
|
|
34
|
+
raise FileNotFoundError(f"Directory not found: {databases_root}")
|
|
35
|
+
|
|
36
|
+
for current_path, dirnames, _ in os.walk(databases_root):
|
|
37
|
+
# If file is kuzu graph database
|
|
38
|
+
if ".pkl" in current_path[-4:]:
|
|
39
|
+
kuzu_db_version = read_kuzu_storage_version(current_path)
|
|
40
|
+
if (
|
|
41
|
+
kuzu_db_version == "0.9.0" or kuzu_db_version == "0.8.2"
|
|
42
|
+
) and kuzu_db_version != kuzu.__version__:
|
|
43
|
+
# Try to migrate kuzu database to latest version
|
|
44
|
+
kuzu_migration(
|
|
45
|
+
new_db=current_path + "_new",
|
|
46
|
+
old_db=current_path,
|
|
47
|
+
new_version=kuzu.__version__,
|
|
48
|
+
old_version=kuzu_db_version,
|
|
49
|
+
overwrite=True,
|
|
50
|
+
)
|
|
51
|
+
else:
|
|
52
|
+
from cognee.infrastructure.databases.graph import get_graph_config
|
|
53
|
+
|
|
54
|
+
graph_config = get_graph_config()
|
|
55
|
+
if graph_config.graph_database_provider.lower() == "kuzu":
|
|
56
|
+
if os.path.exists(graph_config.graph_file_path):
|
|
57
|
+
kuzu_db_version = read_kuzu_storage_version(graph_config.graph_file_path)
|
|
58
|
+
if (
|
|
59
|
+
kuzu_db_version == "0.9.0" or kuzu_db_version == "0.8.2"
|
|
60
|
+
) and kuzu_db_version != kuzu.__version__:
|
|
61
|
+
# Try to migrate kuzu database to latest version
|
|
62
|
+
kuzu_migration(
|
|
63
|
+
new_db=graph_config.graph_file_path + "_new",
|
|
64
|
+
old_db=graph_config.graph_file_path,
|
|
65
|
+
new_version=kuzu.__version__,
|
|
66
|
+
old_version=kuzu_db_version,
|
|
67
|
+
overwrite=True,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def downgrade() -> None:
|
|
72
|
+
# To downgrade you will have to manually change the backup old kuzu graph databases
|
|
73
|
+
# stored in the user folder to its previous name and remove the new kuzu graph
|
|
74
|
+
# database that replaced it
|
|
75
|
+
pass
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Multi Tenant Support
|
|
2
|
+
|
|
3
|
+
Revision ID: c946955da633
|
|
4
|
+
Revises: 211ab850ef3d
|
|
5
|
+
Create Date: 2025-11-04 18:11:09.325158
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Sequence, Union
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from uuid import uuid4
|
|
12
|
+
|
|
13
|
+
from alembic import op
|
|
14
|
+
import sqlalchemy as sa
|
|
15
|
+
|
|
16
|
+
# revision identifiers, used by Alembic.
|
|
17
|
+
revision: str = "c946955da633"
|
|
18
|
+
down_revision: Union[str, None] = "211ab850ef3d"
|
|
19
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
20
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _now():
|
|
24
|
+
return datetime.now(timezone.utc)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _define_user_table() -> sa.Table:
|
|
28
|
+
table = sa.Table(
|
|
29
|
+
"users",
|
|
30
|
+
sa.MetaData(),
|
|
31
|
+
sa.Column(
|
|
32
|
+
"id",
|
|
33
|
+
sa.UUID,
|
|
34
|
+
sa.ForeignKey("principals.id", ondelete="CASCADE"),
|
|
35
|
+
primary_key=True,
|
|
36
|
+
nullable=False,
|
|
37
|
+
),
|
|
38
|
+
sa.Column("tenant_id", sa.UUID, sa.ForeignKey("tenants.id"), index=True, nullable=True),
|
|
39
|
+
)
|
|
40
|
+
return table
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _define_dataset_table() -> sa.Table:
|
|
44
|
+
# Note: We can't use any Cognee model info to gather data (as it can change) in database so we must use our own table
|
|
45
|
+
# definition or load what is in the database
|
|
46
|
+
table = sa.Table(
|
|
47
|
+
"datasets",
|
|
48
|
+
sa.MetaData(),
|
|
49
|
+
sa.Column("id", sa.UUID, primary_key=True, default=uuid4),
|
|
50
|
+
sa.Column("name", sa.Text),
|
|
51
|
+
sa.Column(
|
|
52
|
+
"created_at",
|
|
53
|
+
sa.DateTime(timezone=True),
|
|
54
|
+
default=lambda: datetime.now(timezone.utc),
|
|
55
|
+
),
|
|
56
|
+
sa.Column(
|
|
57
|
+
"updated_at",
|
|
58
|
+
sa.DateTime(timezone=True),
|
|
59
|
+
onupdate=lambda: datetime.now(timezone.utc),
|
|
60
|
+
),
|
|
61
|
+
sa.Column("owner_id", sa.UUID(), sa.ForeignKey("principals.id"), index=True),
|
|
62
|
+
sa.Column("tenant_id", sa.UUID(), sa.ForeignKey("tenants.id"), index=True, nullable=True),
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return table
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _get_column(inspector, table, name, schema=None):
|
|
69
|
+
for col in inspector.get_columns(table, schema=schema):
|
|
70
|
+
if col["name"] == name:
|
|
71
|
+
return col
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def upgrade() -> None:
|
|
76
|
+
conn = op.get_bind()
|
|
77
|
+
insp = sa.inspect(conn)
|
|
78
|
+
|
|
79
|
+
dataset = _define_dataset_table()
|
|
80
|
+
user = _define_user_table()
|
|
81
|
+
|
|
82
|
+
if "user_tenants" not in insp.get_table_names():
|
|
83
|
+
# Define table with all necessary columns including primary key
|
|
84
|
+
user_tenants = op.create_table(
|
|
85
|
+
"user_tenants",
|
|
86
|
+
sa.Column("user_id", sa.UUID, sa.ForeignKey("users.id"), primary_key=True),
|
|
87
|
+
sa.Column("tenant_id", sa.UUID, sa.ForeignKey("tenants.id"), primary_key=True),
|
|
88
|
+
sa.Column(
|
|
89
|
+
"created_at", sa.DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
|
|
90
|
+
),
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Get all users with their tenant_id
|
|
94
|
+
user_data = conn.execute(
|
|
95
|
+
sa.select(user.c.id, user.c.tenant_id).where(user.c.tenant_id.isnot(None))
|
|
96
|
+
).fetchall()
|
|
97
|
+
|
|
98
|
+
# Insert into user_tenants table
|
|
99
|
+
if user_data:
|
|
100
|
+
op.bulk_insert(
|
|
101
|
+
user_tenants,
|
|
102
|
+
[
|
|
103
|
+
{"user_id": user_id, "tenant_id": tenant_id, "created_at": _now()}
|
|
104
|
+
for user_id, tenant_id in user_data
|
|
105
|
+
],
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
tenant_id_column = _get_column(insp, "datasets", "tenant_id")
|
|
109
|
+
if not tenant_id_column:
|
|
110
|
+
op.add_column("datasets", sa.Column("tenant_id", sa.UUID(), nullable=True))
|
|
111
|
+
|
|
112
|
+
# Build subquery, select users.tenant_id for each dataset.owner_id
|
|
113
|
+
tenant_id_from_dataset_owner = (
|
|
114
|
+
sa.select(user.c.tenant_id).where(user.c.id == dataset.c.owner_id).scalar_subquery()
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
if op.get_context().dialect.name == "sqlite":
|
|
118
|
+
# If column doesn't exist create new original_extension column and update from values of extension column
|
|
119
|
+
with op.batch_alter_table("datasets") as batch_op:
|
|
120
|
+
batch_op.execute(
|
|
121
|
+
dataset.update().values(
|
|
122
|
+
tenant_id=tenant_id_from_dataset_owner,
|
|
123
|
+
)
|
|
124
|
+
)
|
|
125
|
+
else:
|
|
126
|
+
conn = op.get_bind()
|
|
127
|
+
conn.execute(dataset.update().values(tenant_id=tenant_id_from_dataset_owner))
|
|
128
|
+
|
|
129
|
+
op.create_index(op.f("ix_datasets_tenant_id"), "datasets", ["tenant_id"])
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def downgrade() -> None:
|
|
133
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
134
|
+
op.drop_table("user_tenants")
|
|
135
|
+
op.drop_index(op.f("ix_datasets_tenant_id"), table_name="datasets")
|
|
136
|
+
op.drop_column("datasets", "tenant_id")
|
|
137
|
+
# ### end Alembic commands ###
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""add_last_accessed_to_data
|
|
2
|
+
|
|
3
|
+
Revision ID: e1ec1dcb50b6
|
|
4
|
+
Revises: 211ab850ef3d
|
|
5
|
+
Create Date: 2025-11-04 21:45:52.642322
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from typing import Sequence, Union
|
|
11
|
+
|
|
12
|
+
from alembic import op
|
|
13
|
+
import sqlalchemy as sa
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# revision identifiers, used by Alembic.
|
|
17
|
+
revision: str = "e1ec1dcb50b6"
|
|
18
|
+
down_revision: Union[str, None] = "a1b2c3d4e5f6"
|
|
19
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
20
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _get_column(inspector, table, name, schema=None):
|
|
24
|
+
for col in inspector.get_columns(table, schema=schema):
|
|
25
|
+
if col["name"] == name:
|
|
26
|
+
return col
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def upgrade() -> None:
|
|
31
|
+
conn = op.get_bind()
|
|
32
|
+
insp = sa.inspect(conn)
|
|
33
|
+
|
|
34
|
+
last_accessed_column = _get_column(insp, "data", "last_accessed")
|
|
35
|
+
if not last_accessed_column:
|
|
36
|
+
# Always create the column for schema consistency
|
|
37
|
+
op.add_column("data", sa.Column("last_accessed", sa.DateTime(timezone=True), nullable=True))
|
|
38
|
+
|
|
39
|
+
# Only initialize existing records if feature is enabled
|
|
40
|
+
enable_last_accessed = os.getenv("ENABLE_LAST_ACCESSED", "false").lower() == "true"
|
|
41
|
+
if enable_last_accessed:
|
|
42
|
+
op.execute("UPDATE data SET last_accessed = CURRENT_TIMESTAMP")
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def downgrade() -> None:
|
|
46
|
+
conn = op.get_bind()
|
|
47
|
+
insp = sa.inspect(conn)
|
|
48
|
+
|
|
49
|
+
last_accessed_column = _get_column(insp, "data", "last_accessed")
|
|
50
|
+
if last_accessed_column:
|
|
51
|
+
op.drop_column("data", "last_accessed")
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
"""Expand data model info
|
|
2
|
+
|
|
3
|
+
Revision ID: e4ebee1091e7
|
|
4
|
+
Revises: ab7e313804ae
|
|
5
|
+
Create Date: 2025-07-24 13:21:30.738486
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Sequence, Union
|
|
10
|
+
|
|
11
|
+
from alembic import op
|
|
12
|
+
import sqlalchemy as sa
|
|
13
|
+
from sqlalchemy.dialects import postgresql
|
|
14
|
+
|
|
15
|
+
# revision identifiers, used by Alembic.
|
|
16
|
+
revision: str = "e4ebee1091e7"
|
|
17
|
+
down_revision: Union[str, None] = "ab7e313804ae"
|
|
18
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
19
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _get_column(inspector, table, name, schema=None):
|
|
23
|
+
for col in inspector.get_columns(table, schema=schema):
|
|
24
|
+
if col["name"] == name:
|
|
25
|
+
return col
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _index_exists(inspector, table, name, schema=None):
|
|
30
|
+
return any(ix["name"] == name for ix in inspector.get_indexes(table, schema=schema))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def upgrade() -> None:
|
|
34
|
+
TABLES_TO_DROP = [
|
|
35
|
+
"file_metadata",
|
|
36
|
+
"_dlt_loads",
|
|
37
|
+
"_dlt_version",
|
|
38
|
+
"_dlt_pipeline_state",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
conn = op.get_bind()
|
|
42
|
+
insp = sa.inspect(conn)
|
|
43
|
+
existing = set(insp.get_table_names())
|
|
44
|
+
|
|
45
|
+
for tbl in TABLES_TO_DROP:
|
|
46
|
+
if tbl in existing:
|
|
47
|
+
op.drop_table(tbl)
|
|
48
|
+
|
|
49
|
+
DATA_TABLE = "data"
|
|
50
|
+
DATA_TENANT_COL = "tenant_id"
|
|
51
|
+
DATA_SIZE_COL = "data_size"
|
|
52
|
+
DATA_TENANT_IDX = "ix_data_tenant_id"
|
|
53
|
+
|
|
54
|
+
# --- tenant_id ---
|
|
55
|
+
col = _get_column(insp, DATA_TABLE, DATA_TENANT_COL)
|
|
56
|
+
if col is None:
|
|
57
|
+
op.add_column(
|
|
58
|
+
DATA_TABLE,
|
|
59
|
+
sa.Column(DATA_TENANT_COL, postgresql.UUID(as_uuid=True), nullable=True),
|
|
60
|
+
)
|
|
61
|
+
else:
|
|
62
|
+
# Column exists – fix nullability if needed
|
|
63
|
+
if col.get("nullable", True) is False:
|
|
64
|
+
op.alter_column(
|
|
65
|
+
DATA_TABLE,
|
|
66
|
+
DATA_TENANT_COL,
|
|
67
|
+
existing_type=postgresql.UUID(as_uuid=True),
|
|
68
|
+
nullable=True,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# --- data_size ---
|
|
72
|
+
col = _get_column(insp, DATA_TABLE, DATA_SIZE_COL)
|
|
73
|
+
if col is None:
|
|
74
|
+
op.add_column(DATA_TABLE, sa.Column(DATA_SIZE_COL, sa.Integer(), nullable=True))
|
|
75
|
+
else:
|
|
76
|
+
# If you also need to change nullability for data_size, do it here
|
|
77
|
+
if col.get("nullable", True) is False:
|
|
78
|
+
op.alter_column(
|
|
79
|
+
DATA_TABLE,
|
|
80
|
+
DATA_SIZE_COL,
|
|
81
|
+
existing_type=sa.Integer(),
|
|
82
|
+
nullable=True,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# --- index on tenant_id ---
|
|
86
|
+
if not _index_exists(insp, DATA_TABLE, DATA_TENANT_IDX):
|
|
87
|
+
op.create_index(DATA_TENANT_IDX, DATA_TABLE, [DATA_TENANT_COL], unique=False)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def downgrade() -> None:
|
|
91
|
+
op.drop_index(op.f("ix_data_tenant_id"), table_name="data")
|
|
92
|
+
op.drop_column("data", "data_size")
|
|
93
|
+
op.drop_column("data", "tenant_id")
|
|
94
|
+
op.create_table(
|
|
95
|
+
"_dlt_pipeline_state",
|
|
96
|
+
sa.Column("version", sa.BIGINT(), autoincrement=False, nullable=False),
|
|
97
|
+
sa.Column("engine_version", sa.BIGINT(), autoincrement=False, nullable=False),
|
|
98
|
+
sa.Column("pipeline_name", sa.TEXT(), autoincrement=False, nullable=False),
|
|
99
|
+
sa.Column("state", sa.TEXT(), autoincrement=False, nullable=False),
|
|
100
|
+
sa.Column(
|
|
101
|
+
"created_at", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False
|
|
102
|
+
),
|
|
103
|
+
sa.Column("version_hash", sa.TEXT(), autoincrement=False, nullable=True),
|
|
104
|
+
sa.Column("_dlt_load_id", sa.TEXT(), autoincrement=False, nullable=False),
|
|
105
|
+
sa.Column("_dlt_id", sa.VARCHAR(length=128), autoincrement=False, nullable=False),
|
|
106
|
+
)
|
|
107
|
+
op.create_table(
|
|
108
|
+
"_dlt_version",
|
|
109
|
+
sa.Column("version", sa.BIGINT(), autoincrement=False, nullable=False),
|
|
110
|
+
sa.Column("engine_version", sa.BIGINT(), autoincrement=False, nullable=False),
|
|
111
|
+
sa.Column(
|
|
112
|
+
"inserted_at", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False
|
|
113
|
+
),
|
|
114
|
+
sa.Column("schema_name", sa.TEXT(), autoincrement=False, nullable=False),
|
|
115
|
+
sa.Column("version_hash", sa.TEXT(), autoincrement=False, nullable=False),
|
|
116
|
+
sa.Column("schema", sa.TEXT(), autoincrement=False, nullable=False),
|
|
117
|
+
)
|
|
118
|
+
op.create_table(
|
|
119
|
+
"_dlt_loads",
|
|
120
|
+
sa.Column("load_id", sa.TEXT(), autoincrement=False, nullable=False),
|
|
121
|
+
sa.Column("schema_name", sa.TEXT(), autoincrement=False, nullable=True),
|
|
122
|
+
sa.Column("status", sa.BIGINT(), autoincrement=False, nullable=False),
|
|
123
|
+
sa.Column(
|
|
124
|
+
"inserted_at", postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False
|
|
125
|
+
),
|
|
126
|
+
sa.Column("schema_version_hash", sa.TEXT(), autoincrement=False, nullable=True),
|
|
127
|
+
)
|
|
128
|
+
op.create_table(
|
|
129
|
+
"file_metadata",
|
|
130
|
+
sa.Column("id", sa.TEXT(), autoincrement=False, nullable=False),
|
|
131
|
+
sa.Column("name", sa.TEXT(), autoincrement=False, nullable=True),
|
|
132
|
+
sa.Column("file_path", sa.TEXT(), autoincrement=False, nullable=True),
|
|
133
|
+
sa.Column("extension", sa.TEXT(), autoincrement=False, nullable=True),
|
|
134
|
+
sa.Column("mime_type", sa.TEXT(), autoincrement=False, nullable=True),
|
|
135
|
+
sa.Column("content_hash", sa.TEXT(), autoincrement=False, nullable=True),
|
|
136
|
+
sa.Column("owner_id", sa.TEXT(), autoincrement=False, nullable=True),
|
|
137
|
+
sa.Column("_dlt_load_id", sa.TEXT(), autoincrement=False, nullable=False),
|
|
138
|
+
sa.Column("_dlt_id", sa.VARCHAR(length=128), autoincrement=False, nullable=False),
|
|
139
|
+
sa.Column("node_set", sa.TEXT(), autoincrement=False, nullable=True),
|
|
140
|
+
)
|