django-data-purger 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,174 @@
1
+ Metadata-Version: 2.1
2
+ Name: django-data-purger
3
+ Version: 0.2.3
4
+ Summary: Periodically remove data from your Django app.
5
+ License: MIT
6
+ Author: Eirik Martiniussen Sylliaas
7
+ Author-email: eirik@sylliaas.no
8
+ Requires-Python: >=3.11,<3.14
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Dist: pydantic (>=2.11.6,<3.0.0)
14
+ Description-Content-Type: text/markdown
15
+
16
+ # django-data-purger
17
+
18
+ > Periodically remove data from your Django app.
19
+
20
+ ## Getting Started
21
+
22
+ 1. Install django-data-purger
23
+
24
+ Use Poetry to add the package
25
+
26
+ ```bash
27
+ $ poetry add django-data-purger
28
+ ```
29
+
30
+ 2. Add `django_data_purger` to `INSTALLED_APPS`
31
+
32
+ Update your `INSTALLED_APP` setting:
33
+
34
+ ```python
35
+ INSTALLED_APPS = [
36
+ 'django...',
37
+ ...
38
+ 'django_data_purger',
39
+ ]
40
+ ```
41
+
42
+ 3. Create a data purger in the Django app you want to clean periodically
43
+
44
+ Example:
45
+
46
+ ```python
47
+ # data_purger.py
48
+ from django_data_purger.data_purger import DataPurger, PurgeResult
49
+ from app.models import DataModel
50
+ from datetime import datetime, timedelta
51
+
52
+ class PurgeDataModel(DataPurger):
53
+ expected_delete_models = ("app.DataModel",)
54
+
55
+ def run(self, *, now: datetime) -> list[PurgeResult]:
56
+ old_threshold = now - timedelta(weeks=6)
57
+
58
+ entries = DataModel.objects.filter(
59
+ created_time__lte=old_threshold,
60
+ )
61
+
62
+ return self._delete_queryset_in_batch(
63
+ entries, batch_size=DataPurger.BATCH_SIZE_LARGE
64
+ )
65
+ ```
66
+
67
+ 4. Register the data purger in the `DATA_PURGERS` setting
68
+
69
+ Add the purger to your settings:
70
+
71
+ ```python
72
+ DATA_PURGERS = [
73
+ "app.data_purger.PurgeDataModel",
74
+ ]
75
+ ```
76
+
77
+
78
+ 5. Run the management command to purge old data
79
+
80
+ Configure this command to run periodically using a scheduler like cron:
81
+
82
+ ```bash
83
+ $ python manage.py run_data_purgers --force
84
+ ```
85
+
86
+ ## Settings
87
+
88
+ | Setting name | Type | Default | Description |
89
+ | ----------------------- | ----------- | ------- | ---------------------------------------------------------- |
90
+ | `DATA_PURGERS` | `list[str]` | `[]` | Array with import strings to data purgers in your project. |
91
+
92
+ ## The DataPurger Class
93
+
94
+ The DataPurger class can be used to UPDATE or DELETE models. It runs within a transaction and ensures that updates or deletions are only applied to whitelisted models.
95
+
96
+ ### Update Model Instances
97
+
98
+ ```python
99
+ class PurgeDataModel(DataPurger):
100
+ expected_update_models = ("app.DataModel",)
101
+
102
+ def run(self, *, now: datetime) -> list[PurgeResult]:
103
+ old_threshold = now - timedelta(weeks=6)
104
+
105
+ entries = DataModel.objects.filter(
106
+ created_time__lte=old_threshold,
107
+ )
108
+
109
+ return self._update_queryset_in_batch(
110
+ entries, updates={"is_deleted": True}, batch_size=DataPurger.BATCH_SIZE_MEDIUM
111
+ )
112
+ ```
113
+
114
+ ### Delete Model Instances
115
+
116
+ ```python
117
+ class PurgeDataModel(DataPurger):
118
+ expected_delete_models = ("app.DataModel",)
119
+
120
+ def run(self, *, now: datetime) -> list[PurgeResult]:
121
+ old_threshold = now - timedelta(weeks=6)
122
+
123
+ entries = DataModel.objects.filter(
124
+ created_time__lte=old_threshold,
125
+ )
126
+
127
+ return self._delete_queryset_in_batch(
128
+ entries, batch_size=DataPurger.BATCH_SIZE_LARGE
129
+ )
130
+ ```
131
+
132
+ ## Planning for Model Instance Deletion
133
+
134
+ Models often depend on each other via `ForeignKey` or `ManyToManyField` relationships. It can be challenging to determine the correct order for deleting models without causing unexpected cascading deletions or errors from `on_delete=models.PROTECT`.
135
+
136
+ django-data-purger includes a tool to explore model dependencies. ✅ and 🛑 icons indicate whether a data purger for the model is already defined.
137
+
138
+ Example:
139
+
140
+ ```bash
141
+ $ poetry run python manage.py calculate_model_dependencies --model app.DataModel
142
+ The following models depend on app.DataModel:
143
+ - ...
144
+
145
+ The following models depend on ...:
146
+ - ...
147
+
148
+ ==============
149
+
150
+ 2 models depend on app.DataModel.
151
+
152
+ ==============
153
+
154
+ The models need to be deleted in the following order to safely delete app.DataModel:
155
+ (Models in the same batch can be deleted in any order.)
156
+
157
+ Batch 1:
158
+ - ✅ ...
159
+ - 🛑 ...
160
+
161
+ Batch 2:
162
+ - ✅ ...
163
+ ```
164
+
165
+ ## Listing Models with Enabled Data Purgers
166
+
167
+ To view all models with a configured data purger:
168
+
169
+ ```bash
170
+ $ python manage.py print_data_purging_enabled_tables --action delete
171
+
172
+ - app.DataModel
173
+ ```
174
+
@@ -0,0 +1,158 @@
1
+ # django-data-purger
2
+
3
+ > Periodically remove data from your Django app.
4
+
5
+ ## Getting Started
6
+
7
+ 1. Install django-data-purger
8
+
9
+ Use Poetry to add the package
10
+
11
+ ```bash
12
+ $ poetry add django-data-purger
13
+ ```
14
+
15
+ 2. Add `django_data_purger` to `INSTALLED_APPS`
16
+
17
+ Update your `INSTALLED_APP` setting:
18
+
19
+ ```python
20
+ INSTALLED_APPS = [
21
+ 'django...',
22
+ ...
23
+ 'django_data_purger',
24
+ ]
25
+ ```
26
+
27
+ 3. Create a data purger in the Django app you want to clean periodically
28
+
29
+ Example:
30
+
31
+ ```python
32
+ # data_purger.py
33
+ from django_data_purger.data_purger import DataPurger, PurgeResult
34
+ from app.models import DataModel
35
+ from datetime import datetime, timedelta
36
+
37
+ class PurgeDataModel(DataPurger):
38
+ expected_delete_models = ("app.DataModel",)
39
+
40
+ def run(self, *, now: datetime) -> list[PurgeResult]:
41
+ old_threshold = now - timedelta(weeks=6)
42
+
43
+ entries = DataModel.objects.filter(
44
+ created_time__lte=old_threshold,
45
+ )
46
+
47
+ return self._delete_queryset_in_batch(
48
+ entries, batch_size=DataPurger.BATCH_SIZE_LARGE
49
+ )
50
+ ```
51
+
52
+ 4. Register the data purger in the `DATA_PURGERS` setting
53
+
54
+ Add the purger to your settings:
55
+
56
+ ```python
57
+ DATA_PURGERS = [
58
+ "app.data_purger.PurgeDataModel",
59
+ ]
60
+ ```
61
+
62
+
63
+ 5. Run the management command to purge old data
64
+
65
+ Configure this command to run periodically using a scheduler like cron:
66
+
67
+ ```bash
68
+ $ python manage.py run_data_purgers --force
69
+ ```
70
+
71
+ ## Settings
72
+
73
+ | Setting name | Type | Default | Description |
74
+ | ----------------------- | ----------- | ------- | ---------------------------------------------------------- |
75
+ | `DATA_PURGERS` | `list[str]` | `[]` | Array with import strings to data purgers in your project. |
76
+
77
+ ## The DataPurger Class
78
+
79
+ The DataPurger class can be used to UPDATE or DELETE models. It runs within a transaction and ensures that updates or deletions are only applied to whitelisted models.
80
+
81
+ ### Update Model Instances
82
+
83
+ ```python
84
+ class PurgeDataModel(DataPurger):
85
+ expected_update_models = ("app.DataModel",)
86
+
87
+ def run(self, *, now: datetime) -> list[PurgeResult]:
88
+ old_threshold = now - timedelta(weeks=6)
89
+
90
+ entries = DataModel.objects.filter(
91
+ created_time__lte=old_threshold,
92
+ )
93
+
94
+ return self._update_queryset_in_batch(
95
+ entries, updates={"is_deleted": True}, batch_size=DataPurger.BATCH_SIZE_MEDIUM
96
+ )
97
+ ```
98
+
99
+ ### Delete Model Instances
100
+
101
+ ```python
102
+ class PurgeDataModel(DataPurger):
103
+ expected_delete_models = ("app.DataModel",)
104
+
105
+ def run(self, *, now: datetime) -> list[PurgeResult]:
106
+ old_threshold = now - timedelta(weeks=6)
107
+
108
+ entries = DataModel.objects.filter(
109
+ created_time__lte=old_threshold,
110
+ )
111
+
112
+ return self._delete_queryset_in_batch(
113
+ entries, batch_size=DataPurger.BATCH_SIZE_LARGE
114
+ )
115
+ ```
116
+
117
+ ## Planning for Model Instance Deletion
118
+
119
+ Models often depend on each other via `ForeignKey` or `ManyToManyField` relationships. It can be challenging to determine the correct order for deleting models without causing unexpected cascading deletions or errors from `on_delete=models.PROTECT`.
120
+
121
+ django-data-purger includes a tool to explore model dependencies. ✅ and 🛑 icons indicate whether a data purger for the model is already defined.
122
+
123
+ Example:
124
+
125
+ ```bash
126
+ $ poetry run python manage.py calculate_model_dependencies --model app.DataModel
127
+ The following models depend on app.DataModel:
128
+ - ...
129
+
130
+ The following models depend on ...:
131
+ - ...
132
+
133
+ ==============
134
+
135
+ 2 models depend on app.DataModel.
136
+
137
+ ==============
138
+
139
+ The models need to be deleted in the following order to safely delete app.DataModel:
140
+ (Models in the same batch can be deleted in any order.)
141
+
142
+ Batch 1:
143
+ - ✅ ...
144
+ - 🛑 ...
145
+
146
+ Batch 2:
147
+ - ✅ ...
148
+ ```
149
+
150
+ ## Listing Models with Enabled Data Purgers
151
+
152
+ To view all models with a configured data purger:
153
+
154
+ ```bash
155
+ $ python manage.py print_data_purging_enabled_tables --action delete
156
+
157
+ - app.DataModel
158
+ ```
@@ -0,0 +1,131 @@
1
+ [tool.poetry]
2
+ name = "django-data-purger"
3
+ version = "0.2.3"
4
+ description = "Periodically remove data from your Django app."
5
+ authors = ["Eirik Martiniussen Sylliaas <eirik@sylliaas.no>"]
6
+ license = "MIT"
7
+ readme = "README.md"
8
+ packages = [{include = "django_data_purger", from = "src"}]
9
+
10
+ [tool.poetry.dependencies]
11
+ python = ">= 3.11, < 3.14"
12
+ pydantic = "^2.11.6"
13
+
14
+ [tool.poetry.group.dev.dependencies]
15
+ ruff = "^0.11.13"
16
+ mypy = "^1.16.1"
17
+ pytest = "^8.4.0"
18
+ pytest-cov = "^6.2.1"
19
+ pytest-django = "^4.11.1"
20
+ pytest-socket = "^0.7.0"
21
+ django-stubs = "^5.2.0"
22
+ pytest-xdist = "^3.7.0"
23
+ django = "^5.2.3"
24
+ networkx = "^3.4.2"
25
+
26
+ [build-system]
27
+ requires = ["poetry-core"]
28
+ build-backend = "poetry.core.masonry.api"
29
+
30
+ [tool.ruff]
31
+ exclude = [
32
+ ".git",
33
+ ".venv",
34
+ "__pycache__",
35
+ "migrations",
36
+ ]
37
+
38
+ [tool.ruff.lint]
39
+ select = [
40
+ # Regular flake8 rules
41
+ "C", "E", "F", "W",
42
+ # flake8-bugbear rules
43
+ "B",
44
+ # Import sorting rules
45
+ "I",
46
+ # Django rules
47
+ "DJ",
48
+ # flake8-comprehensions
49
+ "C4",
50
+ # Pylint rules
51
+ "PLC", "PLE", "PLR", "PLW",
52
+ # Ruff
53
+ "RUF",
54
+ ]
55
+ ignore = [
56
+ # Disable magic value comparison. They're perfectly valid in tests and quite a few
57
+ # other places in the codebase. It would just be annoying to refactor.
58
+ "PLR2004",
59
+ # Too many return statements.
60
+ "PLR0911",
61
+ # Too many arguments to function call.
62
+ "PLR0913",
63
+ # DJ001 Avoid using null=True on string-based fields
64
+ "DJ001",
65
+ # DJ008 Model does not define __str__ method
66
+ "DJ008",
67
+ ]
68
+
69
+ [tool.ruff.lint.flake8-tidy-imports]
70
+ ban-relative-imports = "parents"
71
+
72
+ [tool.ruff.lint.isort]
73
+ combine-as-imports = true
74
+
75
+ [tool.mypy]
76
+ python_version = "3.12"
77
+ plugins = ["mypy_django_plugin.main", "pydantic.mypy"]
78
+ strict = true
79
+ ignore_missing_imports = true
80
+
81
+ [tool.django-stubs]
82
+ django_settings_module = "tests.settings"
83
+
84
+ [tool.pydantic-mypy]
85
+ init_forbid_extra = true
86
+ init_typed = true
87
+ warn_required_dynamic_aliases = true
88
+
89
+ [[tool.mypy.overrides]]
90
+ module = [
91
+ # Disable typing in migration files generated by Django
92
+ "django_data_purger.*.migrations.*",
93
+ ]
94
+ ignore_errors = true
95
+
96
+ [tool.pytest.ini_options]
97
+ pythonpath = [".", "src"]
98
+ testpaths = ["tests"]
99
+ # Reuse the database between tests
100
+ addopts = [
101
+ "--reuse-db",
102
+ "--allow-hosts=localhost,::1,127.0.0.1",
103
+ ]
104
+ # Include captured log messages in system-out in CI report file
105
+ junit_logging = "system-out"
106
+ markers = [ ]
107
+ # --- pytest-django settings
108
+ django_find_project = false
109
+ DJANGO_SETTINGS_MODULE = "tests.settings"
110
+
111
+ # Ignore select warnings from third party libraries.
112
+ filterwarnings = [
113
+ "error",
114
+ ]
115
+
116
+ [tool.coverage.run]
117
+ branch = true
118
+ source = [ "src/django_data_purger" ]
119
+ omit = [
120
+ "*/migrations/*",
121
+ ]
122
+
123
+ [tool.coverage.report]
124
+ fail_under = 30
125
+ exclude_lines = [
126
+ "pragma: no cover",
127
+ "if TYPE_CHECKING:",
128
+ ]
129
+
130
+ [tool.coverage.html]
131
+ directory = "coverage"
@@ -0,0 +1,30 @@
1
+ """Periodically remove data from your Django app."""
2
+
3
+ from importlib.metadata import PackageNotFoundError, version
4
+
5
+ from .data_purger import DataPurger, PurgeDeleteResult, PurgeResult, PurgeUpdateResult
6
+ from .enums import DataPurgerAction
7
+ from .exceptions import DataPurgerException
8
+ from .services import (
9
+ get_tables_with_data_purging_enabled,
10
+ run_data_purger,
11
+ run_data_purgers,
12
+ )
13
+
14
+ try:
15
+ __version__ = version(__name__)
16
+ except PackageNotFoundError: # pragma: no cover
17
+ __version__ = "unknown"
18
+
19
+
20
+ __all__ = [
21
+ "DataPurger",
22
+ "DataPurgerAction",
23
+ "DataPurgerException",
24
+ "PurgeDeleteResult",
25
+ "PurgeResult",
26
+ "PurgeUpdateResult",
27
+ "get_tables_with_data_purging_enabled",
28
+ "run_data_purger",
29
+ "run_data_purgers",
30
+ ]
@@ -0,0 +1,12 @@
1
+ from django.apps import AppConfig
2
+ from django.core import checks
3
+
4
+ from .checks import check_data_purgers
5
+
6
+
7
+ class DjangoDataPurgerConfig(AppConfig):
8
+ name = "django_data_purger"
9
+ verbose_name = "Django Data Purger"
10
+
11
+ def ready(self) -> None:
12
+ checks.register(check_data_purgers)
@@ -0,0 +1,44 @@
1
+ from typing import Any
2
+
3
+ from django.core.checks import Error, Warning
4
+
5
+ from .conf import settings
6
+ from .enums import DataPurgerAction
7
+ from .exceptions import DataPurgerImportException, DataPurgerInvalidConfiguration
8
+ from .services import import_data_purger
9
+
10
+
11
+ def check_data_purgers(app_configs: Any, **kwargs: Any) -> list[Warning | Error]:
12
+ errors: list[Warning | Error] = []
13
+
14
+ for data_purger_import in settings.DATA_PURGERS:
15
+ # Make sure the data purger can be imported.
16
+ try:
17
+ data_purger_cls = import_data_purger(data_purger_import)
18
+ except DataPurgerImportException:
19
+ errors.append(
20
+ Error(
21
+ f"django-data-purger is not able to import the data "
22
+ f"purger {data_purger_import}.",
23
+ hint=f"Make sure the {data_purger_import} entry in "
24
+ "settings.DATA_PURGERS can be imported.",
25
+ )
26
+ )
27
+ continue
28
+
29
+ # Make sure we are able to retrieve the expected affected models tuples.
30
+ for action in DataPurgerAction:
31
+ try:
32
+ data_purger_cls.expected_affected_models(action=action)
33
+ except DataPurgerInvalidConfiguration:
34
+ errors.append(
35
+ Error(
36
+ f"django-data-purger could not find the expected affected "
37
+ f"models when {action} operations is executed by "
38
+ f"{data_purger_import}.",
39
+ hint=f"Make sure the expected_{action.value}_models attr on "
40
+ f"the data purger {data_purger_import} is a list of strings.",
41
+ )
42
+ )
43
+
44
+ return errors
@@ -0,0 +1,14 @@
1
+ from django.conf import settings as django_settings
2
+
3
+
4
+ class Settings:
5
+ @property
6
+ def DATA_PURGERS(self) -> list[str]:
7
+ return getattr(
8
+ django_settings,
9
+ "DATA_PURGERS",
10
+ [],
11
+ )
12
+
13
+
14
+ settings = Settings()
@@ -0,0 +1,155 @@
1
+ from collections import defaultdict
2
+ from datetime import datetime
3
+ from typing import Any, ClassVar
4
+
5
+ from django.db.models import QuerySet
6
+ from pydantic import BaseModel, TypeAdapter, ValidationError
7
+
8
+ from .enums import DataPurgerAction
9
+ from .exceptions import DataPurgerInvalidConfiguration
10
+ from .utils import queryset_in_batches_non_slicing
11
+
12
+
13
+ class PurgeResult(BaseModel):
14
+ model: str
15
+ action: DataPurgerAction
16
+ affected_items: int
17
+
18
+
19
+ class PurgeUpdateResult(PurgeResult):
20
+ action: DataPurgerAction = DataPurgerAction.UPDATE
21
+
22
+
23
+ class PurgeDeleteResult(PurgeResult):
24
+ action: DataPurgerAction = DataPurgerAction.DELETE
25
+
26
+
27
+ class DataPurger:
28
+ BATCH_SIZE_LARGE = 500_000
29
+ BATCH_SIZE_MEDIUM = 10_000
30
+ BATCH_SIZE_SMALL = 500
31
+
32
+ DRY_RUN_OVERRIDE: ClassVar[bool] = False
33
+
34
+ expected_update_models: ClassVar[tuple[str, ...]] = ()
35
+ expected_delete_models: ClassVar[tuple[str, ...]] = ()
36
+
37
+ def run(self, *, now: datetime) -> list[PurgeResult]:
38
+ raise NotImplementedError("Subclasses must implement run")
39
+
40
+ def _update_queryset_in_batch(
41
+ self,
42
+ queryset: QuerySet[Any],
43
+ *,
44
+ batch_size: int = BATCH_SIZE_MEDIUM,
45
+ updates: dict[str, Any],
46
+ affected_rows_limit: int | None = None,
47
+ ) -> list[PurgeResult]:
48
+ """Update queryset in batches, return a list of PurgeResults."""
49
+ results: list[PurgeResult] = []
50
+
51
+ for batch in queryset_in_batches_non_slicing(queryset, chunk_size=batch_size):
52
+ results += self._update_queryset(batch, updates)
53
+
54
+ if affected_rows_limit and (
55
+ sum(result.affected_items for result in results) >= affected_rows_limit
56
+ ):
57
+ break
58
+
59
+ # Some models may have multiple purge results in the result list.
60
+ # Group them together by model name for better output.
61
+ queryset_result: dict[str, int] = defaultdict(int)
62
+
63
+ for result in results:
64
+ queryset_result[result.model] += result.affected_items
65
+
66
+ return [
67
+ PurgeUpdateResult(model=model, affected_items=affected_items)
68
+ for model, affected_items in queryset_result.items()
69
+ ]
70
+
71
+ def _delete_queryset_in_batch(
72
+ self,
73
+ queryset: QuerySet[Any],
74
+ *,
75
+ batch_size: int = BATCH_SIZE_MEDIUM,
76
+ affected_rows_limit: int | None = None,
77
+ ) -> list[PurgeResult]:
78
+ """Delete queryset in batches, return a list of PurgeResults."""
79
+ results: list[PurgeResult] = []
80
+
81
+ for batch in queryset_in_batches_non_slicing(queryset, chunk_size=batch_size):
82
+ results += self._delete_queryset(batch)
83
+
84
+ if affected_rows_limit and (
85
+ sum(result.affected_items for result in results) >= affected_rows_limit
86
+ ):
87
+ break
88
+
89
+ # Some models may have multiple purge results in the result list.
90
+ # Group them together by model name for better output.
91
+ queryset_result: dict[str, int] = defaultdict(int)
92
+
93
+ for result in results:
94
+ queryset_result[result.model] += result.affected_items
95
+
96
+ return [
97
+ PurgeDeleteResult(model=model, affected_items=affected_items)
98
+ for model, affected_items in queryset_result.items()
99
+ ]
100
+
101
+ def _update_queryset(
102
+ self, queryset: QuerySet[Any], updates: dict[str, Any]
103
+ ) -> list[PurgeResult]:
104
+ """Update items in querset and return a list of PurgeResults."""
105
+ affected_models = queryset.update(**updates)
106
+
107
+ result: list[PurgeResult] = []
108
+
109
+ result.append(
110
+ PurgeUpdateResult(
111
+ model=queryset.model._meta.label,
112
+ affected_items=affected_models,
113
+ )
114
+ )
115
+
116
+ return result
117
+
118
+ def _delete_queryset(self, queryset: QuerySet[Any]) -> list[PurgeResult]:
119
+ """Delete items in querset and return a list of PurgeResults."""
120
+ _, affected_models = queryset.delete()
121
+
122
+ result: list[PurgeResult] = []
123
+
124
+ for model, affected_items in affected_models.items():
125
+ result.append(PurgeDeleteResult(model=model, affected_items=affected_items))
126
+
127
+ return result
128
+
129
+ #
130
+ # Expected models
131
+ #
132
+
133
+ @classmethod
134
+ def expected_affected_models(cls, action: DataPurgerAction) -> set[str]:
135
+ """Return a set with the expected models affected by this data purger."""
136
+
137
+ attr = f"expected_{action.value}_models"
138
+
139
+ if not hasattr(cls, attr):
140
+ raise DataPurgerInvalidConfiguration(
141
+ f"Data purger {cls} does not have the {attr} configured."
142
+ )
143
+
144
+ expected_models = getattr(cls, attr)
145
+
146
+ try:
147
+ TypeAdapter(set[str] | list[str] | tuple[str]).validate_python(
148
+ expected_models
149
+ )
150
+ except ValidationError as exc:
151
+ raise DataPurgerInvalidConfiguration(
152
+ f"The {attr} attr on the data purger {cls} has to be a list of strings."
153
+ ) from exc
154
+
155
+ return set(getattr(cls, attr))
@@ -0,0 +1,8 @@
1
+ from django.db.models import TextChoices
2
+
3
+
4
+ class DataPurgerAction(TextChoices):
5
+ """Action to perform on the model by a data purger."""
6
+
7
+ UPDATE = "update"
8
+ DELETE = "delete"
@@ -0,0 +1,10 @@
1
+ class DataPurgerException(Exception):
2
+ """Base exception for all other exeptions raised by this library."""
3
+
4
+
5
+ class DataPurgerImportException(DataPurgerException):
6
+ """Raised when the framework is unable to import a data purger."""
7
+
8
+
9
+ class DataPurgerInvalidConfiguration(DataPurgerException):
10
+ """Raised when the data purger is configured incorrectly."""
@@ -0,0 +1,189 @@
1
+ from collections import defaultdict
2
+ from typing import Any, DefaultDict, cast
3
+
4
+ try:
5
+ import networkx as nx
6
+ except ImportError:
7
+ nx = None
8
+
9
+ from django.apps import apps
10
+ from django.core.management.base import BaseCommand, CommandParser
11
+ from django.db import models
12
+
13
+ from django_data_purger.enums import DataPurgerAction
14
+ from django_data_purger.services import get_tables_with_data_purging_enabled
15
+
16
+
17
+ class Collector:
18
+ def __init__(self, *, source_model: type[models.Model]) -> None:
19
+ if nx is None:
20
+ raise RuntimeError(
21
+ "Please install networkx before using the model dependency collector."
22
+ )
23
+
24
+ self.source_model = source_model
25
+
26
+ self.seen_models: set[type[models.Model]] = set()
27
+ self.dependencies: DefaultDict[type[models.Model], set[type[models.Model]]] = (
28
+ defaultdict(set)
29
+ )
30
+
31
+ def add_dependency(
32
+ self, *, model: type[models.Model], dependency: type[models.Model]
33
+ ) -> None:
34
+ self.dependencies[model].add(dependency)
35
+
36
+ def collect(self, *, model: type[models.Model]) -> None:
37
+ child_relations = (
38
+ rel
39
+ for rel in model._meta.get_fields(include_hidden=True)
40
+ if rel.auto_created
41
+ and not rel.concrete
42
+ and (rel.one_to_one or rel.one_to_many)
43
+ )
44
+
45
+ for rel in child_relations:
46
+ related_model = cast(type[models.Model], rel.related_model)
47
+
48
+ if model == related_model:
49
+ continue
50
+
51
+ if not related_model:
52
+ continue
53
+
54
+ self.add_dependency(model=model, dependency=related_model)
55
+
56
+ if related_model not in self.seen_models:
57
+ self.seen_models.add(related_model)
58
+ self.collect(model=related_model)
59
+
60
+ def calculate_affected_models(self) -> int:
61
+ affected_models: set[type[models.Model]] = set()
62
+
63
+ for dependencies in self.dependencies.values():
64
+ affected_models |= dependencies
65
+
66
+ return len(affected_models)
67
+
68
+ def calculate_dependency_ordering(self) -> list[list[type[models.Model]]]: # noqa
69
+ # Grab a copy of the dependencies, we remove items from it while
70
+ # calculate the depencency ordering.
71
+ dependencies = self.dependencies.copy()
72
+
73
+ delete_batches: list[list[type[models.Model]]] = []
74
+
75
+ while dependencies:
76
+ models_to_delete: set[type[models.Model]] = set()
77
+ current_batch: set[type[models.Model]] = set()
78
+
79
+ for parent, model_dependencies in dependencies.items():
80
+ all_models = {parent, *list(model_dependencies)}
81
+
82
+ for model in all_models:
83
+ # We can't delete the model if it exists as a key in
84
+ # the dependencies mapping.
85
+ if model in dependencies.keys():
86
+ continue
87
+
88
+ # It's safe to add the model to the current batch of models that
89
+ # can be deleted independently of each other.
90
+ current_batch.add(model)
91
+
92
+ # Remove the model from the dependency tree.
93
+ for deps in dependencies.values():
94
+ try:
95
+ deps.remove(model)
96
+ except KeyError:
97
+ pass
98
+
99
+ # Models without any dependencies left can be deleted from
100
+ # the dependency tree.
101
+ for model_to_delete, deps in dependencies.items():
102
+ if not deps:
103
+ models_to_delete.add(model_to_delete)
104
+
105
+ # Circular dependencies where only models form the circle exists
106
+ # as dependencies has to be removed.
107
+ edges = [[k, v] for k, items in dependencies.items() for v in items]
108
+ graph = nx.DiGraph(edges)
109
+ cycles = nx.simple_cycles(graph)
110
+
111
+ for cycle in cycles:
112
+ cycle_set = set(cycle)
113
+ for _model, _dependencies in dependencies.items():
114
+ if _model in cycle and _dependencies.issubset(cycle_set):
115
+ models_to_delete.add(_model)
116
+
117
+ # Add the current batch of models to the result.
118
+ delete_batches.append(list(current_batch))
119
+
120
+ # Remove the models without any dependences left from the dependency tree
121
+ # before calculating the next batch.
122
+ for model in models_to_delete:
123
+ del dependencies[model]
124
+
125
+ return delete_batches
126
+
127
+ def print_dependency_results(self) -> None:
128
+ def get_model_name(model: type[models.Model]) -> str:
129
+ return f"{model._meta.app_label}.{model._meta.object_name}"
130
+
131
+ for model, dependencies in self.dependencies.items():
132
+ print(f"The following models depend on {get_model_name(model)}:")
133
+ for dependency in dependencies:
134
+ print(f"- {get_model_name(dependency)}")
135
+ print()
136
+
137
+ print()
138
+ print("==============")
139
+ print()
140
+
141
+ print(
142
+ f"{self.calculate_affected_models()} models depend "
143
+ f"on {get_model_name(self.source_model)}."
144
+ )
145
+
146
+ print()
147
+ print("==============")
148
+ print()
149
+
150
+ print(
151
+ f"The models have to be deleted in the following order "
152
+ f"before you can delete {get_model_name(self.source_model)}:"
153
+ )
154
+ print("(Models from each batch can be deleted in an arbitrary order.)")
155
+ print()
156
+
157
+ batches = self.calculate_dependency_ordering()
158
+ tables_with_purging = get_tables_with_data_purging_enabled(
159
+ action=DataPurgerAction.DELETE
160
+ )
161
+
162
+ for i, batch in enumerate(batches):
163
+ print(f"Batch {i + 1}:")
164
+ for model in batch:
165
+ model_name = get_model_name(model)
166
+ print(
167
+ "- "
168
+ + ("✅" if model_name in tables_with_purging else "🛑")
169
+ + f" {model_name}"
170
+ )
171
+
172
+ print()
173
+
174
+
175
+ class Command(BaseCommand):
176
+ help = "List models depending on the input model"
177
+
178
+ def add_arguments(self, parser: CommandParser) -> None:
179
+ parser.add_argument("--model", required=True)
180
+
181
+ def handle(self, *args: Any, **options: Any) -> None:
182
+ model_full_name = options["model"]
183
+ app_label, model_name = model_full_name.split(".")
184
+
185
+ model = apps.get_model(app_label=app_label, model_name=model_name)
186
+
187
+ collector = Collector(source_model=model)
188
+ collector.collect(model=model)
189
+ collector.print_dependency_results()
@@ -0,0 +1,33 @@
1
+ from typing import Any
2
+
3
+ from django.core.management.base import BaseCommand, CommandParser
4
+
5
+ from django_data_purger.enums import DataPurgerAction
6
+ from django_data_purger.exceptions import DataPurgerException
7
+ from django_data_purger.services import get_tables_with_data_purging_enabled
8
+
9
+
10
+ class Command(BaseCommand):
11
+ help = "Print tables with data purging enabled."
12
+
13
+ def add_arguments(self, parser: CommandParser) -> None:
14
+ parser.add_argument("--action", required=True)
15
+
16
+ def handle(self, *args: Any, **options: Any) -> None:
17
+ action_value = options["action"]
18
+
19
+ try:
20
+ action = DataPurgerAction(action_value)
21
+ except ValueError as exc:
22
+ supported_actions = ", ".join(DataPurgerAction)
23
+ raise DataPurgerException(
24
+ f"Action {action_value} is not a valid action, use one "
25
+ f"of {supported_actions}."
26
+ ) from exc
27
+
28
+ tables = get_tables_with_data_purging_enabled(action=action)
29
+
30
+ print("Print tables with data purging enabled:")
31
+
32
+ for table in tables:
33
+ print(f"- {table}")
@@ -0,0 +1,18 @@
1
+ from typing import Any
2
+
3
+ from django.core.management import BaseCommand
4
+ from django.core.management.base import CommandParser
5
+
6
+ from django_data_purger.services.data_purger import run_data_purgers
7
+
8
+
9
+ class Command(BaseCommand):
10
+ help: str = "Removes stale database objects."
11
+
12
+ def add_arguments(self, parser: CommandParser) -> None:
13
+ parser.add_argument("--force", default=False, action="store_true")
14
+
15
+ def handle(self, *args: Any, **options: Any) -> None:
16
+ force = options["force"]
17
+
18
+ run_data_purgers(dry_run=not force)
@@ -0,0 +1,9 @@
1
+ from .data_purger import import_data_purger, run_data_purger, run_data_purgers
2
+ from .tables import get_tables_with_data_purging_enabled
3
+
4
+ __all__ = [
5
+ "get_tables_with_data_purging_enabled",
6
+ "import_data_purger",
7
+ "run_data_purger",
8
+ "run_data_purgers",
9
+ ]
@@ -0,0 +1,139 @@
1
+ import logging
2
+ import time
3
+ from datetime import datetime
4
+
5
+ from django.db import transaction
6
+ from django.utils import timezone
7
+ from django.utils.module_loading import import_string
8
+
9
+ from django_data_purger.conf import settings
10
+ from django_data_purger.data_purger import DataPurger, PurgeResult
11
+ from django_data_purger.enums import DataPurgerAction
12
+ from django_data_purger.exceptions import DataPurgerImportException
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class DryRunException(Exception):
18
+ """
19
+ Exception raised to rollback the transaction.
20
+
21
+ This exception is only used to controll the roll-back,
22
+ and it should not be exposed outside of this file.
23
+ """
24
+
25
+
26
+ def import_data_purger(data_purger_path: str) -> type[DataPurger]:
27
+ """Import data purger, raise exception if the import failed."""
28
+
29
+ try:
30
+ data_purger_cls: type[DataPurger] = import_string(data_purger_path)
31
+ assert issubclass(data_purger_cls, DataPurger)
32
+ except ImportError as exc:
33
+ raise DataPurgerImportException(
34
+ "Data purger could not be imported, check the import path."
35
+ ) from exc
36
+ except AssertionError as exc:
37
+ raise DataPurgerImportException(
38
+ "Imported object is not based on the DataPurger base class."
39
+ ) from exc
40
+
41
+ return data_purger_cls
42
+
43
+
44
+ def run_data_purgers(dry_run: bool = True) -> None:
45
+ """Run data purgers defined in settings.DATA_PURGERS."""
46
+ data_purgers_imports = settings.DATA_PURGERS
47
+
48
+ now = timezone.now()
49
+
50
+ results: list[PurgeResult] = []
51
+
52
+ for data_purger_import in data_purgers_imports:
53
+ try:
54
+ data_purger_cls = import_data_purger(data_purger_import)
55
+ except DataPurgerImportException:
56
+ logger.warning(
57
+ "Could not import data purger %s, skipping please fix your "
58
+ "purger imports.",
59
+ data_purger_import,
60
+ )
61
+ continue
62
+
63
+ data_purger = data_purger_cls()
64
+
65
+ results += run_data_purger(data_purger=data_purger, dry_run=dry_run, now=now)
66
+
67
+ updated_items = sum(
68
+ result.affected_items
69
+ for result in results
70
+ if result.action == DataPurgerAction.UPDATE
71
+ )
72
+ deleted_items = sum(
73
+ result.affected_items
74
+ for result in results
75
+ if result.action == DataPurgerAction.DELETE
76
+ )
77
+
78
+ logger.info(
79
+ f"Data purgers updated {updated_items:,} and deleted {deleted_items:,} items"
80
+ )
81
+
82
+
83
+ def run_data_purger(
84
+ *, data_purger: DataPurger, dry_run: bool, now: datetime | None = None
85
+ ) -> list[PurgeResult]:
86
+ """Run a single data purger and log the result."""
87
+ purger_name = data_purger.__class__.__name__
88
+
89
+ logger.info(f"Running data purger {purger_name}")
90
+
91
+ now = now or timezone.now()
92
+
93
+ try:
94
+ with transaction.atomic():
95
+ start_time = time.monotonic()
96
+
97
+ results = data_purger.run(now=now)
98
+
99
+ done_time = time.monotonic()
100
+
101
+ logger.info(
102
+ f"Data purger {purger_name} done (in {(done_time - start_time):.1f}s)"
103
+ )
104
+
105
+ for result in results:
106
+ expected_affected_models = data_purger.expected_affected_models(
107
+ action=result.action
108
+ )
109
+
110
+ if result.model not in expected_affected_models:
111
+ raise RuntimeError(
112
+ f"Unexpected {result.action} on model {result.model} by "
113
+ f"{purger_name}, rolling back transaction"
114
+ )
115
+
116
+ if dry_run:
117
+ raise DryRunException()
118
+
119
+ if data_purger.DRY_RUN_OVERRIDE:
120
+ logger.info(
121
+ "Data purger %s has the DRY_RUN_OVERRIDE flag set to True, "
122
+ "changes are going to be rolled back.",
123
+ purger_name,
124
+ )
125
+ raise DryRunException()
126
+
127
+ except DryRunException:
128
+ pass
129
+
130
+ if len(results) == 0:
131
+ logger.info(f"No changes made by {purger_name}")
132
+
133
+ for result in results:
134
+ logger.info(
135
+ f"Purge result from {purger_name}: "
136
+ f"{result.model} {result.action} {result.affected_items:,} items"
137
+ )
138
+
139
+ return results
@@ -0,0 +1,41 @@
1
+ from logging import getLogger
2
+
3
+ from django_data_purger.conf import settings
4
+ from django_data_purger.enums import DataPurgerAction
5
+ from django_data_purger.exceptions import DataPurgerImportException
6
+
7
+ from .data_purger import import_data_purger
8
+
9
+ logger = getLogger(__name__)
10
+
11
+
12
+ def get_tables_with_data_purging_enabled(*, action: DataPurgerAction) -> list[str]:
13
+ """Return a list of tables with data purging enabled."""
14
+ data_purgers_imports = settings.DATA_PURGERS
15
+
16
+ tables: set[str] = set()
17
+
18
+ for data_purger_import in data_purgers_imports:
19
+ try:
20
+ data_purger_cls = import_data_purger(data_purger_import)
21
+ except DataPurgerImportException:
22
+ logger.warning(
23
+ "Could not import data purger %s, skipping please fix your "
24
+ "purger imports.",
25
+ data_purger_import,
26
+ )
27
+ continue
28
+
29
+ # Some data purgers is configured to always run with DRY_RUN mode enabled.
30
+ # Changes executed by the purger is always going to be rolled back.
31
+ if data_purger_cls.DRY_RUN_OVERRIDE:
32
+ logger.info(
33
+ "Data purger %s has the DRY_RUN_OVERRIDE flag set to True, "
34
+ "skipping tables.",
35
+ data_purger_import,
36
+ )
37
+ continue
38
+
39
+ tables |= data_purger_cls.expected_affected_models(action=action)
40
+
41
+ return sorted(tables)
@@ -0,0 +1,37 @@
1
+ from typing import Generator, TypeVar
2
+
3
+ from django.db.models import Model, QuerySet
4
+
5
+ TModel = TypeVar("TModel", bound=Model)
6
+
7
+
8
+ def queryset_in_batches_non_slicing(
9
+ queryset: QuerySet[TModel], chunk_size: int = 1000
10
+ ) -> Generator[QuerySet[TModel], None, None]:
11
+ """
12
+ Iterate over a Django queryset that is ordered by primary key.
13
+
14
+ Does not slice the queryset and filters naively on upper and lower bounds
15
+ using pk and chunk size. This allows queryset operations to be performed
16
+ such as `.update()` and `.delete()`.
17
+ """
18
+
19
+ queryset = queryset.order_by("pk")
20
+
21
+ first_element = queryset.first()
22
+
23
+ # Empty queryset
24
+ if first_element is None:
25
+ return
26
+
27
+ pk = max(first_element.pk - 1, 0)
28
+
29
+ last_element = queryset.last()
30
+
31
+ assert last_element is not None
32
+
33
+ while pk < last_element.pk:
34
+ prev_pk = pk
35
+ pk = pk + chunk_size
36
+ queryset_to_yield = queryset.filter(pk__gt=prev_pk, pk__lte=pk)
37
+ yield queryset_to_yield