django-data-purger 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- django_data_purger-0.2.3/PKG-INFO +174 -0
- django_data_purger-0.2.3/README.md +158 -0
- django_data_purger-0.2.3/pyproject.toml +131 -0
- django_data_purger-0.2.3/src/django_data_purger/__init__.py +30 -0
- django_data_purger-0.2.3/src/django_data_purger/apps.py +12 -0
- django_data_purger-0.2.3/src/django_data_purger/checks.py +44 -0
- django_data_purger-0.2.3/src/django_data_purger/conf.py +14 -0
- django_data_purger-0.2.3/src/django_data_purger/data_purger.py +155 -0
- django_data_purger-0.2.3/src/django_data_purger/enums.py +8 -0
- django_data_purger-0.2.3/src/django_data_purger/exceptions.py +10 -0
- django_data_purger-0.2.3/src/django_data_purger/management/__init__.py +0 -0
- django_data_purger-0.2.3/src/django_data_purger/management/commands/__init__.py +0 -0
- django_data_purger-0.2.3/src/django_data_purger/management/commands/calculate_model_dependencies.py +189 -0
- django_data_purger-0.2.3/src/django_data_purger/management/commands/print_data_purging_enabled_tables.py +33 -0
- django_data_purger-0.2.3/src/django_data_purger/management/commands/run_data_purgers.py +18 -0
- django_data_purger-0.2.3/src/django_data_purger/py.typed +0 -0
- django_data_purger-0.2.3/src/django_data_purger/services/__init__.py +9 -0
- django_data_purger-0.2.3/src/django_data_purger/services/data_purger.py +139 -0
- django_data_purger-0.2.3/src/django_data_purger/services/tables.py +41 -0
- django_data_purger-0.2.3/src/django_data_purger/utils.py +37 -0
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: django-data-purger
|
|
3
|
+
Version: 0.2.3
|
|
4
|
+
Summary: Periodically remove data from your Django app.
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Eirik Martiniussen Sylliaas
|
|
7
|
+
Author-email: eirik@sylliaas.no
|
|
8
|
+
Requires-Python: >=3.11,<3.14
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Requires-Dist: pydantic (>=2.11.6,<3.0.0)
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# django-data-purger
|
|
17
|
+
|
|
18
|
+
> Periodically remove data from your Django app.
|
|
19
|
+
|
|
20
|
+
## Getting Started
|
|
21
|
+
|
|
22
|
+
1. Install django-data-purger
|
|
23
|
+
|
|
24
|
+
Use Poetry to add the package
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
$ poetry add django-data-purger
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
2. Add `django_data_purger` to `INSTALLED_APPS`
|
|
31
|
+
|
|
32
|
+
Update your `INSTALLED_APP` setting:
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
INSTALLED_APPS = [
|
|
36
|
+
'django...',
|
|
37
|
+
...
|
|
38
|
+
'django_data_purger',
|
|
39
|
+
]
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
3. Create a data purger in the Django app you want to clean periodically
|
|
43
|
+
|
|
44
|
+
Example:
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
# data_purger.py
|
|
48
|
+
from django_data_purger.data_purger import DataPurger, PurgeResult
|
|
49
|
+
from app.models import DataModel
|
|
50
|
+
from datetime import datetime, timedelta
|
|
51
|
+
|
|
52
|
+
class PurgeDataModel(DataPurger):
|
|
53
|
+
expected_delete_models = ("app.DataModel",)
|
|
54
|
+
|
|
55
|
+
def run(self, *, now: datetime) -> list[PurgeResult]:
|
|
56
|
+
old_threshold = now - timedelta(weeks=6)
|
|
57
|
+
|
|
58
|
+
entries = DataModel.objects.filter(
|
|
59
|
+
created_time__lte=old_threshold,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
return self._delete_queryset_in_batch(
|
|
63
|
+
entries, batch_size=DataPurger.BATCH_SIZE_LARGE
|
|
64
|
+
)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
4. Register the data purger in the `DATA_PURGERS` setting
|
|
68
|
+
|
|
69
|
+
Add the purger to your settings:
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
DATA_PURGERS = [
|
|
73
|
+
"app.data_purger.PurgeDataModel",
|
|
74
|
+
]
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
5. Run the management command to purge old data
|
|
79
|
+
|
|
80
|
+
Configure this command to run periodically using a scheduler like cron:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
$ python manage.py run_data_purgers --force
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Settings
|
|
87
|
+
|
|
88
|
+
| Setting name | Type | Default | Description |
|
|
89
|
+
| ----------------------- | ----------- | ------- | ---------------------------------------------------------- |
|
|
90
|
+
| `DATA_PURGERS` | `list[str]` | `[]` | Array with import strings to data purgers in your project. |
|
|
91
|
+
|
|
92
|
+
## The DataPurger Class
|
|
93
|
+
|
|
94
|
+
The DataPurger class can be used to UPDATE or DELETE models. It runs within a transaction and ensures that updates or deletions are only applied to whitelisted models.
|
|
95
|
+
|
|
96
|
+
### Update Model Instances
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
class PurgeDataModel(DataPurger):
|
|
100
|
+
expected_update_models = ("app.DataModel",)
|
|
101
|
+
|
|
102
|
+
def run(self, *, now: datetime) -> list[PurgeResult]:
|
|
103
|
+
old_threshold = now - timedelta(weeks=6)
|
|
104
|
+
|
|
105
|
+
entries = DataModel.objects.filter(
|
|
106
|
+
created_time__lte=old_threshold,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
return self._update_queryset_in_batch(
|
|
110
|
+
entries, updates={"is_deleted": True}, batch_size=DataPurger.BATCH_SIZE_MEDIUM
|
|
111
|
+
)
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Delete Model Instances
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
class PurgeDataModel(DataPurger):
|
|
118
|
+
expected_delete_models = ("app.DataModel",)
|
|
119
|
+
|
|
120
|
+
def run(self, *, now: datetime) -> list[PurgeResult]:
|
|
121
|
+
old_threshold = now - timedelta(weeks=6)
|
|
122
|
+
|
|
123
|
+
entries = DataModel.objects.filter(
|
|
124
|
+
created_time__lte=old_threshold,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
return self._delete_queryset_in_batch(
|
|
128
|
+
entries, batch_size=DataPurger.BATCH_SIZE_LARGE
|
|
129
|
+
)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Planning for Model Instance Deletion
|
|
133
|
+
|
|
134
|
+
Models often depend on each other via `ForeignKey` or `ManyToManyField` relationships. It can be challenging to determine the correct order for deleting models without causing unexpected cascading deletions or errors from `on_delete=models.PROTECT`.
|
|
135
|
+
|
|
136
|
+
django-data-purger includes a tool to explore model dependencies. ✅ and 🛑 icons indicate whether a data purger for the model is already defined.
|
|
137
|
+
|
|
138
|
+
Example:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
$ poetry run python manage.py calculate_model_dependencies --model app.DataModel
|
|
142
|
+
The following models depend on app.DataModel:
|
|
143
|
+
- ...
|
|
144
|
+
|
|
145
|
+
The following models depend on ...:
|
|
146
|
+
- ...
|
|
147
|
+
|
|
148
|
+
==============
|
|
149
|
+
|
|
150
|
+
2 models depend on app.DataModel.
|
|
151
|
+
|
|
152
|
+
==============
|
|
153
|
+
|
|
154
|
+
The models need to be deleted in the following order to safely delete app.DataModel:
|
|
155
|
+
(Models in the same batch can be deleted in any order.)
|
|
156
|
+
|
|
157
|
+
Batch 1:
|
|
158
|
+
- ✅ ...
|
|
159
|
+
- 🛑 ...
|
|
160
|
+
|
|
161
|
+
Batch 2:
|
|
162
|
+
- ✅ ...
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## Listing Models with Enabled Data Purgers
|
|
166
|
+
|
|
167
|
+
To view all models with a configured data purger:
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
$ python manage.py print_data_purging_enabled_tables --action delete
|
|
171
|
+
|
|
172
|
+
- app.DataModel
|
|
173
|
+
```
|
|
174
|
+
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
# django-data-purger
|
|
2
|
+
|
|
3
|
+
> Periodically remove data from your Django app.
|
|
4
|
+
|
|
5
|
+
## Getting Started
|
|
6
|
+
|
|
7
|
+
1. Install django-data-purger
|
|
8
|
+
|
|
9
|
+
Use Poetry to add the package
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
$ poetry add django-data-purger
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
2. Add `django_data_purger` to `INSTALLED_APPS`
|
|
16
|
+
|
|
17
|
+
Update your `INSTALLED_APP` setting:
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
INSTALLED_APPS = [
|
|
21
|
+
'django...',
|
|
22
|
+
...
|
|
23
|
+
'django_data_purger',
|
|
24
|
+
]
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
3. Create a data purger in the Django app you want to clean periodically
|
|
28
|
+
|
|
29
|
+
Example:
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
# data_purger.py
|
|
33
|
+
from django_data_purger.data_purger import DataPurger, PurgeResult
|
|
34
|
+
from app.models import DataModel
|
|
35
|
+
from datetime import datetime, timedelta
|
|
36
|
+
|
|
37
|
+
class PurgeDataModel(DataPurger):
|
|
38
|
+
expected_delete_models = ("app.DataModel",)
|
|
39
|
+
|
|
40
|
+
def run(self, *, now: datetime) -> list[PurgeResult]:
|
|
41
|
+
old_threshold = now - timedelta(weeks=6)
|
|
42
|
+
|
|
43
|
+
entries = DataModel.objects.filter(
|
|
44
|
+
created_time__lte=old_threshold,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
return self._delete_queryset_in_batch(
|
|
48
|
+
entries, batch_size=DataPurger.BATCH_SIZE_LARGE
|
|
49
|
+
)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
4. Register the data purger in the `DATA_PURGERS` setting
|
|
53
|
+
|
|
54
|
+
Add the purger to your settings:
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
DATA_PURGERS = [
|
|
58
|
+
"app.data_purger.PurgeDataModel",
|
|
59
|
+
]
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
5. Run the management command to purge old data
|
|
64
|
+
|
|
65
|
+
Configure this command to run periodically using a scheduler like cron:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
$ python manage.py run_data_purgers --force
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Settings
|
|
72
|
+
|
|
73
|
+
| Setting name | Type | Default | Description |
|
|
74
|
+
| ----------------------- | ----------- | ------- | ---------------------------------------------------------- |
|
|
75
|
+
| `DATA_PURGERS` | `list[str]` | `[]` | Array with import strings to data purgers in your project. |
|
|
76
|
+
|
|
77
|
+
## The DataPurger Class
|
|
78
|
+
|
|
79
|
+
The DataPurger class can be used to UPDATE or DELETE models. It runs within a transaction and ensures that updates or deletions are only applied to whitelisted models.
|
|
80
|
+
|
|
81
|
+
### Update Model Instances
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
class PurgeDataModel(DataPurger):
|
|
85
|
+
expected_update_models = ("app.DataModel",)
|
|
86
|
+
|
|
87
|
+
def run(self, *, now: datetime) -> list[PurgeResult]:
|
|
88
|
+
old_threshold = now - timedelta(weeks=6)
|
|
89
|
+
|
|
90
|
+
entries = DataModel.objects.filter(
|
|
91
|
+
created_time__lte=old_threshold,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
return self._update_queryset_in_batch(
|
|
95
|
+
entries, updates={"is_deleted": True}, batch_size=DataPurger.BATCH_SIZE_MEDIUM
|
|
96
|
+
)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Delete Model Instances
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
class PurgeDataModel(DataPurger):
|
|
103
|
+
expected_delete_models = ("app.DataModel",)
|
|
104
|
+
|
|
105
|
+
def run(self, *, now: datetime) -> list[PurgeResult]:
|
|
106
|
+
old_threshold = now - timedelta(weeks=6)
|
|
107
|
+
|
|
108
|
+
entries = DataModel.objects.filter(
|
|
109
|
+
created_time__lte=old_threshold,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
return self._delete_queryset_in_batch(
|
|
113
|
+
entries, batch_size=DataPurger.BATCH_SIZE_LARGE
|
|
114
|
+
)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Planning for Model Instance Deletion
|
|
118
|
+
|
|
119
|
+
Models often depend on each other via `ForeignKey` or `ManyToManyField` relationships. It can be challenging to determine the correct order for deleting models without causing unexpected cascading deletions or errors from `on_delete=models.PROTECT`.
|
|
120
|
+
|
|
121
|
+
django-data-purger includes a tool to explore model dependencies. ✅ and 🛑 icons indicate whether a data purger for the model is already defined.
|
|
122
|
+
|
|
123
|
+
Example:
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
$ poetry run python manage.py calculate_model_dependencies --model app.DataModel
|
|
127
|
+
The following models depend on app.DataModel:
|
|
128
|
+
- ...
|
|
129
|
+
|
|
130
|
+
The following models depend on ...:
|
|
131
|
+
- ...
|
|
132
|
+
|
|
133
|
+
==============
|
|
134
|
+
|
|
135
|
+
2 models depend on app.DataModel.
|
|
136
|
+
|
|
137
|
+
==============
|
|
138
|
+
|
|
139
|
+
The models need to be deleted in the following order to safely delete app.DataModel:
|
|
140
|
+
(Models in the same batch can be deleted in any order.)
|
|
141
|
+
|
|
142
|
+
Batch 1:
|
|
143
|
+
- ✅ ...
|
|
144
|
+
- 🛑 ...
|
|
145
|
+
|
|
146
|
+
Batch 2:
|
|
147
|
+
- ✅ ...
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Listing Models with Enabled Data Purgers
|
|
151
|
+
|
|
152
|
+
To view all models with a configured data purger:
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
$ python manage.py print_data_purging_enabled_tables --action delete
|
|
156
|
+
|
|
157
|
+
- app.DataModel
|
|
158
|
+
```
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "django-data-purger"
|
|
3
|
+
version = "0.2.3"
|
|
4
|
+
description = "Periodically remove data from your Django app."
|
|
5
|
+
authors = ["Eirik Martiniussen Sylliaas <eirik@sylliaas.no>"]
|
|
6
|
+
license = "MIT"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
packages = [{include = "django_data_purger", from = "src"}]
|
|
9
|
+
|
|
10
|
+
[tool.poetry.dependencies]
|
|
11
|
+
python = ">= 3.11, < 3.14"
|
|
12
|
+
pydantic = "^2.11.6"
|
|
13
|
+
|
|
14
|
+
[tool.poetry.group.dev.dependencies]
|
|
15
|
+
ruff = "^0.11.13"
|
|
16
|
+
mypy = "^1.16.1"
|
|
17
|
+
pytest = "^8.4.0"
|
|
18
|
+
pytest-cov = "^6.2.1"
|
|
19
|
+
pytest-django = "^4.11.1"
|
|
20
|
+
pytest-socket = "^0.7.0"
|
|
21
|
+
django-stubs = "^5.2.0"
|
|
22
|
+
pytest-xdist = "^3.7.0"
|
|
23
|
+
django = "^5.2.3"
|
|
24
|
+
networkx = "^3.4.2"
|
|
25
|
+
|
|
26
|
+
[build-system]
|
|
27
|
+
requires = ["poetry-core"]
|
|
28
|
+
build-backend = "poetry.core.masonry.api"
|
|
29
|
+
|
|
30
|
+
[tool.ruff]
|
|
31
|
+
exclude = [
|
|
32
|
+
".git",
|
|
33
|
+
".venv",
|
|
34
|
+
"__pycache__",
|
|
35
|
+
"migrations",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[tool.ruff.lint]
|
|
39
|
+
select = [
|
|
40
|
+
# Regular flake8 rules
|
|
41
|
+
"C", "E", "F", "W",
|
|
42
|
+
# flake8-bugbear rules
|
|
43
|
+
"B",
|
|
44
|
+
# Import sorting rules
|
|
45
|
+
"I",
|
|
46
|
+
# Django rules
|
|
47
|
+
"DJ",
|
|
48
|
+
# flake8-comprehensions
|
|
49
|
+
"C4",
|
|
50
|
+
# Pylint rules
|
|
51
|
+
"PLC", "PLE", "PLR", "PLW",
|
|
52
|
+
# Ruff
|
|
53
|
+
"RUF",
|
|
54
|
+
]
|
|
55
|
+
ignore = [
|
|
56
|
+
# Disable magic value comparison. They're perfectly valid in tests and quite a few
|
|
57
|
+
# other places in the codebase. It would just be annoying to refactor.
|
|
58
|
+
"PLR2004",
|
|
59
|
+
# Too many return statements.
|
|
60
|
+
"PLR0911",
|
|
61
|
+
# Too many arguments to function call.
|
|
62
|
+
"PLR0913",
|
|
63
|
+
# DJ001 Avoid using null=True on string-based fields
|
|
64
|
+
"DJ001",
|
|
65
|
+
# DJ008 Model does not define __str__ method
|
|
66
|
+
"DJ008",
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
[tool.ruff.lint.flake8-tidy-imports]
|
|
70
|
+
ban-relative-imports = "parents"
|
|
71
|
+
|
|
72
|
+
[tool.ruff.lint.isort]
|
|
73
|
+
combine-as-imports = true
|
|
74
|
+
|
|
75
|
+
[tool.mypy]
|
|
76
|
+
python_version = "3.12"
|
|
77
|
+
plugins = ["mypy_django_plugin.main", "pydantic.mypy"]
|
|
78
|
+
strict = true
|
|
79
|
+
ignore_missing_imports = true
|
|
80
|
+
|
|
81
|
+
[tool.django-stubs]
|
|
82
|
+
django_settings_module = "tests.settings"
|
|
83
|
+
|
|
84
|
+
[tool.pydantic-mypy]
|
|
85
|
+
init_forbid_extra = true
|
|
86
|
+
init_typed = true
|
|
87
|
+
warn_required_dynamic_aliases = true
|
|
88
|
+
|
|
89
|
+
[[tool.mypy.overrides]]
|
|
90
|
+
module = [
|
|
91
|
+
# Disable typing in migration files generated by Django
|
|
92
|
+
"django_data_purger.*.migrations.*",
|
|
93
|
+
]
|
|
94
|
+
ignore_errors = true
|
|
95
|
+
|
|
96
|
+
[tool.pytest.ini_options]
|
|
97
|
+
pythonpath = [".", "src"]
|
|
98
|
+
testpaths = ["tests"]
|
|
99
|
+
# Reuse the database between tests
|
|
100
|
+
addopts = [
|
|
101
|
+
"--reuse-db",
|
|
102
|
+
"--allow-hosts=localhost,::1,127.0.0.1",
|
|
103
|
+
]
|
|
104
|
+
# Include captured log messages in system-out in CI report file
|
|
105
|
+
junit_logging = "system-out"
|
|
106
|
+
markers = [ ]
|
|
107
|
+
# --- pytest-django settings
|
|
108
|
+
django_find_project = false
|
|
109
|
+
DJANGO_SETTINGS_MODULE = "tests.settings"
|
|
110
|
+
|
|
111
|
+
# Ignore select warnings from third party libraries.
|
|
112
|
+
filterwarnings = [
|
|
113
|
+
"error",
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
[tool.coverage.run]
|
|
117
|
+
branch = true
|
|
118
|
+
source = [ "src/django_data_purger" ]
|
|
119
|
+
omit = [
|
|
120
|
+
"*/migrations/*",
|
|
121
|
+
]
|
|
122
|
+
|
|
123
|
+
[tool.coverage.report]
|
|
124
|
+
fail_under = 30
|
|
125
|
+
exclude_lines = [
|
|
126
|
+
"pragma: no cover",
|
|
127
|
+
"if TYPE_CHECKING:",
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
[tool.coverage.html]
|
|
131
|
+
directory = "coverage"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Periodically remove data from your Django app."""
|
|
2
|
+
|
|
3
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
4
|
+
|
|
5
|
+
from .data_purger import DataPurger, PurgeDeleteResult, PurgeResult, PurgeUpdateResult
|
|
6
|
+
from .enums import DataPurgerAction
|
|
7
|
+
from .exceptions import DataPurgerException
|
|
8
|
+
from .services import (
|
|
9
|
+
get_tables_with_data_purging_enabled,
|
|
10
|
+
run_data_purger,
|
|
11
|
+
run_data_purgers,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
__version__ = version(__name__)
|
|
16
|
+
except PackageNotFoundError: # pragma: no cover
|
|
17
|
+
__version__ = "unknown"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"DataPurger",
|
|
22
|
+
"DataPurgerAction",
|
|
23
|
+
"DataPurgerException",
|
|
24
|
+
"PurgeDeleteResult",
|
|
25
|
+
"PurgeResult",
|
|
26
|
+
"PurgeUpdateResult",
|
|
27
|
+
"get_tables_with_data_purging_enabled",
|
|
28
|
+
"run_data_purger",
|
|
29
|
+
"run_data_purgers",
|
|
30
|
+
]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from django.apps import AppConfig
|
|
2
|
+
from django.core import checks
|
|
3
|
+
|
|
4
|
+
from .checks import check_data_purgers
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DjangoDataPurgerConfig(AppConfig):
|
|
8
|
+
name = "django_data_purger"
|
|
9
|
+
verbose_name = "Django Data Purger"
|
|
10
|
+
|
|
11
|
+
def ready(self) -> None:
|
|
12
|
+
checks.register(check_data_purgers)
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from django.core.checks import Error, Warning
|
|
4
|
+
|
|
5
|
+
from .conf import settings
|
|
6
|
+
from .enums import DataPurgerAction
|
|
7
|
+
from .exceptions import DataPurgerImportException, DataPurgerInvalidConfiguration
|
|
8
|
+
from .services import import_data_purger
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def check_data_purgers(app_configs: Any, **kwargs: Any) -> list[Warning | Error]:
|
|
12
|
+
errors: list[Warning | Error] = []
|
|
13
|
+
|
|
14
|
+
for data_purger_import in settings.DATA_PURGERS:
|
|
15
|
+
# Make sure the data purger can be imported.
|
|
16
|
+
try:
|
|
17
|
+
data_purger_cls = import_data_purger(data_purger_import)
|
|
18
|
+
except DataPurgerImportException:
|
|
19
|
+
errors.append(
|
|
20
|
+
Error(
|
|
21
|
+
f"django-data-purger is not able to import the data "
|
|
22
|
+
f"purger {data_purger_import}.",
|
|
23
|
+
hint=f"Make sure the {data_purger_import} entry in "
|
|
24
|
+
"settings.DATA_PURGERS can be imported.",
|
|
25
|
+
)
|
|
26
|
+
)
|
|
27
|
+
continue
|
|
28
|
+
|
|
29
|
+
# Make sure we are able to retrieve the expected affected models tuples.
|
|
30
|
+
for action in DataPurgerAction:
|
|
31
|
+
try:
|
|
32
|
+
data_purger_cls.expected_affected_models(action=action)
|
|
33
|
+
except DataPurgerInvalidConfiguration:
|
|
34
|
+
errors.append(
|
|
35
|
+
Error(
|
|
36
|
+
f"django-data-purger could not find the expected affected "
|
|
37
|
+
f"models when {action} operations is executed by "
|
|
38
|
+
f"{data_purger_import}.",
|
|
39
|
+
hint=f"Make sure the expected_{action.value}_models attr on "
|
|
40
|
+
f"the data purger {data_purger_import} is a list of strings.",
|
|
41
|
+
)
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
return errors
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Any, ClassVar
|
|
4
|
+
|
|
5
|
+
from django.db.models import QuerySet
|
|
6
|
+
from pydantic import BaseModel, TypeAdapter, ValidationError
|
|
7
|
+
|
|
8
|
+
from .enums import DataPurgerAction
|
|
9
|
+
from .exceptions import DataPurgerInvalidConfiguration
|
|
10
|
+
from .utils import queryset_in_batches_non_slicing
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PurgeResult(BaseModel):
|
|
14
|
+
model: str
|
|
15
|
+
action: DataPurgerAction
|
|
16
|
+
affected_items: int
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PurgeUpdateResult(PurgeResult):
|
|
20
|
+
action: DataPurgerAction = DataPurgerAction.UPDATE
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PurgeDeleteResult(PurgeResult):
|
|
24
|
+
action: DataPurgerAction = DataPurgerAction.DELETE
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DataPurger:
|
|
28
|
+
BATCH_SIZE_LARGE = 500_000
|
|
29
|
+
BATCH_SIZE_MEDIUM = 10_000
|
|
30
|
+
BATCH_SIZE_SMALL = 500
|
|
31
|
+
|
|
32
|
+
DRY_RUN_OVERRIDE: ClassVar[bool] = False
|
|
33
|
+
|
|
34
|
+
expected_update_models: ClassVar[tuple[str, ...]] = ()
|
|
35
|
+
expected_delete_models: ClassVar[tuple[str, ...]] = ()
|
|
36
|
+
|
|
37
|
+
def run(self, *, now: datetime) -> list[PurgeResult]:
|
|
38
|
+
raise NotImplementedError("Subclasses must implement run")
|
|
39
|
+
|
|
40
|
+
def _update_queryset_in_batch(
|
|
41
|
+
self,
|
|
42
|
+
queryset: QuerySet[Any],
|
|
43
|
+
*,
|
|
44
|
+
batch_size: int = BATCH_SIZE_MEDIUM,
|
|
45
|
+
updates: dict[str, Any],
|
|
46
|
+
affected_rows_limit: int | None = None,
|
|
47
|
+
) -> list[PurgeResult]:
|
|
48
|
+
"""Update queryset in batches, return a list of PurgeResults."""
|
|
49
|
+
results: list[PurgeResult] = []
|
|
50
|
+
|
|
51
|
+
for batch in queryset_in_batches_non_slicing(queryset, chunk_size=batch_size):
|
|
52
|
+
results += self._update_queryset(batch, updates)
|
|
53
|
+
|
|
54
|
+
if affected_rows_limit and (
|
|
55
|
+
sum(result.affected_items for result in results) >= affected_rows_limit
|
|
56
|
+
):
|
|
57
|
+
break
|
|
58
|
+
|
|
59
|
+
# Some models may have multiple purge results in the result list.
|
|
60
|
+
# Group them together by model name for better output.
|
|
61
|
+
queryset_result: dict[str, int] = defaultdict(int)
|
|
62
|
+
|
|
63
|
+
for result in results:
|
|
64
|
+
queryset_result[result.model] += result.affected_items
|
|
65
|
+
|
|
66
|
+
return [
|
|
67
|
+
PurgeUpdateResult(model=model, affected_items=affected_items)
|
|
68
|
+
for model, affected_items in queryset_result.items()
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
def _delete_queryset_in_batch(
|
|
72
|
+
self,
|
|
73
|
+
queryset: QuerySet[Any],
|
|
74
|
+
*,
|
|
75
|
+
batch_size: int = BATCH_SIZE_MEDIUM,
|
|
76
|
+
affected_rows_limit: int | None = None,
|
|
77
|
+
) -> list[PurgeResult]:
|
|
78
|
+
"""Delete queryset in batches, return a list of PurgeResults."""
|
|
79
|
+
results: list[PurgeResult] = []
|
|
80
|
+
|
|
81
|
+
for batch in queryset_in_batches_non_slicing(queryset, chunk_size=batch_size):
|
|
82
|
+
results += self._delete_queryset(batch)
|
|
83
|
+
|
|
84
|
+
if affected_rows_limit and (
|
|
85
|
+
sum(result.affected_items for result in results) >= affected_rows_limit
|
|
86
|
+
):
|
|
87
|
+
break
|
|
88
|
+
|
|
89
|
+
# Some models may have multiple purge results in the result list.
|
|
90
|
+
# Group them together by model name for better output.
|
|
91
|
+
queryset_result: dict[str, int] = defaultdict(int)
|
|
92
|
+
|
|
93
|
+
for result in results:
|
|
94
|
+
queryset_result[result.model] += result.affected_items
|
|
95
|
+
|
|
96
|
+
return [
|
|
97
|
+
PurgeDeleteResult(model=model, affected_items=affected_items)
|
|
98
|
+
for model, affected_items in queryset_result.items()
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
def _update_queryset(
|
|
102
|
+
self, queryset: QuerySet[Any], updates: dict[str, Any]
|
|
103
|
+
) -> list[PurgeResult]:
|
|
104
|
+
"""Update items in querset and return a list of PurgeResults."""
|
|
105
|
+
affected_models = queryset.update(**updates)
|
|
106
|
+
|
|
107
|
+
result: list[PurgeResult] = []
|
|
108
|
+
|
|
109
|
+
result.append(
|
|
110
|
+
PurgeUpdateResult(
|
|
111
|
+
model=queryset.model._meta.label,
|
|
112
|
+
affected_items=affected_models,
|
|
113
|
+
)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
return result
|
|
117
|
+
|
|
118
|
+
def _delete_queryset(self, queryset: QuerySet[Any]) -> list[PurgeResult]:
|
|
119
|
+
"""Delete items in querset and return a list of PurgeResults."""
|
|
120
|
+
_, affected_models = queryset.delete()
|
|
121
|
+
|
|
122
|
+
result: list[PurgeResult] = []
|
|
123
|
+
|
|
124
|
+
for model, affected_items in affected_models.items():
|
|
125
|
+
result.append(PurgeDeleteResult(model=model, affected_items=affected_items))
|
|
126
|
+
|
|
127
|
+
return result
|
|
128
|
+
|
|
129
|
+
#
|
|
130
|
+
# Expected models
|
|
131
|
+
#
|
|
132
|
+
|
|
133
|
+
@classmethod
|
|
134
|
+
def expected_affected_models(cls, action: DataPurgerAction) -> set[str]:
|
|
135
|
+
"""Return a set with the expected models affected by this data purger."""
|
|
136
|
+
|
|
137
|
+
attr = f"expected_{action.value}_models"
|
|
138
|
+
|
|
139
|
+
if not hasattr(cls, attr):
|
|
140
|
+
raise DataPurgerInvalidConfiguration(
|
|
141
|
+
f"Data purger {cls} does not have the {attr} configured."
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
expected_models = getattr(cls, attr)
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
TypeAdapter(set[str] | list[str] | tuple[str]).validate_python(
|
|
148
|
+
expected_models
|
|
149
|
+
)
|
|
150
|
+
except ValidationError as exc:
|
|
151
|
+
raise DataPurgerInvalidConfiguration(
|
|
152
|
+
f"The {attr} attr on the data purger {cls} has to be a list of strings."
|
|
153
|
+
) from exc
|
|
154
|
+
|
|
155
|
+
return set(getattr(cls, attr))
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
class DataPurgerException(Exception):
|
|
2
|
+
"""Base exception for all other exeptions raised by this library."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class DataPurgerImportException(DataPurgerException):
|
|
6
|
+
"""Raised when the framework is unable to import a data purger."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DataPurgerInvalidConfiguration(DataPurgerException):
|
|
10
|
+
"""Raised when the data purger is configured incorrectly."""
|
|
File without changes
|
|
File without changes
|
django_data_purger-0.2.3/src/django_data_purger/management/commands/calculate_model_dependencies.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
from collections import defaultdict
|
|
2
|
+
from typing import Any, DefaultDict, cast
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
import networkx as nx
|
|
6
|
+
except ImportError:
|
|
7
|
+
nx = None
|
|
8
|
+
|
|
9
|
+
from django.apps import apps
|
|
10
|
+
from django.core.management.base import BaseCommand, CommandParser
|
|
11
|
+
from django.db import models
|
|
12
|
+
|
|
13
|
+
from django_data_purger.enums import DataPurgerAction
|
|
14
|
+
from django_data_purger.services import get_tables_with_data_purging_enabled
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Collector:
|
|
18
|
+
def __init__(self, *, source_model: type[models.Model]) -> None:
|
|
19
|
+
if nx is None:
|
|
20
|
+
raise RuntimeError(
|
|
21
|
+
"Please install networkx before using the model dependency collector."
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
self.source_model = source_model
|
|
25
|
+
|
|
26
|
+
self.seen_models: set[type[models.Model]] = set()
|
|
27
|
+
self.dependencies: DefaultDict[type[models.Model], set[type[models.Model]]] = (
|
|
28
|
+
defaultdict(set)
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
def add_dependency(
|
|
32
|
+
self, *, model: type[models.Model], dependency: type[models.Model]
|
|
33
|
+
) -> None:
|
|
34
|
+
self.dependencies[model].add(dependency)
|
|
35
|
+
|
|
36
|
+
def collect(self, *, model: type[models.Model]) -> None:
|
|
37
|
+
child_relations = (
|
|
38
|
+
rel
|
|
39
|
+
for rel in model._meta.get_fields(include_hidden=True)
|
|
40
|
+
if rel.auto_created
|
|
41
|
+
and not rel.concrete
|
|
42
|
+
and (rel.one_to_one or rel.one_to_many)
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
for rel in child_relations:
|
|
46
|
+
related_model = cast(type[models.Model], rel.related_model)
|
|
47
|
+
|
|
48
|
+
if model == related_model:
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
if not related_model:
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
self.add_dependency(model=model, dependency=related_model)
|
|
55
|
+
|
|
56
|
+
if related_model not in self.seen_models:
|
|
57
|
+
self.seen_models.add(related_model)
|
|
58
|
+
self.collect(model=related_model)
|
|
59
|
+
|
|
60
|
+
def calculate_affected_models(self) -> int:
|
|
61
|
+
affected_models: set[type[models.Model]] = set()
|
|
62
|
+
|
|
63
|
+
for dependencies in self.dependencies.values():
|
|
64
|
+
affected_models |= dependencies
|
|
65
|
+
|
|
66
|
+
return len(affected_models)
|
|
67
|
+
|
|
68
|
+
def calculate_dependency_ordering(self) -> list[list[type[models.Model]]]: # noqa
|
|
69
|
+
# Grab a copy of the dependencies, we remove items from it while
|
|
70
|
+
# calculate the depencency ordering.
|
|
71
|
+
dependencies = self.dependencies.copy()
|
|
72
|
+
|
|
73
|
+
delete_batches: list[list[type[models.Model]]] = []
|
|
74
|
+
|
|
75
|
+
while dependencies:
|
|
76
|
+
models_to_delete: set[type[models.Model]] = set()
|
|
77
|
+
current_batch: set[type[models.Model]] = set()
|
|
78
|
+
|
|
79
|
+
for parent, model_dependencies in dependencies.items():
|
|
80
|
+
all_models = {parent, *list(model_dependencies)}
|
|
81
|
+
|
|
82
|
+
for model in all_models:
|
|
83
|
+
# We can't delete the model if it exists as a key in
|
|
84
|
+
# the dependencies mapping.
|
|
85
|
+
if model in dependencies.keys():
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
# It's safe to add the model to the current batch of models that
|
|
89
|
+
# can be deleted independently of each other.
|
|
90
|
+
current_batch.add(model)
|
|
91
|
+
|
|
92
|
+
# Remove the model from the dependency tree.
|
|
93
|
+
for deps in dependencies.values():
|
|
94
|
+
try:
|
|
95
|
+
deps.remove(model)
|
|
96
|
+
except KeyError:
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
# Models without any dependencies left can be deleted from
|
|
100
|
+
# the dependency tree.
|
|
101
|
+
for model_to_delete, deps in dependencies.items():
|
|
102
|
+
if not deps:
|
|
103
|
+
models_to_delete.add(model_to_delete)
|
|
104
|
+
|
|
105
|
+
# Circular dependencies where only models form the circle exists
|
|
106
|
+
# as dependencies has to be removed.
|
|
107
|
+
edges = [[k, v] for k, items in dependencies.items() for v in items]
|
|
108
|
+
graph = nx.DiGraph(edges)
|
|
109
|
+
cycles = nx.simple_cycles(graph)
|
|
110
|
+
|
|
111
|
+
for cycle in cycles:
|
|
112
|
+
cycle_set = set(cycle)
|
|
113
|
+
for _model, _dependencies in dependencies.items():
|
|
114
|
+
if _model in cycle and _dependencies.issubset(cycle_set):
|
|
115
|
+
models_to_delete.add(_model)
|
|
116
|
+
|
|
117
|
+
# Add the current batch of models to the result.
|
|
118
|
+
delete_batches.append(list(current_batch))
|
|
119
|
+
|
|
120
|
+
# Remove the models without any dependences left from the dependency tree
|
|
121
|
+
# before calculating the next batch.
|
|
122
|
+
for model in models_to_delete:
|
|
123
|
+
del dependencies[model]
|
|
124
|
+
|
|
125
|
+
return delete_batches
|
|
126
|
+
|
|
127
|
+
def print_dependency_results(self) -> None:
|
|
128
|
+
def get_model_name(model: type[models.Model]) -> str:
|
|
129
|
+
return f"{model._meta.app_label}.{model._meta.object_name}"
|
|
130
|
+
|
|
131
|
+
for model, dependencies in self.dependencies.items():
|
|
132
|
+
print(f"The following models depend on {get_model_name(model)}:")
|
|
133
|
+
for dependency in dependencies:
|
|
134
|
+
print(f"- {get_model_name(dependency)}")
|
|
135
|
+
print()
|
|
136
|
+
|
|
137
|
+
print()
|
|
138
|
+
print("==============")
|
|
139
|
+
print()
|
|
140
|
+
|
|
141
|
+
print(
|
|
142
|
+
f"{self.calculate_affected_models()} models depend "
|
|
143
|
+
f"on {get_model_name(self.source_model)}."
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
print()
|
|
147
|
+
print("==============")
|
|
148
|
+
print()
|
|
149
|
+
|
|
150
|
+
print(
|
|
151
|
+
f"The models have to be deleted in the following order "
|
|
152
|
+
f"before you can delete {get_model_name(self.source_model)}:"
|
|
153
|
+
)
|
|
154
|
+
print("(Models from each batch can be deleted in an arbitrary order.)")
|
|
155
|
+
print()
|
|
156
|
+
|
|
157
|
+
batches = self.calculate_dependency_ordering()
|
|
158
|
+
tables_with_purging = get_tables_with_data_purging_enabled(
|
|
159
|
+
action=DataPurgerAction.DELETE
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
for i, batch in enumerate(batches):
|
|
163
|
+
print(f"Batch {i + 1}:")
|
|
164
|
+
for model in batch:
|
|
165
|
+
model_name = get_model_name(model)
|
|
166
|
+
print(
|
|
167
|
+
"- "
|
|
168
|
+
+ ("✅" if model_name in tables_with_purging else "🛑")
|
|
169
|
+
+ f" {model_name}"
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
print()
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class Command(BaseCommand):
|
|
176
|
+
help = "List models depending on the input model"
|
|
177
|
+
|
|
178
|
+
def add_arguments(self, parser: CommandParser) -> None:
|
|
179
|
+
parser.add_argument("--model", required=True)
|
|
180
|
+
|
|
181
|
+
def handle(self, *args: Any, **options: Any) -> None:
|
|
182
|
+
model_full_name = options["model"]
|
|
183
|
+
app_label, model_name = model_full_name.split(".")
|
|
184
|
+
|
|
185
|
+
model = apps.get_model(app_label=app_label, model_name=model_name)
|
|
186
|
+
|
|
187
|
+
collector = Collector(source_model=model)
|
|
188
|
+
collector.collect(model=model)
|
|
189
|
+
collector.print_dependency_results()
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from django.core.management.base import BaseCommand, CommandParser
|
|
4
|
+
|
|
5
|
+
from django_data_purger.enums import DataPurgerAction
|
|
6
|
+
from django_data_purger.exceptions import DataPurgerException
|
|
7
|
+
from django_data_purger.services import get_tables_with_data_purging_enabled
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Command(BaseCommand):
|
|
11
|
+
help = "Print tables with data purging enabled."
|
|
12
|
+
|
|
13
|
+
def add_arguments(self, parser: CommandParser) -> None:
|
|
14
|
+
parser.add_argument("--action", required=True)
|
|
15
|
+
|
|
16
|
+
def handle(self, *args: Any, **options: Any) -> None:
|
|
17
|
+
action_value = options["action"]
|
|
18
|
+
|
|
19
|
+
try:
|
|
20
|
+
action = DataPurgerAction(action_value)
|
|
21
|
+
except ValueError as exc:
|
|
22
|
+
supported_actions = ", ".join(DataPurgerAction)
|
|
23
|
+
raise DataPurgerException(
|
|
24
|
+
f"Action {action_value} is not a valid action, use one "
|
|
25
|
+
f"of {supported_actions}."
|
|
26
|
+
) from exc
|
|
27
|
+
|
|
28
|
+
tables = get_tables_with_data_purging_enabled(action=action)
|
|
29
|
+
|
|
30
|
+
print("Print tables with data purging enabled:")
|
|
31
|
+
|
|
32
|
+
for table in tables:
|
|
33
|
+
print(f"- {table}")
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from django.core.management import BaseCommand
|
|
4
|
+
from django.core.management.base import CommandParser
|
|
5
|
+
|
|
6
|
+
from django_data_purger.services.data_purger import run_data_purgers
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Command(BaseCommand):
|
|
10
|
+
help: str = "Removes stale database objects."
|
|
11
|
+
|
|
12
|
+
def add_arguments(self, parser: CommandParser) -> None:
|
|
13
|
+
parser.add_argument("--force", default=False, action="store_true")
|
|
14
|
+
|
|
15
|
+
def handle(self, *args: Any, **options: Any) -> None:
|
|
16
|
+
force = options["force"]
|
|
17
|
+
|
|
18
|
+
run_data_purgers(dry_run=not force)
|
|
File without changes
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
from .data_purger import import_data_purger, run_data_purger, run_data_purgers
|
|
2
|
+
from .tables import get_tables_with_data_purging_enabled
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"get_tables_with_data_purging_enabled",
|
|
6
|
+
"import_data_purger",
|
|
7
|
+
"run_data_purger",
|
|
8
|
+
"run_data_purgers",
|
|
9
|
+
]
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
from django.db import transaction
|
|
6
|
+
from django.utils import timezone
|
|
7
|
+
from django.utils.module_loading import import_string
|
|
8
|
+
|
|
9
|
+
from django_data_purger.conf import settings
|
|
10
|
+
from django_data_purger.data_purger import DataPurger, PurgeResult
|
|
11
|
+
from django_data_purger.enums import DataPurgerAction
|
|
12
|
+
from django_data_purger.exceptions import DataPurgerImportException
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DryRunException(Exception):
|
|
18
|
+
"""
|
|
19
|
+
Exception raised to rollback the transaction.
|
|
20
|
+
|
|
21
|
+
This exception is only used to controll the roll-back,
|
|
22
|
+
and it should not be exposed outside of this file.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def import_data_purger(data_purger_path: str) -> type[DataPurger]:
|
|
27
|
+
"""Import data purger, raise exception if the import failed."""
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
data_purger_cls: type[DataPurger] = import_string(data_purger_path)
|
|
31
|
+
assert issubclass(data_purger_cls, DataPurger)
|
|
32
|
+
except ImportError as exc:
|
|
33
|
+
raise DataPurgerImportException(
|
|
34
|
+
"Data purger could not be imported, check the import path."
|
|
35
|
+
) from exc
|
|
36
|
+
except AssertionError as exc:
|
|
37
|
+
raise DataPurgerImportException(
|
|
38
|
+
"Imported object is not based on the DataPurger base class."
|
|
39
|
+
) from exc
|
|
40
|
+
|
|
41
|
+
return data_purger_cls
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def run_data_purgers(dry_run: bool = True) -> None:
|
|
45
|
+
"""Run data purgers defined in settings.DATA_PURGERS."""
|
|
46
|
+
data_purgers_imports = settings.DATA_PURGERS
|
|
47
|
+
|
|
48
|
+
now = timezone.now()
|
|
49
|
+
|
|
50
|
+
results: list[PurgeResult] = []
|
|
51
|
+
|
|
52
|
+
for data_purger_import in data_purgers_imports:
|
|
53
|
+
try:
|
|
54
|
+
data_purger_cls = import_data_purger(data_purger_import)
|
|
55
|
+
except DataPurgerImportException:
|
|
56
|
+
logger.warning(
|
|
57
|
+
"Could not import data purger %s, skipping please fix your "
|
|
58
|
+
"purger imports.",
|
|
59
|
+
data_purger_import,
|
|
60
|
+
)
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
data_purger = data_purger_cls()
|
|
64
|
+
|
|
65
|
+
results += run_data_purger(data_purger=data_purger, dry_run=dry_run, now=now)
|
|
66
|
+
|
|
67
|
+
updated_items = sum(
|
|
68
|
+
result.affected_items
|
|
69
|
+
for result in results
|
|
70
|
+
if result.action == DataPurgerAction.UPDATE
|
|
71
|
+
)
|
|
72
|
+
deleted_items = sum(
|
|
73
|
+
result.affected_items
|
|
74
|
+
for result in results
|
|
75
|
+
if result.action == DataPurgerAction.DELETE
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
logger.info(
|
|
79
|
+
f"Data purgers updated {updated_items:,} and deleted {deleted_items:,} items"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def run_data_purger(
|
|
84
|
+
*, data_purger: DataPurger, dry_run: bool, now: datetime | None = None
|
|
85
|
+
) -> list[PurgeResult]:
|
|
86
|
+
"""Run a single data purger and log the result."""
|
|
87
|
+
purger_name = data_purger.__class__.__name__
|
|
88
|
+
|
|
89
|
+
logger.info(f"Running data purger {purger_name}")
|
|
90
|
+
|
|
91
|
+
now = now or timezone.now()
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
with transaction.atomic():
|
|
95
|
+
start_time = time.monotonic()
|
|
96
|
+
|
|
97
|
+
results = data_purger.run(now=now)
|
|
98
|
+
|
|
99
|
+
done_time = time.monotonic()
|
|
100
|
+
|
|
101
|
+
logger.info(
|
|
102
|
+
f"Data purger {purger_name} done (in {(done_time - start_time):.1f}s)"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
for result in results:
|
|
106
|
+
expected_affected_models = data_purger.expected_affected_models(
|
|
107
|
+
action=result.action
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
if result.model not in expected_affected_models:
|
|
111
|
+
raise RuntimeError(
|
|
112
|
+
f"Unexpected {result.action} on model {result.model} by "
|
|
113
|
+
f"{purger_name}, rolling back transaction"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
if dry_run:
|
|
117
|
+
raise DryRunException()
|
|
118
|
+
|
|
119
|
+
if data_purger.DRY_RUN_OVERRIDE:
|
|
120
|
+
logger.info(
|
|
121
|
+
"Data purger %s has the DRY_RUN_OVERRIDE flag set to True, "
|
|
122
|
+
"changes are going to be rolled back.",
|
|
123
|
+
purger_name,
|
|
124
|
+
)
|
|
125
|
+
raise DryRunException()
|
|
126
|
+
|
|
127
|
+
except DryRunException:
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
if len(results) == 0:
|
|
131
|
+
logger.info(f"No changes made by {purger_name}")
|
|
132
|
+
|
|
133
|
+
for result in results:
|
|
134
|
+
logger.info(
|
|
135
|
+
f"Purge result from {purger_name}: "
|
|
136
|
+
f"{result.model} {result.action} {result.affected_items:,} items"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
return results
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from logging import getLogger
|
|
2
|
+
|
|
3
|
+
from django_data_purger.conf import settings
|
|
4
|
+
from django_data_purger.enums import DataPurgerAction
|
|
5
|
+
from django_data_purger.exceptions import DataPurgerImportException
|
|
6
|
+
|
|
7
|
+
from .data_purger import import_data_purger
|
|
8
|
+
|
|
9
|
+
logger = getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def get_tables_with_data_purging_enabled(*, action: DataPurgerAction) -> list[str]:
|
|
13
|
+
"""Return a list of tables with data purging enabled."""
|
|
14
|
+
data_purgers_imports = settings.DATA_PURGERS
|
|
15
|
+
|
|
16
|
+
tables: set[str] = set()
|
|
17
|
+
|
|
18
|
+
for data_purger_import in data_purgers_imports:
|
|
19
|
+
try:
|
|
20
|
+
data_purger_cls = import_data_purger(data_purger_import)
|
|
21
|
+
except DataPurgerImportException:
|
|
22
|
+
logger.warning(
|
|
23
|
+
"Could not import data purger %s, skipping please fix your "
|
|
24
|
+
"purger imports.",
|
|
25
|
+
data_purger_import,
|
|
26
|
+
)
|
|
27
|
+
continue
|
|
28
|
+
|
|
29
|
+
# Some data purgers is configured to always run with DRY_RUN mode enabled.
|
|
30
|
+
# Changes executed by the purger is always going to be rolled back.
|
|
31
|
+
if data_purger_cls.DRY_RUN_OVERRIDE:
|
|
32
|
+
logger.info(
|
|
33
|
+
"Data purger %s has the DRY_RUN_OVERRIDE flag set to True, "
|
|
34
|
+
"skipping tables.",
|
|
35
|
+
data_purger_import,
|
|
36
|
+
)
|
|
37
|
+
continue
|
|
38
|
+
|
|
39
|
+
tables |= data_purger_cls.expected_affected_models(action=action)
|
|
40
|
+
|
|
41
|
+
return sorted(tables)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from typing import Generator, TypeVar
|
|
2
|
+
|
|
3
|
+
from django.db.models import Model, QuerySet
|
|
4
|
+
|
|
5
|
+
TModel = TypeVar("TModel", bound=Model)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def queryset_in_batches_non_slicing(
|
|
9
|
+
queryset: QuerySet[TModel], chunk_size: int = 1000
|
|
10
|
+
) -> Generator[QuerySet[TModel], None, None]:
|
|
11
|
+
"""
|
|
12
|
+
Iterate over a Django queryset that is ordered by primary key.
|
|
13
|
+
|
|
14
|
+
Does not slice the queryset and filters naively on upper and lower bounds
|
|
15
|
+
using pk and chunk size. This allows queryset operations to be performed
|
|
16
|
+
such as `.update()` and `.delete()`.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
queryset = queryset.order_by("pk")
|
|
20
|
+
|
|
21
|
+
first_element = queryset.first()
|
|
22
|
+
|
|
23
|
+
# Empty queryset
|
|
24
|
+
if first_element is None:
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
pk = max(first_element.pk - 1, 0)
|
|
28
|
+
|
|
29
|
+
last_element = queryset.last()
|
|
30
|
+
|
|
31
|
+
assert last_element is not None
|
|
32
|
+
|
|
33
|
+
while pk < last_element.pk:
|
|
34
|
+
prev_pk = pk
|
|
35
|
+
pk = pk + chunk_size
|
|
36
|
+
queryset_to_yield = queryset.filter(pk__gt=prev_pk, pk__lte=pk)
|
|
37
|
+
yield queryset_to_yield
|