edu-rdm-integration 3.21.0__py3-none-any.whl → 3.23.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edu_rdm_integration/core/consts.py +3 -0
- edu_rdm_integration/pipelines/cleanup_outdated_data/management/commands/rdm_cleanup_outdated_data.py +32 -1
- edu_rdm_integration/pipelines/transfer/tasks.py +5 -0
- edu_rdm_integration/rdm_entities/models.py +5 -0
- edu_rdm_integration/rdm_models/models.py +29 -11
- edu_rdm_integration/stages/collect_data/functions/base/runners.py +7 -0
- edu_rdm_integration/stages/collect_data/migrations/0005_alter_rdmcollectingdatasubstage_previous.py +22 -0
- edu_rdm_integration/stages/collect_data/migrations/0006_fix_fk_constraints.py +59 -0
- edu_rdm_integration/stages/collect_data/models.py +5 -0
- edu_rdm_integration/stages/collect_data/registry/actions.py +4 -4
- edu_rdm_integration/stages/collect_data/registry/templates/ui-js/collect-command-window.js +8 -19
- edu_rdm_integration/stages/collect_data/registry/templates/ui-js/validators.js +0 -15
- edu_rdm_integration/stages/export_data/functions/base/runners.py +11 -0
- edu_rdm_integration/stages/export_data/migrations/0003_alter_rdmexportingdatasubstageattachment_exporting_data_sub_stage.py +22 -0
- edu_rdm_integration/stages/export_data/migrations/0004_fix_fk_constraints.py +76 -0
- edu_rdm_integration/stages/export_data/models.py +12 -1
- edu_rdm_integration/stages/export_data/registry/actions.py +5 -4
- edu_rdm_integration/stages/export_data/registry/templates/ui-js/create-export-command-win.js +15 -31
- edu_rdm_integration/stages/service/service_outdated_data/cleaners/__init__.py +0 -0
- edu_rdm_integration/stages/service/service_outdated_data/cleaners/base.py +305 -0
- edu_rdm_integration/stages/service/service_outdated_data/cleaners/collect_data.py +119 -0
- edu_rdm_integration/stages/service/service_outdated_data/cleaners/consts.py +1 -0
- edu_rdm_integration/stages/service/service_outdated_data/cleaners/export_data.py +174 -0
- edu_rdm_integration/stages/service/service_outdated_data/cleaners/upload_data.py +64 -0
- edu_rdm_integration/stages/service/service_outdated_data/managers.py +80 -0
- edu_rdm_integration/stages/upload_data/enums.py +2 -0
- edu_rdm_integration/stages/upload_data/export_managers.py +3 -1
- edu_rdm_integration/stages/upload_data/management/commands/custom_check_upload_status.py +59 -0
- edu_rdm_integration/stages/upload_data/management/commands/custom_upload_files.py +45 -0
- edu_rdm_integration/stages/upload_data/migrations/0003_auto_20251006_1417.py +28 -0
- edu_rdm_integration/stages/upload_data/migrations/0004_fix_fk_constraints.py +79 -0
- edu_rdm_integration/stages/upload_data/models.py +5 -2
- edu_rdm_integration/stages/upload_data/queues.py +50 -2
- edu_rdm_integration/stages/upload_data/tasks.py +2 -2
- edu_rdm_integration/stages/utils.py +61 -0
- edu_rdm_integration/templates/ui-js/collect-and-export-validators.js +54 -0
- {edu_rdm_integration-3.21.0.dist-info → edu_rdm_integration-3.23.0.dist-info}/METADATA +86 -59
- {edu_rdm_integration-3.21.0.dist-info → edu_rdm_integration-3.23.0.dist-info}/RECORD +41 -24
- {edu_rdm_integration-3.21.0.dist-info → edu_rdm_integration-3.23.0.dist-info}/WHEEL +0 -0
- {edu_rdm_integration-3.21.0.dist-info → edu_rdm_integration-3.23.0.dist-info}/licenses/LICENSE +0 -0
- {edu_rdm_integration-3.21.0.dist-info → edu_rdm_integration-3.23.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from abc import (
|
|
3
|
+
ABCMeta,
|
|
4
|
+
abstractmethod,
|
|
5
|
+
)
|
|
6
|
+
from pathlib import (
|
|
7
|
+
Path,
|
|
8
|
+
)
|
|
9
|
+
from typing import (
|
|
10
|
+
TYPE_CHECKING,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
import asyncpg
|
|
14
|
+
from django.conf import (
|
|
15
|
+
settings,
|
|
16
|
+
)
|
|
17
|
+
from django.db import (
|
|
18
|
+
connection,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
from educommon import (
|
|
22
|
+
logger,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from asyncpg import (
|
|
28
|
+
Pool,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class BaseServiceOutdatedDataCleaner(metaclass=ABCMeta):
|
|
33
|
+
"""Базовый класс уборщика устаревших сервисных данных."""
|
|
34
|
+
|
|
35
|
+
model = None
|
|
36
|
+
|
|
37
|
+
SELECT_RDM_CHUNK_BOUNDED_SQL = """
|
|
38
|
+
DO $$
|
|
39
|
+
DECLARE
|
|
40
|
+
chunk_size INT := {chunk_size};
|
|
41
|
+
last_id INT := 0;
|
|
42
|
+
first_id INT;
|
|
43
|
+
last_chunk_id INT;
|
|
44
|
+
BEGIN
|
|
45
|
+
DROP TABLE IF EXISTS rdm_chunk_bounds;
|
|
46
|
+
CREATE TEMP TABLE rdm_chunk_bounds (
|
|
47
|
+
chunk_number INT,
|
|
48
|
+
first_id INT,
|
|
49
|
+
last_id INT
|
|
50
|
+
);
|
|
51
|
+
|
|
52
|
+
DROP TABLE IF EXISTS tmp_chunk;
|
|
53
|
+
CREATE TEMP TABLE tmp_chunk (id INT) ON COMMIT DROP;
|
|
54
|
+
|
|
55
|
+
WHILE TRUE LOOP
|
|
56
|
+
TRUNCATE tmp_chunk;
|
|
57
|
+
|
|
58
|
+
INSERT INTO tmp_chunk (id)
|
|
59
|
+
SELECT id
|
|
60
|
+
FROM {table_name}
|
|
61
|
+
WHERE id > last_id
|
|
62
|
+
ORDER BY id
|
|
63
|
+
LIMIT chunk_size;
|
|
64
|
+
|
|
65
|
+
IF NOT FOUND THEN
|
|
66
|
+
EXIT;
|
|
67
|
+
END IF;
|
|
68
|
+
|
|
69
|
+
SELECT MIN(id), MAX(id)
|
|
70
|
+
INTO first_id, last_chunk_id
|
|
71
|
+
FROM tmp_chunk;
|
|
72
|
+
|
|
73
|
+
INSERT INTO rdm_chunk_bounds (chunk_number, first_id, last_id)
|
|
74
|
+
VALUES (
|
|
75
|
+
(SELECT COUNT(*) FROM rdm_chunk_bounds) + 1,
|
|
76
|
+
first_id,
|
|
77
|
+
last_chunk_id
|
|
78
|
+
);
|
|
79
|
+
|
|
80
|
+
last_id := last_chunk_id;
|
|
81
|
+
END LOOP;
|
|
82
|
+
END $$;
|
|
83
|
+
|
|
84
|
+
SELECT * FROM rdm_chunk_bounds ORDER BY chunk_number;
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
REMOVE_OUTDATED_DATA_SQL = """
|
|
88
|
+
WITH deleted_rows AS (
|
|
89
|
+
DELETE FROM {table_name}
|
|
90
|
+
WHERE id IN (
|
|
91
|
+
WITH tbl AS (
|
|
92
|
+
SELECT *
|
|
93
|
+
FROM {table_name}
|
|
94
|
+
WHERE id >= {first_id}
|
|
95
|
+
AND id <= {last_id}
|
|
96
|
+
)
|
|
97
|
+
SELECT tbl.id
|
|
98
|
+
FROM tbl
|
|
99
|
+
WHERE {conditions}
|
|
100
|
+
)
|
|
101
|
+
RETURNING id
|
|
102
|
+
)
|
|
103
|
+
SELECT COUNT(*) AS deleted_count FROM deleted_rows;
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
def __init__(
|
|
107
|
+
self,
|
|
108
|
+
*args,
|
|
109
|
+
safe: bool = False,
|
|
110
|
+
log_sql: bool = False,
|
|
111
|
+
**kwargs
|
|
112
|
+
):
|
|
113
|
+
"""Инициализация уборщика."""
|
|
114
|
+
self._safe = safe
|
|
115
|
+
self._log_sql = log_sql
|
|
116
|
+
self._deleted_count = 0
|
|
117
|
+
|
|
118
|
+
super().__init__(*args, **kwargs)
|
|
119
|
+
|
|
120
|
+
@abstractmethod
|
|
121
|
+
def get_merged_conditions(self) -> str:
|
|
122
|
+
"""Возвращает условия для удаления устаревших данных."""
|
|
123
|
+
|
|
124
|
+
@classmethod
|
|
125
|
+
def get_table_name(cls) -> str:
|
|
126
|
+
"""Возвращает имя таблицы в базе данных."""
|
|
127
|
+
if cls.model is None:
|
|
128
|
+
raise NotImplementedError('Необходимо задать атрибут "model"')
|
|
129
|
+
|
|
130
|
+
return cls.model._meta.db_table
|
|
131
|
+
|
|
132
|
+
async def file_deletion_process(self, file_paths: list[str]):
|
|
133
|
+
"""Функция для удаления файлов, связанных с удалёнными устаревшими записями.
|
|
134
|
+
|
|
135
|
+
Очистка данных производится в таблицах системных моделей РВД.
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
def get_orphan_reference_condition(
|
|
139
|
+
self,
|
|
140
|
+
reference_table: str,
|
|
141
|
+
reference_field: str,
|
|
142
|
+
local_field: str = 'id'
|
|
143
|
+
) -> str:
|
|
144
|
+
"""Условие проверки отсутствия записей в связанной таблице."""
|
|
145
|
+
return f"""
|
|
146
|
+
NOT EXISTS (
|
|
147
|
+
SELECT 1
|
|
148
|
+
FROM {reference_table} ref
|
|
149
|
+
WHERE ref.{reference_field} = tbl.{local_field}
|
|
150
|
+
)
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
def get_status_condition(
|
|
154
|
+
self,
|
|
155
|
+
related_table: str,
|
|
156
|
+
related_field: str,
|
|
157
|
+
status_value: str,
|
|
158
|
+
days: int,
|
|
159
|
+
local_field: str = 'id'
|
|
160
|
+
) -> str:
|
|
161
|
+
"""Условие проверки записи с заданным статусом и возрастом."""
|
|
162
|
+
return f"""
|
|
163
|
+
EXISTS (
|
|
164
|
+
SELECT 1
|
|
165
|
+
FROM {related_table} sub
|
|
166
|
+
WHERE sub.{related_field} = tbl.{local_field}
|
|
167
|
+
AND sub.status_id = '{status_value}'
|
|
168
|
+
AND sub.ended_at <= NOW() - INTERVAL '{days} days'
|
|
169
|
+
)
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
def get_chunk_bounded(self):
|
|
173
|
+
"""Возвращает границы чанков для текущей таблицы."""
|
|
174
|
+
get_chunk_bounded_sql = self.SELECT_RDM_CHUNK_BOUNDED_SQL.format(
|
|
175
|
+
table_name=self.get_table_name(),
|
|
176
|
+
chunk_size=settings.RDM_CLEANUP_MODELS_OUTDATED_DATA_CHUNK_SIZE,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
if self._log_sql:
|
|
180
|
+
# Проверка на доступность sqlparse для форматирования
|
|
181
|
+
try:
|
|
182
|
+
import sqlparse
|
|
183
|
+
except ImportError:
|
|
184
|
+
sqlparse = None
|
|
185
|
+
|
|
186
|
+
if sqlparse:
|
|
187
|
+
# Форматирование кода
|
|
188
|
+
get_chunk_bounded_sql = sqlparse.format(
|
|
189
|
+
sql=get_chunk_bounded_sql,
|
|
190
|
+
reindent=True,
|
|
191
|
+
strip_comments=True,
|
|
192
|
+
)
|
|
193
|
+
logger.info(
|
|
194
|
+
f'Запрос для получения границ чанков модели {self.get_table_name()}: \n{get_chunk_bounded_sql}\n'
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
with connection.cursor() as cursor:
|
|
198
|
+
cursor.execute(get_chunk_bounded_sql)
|
|
199
|
+
result = cursor.fetchall()
|
|
200
|
+
|
|
201
|
+
return result
|
|
202
|
+
|
|
203
|
+
async def execute_query(self, pool: 'Pool', query: str):
|
|
204
|
+
"""Асинхронное выполнение запроса."""
|
|
205
|
+
async with pool.acquire() as conn:
|
|
206
|
+
try:
|
|
207
|
+
if self._safe:
|
|
208
|
+
logger.info(f'Запрос не будет выполнен, включен безопасный режим!\n')
|
|
209
|
+
|
|
210
|
+
if self._log_sql:
|
|
211
|
+
logger.info(f'{query}\n')
|
|
212
|
+
else:
|
|
213
|
+
result = await conn.fetch(query)
|
|
214
|
+
if not result:
|
|
215
|
+
return
|
|
216
|
+
|
|
217
|
+
if self._log_sql:
|
|
218
|
+
logger.info(f'При помощи запроса:\n{query}\n')
|
|
219
|
+
|
|
220
|
+
# Проверяем, что вернул запрос
|
|
221
|
+
if 'deleted_count' in result[0]:
|
|
222
|
+
deleted_count = result[0]['deleted_count']
|
|
223
|
+
self._deleted_count += deleted_count
|
|
224
|
+
logger.info(f'Было удалено записей: {deleted_count}')
|
|
225
|
+
else:
|
|
226
|
+
file_paths = [record['file_path'] for record in result if record.get('file_path')]
|
|
227
|
+
if file_paths:
|
|
228
|
+
await self.file_deletion_process(file_paths)
|
|
229
|
+
deleted_count = len(result)
|
|
230
|
+
self._deleted_count += deleted_count
|
|
231
|
+
logger.info(f'Было удалено записей с файлами: {deleted_count}')
|
|
232
|
+
|
|
233
|
+
except Exception as e:
|
|
234
|
+
logger.error(f'Ошибка при выполнении {query}\n{e}')
|
|
235
|
+
|
|
236
|
+
def prepare_queries(self, chunk_bounded: list[tuple[int, int, int]]) -> list[str]:
|
|
237
|
+
"""Формирование списка запросов для удаления устаревших данных."""
|
|
238
|
+
queries = []
|
|
239
|
+
conditions = self.get_merged_conditions()
|
|
240
|
+
|
|
241
|
+
for chunk_number, first_id, last_id in chunk_bounded:
|
|
242
|
+
remove_outdated_data_sql = self.REMOVE_OUTDATED_DATA_SQL.format(
|
|
243
|
+
table_name=self.get_table_name(),
|
|
244
|
+
first_id=first_id,
|
|
245
|
+
last_id=last_id,
|
|
246
|
+
conditions=conditions,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
queries.append(remove_outdated_data_sql)
|
|
250
|
+
|
|
251
|
+
return queries
|
|
252
|
+
|
|
253
|
+
async def execute_queries(self, queries: list[str]) -> None:
|
|
254
|
+
"""Асинхронное выполнение запросов."""
|
|
255
|
+
DB_SETTINGS = settings.DATABASES['default']
|
|
256
|
+
|
|
257
|
+
pool = await asyncpg.create_pool(
|
|
258
|
+
max_size=settings.RDM_CLEANUP_MODELS_OUTDATED_DATA_POOL_SIZE,
|
|
259
|
+
min_size=settings.RDM_CLEANUP_MODELS_OUTDATED_DATA_POOL_SIZE,
|
|
260
|
+
host=DB_SETTINGS['HOST'],
|
|
261
|
+
port=DB_SETTINGS['PORT'],
|
|
262
|
+
user=DB_SETTINGS['USER'],
|
|
263
|
+
password=DB_SETTINGS['PASSWORD'],
|
|
264
|
+
database=DB_SETTINGS['NAME'],
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
tasks = [self.execute_query(pool, query) for query in queries]
|
|
268
|
+
|
|
269
|
+
await asyncio.gather(*tasks)
|
|
270
|
+
|
|
271
|
+
def run(self):
|
|
272
|
+
"""Запуск очистки устаревших данных."""
|
|
273
|
+
chunk_bounded = self.get_chunk_bounded()
|
|
274
|
+
|
|
275
|
+
queries = self.prepare_queries(chunk_bounded=chunk_bounded)
|
|
276
|
+
|
|
277
|
+
if queries:
|
|
278
|
+
even_loop = asyncio.new_event_loop()
|
|
279
|
+
try:
|
|
280
|
+
even_loop.run_until_complete(self.execute_queries(queries=queries))
|
|
281
|
+
finally:
|
|
282
|
+
even_loop.close()
|
|
283
|
+
|
|
284
|
+
logger.info(f'Удалено записей модели {self.model.__name__}: {self._deleted_count}')
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
class ServiceFileCleaner:
|
|
288
|
+
"""Асинхронный сервис для безопасного удаления файлов из MEDIA_ROOT."""
|
|
289
|
+
|
|
290
|
+
@staticmethod
|
|
291
|
+
async def file_deletion_process(file_paths: list[str]) -> None:
|
|
292
|
+
"""Удаляет указанные файлы, считая пути относительными к MEDIA_ROOT."""
|
|
293
|
+
media_root = Path(settings.MEDIA_ROOT).resolve()
|
|
294
|
+
|
|
295
|
+
async def delete_file(path_str: str):
|
|
296
|
+
path = (media_root / path_str).resolve()
|
|
297
|
+
try:
|
|
298
|
+
exists = await asyncio.to_thread(path.exists)
|
|
299
|
+
if exists and await asyncio.to_thread(path.is_file):
|
|
300
|
+
await asyncio.to_thread(path.unlink)
|
|
301
|
+
|
|
302
|
+
except Exception as e:
|
|
303
|
+
logger.warning(f"Не удалось удалить {path}: {e}")
|
|
304
|
+
|
|
305
|
+
await asyncio.gather(*(delete_file(path) for path in file_paths))
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
from typing import (
|
|
2
|
+
Optional,
|
|
3
|
+
)
|
|
4
|
+
|
|
5
|
+
from django.core.exceptions import (
|
|
6
|
+
FieldDoesNotExist,
|
|
7
|
+
)
|
|
8
|
+
from django.db.models import (
|
|
9
|
+
Subquery,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from edu_rdm_integration.rdm_models.models import (
|
|
13
|
+
RDMModelEnum,
|
|
14
|
+
)
|
|
15
|
+
from edu_rdm_integration.stages.collect_data.models import (
|
|
16
|
+
RDMCollectingDataCommandProgress,
|
|
17
|
+
RDMCollectingDataStage,
|
|
18
|
+
RDMCollectingDataSubStage,
|
|
19
|
+
)
|
|
20
|
+
from educommon.utils.seqtools import (
|
|
21
|
+
make_chunks,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
from .base import (
|
|
25
|
+
BaseServiceOutdatedDataCleaner,
|
|
26
|
+
)
|
|
27
|
+
from .consts import (
|
|
28
|
+
UNION_CHUNK_SIZE,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class CollectingDataSubStageCleaner(BaseServiceOutdatedDataCleaner):
|
|
33
|
+
"""Очистка подэтапов сбора данных, которые не ссылаются ни на одну модель РВД.
|
|
34
|
+
|
|
35
|
+
Подход:
|
|
36
|
+
- Проходим по всем моделям, зарегистрированным в RDMModelEnum, и собираем
|
|
37
|
+
значения полей collecting_sub_stage_id (если модель содержит такое поле).
|
|
38
|
+
- Объединяем запросы по моделям в UNION, получая набор валидных id.
|
|
39
|
+
- Удаляем те подэтапы, id которых отсутствуют в полученном наборе.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
model = RDMCollectingDataSubStage
|
|
43
|
+
|
|
44
|
+
def _get_valid_substage_ids_subquery(self) -> Optional[Subquery]:
|
|
45
|
+
"""Подзапрос, возвращающий все допустимые collecting_sub_stage_id из моделей, описанных в RDMModelEnum."""
|
|
46
|
+
model_enum_values = RDMModelEnum.get_model_enum_values()
|
|
47
|
+
all_model = [model_enum.model for model_enum in model_enum_values]
|
|
48
|
+
chunk_queries = []
|
|
49
|
+
|
|
50
|
+
for enum_values_chunk in make_chunks(all_model, UNION_CHUNK_SIZE, is_list=True):
|
|
51
|
+
qs_list = []
|
|
52
|
+
for model_cls in enum_values_chunk:
|
|
53
|
+
try:
|
|
54
|
+
model_cls._meta.get_field('collecting_sub_stage_id')
|
|
55
|
+
except FieldDoesNotExist:
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
qs_list.append(model_cls.objects.values('collecting_sub_stage_id'))
|
|
59
|
+
|
|
60
|
+
if qs_list:
|
|
61
|
+
chunk_union = qs_list[0].union(*qs_list[1:])
|
|
62
|
+
chunk_queries.append(chunk_union)
|
|
63
|
+
|
|
64
|
+
if not chunk_queries:
|
|
65
|
+
return
|
|
66
|
+
|
|
67
|
+
# Объединяем все чанки в один общий UNION
|
|
68
|
+
full_union = chunk_queries[0].union(*chunk_queries[1:])
|
|
69
|
+
|
|
70
|
+
return Subquery(full_union)
|
|
71
|
+
|
|
72
|
+
def get_merged_conditions(self) -> str:
|
|
73
|
+
"""Формирует условие удаления для устаревших данных.
|
|
74
|
+
|
|
75
|
+
Удаляем подэтапы, которых нет в объединённом наборе валидных collecting_sub_stage_id
|
|
76
|
+
(т.е. подэтапы, не используемые ни одной моделью данных).
|
|
77
|
+
"""
|
|
78
|
+
conditions = ''
|
|
79
|
+
subquery = self._get_valid_substage_ids_subquery()
|
|
80
|
+
if subquery:
|
|
81
|
+
conditions = f"""
|
|
82
|
+
NOT EXISTS (
|
|
83
|
+
SELECT collecting_sub_stage_id
|
|
84
|
+
FROM ({str(subquery.query)}) AS valid
|
|
85
|
+
WHERE valid.collecting_sub_stage_id = tbl.id
|
|
86
|
+
)
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
return conditions
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class CollectingDataStageCleaner(BaseServiceOutdatedDataCleaner):
|
|
93
|
+
"""Очистка этапов сбора данных, у которых нет связанных подэтапов."""
|
|
94
|
+
|
|
95
|
+
model = RDMCollectingDataStage
|
|
96
|
+
|
|
97
|
+
def get_merged_conditions(self) -> str:
|
|
98
|
+
"""Формирует условие удаления для устаревших данных."""
|
|
99
|
+
sub_stage_table = CollectingDataSubStageCleaner.get_table_name()
|
|
100
|
+
|
|
101
|
+
return self.get_orphan_reference_condition(sub_stage_table, 'stage_id')
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class CollectingDataCommandProgressCleaner(BaseServiceOutdatedDataCleaner):
|
|
105
|
+
"""Очистка устаревших хранящихся задач по сбору данных."""
|
|
106
|
+
|
|
107
|
+
model = RDMCollectingDataCommandProgress
|
|
108
|
+
|
|
109
|
+
def get_merged_conditions(self) -> str:
|
|
110
|
+
"""Формирует условие удаления для устаревших данных."""
|
|
111
|
+
stage_table = CollectingDataStageCleaner.get_table_name()
|
|
112
|
+
conditions = [
|
|
113
|
+
'stage_id IS NULL',
|
|
114
|
+
f'({self.get_status_condition(stage_table, "id", "FINISHED", 7, "stage_id")})',
|
|
115
|
+
f'({self.get_status_condition(stage_table, "id", "FAILED", 30, "stage_id")})',
|
|
116
|
+
f'({self.get_orphan_reference_condition(stage_table, "id", "stage_id")})',
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
return " OR ".join(conditions)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
UNION_CHUNK_SIZE = 5
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
from typing import (
|
|
2
|
+
Optional,
|
|
3
|
+
)
|
|
4
|
+
|
|
5
|
+
from django.core.exceptions import (
|
|
6
|
+
FieldDoesNotExist,
|
|
7
|
+
)
|
|
8
|
+
from django.db.models import (
|
|
9
|
+
Subquery,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from edu_rdm_integration.rdm_models.models import (
|
|
13
|
+
RDMModelEnum,
|
|
14
|
+
)
|
|
15
|
+
from edu_rdm_integration.stages.export_data.models import (
|
|
16
|
+
RDMExportingDataCommandProgress,
|
|
17
|
+
RDMExportingDataStage,
|
|
18
|
+
RDMExportingDataSubStage,
|
|
19
|
+
RDMExportingDataSubStageAttachment,
|
|
20
|
+
RDMExportingDataSubStageEntity,
|
|
21
|
+
)
|
|
22
|
+
from educommon.utils.seqtools import (
|
|
23
|
+
make_chunks,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
from .base import (
|
|
27
|
+
BaseServiceOutdatedDataCleaner,
|
|
28
|
+
ServiceFileCleaner,
|
|
29
|
+
)
|
|
30
|
+
from .consts import (
|
|
31
|
+
UNION_CHUNK_SIZE,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class ExportingDataSubStageCleaner(BaseServiceOutdatedDataCleaner):
|
|
36
|
+
"""Очистка подэтапов выгрузки данных, которые не ссылаются ни на одну модель РВД.
|
|
37
|
+
|
|
38
|
+
Подход:
|
|
39
|
+
- Проходим по всем моделям, зарегистрированным в RDMModelEnum, и собираем
|
|
40
|
+
значения полей exporting_sub_stage_id (если модель содержит такое поле).
|
|
41
|
+
- Объединяем запросы по моделям в UNION, получая набор валидных id.
|
|
42
|
+
- Удаляем те подэтапы, id которых отсутствуют в полученном наборе.
|
|
43
|
+
"""
|
|
44
|
+
model = RDMExportingDataSubStage
|
|
45
|
+
|
|
46
|
+
def _get_valid_substage_ids_subquery(self) -> Optional[Subquery]:
|
|
47
|
+
"""Подзапрос, возвращающий все допустимые exporting_sub_stage_id из моделей, описанных в RDMModelEnum."""
|
|
48
|
+
model_enum_values = RDMModelEnum.get_model_enum_values()
|
|
49
|
+
all_model = [model_enum.model for model_enum in model_enum_values]
|
|
50
|
+
chunk_queries = []
|
|
51
|
+
|
|
52
|
+
for enum_values_chunk in make_chunks(all_model, UNION_CHUNK_SIZE, is_list=True):
|
|
53
|
+
qs_list = []
|
|
54
|
+
for model_cls in enum_values_chunk:
|
|
55
|
+
try:
|
|
56
|
+
model_cls._meta.get_field('exporting_sub_stage_id')
|
|
57
|
+
except FieldDoesNotExist:
|
|
58
|
+
continue
|
|
59
|
+
qs_list.append(model_cls.objects.values('exporting_sub_stage_id'))
|
|
60
|
+
|
|
61
|
+
if qs_list:
|
|
62
|
+
chunk_union = qs_list[0].union(*qs_list[1:])
|
|
63
|
+
chunk_queries.append(chunk_union)
|
|
64
|
+
|
|
65
|
+
if not chunk_queries:
|
|
66
|
+
return
|
|
67
|
+
|
|
68
|
+
# Объединяем все чанки в один общий UNION
|
|
69
|
+
full_union = chunk_queries[0].union(*chunk_queries[1:])
|
|
70
|
+
|
|
71
|
+
return Subquery(full_union.values('exporting_sub_stage_id'))
|
|
72
|
+
|
|
73
|
+
def get_merged_conditions(self) -> str:
|
|
74
|
+
"""Формирует условие удаления для устаревших данных.
|
|
75
|
+
|
|
76
|
+
Удаляем подэтапы, которых нет в объединённом наборе валидных exporting_sub_stage_id
|
|
77
|
+
(т.е. подэтапы, не используемые ни одной моделью данных).
|
|
78
|
+
"""
|
|
79
|
+
conditions = ''
|
|
80
|
+
subquery = self._get_valid_substage_ids_subquery()
|
|
81
|
+
if subquery:
|
|
82
|
+
conditions = f"""
|
|
83
|
+
NOT EXISTS (
|
|
84
|
+
SELECT exporting_sub_stage_id
|
|
85
|
+
FROM ({str(subquery.query)}) AS valid
|
|
86
|
+
WHERE valid.exporting_sub_stage_id = tbl.id
|
|
87
|
+
)
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
return conditions
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ExportingDataStageCleaner(BaseServiceOutdatedDataCleaner):
|
|
94
|
+
"""Очистка этапов выгрузки данных без подэтапов."""
|
|
95
|
+
|
|
96
|
+
model = RDMExportingDataStage
|
|
97
|
+
|
|
98
|
+
def get_merged_conditions(self) -> str:
|
|
99
|
+
"""Формирует условие удаления для устаревших данных."""
|
|
100
|
+
sub_stage_table = ExportingDataSubStageCleaner.get_table_name()
|
|
101
|
+
|
|
102
|
+
return self.get_orphan_reference_condition(sub_stage_table, 'stage_id')
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class ExportingDataSubStageAttachmentCleaner(ServiceFileCleaner, BaseServiceOutdatedDataCleaner):
|
|
106
|
+
"""Очистка вложений подэтапов выгрузки данных."""
|
|
107
|
+
|
|
108
|
+
model = RDMExportingDataSubStageAttachment
|
|
109
|
+
|
|
110
|
+
REMOVE_OUTDATED_DATA_SQL = """
|
|
111
|
+
WITH deleted_rows AS (
|
|
112
|
+
DELETE FROM {table_name}
|
|
113
|
+
WHERE id IN (
|
|
114
|
+
WITH tbl AS (
|
|
115
|
+
SELECT *
|
|
116
|
+
FROM {table_name}
|
|
117
|
+
WHERE id >= {first_id}
|
|
118
|
+
AND id <= {last_id}
|
|
119
|
+
)
|
|
120
|
+
SELECT tbl.id
|
|
121
|
+
FROM tbl
|
|
122
|
+
WHERE {conditions}
|
|
123
|
+
)
|
|
124
|
+
RETURNING attachment AS file_path
|
|
125
|
+
)
|
|
126
|
+
SELECT file_path FROM deleted_rows;
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
def get_merged_conditions(self) -> str:
|
|
130
|
+
"""Формирует условие удаления для устаревших данных."""
|
|
131
|
+
sub_stage_table = ExportingDataSubStageCleaner.get_table_name()
|
|
132
|
+
conditions = [
|
|
133
|
+
'exporting_data_sub_stage_id IS NULL',
|
|
134
|
+
f'({self.get_status_condition(sub_stage_table, "id", "FINISHED", 7, "exporting_data_sub_stage_id")})',
|
|
135
|
+
f'({self.get_status_condition(sub_stage_table, "id", "FAILED",30, "exporting_data_sub_stage_id")})',
|
|
136
|
+
f'({self.get_orphan_reference_condition(sub_stage_table, "id", "exporting_data_sub_stage_id")})',
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
return ' OR '.join(conditions)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class ExportingDataSubStageEntityCleaner(BaseServiceOutdatedDataCleaner):
|
|
143
|
+
"""Очистка связей сущности и подэтапов выгрузки данных."""
|
|
144
|
+
|
|
145
|
+
model = RDMExportingDataSubStageEntity
|
|
146
|
+
|
|
147
|
+
def get_merged_conditions(self) -> str:
|
|
148
|
+
"""Формирует условие удаления для устаревших данных."""
|
|
149
|
+
sub_stage_table = ExportingDataSubStageCleaner.get_table_name()
|
|
150
|
+
conditions = [
|
|
151
|
+
f'({self.get_status_condition(sub_stage_table, "id", "FINISHED", 7, "exporting_data_sub_stage_id")})',
|
|
152
|
+
f'({self.get_status_condition(sub_stage_table, "id", "FAILED",30, "exporting_data_sub_stage_id")})',
|
|
153
|
+
f'({self.get_orphan_reference_condition(sub_stage_table, "id", "exporting_data_sub_stage_id")})',
|
|
154
|
+
]
|
|
155
|
+
|
|
156
|
+
return ' OR '.join(conditions)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class ExportingDataCommandProgressCleaner(BaseServiceOutdatedDataCleaner):
|
|
160
|
+
"""Очистка устаревших хранящихся задач по экспорту данных."""
|
|
161
|
+
|
|
162
|
+
model = RDMExportingDataCommandProgress
|
|
163
|
+
|
|
164
|
+
def get_merged_conditions(self) -> str:
|
|
165
|
+
"""Формирует условие удаления для устаревших данных."""
|
|
166
|
+
stage_table = ExportingDataStageCleaner.get_table_name()
|
|
167
|
+
conditions = [
|
|
168
|
+
'stage_id IS NULL',
|
|
169
|
+
f'({self.get_status_condition(stage_table, "id", "FINISHED", 7, "stage_id")})',
|
|
170
|
+
f'({self.get_status_condition(stage_table, "id", "FAILED",30, "stage_id")})',
|
|
171
|
+
f'({self.get_orphan_reference_condition(stage_table, "id", "stage_id")})',
|
|
172
|
+
]
|
|
173
|
+
|
|
174
|
+
return ' OR '.join(conditions)
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from edu_rdm_integration.stages.upload_data.models import (
|
|
2
|
+
RDMExportingDataSubStageUploaderClientLog,
|
|
3
|
+
RDMUploadStatusRequestLog,
|
|
4
|
+
)
|
|
5
|
+
from uploader_client.models import (
|
|
6
|
+
Entry,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
from .base import (
|
|
10
|
+
BaseServiceOutdatedDataCleaner,
|
|
11
|
+
)
|
|
12
|
+
from .export_data import (
|
|
13
|
+
ExportingDataSubStageAttachmentCleaner,
|
|
14
|
+
ExportingDataSubStageCleaner,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ExportingDataSubStageUploaderClientLogCleaner(BaseServiceOutdatedDataCleaner):
|
|
19
|
+
"""Очистка логов загрузчика подэтапов выгрузки данных без связи с подэтапами или файлами."""
|
|
20
|
+
|
|
21
|
+
model = RDMExportingDataSubStageUploaderClientLog
|
|
22
|
+
|
|
23
|
+
def get_merged_conditions(self) -> str:
|
|
24
|
+
"""Формирует условие удаления для устаревших данных."""
|
|
25
|
+
sub_stage_table = ExportingDataSubStageCleaner.get_table_name()
|
|
26
|
+
attachment_table = ExportingDataSubStageAttachmentCleaner.get_table_name()
|
|
27
|
+
|
|
28
|
+
conditions = [
|
|
29
|
+
f'({self.get_status_condition(sub_stage_table, "id", "FINISHED", 7, "sub_stage_id")})',
|
|
30
|
+
f'({self.get_status_condition(sub_stage_table, "id", "FAILED", 30, "sub_stage_id")})',
|
|
31
|
+
f'({self.get_orphan_reference_condition(sub_stage_table, "id", local_field="sub_stage_id")})',
|
|
32
|
+
f'({self.get_orphan_reference_condition(attachment_table, "id", local_field="attachment_id")})',
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
return ' OR '.join(conditions)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class UploadStatusRequestLogCleaner(BaseServiceOutdatedDataCleaner):
|
|
39
|
+
"""Очистка логов статуса загрузки файла в витрину без связей upload."""
|
|
40
|
+
|
|
41
|
+
model = RDMUploadStatusRequestLog
|
|
42
|
+
|
|
43
|
+
def get_merged_conditions(self) -> str:
|
|
44
|
+
"""Формирует условие удаления для устаревших данных."""
|
|
45
|
+
uploader_client_log_table = ExportingDataSubStageUploaderClientLogCleaner.get_table_name()
|
|
46
|
+
|
|
47
|
+
return self.get_orphan_reference_condition(uploader_client_log_table, 'id', local_field='upload_id')
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class EntryCleaner(BaseServiceOutdatedDataCleaner):
|
|
51
|
+
"""Очистка записей журнала, не связанные ни с upload, ни с логами."""
|
|
52
|
+
|
|
53
|
+
model = Entry
|
|
54
|
+
|
|
55
|
+
def get_merged_conditions(self) -> str:
|
|
56
|
+
"""Формирует условие удаления для устаревших данных."""
|
|
57
|
+
uploader_client_log_table = ExportingDataSubStageUploaderClientLogCleaner.get_table_name()
|
|
58
|
+
upload_status_log_table = UploadStatusRequestLogCleaner.get_table_name()
|
|
59
|
+
conditions = [
|
|
60
|
+
f'({self.get_orphan_reference_condition(uploader_client_log_table, "entry_id")})',
|
|
61
|
+
f'({self.get_orphan_reference_condition(upload_status_log_table, "entry_id")})',
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
return ' AND '.join(conditions)
|