geovisio 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geovisio/__init__.py +38 -8
- geovisio/admin_cli/__init__.py +2 -2
- geovisio/admin_cli/db.py +8 -0
- geovisio/config_app.py +64 -0
- geovisio/db_migrations.py +24 -3
- geovisio/templates/main.html +14 -14
- geovisio/templates/viewer.html +3 -3
- geovisio/translations/de/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/de/LC_MESSAGES/messages.po +667 -0
- geovisio/translations/en/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/en/LC_MESSAGES/messages.po +730 -0
- geovisio/translations/es/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/es/LC_MESSAGES/messages.po +778 -0
- geovisio/translations/fi/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/fi/LC_MESSAGES/messages.po +589 -0
- geovisio/translations/fr/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/fr/LC_MESSAGES/messages.po +814 -0
- geovisio/translations/ko/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/ko/LC_MESSAGES/messages.po +685 -0
- geovisio/translations/messages.pot +686 -0
- geovisio/translations/nl/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/nl/LC_MESSAGES/messages.po +594 -0
- geovisio/utils/__init__.py +1 -1
- geovisio/utils/auth.py +50 -11
- geovisio/utils/db.py +65 -0
- geovisio/utils/excluded_areas.py +83 -0
- geovisio/utils/extent.py +30 -0
- geovisio/utils/fields.py +1 -1
- geovisio/utils/filesystems.py +0 -1
- geovisio/utils/link.py +14 -0
- geovisio/utils/params.py +20 -0
- geovisio/utils/pictures.py +94 -69
- geovisio/utils/reports.py +171 -0
- geovisio/utils/sequences.py +288 -126
- geovisio/utils/tokens.py +37 -42
- geovisio/utils/upload_set.py +654 -0
- geovisio/web/auth.py +50 -37
- geovisio/web/collections.py +305 -319
- geovisio/web/configuration.py +14 -0
- geovisio/web/docs.py +288 -12
- geovisio/web/excluded_areas.py +377 -0
- geovisio/web/items.py +203 -151
- geovisio/web/map.py +322 -106
- geovisio/web/params.py +69 -26
- geovisio/web/pictures.py +14 -31
- geovisio/web/reports.py +399 -0
- geovisio/web/rss.py +13 -7
- geovisio/web/stac.py +129 -121
- geovisio/web/tokens.py +105 -112
- geovisio/web/upload_set.py +768 -0
- geovisio/web/users.py +100 -73
- geovisio/web/utils.py +38 -9
- geovisio/workers/runner_pictures.py +278 -183
- geovisio-2.7.0.dist-info/METADATA +95 -0
- geovisio-2.7.0.dist-info/RECORD +66 -0
- geovisio-2.5.0.dist-info/METADATA +0 -115
- geovisio-2.5.0.dist-info/RECORD +0 -41
- {geovisio-2.5.0.dist-info → geovisio-2.7.0.dist-info}/LICENSE +0 -0
- {geovisio-2.5.0.dist-info → geovisio-2.7.0.dist-info}/WHEEL +0 -0
|
@@ -1,21 +1,23 @@
|
|
|
1
|
-
from fs import open_fs
|
|
2
1
|
from fs.path import dirname
|
|
3
2
|
from PIL import Image, ImageOps
|
|
4
3
|
from flask import current_app
|
|
4
|
+
from geovisio import utils
|
|
5
|
+
from geovisio.utils import db, sequences, upload_set
|
|
5
6
|
import psycopg
|
|
7
|
+
from psycopg.rows import dict_row
|
|
8
|
+
from psycopg.sql import SQL
|
|
6
9
|
import sentry_sdk
|
|
7
|
-
from geovisio import utils
|
|
8
10
|
from geovisio import errors
|
|
9
11
|
from dataclasses import dataclass
|
|
10
12
|
import logging
|
|
11
13
|
from contextlib import contextmanager
|
|
12
14
|
from enum import Enum
|
|
13
|
-
from typing import Any
|
|
15
|
+
from typing import Any, Optional
|
|
14
16
|
import threading
|
|
15
17
|
from uuid import UUID
|
|
16
|
-
|
|
18
|
+
from croniter import croniter
|
|
19
|
+
from datetime import datetime, timezone
|
|
17
20
|
import geovisio.utils.filesystems
|
|
18
|
-
from geovisio.utils.sequences import update_headings
|
|
19
21
|
|
|
20
22
|
log = logging.getLogger("geovisio.runner_pictures")
|
|
21
23
|
|
|
@@ -23,7 +25,7 @@ PICTURE_PROCESS_MAX_RETRY = 10 # Number of times a job will be retryed if there
|
|
|
23
25
|
|
|
24
26
|
|
|
25
27
|
class PictureBackgroundProcessor(object):
|
|
26
|
-
def
|
|
28
|
+
def __init__(self, app):
|
|
27
29
|
nb_threads = app.config["EXECUTOR_MAX_WORKERS"]
|
|
28
30
|
self.enabled = nb_threads != 0
|
|
29
31
|
|
|
@@ -34,7 +36,7 @@ class PictureBackgroundProcessor(object):
|
|
|
34
36
|
else:
|
|
35
37
|
import sys
|
|
36
38
|
|
|
37
|
-
if "run" in sys.argv or "waitress" in sys.argv: # hack not to display a frightening warning uselessly
|
|
39
|
+
if "run" in sys.argv or "waitress" in sys.argv or "gunicorn" in sys.argv: # hack not to display a frightening warning uselessly
|
|
38
40
|
log.warning("No picture background processor run, no picture will be processed unless another separate worker is run")
|
|
39
41
|
log.warning("A separate process can be run with:")
|
|
40
42
|
log.warning("flask picture-worker")
|
|
@@ -44,34 +46,63 @@ class PictureBackgroundProcessor(object):
|
|
|
44
46
|
Ask for a background picture process that will run until not pictures need to be processed
|
|
45
47
|
"""
|
|
46
48
|
if self.enabled:
|
|
47
|
-
worker = PictureProcessor(
|
|
48
|
-
return self.executor.submit(worker.
|
|
49
|
+
worker = PictureProcessor(app=current_app)
|
|
50
|
+
return self.executor.submit(worker.process_jobs)
|
|
49
51
|
|
|
50
52
|
|
|
51
|
-
background_processor = PictureBackgroundProcessor()
|
|
53
|
+
# background_processor = PictureBackgroundProcessor()
|
|
52
54
|
|
|
53
55
|
|
|
54
56
|
class ProcessTask(str, Enum):
|
|
55
57
|
prepare = "prepare"
|
|
56
58
|
delete = "delete"
|
|
59
|
+
dispatch = "dispatch"
|
|
60
|
+
finalize = "finalize"
|
|
57
61
|
|
|
58
62
|
|
|
59
63
|
@dataclass
|
|
60
64
|
class DbPicture:
|
|
61
|
-
id:
|
|
65
|
+
id: UUID
|
|
62
66
|
metadata: dict
|
|
63
67
|
|
|
64
68
|
def blurred_by_author(self):
|
|
65
69
|
return self.metadata.get("blurredByAuthor", False)
|
|
66
70
|
|
|
67
71
|
|
|
72
|
+
@dataclass
|
|
73
|
+
class DbSequence:
|
|
74
|
+
id: UUID
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class DbUploadSet:
|
|
79
|
+
id: UUID
|
|
80
|
+
|
|
81
|
+
|
|
68
82
|
@dataclass
|
|
69
83
|
class DbJob:
|
|
70
84
|
reporting_conn: psycopg.Connection
|
|
71
|
-
|
|
72
|
-
|
|
85
|
+
job_history_id: UUID # ID of the job in the job_history
|
|
86
|
+
job_queue_id: UUID # ID in the job_queue
|
|
87
|
+
pic: Optional[DbPicture]
|
|
88
|
+
upload_set: Optional[DbUploadSet]
|
|
89
|
+
seq: Optional[DbSequence]
|
|
90
|
+
|
|
73
91
|
task: ProcessTask
|
|
74
92
|
|
|
93
|
+
def label(self):
|
|
94
|
+
impacted_object = ""
|
|
95
|
+
if self.pic:
|
|
96
|
+
impacted_object = f"picture {self.pic.id}"
|
|
97
|
+
elif self.seq:
|
|
98
|
+
impacted_object = f"sequence {self.seq.id}"
|
|
99
|
+
elif self.upload_set:
|
|
100
|
+
impacted_object = f"upload set {self.upload_set.id}"
|
|
101
|
+
else:
|
|
102
|
+
impacted_object = "unknown object"
|
|
103
|
+
|
|
104
|
+
return f"{self.task} for {impacted_object}"
|
|
105
|
+
|
|
75
106
|
|
|
76
107
|
def processPictureFiles(pic: DbPicture, config):
|
|
77
108
|
"""Generates the files associated with a sequence picture.
|
|
@@ -87,7 +118,7 @@ def processPictureFiles(pic: DbPicture, config):
|
|
|
87
118
|
config : dict
|
|
88
119
|
Flask app.config (passed as param to allow using ThreadPoolExecutor)
|
|
89
120
|
"""
|
|
90
|
-
skipBlur = pic.blurred_by_author() or config.get("API_BLUR_URL")
|
|
121
|
+
skipBlur = pic.blurred_by_author() or config.get("API_BLUR_URL") is None
|
|
91
122
|
fses = config["FILESYSTEMS"]
|
|
92
123
|
fs = fses.permanent if skipBlur else fses.tmp
|
|
93
124
|
picHdPath = utils.pictures.getHDPicturePath(pic.id)
|
|
@@ -152,29 +183,48 @@ class RecoverableProcessException(Exception):
|
|
|
152
183
|
super().__init__(msg)
|
|
153
184
|
|
|
154
185
|
|
|
186
|
+
class RetryLaterProcessException(Exception):
|
|
187
|
+
"""Exception raised when we want to retry later, even if it's not an error"""
|
|
188
|
+
|
|
189
|
+
def __init__(self, msg):
|
|
190
|
+
super().__init__(msg)
|
|
191
|
+
|
|
192
|
+
|
|
155
193
|
class PictureProcessor:
|
|
156
194
|
stop: bool
|
|
157
195
|
config: dict[Any, Any]
|
|
196
|
+
waiting_time: float
|
|
158
197
|
|
|
159
|
-
def __init__(self,
|
|
160
|
-
self.
|
|
198
|
+
def __init__(self, app, stop=True) -> None:
|
|
199
|
+
self.app = app
|
|
161
200
|
self.stop = stop
|
|
162
201
|
if threading.current_thread() is threading.main_thread():
|
|
163
202
|
# if worker is in daemon mode, register signals to gracefully stop it
|
|
164
203
|
self._register_signals()
|
|
204
|
+
self.next_periodic_task_dt = None
|
|
205
|
+
self.cron = croniter(self.app.config["PICTURE_PROCESS_REFRESH_CRON"])
|
|
206
|
+
|
|
207
|
+
# Note: in tests, we don't want to wait between each picture processing
|
|
208
|
+
waiting_time = 0 if app.config.get("TESTING") is True else 1
|
|
209
|
+
self.waiting_time = waiting_time
|
|
165
210
|
|
|
166
|
-
def
|
|
211
|
+
def process_jobs(self):
|
|
167
212
|
try:
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
213
|
+
with self.app.app_context():
|
|
214
|
+
while True:
|
|
215
|
+
if self.app.pool.closed and self.stop:
|
|
216
|
+
# in some tests, the pool is closed before the worker is stopped, we check this here
|
|
172
217
|
return
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
218
|
+
self.check_periodic_tasks()
|
|
219
|
+
r = process_next_job(self.app)
|
|
220
|
+
if not r:
|
|
221
|
+
if self.stop:
|
|
222
|
+
return
|
|
223
|
+
# no more picture to process
|
|
224
|
+
# wait a bit until there are some
|
|
225
|
+
import time
|
|
176
226
|
|
|
177
|
-
|
|
227
|
+
time.sleep(self.waiting_time)
|
|
178
228
|
|
|
179
229
|
except:
|
|
180
230
|
log.exception("Exiting thread")
|
|
@@ -189,22 +239,77 @@ class PictureProcessor:
|
|
|
189
239
|
log.info("Stoping worker, waiting for last picture processing to finish...")
|
|
190
240
|
self.stop = True
|
|
191
241
|
|
|
192
|
-
|
|
193
|
-
|
|
242
|
+
def check_periodic_tasks(self):
|
|
243
|
+
"""
|
|
244
|
+
Check if a periodic task needs to be done, and do it if necessary
|
|
245
|
+
This method ensure only one picture worker will do the needed periodic task
|
|
246
|
+
"""
|
|
247
|
+
if self.next_periodic_task_dt is None:
|
|
248
|
+
with db.conn(self.app) as conn:
|
|
249
|
+
self.next_periodic_task_dt = self.get_next_periodic_task_dt(conn)
|
|
250
|
+
|
|
251
|
+
if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
|
|
252
|
+
with db.conn(self.app) as conn:
|
|
253
|
+
# since the next_periodic_task_dt can have been changed by another process, we check again that the task needs to be done
|
|
254
|
+
self.next_periodic_task_dt = self.get_next_periodic_task_dt(conn)
|
|
255
|
+
if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
|
|
256
|
+
if not self.refresh_database():
|
|
257
|
+
# another refresh is in progress, we'll check again later and ask for the next refresh date considering it's in progress
|
|
258
|
+
self.next_periodic_task_dt = self.cron.get_next(datetime, datetime.now(timezone.utc))
|
|
259
|
+
logging.getLogger("geovisio.periodic_task").info(
|
|
260
|
+
f"Refresh in progress, checking after = {self.next_periodic_task_dt}"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
def get_next_periodic_task_dt(self, conn) -> datetime:
|
|
264
|
+
r = conn.execute("SELECT refreshed_at, NOW() FROM refresh_database").fetchone()
|
|
265
|
+
assert r # the table always has exactly one row
|
|
266
|
+
|
|
267
|
+
refreshed_at, db_time = r
|
|
268
|
+
current_time = datetime.now(timezone.utc)
|
|
269
|
+
if refreshed_at is None:
|
|
270
|
+
# if the db has never been updated, we need to update it now
|
|
271
|
+
return current_time
|
|
272
|
+
next_schedule_date = self.cron.get_next(datetime, refreshed_at)
|
|
273
|
+
|
|
274
|
+
# if the db time and the app time is not the same, we need to apply an offset on the scheduled time
|
|
275
|
+
next_schedule_date += db_time - current_time
|
|
276
|
+
logging.getLogger("geovisio.periodic_task").info(f"Next database refresh = {next_schedule_date}")
|
|
277
|
+
return next_schedule_date
|
|
278
|
+
|
|
279
|
+
def refresh_database(self):
|
|
280
|
+
with sentry_sdk.start_transaction(op="task", name="refresh_database"):
|
|
281
|
+
# Note: there is a mechanism in `sequences.update_pictures_grid` to ensure that only one refresh can be done at one time, and it will update the `refreshed_at` value
|
|
282
|
+
return utils.sequences.update_pictures_grid()
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def process_next_job(app):
|
|
194
286
|
with sentry_sdk.start_transaction(op="task", name="process_next_picture"):
|
|
195
|
-
with
|
|
287
|
+
with _get_next_job(app) as job:
|
|
196
288
|
if job is None:
|
|
197
289
|
return False
|
|
198
|
-
if job.task == ProcessTask.prepare:
|
|
290
|
+
if job.task == ProcessTask.prepare and job.pic:
|
|
199
291
|
with sentry_sdk.start_span(description="Processing picture") as span:
|
|
200
292
|
span.set_data("pic_id", job.pic.id)
|
|
201
293
|
with utils.time.log_elapsed(f"Processing picture {job.pic.id}"):
|
|
202
|
-
|
|
203
|
-
|
|
294
|
+
# open another connection for reporting and queries
|
|
295
|
+
processPictureFiles(job.pic, app.config)
|
|
296
|
+
elif job.task == ProcessTask.delete and job.pic:
|
|
204
297
|
with sentry_sdk.start_span(description="Deleting picture") as span:
|
|
205
298
|
span.set_data("pic_id", job.pic.id)
|
|
206
299
|
with utils.time.log_elapsed(f"Deleting picture {job.pic.id}"):
|
|
207
|
-
_delete_picture(job)
|
|
300
|
+
_delete_picture(job.pic)
|
|
301
|
+
elif job.task == ProcessTask.delete and job.upload_set:
|
|
302
|
+
with sentry_sdk.start_span(description="Deleting upload set") as span:
|
|
303
|
+
span.set_data("us_id", job.upload_set.id)
|
|
304
|
+
with utils.time.log_elapsed(f"Deleting upload set {job.upload_set.id}"):
|
|
305
|
+
_delete_upload_set(job.upload_set)
|
|
306
|
+
elif job.task == ProcessTask.dispatch and job.upload_set:
|
|
307
|
+
with utils.time.log_elapsed(f"Dispatching upload set {job.upload_set.id}"):
|
|
308
|
+
upload_set.dispatch(job.upload_set.id)
|
|
309
|
+
elif job.task == ProcessTask.finalize and job.seq:
|
|
310
|
+
with utils.time.log_elapsed(f"Finalizing sequence {job.seq.id}"):
|
|
311
|
+
with job.reporting_conn.cursor(row_factory=dict_row) as cursor:
|
|
312
|
+
sequences.finalize(cursor, job.seq.id)
|
|
208
313
|
else:
|
|
209
314
|
raise RecoverableProcessException(f"Unhandled process task: {job.task}")
|
|
210
315
|
|
|
@@ -212,58 +317,67 @@ def process_next_picture(config):
|
|
|
212
317
|
|
|
213
318
|
|
|
214
319
|
@contextmanager
|
|
215
|
-
def
|
|
320
|
+
def _get_next_job(app):
|
|
216
321
|
"""
|
|
217
|
-
Open a new connection and return the next
|
|
218
|
-
Note: the
|
|
322
|
+
Open a new connection and return the next job to process
|
|
323
|
+
Note: the job should be used as a context manager to close the connection when we stop using the returned job.
|
|
219
324
|
|
|
220
|
-
The new connection is needed because we lock the `
|
|
325
|
+
The new connection is needed because we lock the `job_queue` for the whole transaction for another worker not to process the same job
|
|
221
326
|
"""
|
|
222
327
|
error = None
|
|
223
|
-
with
|
|
224
|
-
with locking_transaction.transaction():
|
|
225
|
-
r =
|
|
226
|
-
"""
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
JOIN pictures p ON p.id = pictures_to_process.picture_id
|
|
328
|
+
with app.pool.connection() as locking_transaction:
|
|
329
|
+
with locking_transaction.transaction(), locking_transaction.cursor(row_factory=dict_row) as cursor:
|
|
330
|
+
r = cursor.execute(
|
|
331
|
+
"""SELECT j.id, j.picture_id, j.upload_set_id, j.sequence_id, j.task, p.metadata
|
|
332
|
+
FROM job_queue j
|
|
333
|
+
LEFT JOIN pictures p ON p.id = j.picture_id
|
|
230
334
|
ORDER by
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
FOR UPDATE of
|
|
234
|
-
LIMIT 1
|
|
235
|
-
"""
|
|
335
|
+
j.nb_errors,
|
|
336
|
+
j.ts
|
|
337
|
+
FOR UPDATE of j SKIP LOCKED
|
|
338
|
+
LIMIT 1"""
|
|
236
339
|
).fetchone()
|
|
237
340
|
if r is None:
|
|
238
341
|
# Nothing to process
|
|
239
342
|
yield None
|
|
240
343
|
else:
|
|
241
|
-
log.debug(f"Processing {r[
|
|
242
|
-
|
|
243
|
-
db_pic = DbPicture(id=
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
344
|
+
log.debug(f"Processing {r['id']}")
|
|
345
|
+
|
|
346
|
+
db_pic = DbPicture(id=r["picture_id"], metadata=r["metadata"]) if r["picture_id"] is not None else None
|
|
347
|
+
db_seq = DbSequence(id=r["sequence_id"]) if r["sequence_id"] is not None else None
|
|
348
|
+
db_upload_set = DbUploadSet(id=r["upload_set_id"]) if r["upload_set_id"] is not None else None
|
|
349
|
+
|
|
350
|
+
with app.pool.connection() as reporting_conn:
|
|
351
|
+
job = _initialize_job(
|
|
352
|
+
reporting_conn,
|
|
353
|
+
job_queue_id=r["id"],
|
|
354
|
+
db_pic=db_pic,
|
|
355
|
+
db_seq=db_seq,
|
|
356
|
+
db_upload_set=db_upload_set,
|
|
357
|
+
task=ProcessTask(r["task"]),
|
|
358
|
+
)
|
|
247
359
|
try:
|
|
248
360
|
yield job
|
|
249
361
|
|
|
250
362
|
# Finalize the picture process, set the picture status and remove the picture from the queue process
|
|
251
|
-
|
|
252
|
-
log.debug(f"
|
|
363
|
+
_finalize_job(locking_transaction, job)
|
|
364
|
+
log.debug(f"Job {job.label()} processed")
|
|
253
365
|
except RecoverableProcessException as e:
|
|
254
|
-
_mark_process_as_error(locking_transaction, job, e,
|
|
366
|
+
_mark_process_as_error(locking_transaction, job, e, recoverable=True)
|
|
367
|
+
except RetryLaterProcessException as e:
|
|
368
|
+
_mark_process_as_error(locking_transaction, job, e, recoverable=True, mark_as_error=False)
|
|
255
369
|
except InterruptedError as interruption:
|
|
256
|
-
log.error(f"Interruption received, stoping job {job.
|
|
370
|
+
log.error(f"Interruption received, stoping job {job.label()}")
|
|
257
371
|
# starts a new connection, since the current one can be corrupted by the exception
|
|
258
|
-
with
|
|
259
|
-
_mark_process_as_error(t, job, interruption,
|
|
372
|
+
with app.pool.connection() as t:
|
|
373
|
+
_mark_process_as_error(t, job, interruption, recoverable=True)
|
|
260
374
|
error = interruption
|
|
261
375
|
except Exception as e:
|
|
262
|
-
log.exception(f"Impossible to finish job {job.
|
|
263
|
-
_mark_process_as_error(locking_transaction, job, e,
|
|
376
|
+
log.exception(f"Impossible to finish job {job.label()}")
|
|
377
|
+
_mark_process_as_error(locking_transaction, job, e, recoverable=False)
|
|
264
378
|
|
|
265
379
|
# try to finalize the sequence anyway
|
|
266
|
-
|
|
380
|
+
_finalize_sequence(job)
|
|
267
381
|
error = e
|
|
268
382
|
|
|
269
383
|
# we raise an error after the transaction has been comited to be sure to have the state persisted in the database
|
|
@@ -271,163 +385,144 @@ def _get_next_picture_to_process(config):
|
|
|
271
385
|
raise error
|
|
272
386
|
|
|
273
387
|
|
|
274
|
-
def
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
SELECT sp.seq_id AS id FROM sequences_pictures AS sp
|
|
278
|
-
WHERE sp.pic_id = %(id)s
|
|
279
|
-
""",
|
|
280
|
-
{"id": job.pic.id},
|
|
281
|
-
).fetchone()
|
|
282
|
-
if not r:
|
|
283
|
-
raise Exception(f"impossible to find sequence associated to picture {job.pic.id}")
|
|
284
|
-
|
|
285
|
-
seqId = r[0]
|
|
286
|
-
|
|
287
|
-
is_sequence_finalized = _is_sequence_finalized(job.reporting_conn, seqId)
|
|
288
|
-
if not is_sequence_finalized:
|
|
289
|
-
log.debug("sequence not finalized")
|
|
388
|
+
def _finalize_sequence(job: DbJob):
|
|
389
|
+
# on picture preparation finalization, we add a sequence/upload_set finalization job
|
|
390
|
+
if job.task != "prepare" or not job.pic:
|
|
290
391
|
return
|
|
291
392
|
|
|
292
|
-
with
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
# Complete missing headings in pictures
|
|
298
|
-
update_headings(job.reporting_conn, seqId)
|
|
299
|
-
|
|
300
|
-
# Change sequence database status in DB
|
|
301
|
-
# Also generates data in computed columns
|
|
302
|
-
job.reporting_conn.execute(
|
|
303
|
-
"""WITH
|
|
304
|
-
aggregated_pictures AS (
|
|
305
|
-
SELECT
|
|
306
|
-
sp.seq_id,
|
|
307
|
-
MIN(p.ts::DATE) AS day,
|
|
308
|
-
ARRAY_AGG(DISTINCT TRIM(
|
|
309
|
-
CONCAT(p.metadata->>'make', ' ', p.metadata->>'model')
|
|
310
|
-
)) AS models,
|
|
311
|
-
ARRAY_AGG(DISTINCT p.metadata->>'type') AS types
|
|
312
|
-
FROM sequences_pictures sp
|
|
313
|
-
JOIN pictures p ON sp.pic_id = p.id
|
|
314
|
-
WHERE sp.seq_id = %(seq)s
|
|
315
|
-
GROUP BY sp.seq_id
|
|
316
|
-
)
|
|
317
|
-
UPDATE sequences
|
|
318
|
-
SET
|
|
319
|
-
status = 'ready',
|
|
320
|
-
geom = compute_sequence_geom(id),
|
|
321
|
-
bbox = compute_sequence_bbox(id),
|
|
322
|
-
computed_type = CASE WHEN array_length(types, 1) = 1 THEN types[1] ELSE NULL END,
|
|
323
|
-
computed_model = CASE WHEN array_length(models, 1) = 1 THEN models[1] ELSE NULL END,
|
|
324
|
-
computed_capture_date = day
|
|
325
|
-
FROM aggregated_pictures
|
|
326
|
-
WHERE id = %(seq)s
|
|
327
|
-
""",
|
|
328
|
-
{"seq": seqId},
|
|
329
|
-
)
|
|
393
|
+
with job.reporting_conn.cursor(row_factory=dict_row) as cursor:
|
|
394
|
+
r = cursor.execute(
|
|
395
|
+
"SELECT upload_set_id, seq_id FROM pictures p LEFT JOIN sequences_pictures sp on sp.pic_id = p.id WHERE p.id = %(pic_id)s",
|
|
396
|
+
{"pic_id": job.pic.id},
|
|
397
|
+
).fetchone()
|
|
330
398
|
|
|
331
|
-
|
|
399
|
+
if not r or not r["seq_id"]:
|
|
400
|
+
# if the associated upload set has not yet been dispatch, the picture might not be associated to a sequence
|
|
401
|
+
return
|
|
332
402
|
|
|
403
|
+
if r["upload_set_id"]:
|
|
404
|
+
# if the picture is part of the upload set, the sequence finalization will be done when the upload set is dispatched
|
|
405
|
+
return
|
|
333
406
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
We consider a sequence as ready, if all pictures have been processed and there is at least one correctly processed picture
|
|
337
|
-
Eg. we don't want pictures with preparing_status = 'not-processed' and at least one 'prepared'
|
|
338
|
-
"""
|
|
339
|
-
statuses = db.execute(
|
|
340
|
-
"""SELECT DISTINCT(preparing_status) FROM pictures p
|
|
341
|
-
JOIN sequences_pictures sp ON sp.pic_id = p.id
|
|
342
|
-
WHERE
|
|
343
|
-
sp.seq_id = %(id)s
|
|
344
|
-
AND p.preparing_status <> 'broken'
|
|
345
|
-
;
|
|
346
|
-
""",
|
|
347
|
-
{"id": seq_id},
|
|
348
|
-
).fetchall()
|
|
407
|
+
# Add a task to finalize the sequence/upload_set
|
|
408
|
+
sequences.add_finalization_job(cursor, r["seq_id"])
|
|
349
409
|
|
|
350
|
-
return [("prepared",)] == statuses
|
|
351
410
|
|
|
352
|
-
|
|
353
|
-
def _finalize_picture_process(db, job: DbJob):
|
|
411
|
+
def _finalize_job(conn, job: DbJob):
|
|
354
412
|
job.reporting_conn.execute(
|
|
355
413
|
"UPDATE job_history SET finished_at = CURRENT_TIMESTAMP WHERE id = %(id)s",
|
|
356
|
-
{"id": job.
|
|
414
|
+
{"id": job.job_history_id},
|
|
357
415
|
)
|
|
358
|
-
if job.task == ProcessTask.prepare:
|
|
416
|
+
if job.task == ProcessTask.prepare and job.pic:
|
|
359
417
|
# Note: the status is slowly been deprecated by replacing it with more precise status, and in the end it will be removed
|
|
360
418
|
job.reporting_conn.execute(
|
|
361
419
|
"UPDATE pictures SET status = 'ready', preparing_status = 'prepared' WHERE id = %(pic_id)s",
|
|
362
420
|
{"pic_id": job.pic.id},
|
|
363
421
|
)
|
|
364
422
|
|
|
365
|
-
#
|
|
366
|
-
|
|
367
|
-
elif job.task == ProcessTask.delete:
|
|
368
|
-
|
|
369
|
-
db.execute(
|
|
423
|
+
# Add a task to finalize the sequence
|
|
424
|
+
_finalize_sequence(job)
|
|
425
|
+
elif job.task == ProcessTask.delete and job.pic:
|
|
426
|
+
conn.execute(
|
|
370
427
|
"DELETE FROM pictures WHERE id = %(pic_id)s",
|
|
371
428
|
{"pic_id": job.pic.id},
|
|
372
429
|
)
|
|
373
|
-
|
|
374
|
-
"DELETE FROM
|
|
375
|
-
|
|
376
|
-
)
|
|
430
|
+
elif job.task == ProcessTask.delete and job.upload_set:
|
|
431
|
+
conn.execute(SQL("DELETE FROM upload_sets WHERE id = %s"), [job.upload_set.id])
|
|
432
|
+
|
|
433
|
+
conn.execute("DELETE FROM job_queue WHERE id = %(job_id)s", {"job_id": job.job_queue_id})
|
|
377
434
|
|
|
378
435
|
|
|
379
|
-
def
|
|
436
|
+
def _initialize_job(
|
|
437
|
+
reporting_conn: psycopg.Connection,
|
|
438
|
+
job_queue_id: UUID,
|
|
439
|
+
db_pic: Optional[DbPicture],
|
|
440
|
+
db_seq: Optional[DbSequence],
|
|
441
|
+
db_upload_set: Optional[DbUploadSet],
|
|
442
|
+
task: ProcessTask,
|
|
443
|
+
) -> DbJob:
|
|
380
444
|
r = reporting_conn.execute(
|
|
381
|
-
"""INSERT INTO job_history(picture_id,
|
|
382
|
-
VALUES (%(
|
|
383
|
-
RETURNING id
|
|
384
|
-
|
|
385
|
-
|
|
445
|
+
"""INSERT INTO job_history(job_id, picture_id, sequence_id, upload_set_id, job_task)
|
|
446
|
+
VALUES (%(job_id)s, %(pic_id)s, %(seq_id)s, %(us_id)s, %(task)s)
|
|
447
|
+
RETURNING id""",
|
|
448
|
+
{
|
|
449
|
+
"job_id": job_queue_id,
|
|
450
|
+
"pic_id": db_pic.id if db_pic else None,
|
|
451
|
+
"seq_id": db_seq.id if db_seq else None,
|
|
452
|
+
"us_id": db_upload_set.id if db_upload_set else None,
|
|
453
|
+
"task": task.value,
|
|
454
|
+
},
|
|
386
455
|
).fetchone()
|
|
387
456
|
|
|
388
457
|
if not r:
|
|
389
458
|
raise Exception("impossible to insert task in database")
|
|
390
|
-
|
|
459
|
+
|
|
460
|
+
return DbJob(
|
|
461
|
+
reporting_conn=reporting_conn,
|
|
462
|
+
job_queue_id=job_queue_id,
|
|
463
|
+
pic=db_pic,
|
|
464
|
+
seq=db_seq,
|
|
465
|
+
upload_set=db_upload_set,
|
|
466
|
+
task=task,
|
|
467
|
+
job_history_id=r[0],
|
|
468
|
+
)
|
|
391
469
|
|
|
392
470
|
|
|
393
|
-
def _mark_process_as_error(
|
|
471
|
+
def _mark_process_as_error(conn, job: DbJob, e: Exception, recoverable: bool = False, mark_as_error: bool = True):
|
|
394
472
|
job.reporting_conn.execute(
|
|
395
473
|
"""UPDATE job_history SET
|
|
396
474
|
error = %(err)s, finished_at = CURRENT_TIMESTAMP
|
|
397
475
|
WHERE id = %(id)s""",
|
|
398
|
-
{"err": str(e), "id": job.
|
|
476
|
+
{"err": str(e), "id": job.job_history_id},
|
|
399
477
|
)
|
|
400
478
|
if recoverable:
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
479
|
+
if mark_as_error:
|
|
480
|
+
nb_error = conn.execute(
|
|
481
|
+
"""
|
|
482
|
+
UPDATE job_queue SET
|
|
483
|
+
nb_errors = nb_errors + 1
|
|
484
|
+
WHERE id = %(id)s
|
|
485
|
+
RETURNING nb_errors""",
|
|
486
|
+
{"err": str(e), "id": job.job_queue_id},
|
|
487
|
+
).fetchone()
|
|
488
|
+
if nb_error and nb_error[0] > PICTURE_PROCESS_MAX_RETRY:
|
|
489
|
+
logging.info(f"Job {job.label()} has failed {nb_error} times, we stop trying to process it.")
|
|
490
|
+
recoverable = False
|
|
491
|
+
else:
|
|
492
|
+
# it's not a real error, we just want to retry later
|
|
493
|
+
conn.execute(
|
|
494
|
+
SQL("UPDATE job_queue SET ts = NOW() WHERE id = %(id)s"),
|
|
495
|
+
{"err": str(e), "id": job.job_queue_id},
|
|
496
|
+
)
|
|
412
497
|
|
|
413
498
|
if not recoverable:
|
|
414
499
|
# Note: the status is slowly been deprecated by replacing it with more precise status, and in the end it will be removed
|
|
415
|
-
job.
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
DELETE FROM pictures_to_process
|
|
425
|
-
WHERE picture_id = %(id)s""",
|
|
426
|
-
{"id": job.pic.id},
|
|
427
|
-
)
|
|
500
|
+
if job.task == "prepare" and job.pic:
|
|
501
|
+
job.reporting_conn.execute(
|
|
502
|
+
"""UPDATE pictures SET
|
|
503
|
+
preparing_status = 'broken', status = 'broken'
|
|
504
|
+
WHERE id = %(id)s""",
|
|
505
|
+
{"id": job.pic.id},
|
|
506
|
+
)
|
|
507
|
+
# on unrecoverable error, we remove the job from the queue
|
|
508
|
+
conn.execute("DELETE FROM job_queue WHERE id = %(id)s", {"id": job.job_queue_id})
|
|
428
509
|
|
|
429
510
|
|
|
430
|
-
def _delete_picture(
|
|
511
|
+
def _delete_picture(pic: DbPicture):
|
|
431
512
|
"""Delete a picture from the filesystem"""
|
|
432
|
-
log.debug(f"Deleting picture files {
|
|
433
|
-
utils.pictures.removeAllFiles(
|
|
513
|
+
log.debug(f"Deleting picture files {pic.id}")
|
|
514
|
+
utils.pictures.removeAllFiles(pic.id)
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _delete_upload_set(upload_set: DbUploadSet):
|
|
518
|
+
"""Delete an upload set
|
|
519
|
+
We do this in the job queue since we want to wait for all its pictures to be deleted
|
|
520
|
+
"""
|
|
521
|
+
with db.conn(current_app) as conn:
|
|
522
|
+
with conn.transaction(), conn.cursor() as cursor:
|
|
523
|
+
# we want to wait for all pictures to be deleted
|
|
524
|
+
has_more_pictures = cursor.execute("SELECT 1 FROM pictures WHERE upload_set_id = %s LIMIT 1", [upload_set.id]).fetchone()
|
|
525
|
+
if has_more_pictures and has_more_pictures[0]:
|
|
526
|
+
logging.info(f"More pictures to be deleted, upload_set {upload_set.id} will be deleted later")
|
|
527
|
+
raise RetryLaterProcessException("More pictures to be deleted, upload_set will be deleted later")
|
|
528
|
+
# Note: the real deletion will be done on job completion so the lock is released
|