geovisio 2.6.0__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geovisio/__init__.py +36 -7
- geovisio/admin_cli/db.py +1 -4
- geovisio/config_app.py +40 -1
- geovisio/db_migrations.py +24 -3
- geovisio/templates/main.html +13 -13
- geovisio/templates/viewer.html +3 -3
- geovisio/translations/de/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/de/LC_MESSAGES/messages.po +667 -0
- geovisio/translations/en/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/en/LC_MESSAGES/messages.po +730 -0
- geovisio/translations/es/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/es/LC_MESSAGES/messages.po +778 -0
- geovisio/translations/fi/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/fi/LC_MESSAGES/messages.po +589 -0
- geovisio/translations/fr/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/fr/LC_MESSAGES/messages.po +814 -0
- geovisio/translations/ko/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/ko/LC_MESSAGES/messages.po +685 -0
- geovisio/translations/messages.pot +686 -0
- geovisio/translations/nl/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/nl/LC_MESSAGES/messages.po +594 -0
- geovisio/utils/__init__.py +1 -1
- geovisio/utils/auth.py +50 -11
- geovisio/utils/db.py +65 -0
- geovisio/utils/excluded_areas.py +83 -0
- geovisio/utils/extent.py +30 -0
- geovisio/utils/fields.py +1 -1
- geovisio/utils/filesystems.py +0 -1
- geovisio/utils/link.py +14 -0
- geovisio/utils/params.py +20 -0
- geovisio/utils/pictures.py +92 -68
- geovisio/utils/reports.py +171 -0
- geovisio/utils/sequences.py +264 -126
- geovisio/utils/tokens.py +37 -42
- geovisio/utils/upload_set.py +654 -0
- geovisio/web/auth.py +37 -37
- geovisio/web/collections.py +286 -302
- geovisio/web/configuration.py +14 -0
- geovisio/web/docs.py +241 -14
- geovisio/web/excluded_areas.py +377 -0
- geovisio/web/items.py +156 -108
- geovisio/web/map.py +20 -20
- geovisio/web/params.py +69 -26
- geovisio/web/pictures.py +14 -31
- geovisio/web/reports.py +399 -0
- geovisio/web/rss.py +13 -7
- geovisio/web/stac.py +129 -134
- geovisio/web/tokens.py +98 -109
- geovisio/web/upload_set.py +768 -0
- geovisio/web/users.py +100 -73
- geovisio/web/utils.py +28 -9
- geovisio/workers/runner_pictures.py +252 -204
- {geovisio-2.6.0.dist-info → geovisio-2.7.0.dist-info}/METADATA +16 -13
- geovisio-2.7.0.dist-info/RECORD +66 -0
- geovisio-2.6.0.dist-info/RECORD +0 -41
- {geovisio-2.6.0.dist-info → geovisio-2.7.0.dist-info}/LICENSE +0 -0
- {geovisio-2.6.0.dist-info → geovisio-2.7.0.dist-info}/WHEEL +0 -0
|
@@ -1,25 +1,23 @@
|
|
|
1
|
-
from fs import open_fs
|
|
2
1
|
from fs.path import dirname
|
|
3
2
|
from PIL import Image, ImageOps
|
|
4
3
|
from flask import current_app
|
|
4
|
+
from geovisio import utils
|
|
5
|
+
from geovisio.utils import db, sequences, upload_set
|
|
5
6
|
import psycopg
|
|
7
|
+
from psycopg.rows import dict_row
|
|
6
8
|
from psycopg.sql import SQL
|
|
7
9
|
import sentry_sdk
|
|
8
|
-
from geovisio import utils
|
|
9
10
|
from geovisio import errors
|
|
10
11
|
from dataclasses import dataclass
|
|
11
12
|
import logging
|
|
12
13
|
from contextlib import contextmanager
|
|
13
14
|
from enum import Enum
|
|
14
|
-
from typing import Any
|
|
15
|
+
from typing import Any, Optional
|
|
15
16
|
import threading
|
|
16
17
|
from uuid import UUID
|
|
17
18
|
from croniter import croniter
|
|
18
|
-
from typing import Optional
|
|
19
19
|
from datetime import datetime, timezone
|
|
20
|
-
|
|
21
20
|
import geovisio.utils.filesystems
|
|
22
|
-
from geovisio.utils.sequences import update_headings
|
|
23
21
|
|
|
24
22
|
log = logging.getLogger("geovisio.runner_pictures")
|
|
25
23
|
|
|
@@ -27,7 +25,7 @@ PICTURE_PROCESS_MAX_RETRY = 10 # Number of times a job will be retryed if there
|
|
|
27
25
|
|
|
28
26
|
|
|
29
27
|
class PictureBackgroundProcessor(object):
|
|
30
|
-
def
|
|
28
|
+
def __init__(self, app):
|
|
31
29
|
nb_threads = app.config["EXECUTOR_MAX_WORKERS"]
|
|
32
30
|
self.enabled = nb_threads != 0
|
|
33
31
|
|
|
@@ -38,7 +36,7 @@ class PictureBackgroundProcessor(object):
|
|
|
38
36
|
else:
|
|
39
37
|
import sys
|
|
40
38
|
|
|
41
|
-
if "run" in sys.argv or "waitress" in sys.argv: # hack not to display a frightening warning uselessly
|
|
39
|
+
if "run" in sys.argv or "waitress" in sys.argv or "gunicorn" in sys.argv: # hack not to display a frightening warning uselessly
|
|
42
40
|
log.warning("No picture background processor run, no picture will be processed unless another separate worker is run")
|
|
43
41
|
log.warning("A separate process can be run with:")
|
|
44
42
|
log.warning("flask picture-worker")
|
|
@@ -48,34 +46,63 @@ class PictureBackgroundProcessor(object):
|
|
|
48
46
|
Ask for a background picture process that will run until not pictures need to be processed
|
|
49
47
|
"""
|
|
50
48
|
if self.enabled:
|
|
51
|
-
worker = PictureProcessor(
|
|
52
|
-
return self.executor.submit(worker.
|
|
49
|
+
worker = PictureProcessor(app=current_app)
|
|
50
|
+
return self.executor.submit(worker.process_jobs)
|
|
53
51
|
|
|
54
52
|
|
|
55
|
-
background_processor = PictureBackgroundProcessor()
|
|
53
|
+
# background_processor = PictureBackgroundProcessor()
|
|
56
54
|
|
|
57
55
|
|
|
58
56
|
class ProcessTask(str, Enum):
|
|
59
57
|
prepare = "prepare"
|
|
60
58
|
delete = "delete"
|
|
59
|
+
dispatch = "dispatch"
|
|
60
|
+
finalize = "finalize"
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
@dataclass
|
|
64
64
|
class DbPicture:
|
|
65
|
-
id:
|
|
65
|
+
id: UUID
|
|
66
66
|
metadata: dict
|
|
67
67
|
|
|
68
68
|
def blurred_by_author(self):
|
|
69
69
|
return self.metadata.get("blurredByAuthor", False)
|
|
70
70
|
|
|
71
71
|
|
|
72
|
+
@dataclass
|
|
73
|
+
class DbSequence:
|
|
74
|
+
id: UUID
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class DbUploadSet:
|
|
79
|
+
id: UUID
|
|
80
|
+
|
|
81
|
+
|
|
72
82
|
@dataclass
|
|
73
83
|
class DbJob:
|
|
74
84
|
reporting_conn: psycopg.Connection
|
|
75
|
-
|
|
76
|
-
|
|
85
|
+
job_history_id: UUID # ID of the job in the job_history
|
|
86
|
+
job_queue_id: UUID # ID in the job_queue
|
|
87
|
+
pic: Optional[DbPicture]
|
|
88
|
+
upload_set: Optional[DbUploadSet]
|
|
89
|
+
seq: Optional[DbSequence]
|
|
90
|
+
|
|
77
91
|
task: ProcessTask
|
|
78
92
|
|
|
93
|
+
def label(self):
|
|
94
|
+
impacted_object = ""
|
|
95
|
+
if self.pic:
|
|
96
|
+
impacted_object = f"picture {self.pic.id}"
|
|
97
|
+
elif self.seq:
|
|
98
|
+
impacted_object = f"sequence {self.seq.id}"
|
|
99
|
+
elif self.upload_set:
|
|
100
|
+
impacted_object = f"upload set {self.upload_set.id}"
|
|
101
|
+
else:
|
|
102
|
+
impacted_object = "unknown object"
|
|
103
|
+
|
|
104
|
+
return f"{self.task} for {impacted_object}"
|
|
105
|
+
|
|
79
106
|
|
|
80
107
|
def processPictureFiles(pic: DbPicture, config):
|
|
81
108
|
"""Generates the files associated with a sequence picture.
|
|
@@ -91,7 +118,7 @@ def processPictureFiles(pic: DbPicture, config):
|
|
|
91
118
|
config : dict
|
|
92
119
|
Flask app.config (passed as param to allow using ThreadPoolExecutor)
|
|
93
120
|
"""
|
|
94
|
-
skipBlur = pic.blurred_by_author() or config.get("API_BLUR_URL")
|
|
121
|
+
skipBlur = pic.blurred_by_author() or config.get("API_BLUR_URL") is None
|
|
95
122
|
fses = config["FILESYSTEMS"]
|
|
96
123
|
fs = fses.permanent if skipBlur else fses.tmp
|
|
97
124
|
picHdPath = utils.pictures.getHDPicturePath(pic.id)
|
|
@@ -156,31 +183,48 @@ class RecoverableProcessException(Exception):
|
|
|
156
183
|
super().__init__(msg)
|
|
157
184
|
|
|
158
185
|
|
|
186
|
+
class RetryLaterProcessException(Exception):
|
|
187
|
+
"""Exception raised when we want to retry later, even if it's not an error"""
|
|
188
|
+
|
|
189
|
+
def __init__(self, msg):
|
|
190
|
+
super().__init__(msg)
|
|
191
|
+
|
|
192
|
+
|
|
159
193
|
class PictureProcessor:
|
|
160
194
|
stop: bool
|
|
161
195
|
config: dict[Any, Any]
|
|
196
|
+
waiting_time: float
|
|
162
197
|
|
|
163
|
-
def __init__(self,
|
|
164
|
-
self.
|
|
198
|
+
def __init__(self, app, stop=True) -> None:
|
|
199
|
+
self.app = app
|
|
165
200
|
self.stop = stop
|
|
166
201
|
if threading.current_thread() is threading.main_thread():
|
|
167
202
|
# if worker is in daemon mode, register signals to gracefully stop it
|
|
168
203
|
self._register_signals()
|
|
169
204
|
self.next_periodic_task_dt = None
|
|
205
|
+
self.cron = croniter(self.app.config["PICTURE_PROCESS_REFRESH_CRON"])
|
|
170
206
|
|
|
171
|
-
|
|
207
|
+
# Note: in tests, we don't want to wait between each picture processing
|
|
208
|
+
waiting_time = 0 if app.config.get("TESTING") is True else 1
|
|
209
|
+
self.waiting_time = waiting_time
|
|
210
|
+
|
|
211
|
+
def process_jobs(self):
|
|
172
212
|
try:
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
if self.stop:
|
|
213
|
+
with self.app.app_context():
|
|
214
|
+
while True:
|
|
215
|
+
if self.app.pool.closed and self.stop:
|
|
216
|
+
# in some tests, the pool is closed before the worker is stopped, we check this here
|
|
178
217
|
return
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
218
|
+
self.check_periodic_tasks()
|
|
219
|
+
r = process_next_job(self.app)
|
|
220
|
+
if not r:
|
|
221
|
+
if self.stop:
|
|
222
|
+
return
|
|
223
|
+
# no more picture to process
|
|
224
|
+
# wait a bit until there are some
|
|
225
|
+
import time
|
|
182
226
|
|
|
183
|
-
|
|
227
|
+
time.sleep(self.waiting_time)
|
|
184
228
|
|
|
185
229
|
except:
|
|
186
230
|
log.exception("Exiting thread")
|
|
@@ -200,15 +244,24 @@ class PictureProcessor:
|
|
|
200
244
|
Check if a periodic task needs to be done, and do it if necessary
|
|
201
245
|
This method ensure only one picture worker will do the needed periodic task
|
|
202
246
|
"""
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
self.next_periodic_task_dt = self.get_next_periodic_task_dt(
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
247
|
+
if self.next_periodic_task_dt is None:
|
|
248
|
+
with db.conn(self.app) as conn:
|
|
249
|
+
self.next_periodic_task_dt = self.get_next_periodic_task_dt(conn)
|
|
250
|
+
|
|
251
|
+
if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
|
|
252
|
+
with db.conn(self.app) as conn:
|
|
253
|
+
# since the next_periodic_task_dt can have been changed by another process, we check again that the task needs to be done
|
|
254
|
+
self.next_periodic_task_dt = self.get_next_periodic_task_dt(conn)
|
|
255
|
+
if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
|
|
256
|
+
if not self.refresh_database():
|
|
257
|
+
# another refresh is in progress, we'll check again later and ask for the next refresh date considering it's in progress
|
|
258
|
+
self.next_periodic_task_dt = self.cron.get_next(datetime, datetime.now(timezone.utc))
|
|
259
|
+
logging.getLogger("geovisio.periodic_task").info(
|
|
260
|
+
f"Refresh in progress, checking after = {self.next_periodic_task_dt}"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
def get_next_periodic_task_dt(self, conn) -> datetime:
|
|
264
|
+
r = conn.execute("SELECT refreshed_at, NOW() FROM refresh_database").fetchone()
|
|
212
265
|
assert r # the table always has exactly one row
|
|
213
266
|
|
|
214
267
|
refreshed_at, db_time = r
|
|
@@ -216,42 +269,47 @@ class PictureProcessor:
|
|
|
216
269
|
if refreshed_at is None:
|
|
217
270
|
# if the db has never been updated, we need to update it now
|
|
218
271
|
return current_time
|
|
219
|
-
|
|
220
|
-
cron = croniter(self.config["PICTURE_PROCESS_REFRESH_CRON"])
|
|
221
|
-
|
|
222
|
-
next_schedule_date = cron.get_next(datetime, refreshed_at)
|
|
272
|
+
next_schedule_date = self.cron.get_next(datetime, refreshed_at)
|
|
223
273
|
|
|
224
274
|
# if the db time and the app time is not the same, we need to apply an offset on the scheduled time
|
|
225
275
|
next_schedule_date += db_time - current_time
|
|
226
276
|
logging.getLogger("geovisio.periodic_task").info(f"Next database refresh = {next_schedule_date}")
|
|
227
277
|
return next_schedule_date
|
|
228
278
|
|
|
229
|
-
def refresh_database(self
|
|
279
|
+
def refresh_database(self):
|
|
230
280
|
with sentry_sdk.start_transaction(op="task", name="refresh_database"):
|
|
231
281
|
# Note: there is a mechanism in `sequences.update_pictures_grid` to ensure that only one refresh can be done at one time, and it will update the `refreshed_at` value
|
|
232
|
-
|
|
233
|
-
if updated:
|
|
234
|
-
self.next_periodic_task_dt = self.get_next_periodic_task_dt(db)
|
|
235
|
-
else:
|
|
236
|
-
# no update could be done because another process was doing it, check next time the scheduled time
|
|
237
|
-
self.next_periodic_task_dt = None
|
|
282
|
+
return utils.sequences.update_pictures_grid()
|
|
238
283
|
|
|
239
284
|
|
|
240
|
-
def
|
|
285
|
+
def process_next_job(app):
|
|
241
286
|
with sentry_sdk.start_transaction(op="task", name="process_next_picture"):
|
|
242
|
-
with
|
|
287
|
+
with _get_next_job(app) as job:
|
|
243
288
|
if job is None:
|
|
244
289
|
return False
|
|
245
|
-
if job.task == ProcessTask.prepare:
|
|
290
|
+
if job.task == ProcessTask.prepare and job.pic:
|
|
246
291
|
with sentry_sdk.start_span(description="Processing picture") as span:
|
|
247
292
|
span.set_data("pic_id", job.pic.id)
|
|
248
293
|
with utils.time.log_elapsed(f"Processing picture {job.pic.id}"):
|
|
249
|
-
|
|
250
|
-
|
|
294
|
+
# open another connection for reporting and queries
|
|
295
|
+
processPictureFiles(job.pic, app.config)
|
|
296
|
+
elif job.task == ProcessTask.delete and job.pic:
|
|
251
297
|
with sentry_sdk.start_span(description="Deleting picture") as span:
|
|
252
298
|
span.set_data("pic_id", job.pic.id)
|
|
253
299
|
with utils.time.log_elapsed(f"Deleting picture {job.pic.id}"):
|
|
254
|
-
_delete_picture(job)
|
|
300
|
+
_delete_picture(job.pic)
|
|
301
|
+
elif job.task == ProcessTask.delete and job.upload_set:
|
|
302
|
+
with sentry_sdk.start_span(description="Deleting upload set") as span:
|
|
303
|
+
span.set_data("us_id", job.upload_set.id)
|
|
304
|
+
with utils.time.log_elapsed(f"Deleting upload set {job.upload_set.id}"):
|
|
305
|
+
_delete_upload_set(job.upload_set)
|
|
306
|
+
elif job.task == ProcessTask.dispatch and job.upload_set:
|
|
307
|
+
with utils.time.log_elapsed(f"Dispatching upload set {job.upload_set.id}"):
|
|
308
|
+
upload_set.dispatch(job.upload_set.id)
|
|
309
|
+
elif job.task == ProcessTask.finalize and job.seq:
|
|
310
|
+
with utils.time.log_elapsed(f"Finalizing sequence {job.seq.id}"):
|
|
311
|
+
with job.reporting_conn.cursor(row_factory=dict_row) as cursor:
|
|
312
|
+
sequences.finalize(cursor, job.seq.id)
|
|
255
313
|
else:
|
|
256
314
|
raise RecoverableProcessException(f"Unhandled process task: {job.task}")
|
|
257
315
|
|
|
@@ -259,58 +317,67 @@ def process_next_picture(config):
|
|
|
259
317
|
|
|
260
318
|
|
|
261
319
|
@contextmanager
|
|
262
|
-
def
|
|
320
|
+
def _get_next_job(app):
|
|
263
321
|
"""
|
|
264
|
-
Open a new connection and return the next
|
|
265
|
-
Note: the
|
|
322
|
+
Open a new connection and return the next job to process
|
|
323
|
+
Note: the job should be used as a context manager to close the connection when we stop using the returned job.
|
|
266
324
|
|
|
267
|
-
The new connection is needed because we lock the `
|
|
325
|
+
The new connection is needed because we lock the `job_queue` for the whole transaction for another worker not to process the same job
|
|
268
326
|
"""
|
|
269
327
|
error = None
|
|
270
|
-
with
|
|
271
|
-
with locking_transaction.transaction():
|
|
272
|
-
r =
|
|
273
|
-
"""
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
JOIN pictures p ON p.id = pictures_to_process.picture_id
|
|
328
|
+
with app.pool.connection() as locking_transaction:
|
|
329
|
+
with locking_transaction.transaction(), locking_transaction.cursor(row_factory=dict_row) as cursor:
|
|
330
|
+
r = cursor.execute(
|
|
331
|
+
"""SELECT j.id, j.picture_id, j.upload_set_id, j.sequence_id, j.task, p.metadata
|
|
332
|
+
FROM job_queue j
|
|
333
|
+
LEFT JOIN pictures p ON p.id = j.picture_id
|
|
277
334
|
ORDER by
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
FOR UPDATE of
|
|
281
|
-
LIMIT 1
|
|
282
|
-
"""
|
|
335
|
+
j.nb_errors,
|
|
336
|
+
j.ts
|
|
337
|
+
FOR UPDATE of j SKIP LOCKED
|
|
338
|
+
LIMIT 1"""
|
|
283
339
|
).fetchone()
|
|
284
340
|
if r is None:
|
|
285
341
|
# Nothing to process
|
|
286
342
|
yield None
|
|
287
343
|
else:
|
|
288
|
-
log.debug(f"Processing {r[
|
|
289
|
-
|
|
290
|
-
db_pic = DbPicture(id=
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
344
|
+
log.debug(f"Processing {r['id']}")
|
|
345
|
+
|
|
346
|
+
db_pic = DbPicture(id=r["picture_id"], metadata=r["metadata"]) if r["picture_id"] is not None else None
|
|
347
|
+
db_seq = DbSequence(id=r["sequence_id"]) if r["sequence_id"] is not None else None
|
|
348
|
+
db_upload_set = DbUploadSet(id=r["upload_set_id"]) if r["upload_set_id"] is not None else None
|
|
349
|
+
|
|
350
|
+
with app.pool.connection() as reporting_conn:
|
|
351
|
+
job = _initialize_job(
|
|
352
|
+
reporting_conn,
|
|
353
|
+
job_queue_id=r["id"],
|
|
354
|
+
db_pic=db_pic,
|
|
355
|
+
db_seq=db_seq,
|
|
356
|
+
db_upload_set=db_upload_set,
|
|
357
|
+
task=ProcessTask(r["task"]),
|
|
358
|
+
)
|
|
294
359
|
try:
|
|
295
360
|
yield job
|
|
296
361
|
|
|
297
362
|
# Finalize the picture process, set the picture status and remove the picture from the queue process
|
|
298
|
-
|
|
299
|
-
log.debug(f"
|
|
363
|
+
_finalize_job(locking_transaction, job)
|
|
364
|
+
log.debug(f"Job {job.label()} processed")
|
|
300
365
|
except RecoverableProcessException as e:
|
|
301
|
-
_mark_process_as_error(locking_transaction, job, e,
|
|
366
|
+
_mark_process_as_error(locking_transaction, job, e, recoverable=True)
|
|
367
|
+
except RetryLaterProcessException as e:
|
|
368
|
+
_mark_process_as_error(locking_transaction, job, e, recoverable=True, mark_as_error=False)
|
|
302
369
|
except InterruptedError as interruption:
|
|
303
|
-
log.error(f"Interruption received, stoping job {job.
|
|
370
|
+
log.error(f"Interruption received, stoping job {job.label()}")
|
|
304
371
|
# starts a new connection, since the current one can be corrupted by the exception
|
|
305
|
-
with
|
|
306
|
-
_mark_process_as_error(t, job, interruption,
|
|
372
|
+
with app.pool.connection() as t:
|
|
373
|
+
_mark_process_as_error(t, job, interruption, recoverable=True)
|
|
307
374
|
error = interruption
|
|
308
375
|
except Exception as e:
|
|
309
|
-
log.exception(f"Impossible to finish job {job.
|
|
310
|
-
_mark_process_as_error(locking_transaction, job, e,
|
|
376
|
+
log.exception(f"Impossible to finish job {job.label()}")
|
|
377
|
+
_mark_process_as_error(locking_transaction, job, e, recoverable=False)
|
|
311
378
|
|
|
312
379
|
# try to finalize the sequence anyway
|
|
313
|
-
|
|
380
|
+
_finalize_sequence(job)
|
|
314
381
|
error = e
|
|
315
382
|
|
|
316
383
|
# we raise an error after the transaction has been comited to be sure to have the state persisted in the database
|
|
@@ -318,163 +385,144 @@ def _get_next_picture_to_process(config):
|
|
|
318
385
|
raise error
|
|
319
386
|
|
|
320
387
|
|
|
321
|
-
def
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
SELECT sp.seq_id AS id FROM sequences_pictures AS sp
|
|
325
|
-
WHERE sp.pic_id = %(id)s
|
|
326
|
-
""",
|
|
327
|
-
{"id": job.pic.id},
|
|
328
|
-
).fetchone()
|
|
329
|
-
if not r:
|
|
330
|
-
raise Exception(f"impossible to find sequence associated to picture {job.pic.id}")
|
|
331
|
-
|
|
332
|
-
seqId = r[0]
|
|
333
|
-
|
|
334
|
-
is_sequence_finalized = _is_sequence_finalized(job.reporting_conn, seqId)
|
|
335
|
-
if not is_sequence_finalized:
|
|
336
|
-
log.debug("sequence not finalized")
|
|
388
|
+
def _finalize_sequence(job: DbJob):
|
|
389
|
+
# on picture preparation finalization, we add a sequence/upload_set finalization job
|
|
390
|
+
if job.task != "prepare" or not job.pic:
|
|
337
391
|
return
|
|
338
392
|
|
|
339
|
-
with
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
# Complete missing headings in pictures
|
|
345
|
-
update_headings(job.reporting_conn, seqId)
|
|
346
|
-
|
|
347
|
-
# Change sequence database status in DB
|
|
348
|
-
# Also generates data in computed columns
|
|
349
|
-
job.reporting_conn.execute(
|
|
350
|
-
"""WITH
|
|
351
|
-
aggregated_pictures AS (
|
|
352
|
-
SELECT
|
|
353
|
-
sp.seq_id,
|
|
354
|
-
MIN(p.ts::DATE) AS day,
|
|
355
|
-
ARRAY_AGG(DISTINCT TRIM(
|
|
356
|
-
CONCAT(p.metadata->>'make', ' ', p.metadata->>'model')
|
|
357
|
-
)) AS models,
|
|
358
|
-
ARRAY_AGG(DISTINCT p.metadata->>'type') AS types
|
|
359
|
-
FROM sequences_pictures sp
|
|
360
|
-
JOIN pictures p ON sp.pic_id = p.id
|
|
361
|
-
WHERE sp.seq_id = %(seq)s
|
|
362
|
-
GROUP BY sp.seq_id
|
|
363
|
-
)
|
|
364
|
-
UPDATE sequences
|
|
365
|
-
SET
|
|
366
|
-
status = 'ready',
|
|
367
|
-
geom = compute_sequence_geom(id),
|
|
368
|
-
bbox = compute_sequence_bbox(id),
|
|
369
|
-
computed_type = CASE WHEN array_length(types, 1) = 1 THEN types[1] ELSE NULL END,
|
|
370
|
-
computed_model = CASE WHEN array_length(models, 1) = 1 THEN models[1] ELSE NULL END,
|
|
371
|
-
computed_capture_date = day
|
|
372
|
-
FROM aggregated_pictures
|
|
373
|
-
WHERE id = %(seq)s
|
|
374
|
-
""",
|
|
375
|
-
{"seq": seqId},
|
|
376
|
-
)
|
|
377
|
-
|
|
378
|
-
log.info(f"Sequence {seqId} is ready")
|
|
393
|
+
with job.reporting_conn.cursor(row_factory=dict_row) as cursor:
|
|
394
|
+
r = cursor.execute(
|
|
395
|
+
"SELECT upload_set_id, seq_id FROM pictures p LEFT JOIN sequences_pictures sp on sp.pic_id = p.id WHERE p.id = %(pic_id)s",
|
|
396
|
+
{"pic_id": job.pic.id},
|
|
397
|
+
).fetchone()
|
|
379
398
|
|
|
399
|
+
if not r or not r["seq_id"]:
|
|
400
|
+
# if the associated upload set has not yet been dispatch, the picture might not be associated to a sequence
|
|
401
|
+
return
|
|
380
402
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
Eg. we don't want pictures with preparing_status = 'not-processed' and at least one 'prepared'
|
|
385
|
-
"""
|
|
386
|
-
statuses = db.execute(
|
|
387
|
-
"""SELECT DISTINCT(preparing_status) FROM pictures p
|
|
388
|
-
JOIN sequences_pictures sp ON sp.pic_id = p.id
|
|
389
|
-
WHERE
|
|
390
|
-
sp.seq_id = %(id)s
|
|
391
|
-
AND p.preparing_status <> 'broken'
|
|
392
|
-
;
|
|
393
|
-
""",
|
|
394
|
-
{"id": seq_id},
|
|
395
|
-
).fetchall()
|
|
403
|
+
if r["upload_set_id"]:
|
|
404
|
+
# if the picture is part of the upload set, the sequence finalization will be done when the upload set is dispatched
|
|
405
|
+
return
|
|
396
406
|
|
|
397
|
-
|
|
407
|
+
# Add a task to finalize the sequence/upload_set
|
|
408
|
+
sequences.add_finalization_job(cursor, r["seq_id"])
|
|
398
409
|
|
|
399
410
|
|
|
400
|
-
def
|
|
411
|
+
def _finalize_job(conn, job: DbJob):
|
|
401
412
|
job.reporting_conn.execute(
|
|
402
413
|
"UPDATE job_history SET finished_at = CURRENT_TIMESTAMP WHERE id = %(id)s",
|
|
403
|
-
{"id": job.
|
|
414
|
+
{"id": job.job_history_id},
|
|
404
415
|
)
|
|
405
|
-
if job.task == ProcessTask.prepare:
|
|
416
|
+
if job.task == ProcessTask.prepare and job.pic:
|
|
406
417
|
# Note: the status is slowly been deprecated by replacing it with more precise status, and in the end it will be removed
|
|
407
418
|
job.reporting_conn.execute(
|
|
408
419
|
"UPDATE pictures SET status = 'ready', preparing_status = 'prepared' WHERE id = %(pic_id)s",
|
|
409
420
|
{"pic_id": job.pic.id},
|
|
410
421
|
)
|
|
411
422
|
|
|
412
|
-
#
|
|
413
|
-
|
|
414
|
-
elif job.task == ProcessTask.delete:
|
|
415
|
-
|
|
416
|
-
db.execute(
|
|
423
|
+
# Add a task to finalize the sequence
|
|
424
|
+
_finalize_sequence(job)
|
|
425
|
+
elif job.task == ProcessTask.delete and job.pic:
|
|
426
|
+
conn.execute(
|
|
417
427
|
"DELETE FROM pictures WHERE id = %(pic_id)s",
|
|
418
428
|
{"pic_id": job.pic.id},
|
|
419
429
|
)
|
|
420
|
-
|
|
421
|
-
"DELETE FROM
|
|
422
|
-
|
|
423
|
-
)
|
|
430
|
+
elif job.task == ProcessTask.delete and job.upload_set:
|
|
431
|
+
conn.execute(SQL("DELETE FROM upload_sets WHERE id = %s"), [job.upload_set.id])
|
|
432
|
+
|
|
433
|
+
conn.execute("DELETE FROM job_queue WHERE id = %(job_id)s", {"job_id": job.job_queue_id})
|
|
424
434
|
|
|
425
435
|
|
|
426
|
-
def
|
|
436
|
+
def _initialize_job(
|
|
437
|
+
reporting_conn: psycopg.Connection,
|
|
438
|
+
job_queue_id: UUID,
|
|
439
|
+
db_pic: Optional[DbPicture],
|
|
440
|
+
db_seq: Optional[DbSequence],
|
|
441
|
+
db_upload_set: Optional[DbUploadSet],
|
|
442
|
+
task: ProcessTask,
|
|
443
|
+
) -> DbJob:
|
|
427
444
|
r = reporting_conn.execute(
|
|
428
|
-
"""INSERT INTO job_history(picture_id,
|
|
429
|
-
VALUES (%(
|
|
430
|
-
RETURNING id
|
|
431
|
-
|
|
432
|
-
|
|
445
|
+
"""INSERT INTO job_history(job_id, picture_id, sequence_id, upload_set_id, job_task)
|
|
446
|
+
VALUES (%(job_id)s, %(pic_id)s, %(seq_id)s, %(us_id)s, %(task)s)
|
|
447
|
+
RETURNING id""",
|
|
448
|
+
{
|
|
449
|
+
"job_id": job_queue_id,
|
|
450
|
+
"pic_id": db_pic.id if db_pic else None,
|
|
451
|
+
"seq_id": db_seq.id if db_seq else None,
|
|
452
|
+
"us_id": db_upload_set.id if db_upload_set else None,
|
|
453
|
+
"task": task.value,
|
|
454
|
+
},
|
|
433
455
|
).fetchone()
|
|
434
456
|
|
|
435
457
|
if not r:
|
|
436
458
|
raise Exception("impossible to insert task in database")
|
|
437
|
-
|
|
459
|
+
|
|
460
|
+
return DbJob(
|
|
461
|
+
reporting_conn=reporting_conn,
|
|
462
|
+
job_queue_id=job_queue_id,
|
|
463
|
+
pic=db_pic,
|
|
464
|
+
seq=db_seq,
|
|
465
|
+
upload_set=db_upload_set,
|
|
466
|
+
task=task,
|
|
467
|
+
job_history_id=r[0],
|
|
468
|
+
)
|
|
438
469
|
|
|
439
470
|
|
|
440
|
-
def _mark_process_as_error(
|
|
471
|
+
def _mark_process_as_error(conn, job: DbJob, e: Exception, recoverable: bool = False, mark_as_error: bool = True):
|
|
441
472
|
job.reporting_conn.execute(
|
|
442
473
|
"""UPDATE job_history SET
|
|
443
474
|
error = %(err)s, finished_at = CURRENT_TIMESTAMP
|
|
444
475
|
WHERE id = %(id)s""",
|
|
445
|
-
{"err": str(e), "id": job.
|
|
476
|
+
{"err": str(e), "id": job.job_history_id},
|
|
446
477
|
)
|
|
447
478
|
if recoverable:
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
479
|
+
if mark_as_error:
|
|
480
|
+
nb_error = conn.execute(
|
|
481
|
+
"""
|
|
482
|
+
UPDATE job_queue SET
|
|
483
|
+
nb_errors = nb_errors + 1
|
|
484
|
+
WHERE id = %(id)s
|
|
485
|
+
RETURNING nb_errors""",
|
|
486
|
+
{"err": str(e), "id": job.job_queue_id},
|
|
487
|
+
).fetchone()
|
|
488
|
+
if nb_error and nb_error[0] > PICTURE_PROCESS_MAX_RETRY:
|
|
489
|
+
logging.info(f"Job {job.label()} has failed {nb_error} times, we stop trying to process it.")
|
|
490
|
+
recoverable = False
|
|
491
|
+
else:
|
|
492
|
+
# it's not a real error, we just want to retry later
|
|
493
|
+
conn.execute(
|
|
494
|
+
SQL("UPDATE job_queue SET ts = NOW() WHERE id = %(id)s"),
|
|
495
|
+
{"err": str(e), "id": job.job_queue_id},
|
|
496
|
+
)
|
|
459
497
|
|
|
460
498
|
if not recoverable:
|
|
461
499
|
# Note: the status is slowly been deprecated by replacing it with more precise status, and in the end it will be removed
|
|
462
|
-
job.
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
DELETE FROM pictures_to_process
|
|
472
|
-
WHERE picture_id = %(id)s""",
|
|
473
|
-
{"id": job.pic.id},
|
|
474
|
-
)
|
|
500
|
+
if job.task == "prepare" and job.pic:
|
|
501
|
+
job.reporting_conn.execute(
|
|
502
|
+
"""UPDATE pictures SET
|
|
503
|
+
preparing_status = 'broken', status = 'broken'
|
|
504
|
+
WHERE id = %(id)s""",
|
|
505
|
+
{"id": job.pic.id},
|
|
506
|
+
)
|
|
507
|
+
# on unrecoverable error, we remove the job from the queue
|
|
508
|
+
conn.execute("DELETE FROM job_queue WHERE id = %(id)s", {"id": job.job_queue_id})
|
|
475
509
|
|
|
476
510
|
|
|
477
|
-
def _delete_picture(
|
|
511
|
+
def _delete_picture(pic: DbPicture):
|
|
478
512
|
"""Delete a picture from the filesystem"""
|
|
479
|
-
log.debug(f"Deleting picture files {
|
|
480
|
-
utils.pictures.removeAllFiles(
|
|
513
|
+
log.debug(f"Deleting picture files {pic.id}")
|
|
514
|
+
utils.pictures.removeAllFiles(pic.id)
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _delete_upload_set(upload_set: DbUploadSet):
|
|
518
|
+
"""Delete an upload set
|
|
519
|
+
We do this in the job queue since we want to wait for all its pictures to be deleted
|
|
520
|
+
"""
|
|
521
|
+
with db.conn(current_app) as conn:
|
|
522
|
+
with conn.transaction(), conn.cursor() as cursor:
|
|
523
|
+
# we want to wait for all pictures to be deleted
|
|
524
|
+
has_more_pictures = cursor.execute("SELECT 1 FROM pictures WHERE upload_set_id = %s LIMIT 1", [upload_set.id]).fetchone()
|
|
525
|
+
if has_more_pictures and has_more_pictures[0]:
|
|
526
|
+
logging.info(f"More pictures to be deleted, upload_set {upload_set.id} will be deleted later")
|
|
527
|
+
raise RetryLaterProcessException("More pictures to be deleted, upload_set will be deleted later")
|
|
528
|
+
# Note: the real deletion will be done on job completion so the lock is released
|