geovisio 2.6.0__py3-none-any.whl → 2.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geovisio/__init__.py +36 -7
- geovisio/admin_cli/cleanup.py +2 -2
- geovisio/admin_cli/db.py +1 -4
- geovisio/config_app.py +40 -1
- geovisio/db_migrations.py +24 -3
- geovisio/templates/main.html +13 -13
- geovisio/templates/viewer.html +3 -3
- geovisio/translations/de/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/de/LC_MESSAGES/messages.po +804 -0
- geovisio/translations/el/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/el/LC_MESSAGES/messages.po +685 -0
- geovisio/translations/en/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/en/LC_MESSAGES/messages.po +738 -0
- geovisio/translations/es/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/es/LC_MESSAGES/messages.po +778 -0
- geovisio/translations/fi/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/fi/LC_MESSAGES/messages.po +589 -0
- geovisio/translations/fr/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/fr/LC_MESSAGES/messages.po +814 -0
- geovisio/translations/hu/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/hu/LC_MESSAGES/messages.po +773 -0
- geovisio/translations/ko/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/ko/LC_MESSAGES/messages.po +685 -0
- geovisio/translations/messages.pot +694 -0
- geovisio/translations/nl/LC_MESSAGES/messages.mo +0 -0
- geovisio/translations/nl/LC_MESSAGES/messages.po +602 -0
- geovisio/utils/__init__.py +1 -1
- geovisio/utils/auth.py +50 -11
- geovisio/utils/db.py +65 -0
- geovisio/utils/excluded_areas.py +83 -0
- geovisio/utils/extent.py +30 -0
- geovisio/utils/fields.py +1 -1
- geovisio/utils/filesystems.py +0 -1
- geovisio/utils/link.py +14 -0
- geovisio/utils/params.py +20 -0
- geovisio/utils/pictures.py +110 -88
- geovisio/utils/reports.py +171 -0
- geovisio/utils/sequences.py +262 -126
- geovisio/utils/tokens.py +37 -42
- geovisio/utils/upload_set.py +642 -0
- geovisio/web/auth.py +37 -37
- geovisio/web/collections.py +304 -304
- geovisio/web/configuration.py +14 -0
- geovisio/web/docs.py +276 -15
- geovisio/web/excluded_areas.py +377 -0
- geovisio/web/items.py +169 -112
- geovisio/web/map.py +104 -36
- geovisio/web/params.py +69 -26
- geovisio/web/pictures.py +14 -31
- geovisio/web/reports.py +399 -0
- geovisio/web/rss.py +13 -7
- geovisio/web/stac.py +129 -134
- geovisio/web/tokens.py +98 -109
- geovisio/web/upload_set.py +771 -0
- geovisio/web/users.py +100 -73
- geovisio/web/utils.py +28 -9
- geovisio/workers/runner_pictures.py +241 -207
- {geovisio-2.6.0.dist-info → geovisio-2.7.1.dist-info}/METADATA +17 -14
- geovisio-2.7.1.dist-info/RECORD +70 -0
- {geovisio-2.6.0.dist-info → geovisio-2.7.1.dist-info}/WHEEL +1 -1
- geovisio-2.6.0.dist-info/RECORD +0 -41
- {geovisio-2.6.0.dist-info → geovisio-2.7.1.dist-info}/LICENSE +0 -0
|
@@ -1,25 +1,23 @@
|
|
|
1
|
-
from fs import open_fs
|
|
2
1
|
from fs.path import dirname
|
|
3
2
|
from PIL import Image, ImageOps
|
|
4
3
|
from flask import current_app
|
|
4
|
+
from geovisio import utils
|
|
5
|
+
from geovisio.utils import db, sequences, upload_set
|
|
5
6
|
import psycopg
|
|
7
|
+
from psycopg.rows import dict_row
|
|
6
8
|
from psycopg.sql import SQL
|
|
7
9
|
import sentry_sdk
|
|
8
|
-
from geovisio import utils
|
|
9
10
|
from geovisio import errors
|
|
10
11
|
from dataclasses import dataclass
|
|
11
12
|
import logging
|
|
12
13
|
from contextlib import contextmanager
|
|
13
14
|
from enum import Enum
|
|
14
|
-
from typing import Any
|
|
15
|
+
from typing import Any, Optional
|
|
15
16
|
import threading
|
|
16
17
|
from uuid import UUID
|
|
17
18
|
from croniter import croniter
|
|
18
|
-
from typing import Optional
|
|
19
19
|
from datetime import datetime, timezone
|
|
20
|
-
|
|
21
20
|
import geovisio.utils.filesystems
|
|
22
|
-
from geovisio.utils.sequences import update_headings
|
|
23
21
|
|
|
24
22
|
log = logging.getLogger("geovisio.runner_pictures")
|
|
25
23
|
|
|
@@ -27,7 +25,7 @@ PICTURE_PROCESS_MAX_RETRY = 10 # Number of times a job will be retryed if there
|
|
|
27
25
|
|
|
28
26
|
|
|
29
27
|
class PictureBackgroundProcessor(object):
|
|
30
|
-
def
|
|
28
|
+
def __init__(self, app):
|
|
31
29
|
nb_threads = app.config["EXECUTOR_MAX_WORKERS"]
|
|
32
30
|
self.enabled = nb_threads != 0
|
|
33
31
|
|
|
@@ -38,7 +36,7 @@ class PictureBackgroundProcessor(object):
|
|
|
38
36
|
else:
|
|
39
37
|
import sys
|
|
40
38
|
|
|
41
|
-
if "run" in sys.argv or "waitress" in sys.argv: # hack not to display a frightening warning uselessly
|
|
39
|
+
if "run" in sys.argv or "waitress" in sys.argv or "gunicorn" in sys.argv: # hack not to display a frightening warning uselessly
|
|
42
40
|
log.warning("No picture background processor run, no picture will be processed unless another separate worker is run")
|
|
43
41
|
log.warning("A separate process can be run with:")
|
|
44
42
|
log.warning("flask picture-worker")
|
|
@@ -48,34 +46,63 @@ class PictureBackgroundProcessor(object):
|
|
|
48
46
|
Ask for a background picture process that will run until not pictures need to be processed
|
|
49
47
|
"""
|
|
50
48
|
if self.enabled:
|
|
51
|
-
worker = PictureProcessor(
|
|
52
|
-
return self.executor.submit(worker.
|
|
49
|
+
worker = PictureProcessor(app=current_app)
|
|
50
|
+
return self.executor.submit(worker.process_jobs)
|
|
53
51
|
|
|
54
52
|
|
|
55
|
-
background_processor = PictureBackgroundProcessor()
|
|
53
|
+
# background_processor = PictureBackgroundProcessor()
|
|
56
54
|
|
|
57
55
|
|
|
58
56
|
class ProcessTask(str, Enum):
|
|
59
57
|
prepare = "prepare"
|
|
60
58
|
delete = "delete"
|
|
59
|
+
dispatch = "dispatch"
|
|
60
|
+
finalize = "finalize"
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
@dataclass
|
|
64
64
|
class DbPicture:
|
|
65
|
-
id:
|
|
65
|
+
id: UUID
|
|
66
66
|
metadata: dict
|
|
67
67
|
|
|
68
68
|
def blurred_by_author(self):
|
|
69
69
|
return self.metadata.get("blurredByAuthor", False)
|
|
70
70
|
|
|
71
71
|
|
|
72
|
+
@dataclass
|
|
73
|
+
class DbSequence:
|
|
74
|
+
id: UUID
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass
|
|
78
|
+
class DbUploadSet:
|
|
79
|
+
id: UUID
|
|
80
|
+
|
|
81
|
+
|
|
72
82
|
@dataclass
|
|
73
83
|
class DbJob:
|
|
74
84
|
reporting_conn: psycopg.Connection
|
|
75
|
-
|
|
76
|
-
|
|
85
|
+
job_history_id: UUID # ID of the job in the job_history
|
|
86
|
+
job_queue_id: UUID # ID in the job_queue
|
|
87
|
+
pic: Optional[DbPicture]
|
|
88
|
+
upload_set: Optional[DbUploadSet]
|
|
89
|
+
seq: Optional[DbSequence]
|
|
90
|
+
|
|
77
91
|
task: ProcessTask
|
|
78
92
|
|
|
93
|
+
def label(self):
|
|
94
|
+
impacted_object = ""
|
|
95
|
+
if self.pic:
|
|
96
|
+
impacted_object = f"picture {self.pic.id}"
|
|
97
|
+
elif self.seq:
|
|
98
|
+
impacted_object = f"sequence {self.seq.id}"
|
|
99
|
+
elif self.upload_set:
|
|
100
|
+
impacted_object = f"upload set {self.upload_set.id}"
|
|
101
|
+
else:
|
|
102
|
+
impacted_object = "unknown object"
|
|
103
|
+
|
|
104
|
+
return f"{self.task} for {impacted_object}"
|
|
105
|
+
|
|
79
106
|
|
|
80
107
|
def processPictureFiles(pic: DbPicture, config):
|
|
81
108
|
"""Generates the files associated with a sequence picture.
|
|
@@ -91,7 +118,7 @@ def processPictureFiles(pic: DbPicture, config):
|
|
|
91
118
|
config : dict
|
|
92
119
|
Flask app.config (passed as param to allow using ThreadPoolExecutor)
|
|
93
120
|
"""
|
|
94
|
-
skipBlur = pic.blurred_by_author() or config.get("API_BLUR_URL")
|
|
121
|
+
skipBlur = pic.blurred_by_author() or config.get("API_BLUR_URL") is None
|
|
95
122
|
fses = config["FILESYSTEMS"]
|
|
96
123
|
fs = fses.permanent if skipBlur else fses.tmp
|
|
97
124
|
picHdPath = utils.pictures.getHDPicturePath(pic.id)
|
|
@@ -156,31 +183,48 @@ class RecoverableProcessException(Exception):
|
|
|
156
183
|
super().__init__(msg)
|
|
157
184
|
|
|
158
185
|
|
|
186
|
+
class RetryLaterProcessException(Exception):
|
|
187
|
+
"""Exception raised when we want to retry later, even if it's not an error"""
|
|
188
|
+
|
|
189
|
+
def __init__(self, msg):
|
|
190
|
+
super().__init__(msg)
|
|
191
|
+
|
|
192
|
+
|
|
159
193
|
class PictureProcessor:
|
|
160
194
|
stop: bool
|
|
161
195
|
config: dict[Any, Any]
|
|
196
|
+
waiting_time: float
|
|
162
197
|
|
|
163
|
-
def __init__(self,
|
|
164
|
-
self.
|
|
198
|
+
def __init__(self, app, stop=True) -> None:
|
|
199
|
+
self.app = app
|
|
165
200
|
self.stop = stop
|
|
166
201
|
if threading.current_thread() is threading.main_thread():
|
|
167
202
|
# if worker is in daemon mode, register signals to gracefully stop it
|
|
168
203
|
self._register_signals()
|
|
169
204
|
self.next_periodic_task_dt = None
|
|
205
|
+
self.cron = croniter(self.app.config["PICTURE_PROCESS_REFRESH_CRON"])
|
|
170
206
|
|
|
171
|
-
|
|
207
|
+
# Note: in tests, we don't want to wait between each picture processing
|
|
208
|
+
waiting_time = 0 if app.config.get("TESTING") is True else 1
|
|
209
|
+
self.waiting_time = waiting_time
|
|
210
|
+
|
|
211
|
+
def process_jobs(self):
|
|
172
212
|
try:
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
if self.stop:
|
|
213
|
+
with self.app.app_context():
|
|
214
|
+
while True:
|
|
215
|
+
if self.app.pool.closed and self.stop:
|
|
216
|
+
# in some tests, the pool is closed before the worker is stopped, we check this here
|
|
178
217
|
return
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
218
|
+
self.check_periodic_tasks()
|
|
219
|
+
r = process_next_job(self.app)
|
|
220
|
+
if not r:
|
|
221
|
+
if self.stop:
|
|
222
|
+
return
|
|
223
|
+
# no more picture to process
|
|
224
|
+
# wait a bit until there are some
|
|
225
|
+
import time
|
|
182
226
|
|
|
183
|
-
|
|
227
|
+
time.sleep(self.waiting_time)
|
|
184
228
|
|
|
185
229
|
except:
|
|
186
230
|
log.exception("Exiting thread")
|
|
@@ -200,15 +244,24 @@ class PictureProcessor:
|
|
|
200
244
|
Check if a periodic task needs to be done, and do it if necessary
|
|
201
245
|
This method ensure only one picture worker will do the needed periodic task
|
|
202
246
|
"""
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
self.next_periodic_task_dt = self.get_next_periodic_task_dt(
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
247
|
+
if self.next_periodic_task_dt is None:
|
|
248
|
+
with db.conn(self.app) as conn:
|
|
249
|
+
self.next_periodic_task_dt = self.get_next_periodic_task_dt(conn)
|
|
250
|
+
|
|
251
|
+
if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
|
|
252
|
+
with db.conn(self.app) as conn:
|
|
253
|
+
# since the next_periodic_task_dt can have been changed by another process, we check again that the task needs to be done
|
|
254
|
+
self.next_periodic_task_dt = self.get_next_periodic_task_dt(conn)
|
|
255
|
+
if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
|
|
256
|
+
if not self.refresh_database():
|
|
257
|
+
# another refresh is in progress, we'll check again later and ask for the next refresh date considering it's in progress
|
|
258
|
+
self.next_periodic_task_dt = self.cron.get_next(datetime, datetime.now(timezone.utc))
|
|
259
|
+
logging.getLogger("geovisio.periodic_task").info(
|
|
260
|
+
f"Refresh in progress, checking after = {self.next_periodic_task_dt}"
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
def get_next_periodic_task_dt(self, conn) -> datetime:
|
|
264
|
+
r = conn.execute("SELECT refreshed_at, NOW() FROM refresh_database").fetchone()
|
|
212
265
|
assert r # the table always has exactly one row
|
|
213
266
|
|
|
214
267
|
refreshed_at, db_time = r
|
|
@@ -216,42 +269,42 @@ class PictureProcessor:
|
|
|
216
269
|
if refreshed_at is None:
|
|
217
270
|
# if the db has never been updated, we need to update it now
|
|
218
271
|
return current_time
|
|
219
|
-
|
|
220
|
-
cron = croniter(self.config["PICTURE_PROCESS_REFRESH_CRON"])
|
|
221
|
-
|
|
222
|
-
next_schedule_date = cron.get_next(datetime, refreshed_at)
|
|
272
|
+
next_schedule_date = self.cron.get_next(datetime, refreshed_at)
|
|
223
273
|
|
|
224
274
|
# if the db time and the app time is not the same, we need to apply an offset on the scheduled time
|
|
225
275
|
next_schedule_date += db_time - current_time
|
|
226
276
|
logging.getLogger("geovisio.periodic_task").info(f"Next database refresh = {next_schedule_date}")
|
|
227
277
|
return next_schedule_date
|
|
228
278
|
|
|
229
|
-
def refresh_database(self
|
|
279
|
+
def refresh_database(self):
|
|
230
280
|
with sentry_sdk.start_transaction(op="task", name="refresh_database"):
|
|
231
281
|
# Note: there is a mechanism in `sequences.update_pictures_grid` to ensure that only one refresh can be done at one time, and it will update the `refreshed_at` value
|
|
232
|
-
|
|
233
|
-
if updated:
|
|
234
|
-
self.next_periodic_task_dt = self.get_next_periodic_task_dt(db)
|
|
235
|
-
else:
|
|
236
|
-
# no update could be done because another process was doing it, check next time the scheduled time
|
|
237
|
-
self.next_periodic_task_dt = None
|
|
282
|
+
return utils.sequences.update_pictures_grid()
|
|
238
283
|
|
|
239
284
|
|
|
240
|
-
def
|
|
285
|
+
def process_next_job(app):
|
|
241
286
|
with sentry_sdk.start_transaction(op="task", name="process_next_picture"):
|
|
242
|
-
with
|
|
287
|
+
with _get_next_job(app) as job:
|
|
243
288
|
if job is None:
|
|
244
289
|
return False
|
|
245
|
-
if job.task == ProcessTask.prepare:
|
|
290
|
+
if job.task == ProcessTask.prepare and job.pic:
|
|
246
291
|
with sentry_sdk.start_span(description="Processing picture") as span:
|
|
247
292
|
span.set_data("pic_id", job.pic.id)
|
|
248
293
|
with utils.time.log_elapsed(f"Processing picture {job.pic.id}"):
|
|
249
|
-
|
|
250
|
-
|
|
294
|
+
# open another connection for reporting and queries
|
|
295
|
+
processPictureFiles(job.pic, app.config)
|
|
296
|
+
elif job.task == ProcessTask.delete and job.pic:
|
|
251
297
|
with sentry_sdk.start_span(description="Deleting picture") as span:
|
|
252
298
|
span.set_data("pic_id", job.pic.id)
|
|
253
299
|
with utils.time.log_elapsed(f"Deleting picture {job.pic.id}"):
|
|
254
|
-
_delete_picture(job)
|
|
300
|
+
_delete_picture(job.pic)
|
|
301
|
+
elif job.task == ProcessTask.dispatch and job.upload_set:
|
|
302
|
+
with utils.time.log_elapsed(f"Dispatching upload set {job.upload_set.id}"):
|
|
303
|
+
upload_set.dispatch(job.upload_set.id)
|
|
304
|
+
elif job.task == ProcessTask.finalize and job.seq:
|
|
305
|
+
with utils.time.log_elapsed(f"Finalizing sequence {job.seq.id}"):
|
|
306
|
+
with job.reporting_conn.cursor(row_factory=dict_row) as cursor:
|
|
307
|
+
sequences.finalize(cursor, job.seq.id)
|
|
255
308
|
else:
|
|
256
309
|
raise RecoverableProcessException(f"Unhandled process task: {job.task}")
|
|
257
310
|
|
|
@@ -259,58 +312,70 @@ def process_next_picture(config):
|
|
|
259
312
|
|
|
260
313
|
|
|
261
314
|
@contextmanager
|
|
262
|
-
def
|
|
315
|
+
def _get_next_job(app):
|
|
263
316
|
"""
|
|
264
|
-
Open a new connection and return the next
|
|
265
|
-
Note: the
|
|
317
|
+
Open a new connection and return the next job to process
|
|
318
|
+
Note: the job should be used as a context manager to close the connection when we stop using the returned job.
|
|
266
319
|
|
|
267
|
-
The new connection is needed because we lock the `
|
|
320
|
+
The new connection is needed because we lock the `job_queue` for the whole transaction for another worker not to process the same job
|
|
268
321
|
"""
|
|
269
322
|
error = None
|
|
270
|
-
with
|
|
271
|
-
with locking_transaction.transaction():
|
|
272
|
-
r =
|
|
273
|
-
"""
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
JOIN pictures p ON p.id = pictures_to_process.picture_id
|
|
323
|
+
with app.pool.connection() as locking_transaction:
|
|
324
|
+
with locking_transaction.transaction(), locking_transaction.cursor(row_factory=dict_row) as cursor:
|
|
325
|
+
r = cursor.execute(
|
|
326
|
+
"""SELECT j.id, j.picture_id, j.upload_set_id, j.sequence_id, j.task, j.picture_to_delete_id, p.metadata
|
|
327
|
+
FROM job_queue j
|
|
328
|
+
LEFT JOIN pictures p ON p.id = j.picture_id
|
|
277
329
|
ORDER by
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
FOR UPDATE of
|
|
281
|
-
LIMIT 1
|
|
282
|
-
"""
|
|
330
|
+
j.nb_errors,
|
|
331
|
+
j.ts
|
|
332
|
+
FOR UPDATE of j SKIP LOCKED
|
|
333
|
+
LIMIT 1"""
|
|
283
334
|
).fetchone()
|
|
284
335
|
if r is None:
|
|
285
336
|
# Nothing to process
|
|
286
337
|
yield None
|
|
287
338
|
else:
|
|
288
|
-
log.debug(f"Processing {r[
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
339
|
+
log.debug(f"Processing {r['id']}")
|
|
340
|
+
|
|
341
|
+
# picture id can either be in `picture_id` (and it will be a foreign key to picture) or in `picture_to_delete_id`
|
|
342
|
+
# (and it will not a foreign key since the picture's row will already have been deleted from the db)
|
|
343
|
+
pic_id = r["picture_id"] or r["picture_to_delete_id"]
|
|
344
|
+
db_pic = DbPicture(id=pic_id, metadata=r["metadata"]) if pic_id is not None else None
|
|
345
|
+
db_seq = DbSequence(id=r["sequence_id"]) if r["sequence_id"] is not None else None
|
|
346
|
+
db_upload_set = DbUploadSet(id=r["upload_set_id"]) if r["upload_set_id"] is not None else None
|
|
347
|
+
|
|
348
|
+
with app.pool.connection() as reporting_conn:
|
|
349
|
+
job = _initialize_job(
|
|
350
|
+
reporting_conn,
|
|
351
|
+
job_queue_id=r["id"],
|
|
352
|
+
db_pic=db_pic,
|
|
353
|
+
db_seq=db_seq,
|
|
354
|
+
db_upload_set=db_upload_set,
|
|
355
|
+
task=ProcessTask(r["task"]),
|
|
356
|
+
)
|
|
294
357
|
try:
|
|
295
358
|
yield job
|
|
296
359
|
|
|
297
360
|
# Finalize the picture process, set the picture status and remove the picture from the queue process
|
|
298
|
-
|
|
299
|
-
log.debug(f"
|
|
361
|
+
_finalize_job(locking_transaction, job)
|
|
362
|
+
log.debug(f"Job {job.label()} processed")
|
|
300
363
|
except RecoverableProcessException as e:
|
|
301
|
-
_mark_process_as_error(locking_transaction, job, e,
|
|
364
|
+
_mark_process_as_error(locking_transaction, job, e, recoverable=True)
|
|
365
|
+
except RetryLaterProcessException as e:
|
|
366
|
+
_mark_process_as_error(locking_transaction, job, e, recoverable=True, mark_as_error=False)
|
|
302
367
|
except InterruptedError as interruption:
|
|
303
|
-
log.error(f"Interruption received, stoping job {job.
|
|
368
|
+
log.error(f"Interruption received, stoping job {job.label()}")
|
|
304
369
|
# starts a new connection, since the current one can be corrupted by the exception
|
|
305
|
-
with
|
|
306
|
-
_mark_process_as_error(t, job, interruption,
|
|
370
|
+
with app.pool.connection() as t:
|
|
371
|
+
_mark_process_as_error(t, job, interruption, recoverable=True)
|
|
307
372
|
error = interruption
|
|
308
373
|
except Exception as e:
|
|
309
|
-
log.exception(f"Impossible to finish job {job.
|
|
310
|
-
_mark_process_as_error(locking_transaction, job, e,
|
|
374
|
+
log.exception(f"Impossible to finish job {job.label()}")
|
|
375
|
+
_mark_process_as_error(locking_transaction, job, e, recoverable=False)
|
|
311
376
|
|
|
312
377
|
# try to finalize the sequence anyway
|
|
313
|
-
|
|
378
|
+
_finalize_sequence(job)
|
|
314
379
|
error = e
|
|
315
380
|
|
|
316
381
|
# we raise an error after the transaction has been comited to be sure to have the state persisted in the database
|
|
@@ -318,163 +383,132 @@ def _get_next_picture_to_process(config):
|
|
|
318
383
|
raise error
|
|
319
384
|
|
|
320
385
|
|
|
321
|
-
def
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
SELECT sp.seq_id AS id FROM sequences_pictures AS sp
|
|
325
|
-
WHERE sp.pic_id = %(id)s
|
|
326
|
-
""",
|
|
327
|
-
{"id": job.pic.id},
|
|
328
|
-
).fetchone()
|
|
329
|
-
if not r:
|
|
330
|
-
raise Exception(f"impossible to find sequence associated to picture {job.pic.id}")
|
|
331
|
-
|
|
332
|
-
seqId = r[0]
|
|
333
|
-
|
|
334
|
-
is_sequence_finalized = _is_sequence_finalized(job.reporting_conn, seqId)
|
|
335
|
-
if not is_sequence_finalized:
|
|
336
|
-
log.debug("sequence not finalized")
|
|
386
|
+
def _finalize_sequence(job: DbJob):
|
|
387
|
+
# on picture preparation finalization, we add a sequence/upload_set finalization job
|
|
388
|
+
if job.task != "prepare" or not job.pic:
|
|
337
389
|
return
|
|
338
390
|
|
|
339
|
-
with
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
# Complete missing headings in pictures
|
|
345
|
-
update_headings(job.reporting_conn, seqId)
|
|
346
|
-
|
|
347
|
-
# Change sequence database status in DB
|
|
348
|
-
# Also generates data in computed columns
|
|
349
|
-
job.reporting_conn.execute(
|
|
350
|
-
"""WITH
|
|
351
|
-
aggregated_pictures AS (
|
|
352
|
-
SELECT
|
|
353
|
-
sp.seq_id,
|
|
354
|
-
MIN(p.ts::DATE) AS day,
|
|
355
|
-
ARRAY_AGG(DISTINCT TRIM(
|
|
356
|
-
CONCAT(p.metadata->>'make', ' ', p.metadata->>'model')
|
|
357
|
-
)) AS models,
|
|
358
|
-
ARRAY_AGG(DISTINCT p.metadata->>'type') AS types
|
|
359
|
-
FROM sequences_pictures sp
|
|
360
|
-
JOIN pictures p ON sp.pic_id = p.id
|
|
361
|
-
WHERE sp.seq_id = %(seq)s
|
|
362
|
-
GROUP BY sp.seq_id
|
|
363
|
-
)
|
|
364
|
-
UPDATE sequences
|
|
365
|
-
SET
|
|
366
|
-
status = 'ready',
|
|
367
|
-
geom = compute_sequence_geom(id),
|
|
368
|
-
bbox = compute_sequence_bbox(id),
|
|
369
|
-
computed_type = CASE WHEN array_length(types, 1) = 1 THEN types[1] ELSE NULL END,
|
|
370
|
-
computed_model = CASE WHEN array_length(models, 1) = 1 THEN models[1] ELSE NULL END,
|
|
371
|
-
computed_capture_date = day
|
|
372
|
-
FROM aggregated_pictures
|
|
373
|
-
WHERE id = %(seq)s
|
|
374
|
-
""",
|
|
375
|
-
{"seq": seqId},
|
|
376
|
-
)
|
|
391
|
+
with job.reporting_conn.cursor(row_factory=dict_row) as cursor:
|
|
392
|
+
r = cursor.execute(
|
|
393
|
+
"SELECT upload_set_id, seq_id FROM pictures p LEFT JOIN sequences_pictures sp on sp.pic_id = p.id WHERE p.id = %(pic_id)s",
|
|
394
|
+
{"pic_id": job.pic.id},
|
|
395
|
+
).fetchone()
|
|
377
396
|
|
|
378
|
-
|
|
397
|
+
if not r or not r["seq_id"]:
|
|
398
|
+
# if the associated upload set has not yet been dispatch, the picture might not be associated to a sequence
|
|
399
|
+
return
|
|
379
400
|
|
|
401
|
+
if r["upload_set_id"]:
|
|
402
|
+
# if the picture is part of the upload set, the sequence finalization will be done when the upload set is dispatched
|
|
403
|
+
return
|
|
380
404
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
We consider a sequence as ready, if all pictures have been processed and there is at least one correctly processed picture
|
|
384
|
-
Eg. we don't want pictures with preparing_status = 'not-processed' and at least one 'prepared'
|
|
385
|
-
"""
|
|
386
|
-
statuses = db.execute(
|
|
387
|
-
"""SELECT DISTINCT(preparing_status) FROM pictures p
|
|
388
|
-
JOIN sequences_pictures sp ON sp.pic_id = p.id
|
|
389
|
-
WHERE
|
|
390
|
-
sp.seq_id = %(id)s
|
|
391
|
-
AND p.preparing_status <> 'broken'
|
|
392
|
-
;
|
|
393
|
-
""",
|
|
394
|
-
{"id": seq_id},
|
|
395
|
-
).fetchall()
|
|
405
|
+
# Add a task to finalize the sequence/upload_set
|
|
406
|
+
sequences.add_finalization_job(cursor, r["seq_id"])
|
|
396
407
|
|
|
397
|
-
return [("prepared",)] == statuses
|
|
398
408
|
|
|
399
|
-
|
|
400
|
-
|
|
409
|
+
def _finalize_job(conn, job: DbJob):
|
|
410
|
+
try:
|
|
411
|
+
# we try to see if our job_history row is still here.
|
|
412
|
+
# It can have been removed if the object this job was preparing has been deleted during the process (since the job_history table store foreign keys)
|
|
413
|
+
job.reporting_conn.execute("SELECT id FROM job_history WHERE id = %(id)s FOR UPDATE NOWAIT", {"id": job.job_history_id})
|
|
414
|
+
except psycopg.errors.LockNotAvailable:
|
|
415
|
+
logging.info(
|
|
416
|
+
f"The job {job.job_history_id} ({job.label()}) has likely been deleted during the process (it can happen if the picture/upload_set/sequence has been deleted by another process), we don't need to finalize it"
|
|
417
|
+
)
|
|
418
|
+
return
|
|
401
419
|
job.reporting_conn.execute(
|
|
402
420
|
"UPDATE job_history SET finished_at = CURRENT_TIMESTAMP WHERE id = %(id)s",
|
|
403
|
-
{"id": job.
|
|
421
|
+
{"id": job.job_history_id},
|
|
404
422
|
)
|
|
405
|
-
if job.task == ProcessTask.prepare:
|
|
423
|
+
if job.task == ProcessTask.prepare and job.pic:
|
|
406
424
|
# Note: the status is slowly been deprecated by replacing it with more precise status, and in the end it will be removed
|
|
407
425
|
job.reporting_conn.execute(
|
|
408
426
|
"UPDATE pictures SET status = 'ready', preparing_status = 'prepared' WHERE id = %(pic_id)s",
|
|
409
427
|
{"pic_id": job.pic.id},
|
|
410
428
|
)
|
|
411
429
|
|
|
412
|
-
#
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
db.execute(
|
|
417
|
-
"DELETE FROM pictures WHERE id = %(pic_id)s",
|
|
418
|
-
{"pic_id": job.pic.id},
|
|
419
|
-
)
|
|
420
|
-
db.execute(
|
|
421
|
-
"DELETE FROM pictures_to_process WHERE picture_id = %(pic_id)s",
|
|
422
|
-
{"pic_id": job.pic.id},
|
|
423
|
-
)
|
|
430
|
+
# Add a task to finalize the sequence
|
|
431
|
+
_finalize_sequence(job)
|
|
432
|
+
|
|
433
|
+
conn.execute("DELETE FROM job_queue WHERE id = %(job_id)s", {"job_id": job.job_queue_id})
|
|
424
434
|
|
|
425
435
|
|
|
426
|
-
def
|
|
436
|
+
def _initialize_job(
|
|
437
|
+
reporting_conn: psycopg.Connection,
|
|
438
|
+
job_queue_id: UUID,
|
|
439
|
+
db_pic: Optional[DbPicture],
|
|
440
|
+
db_seq: Optional[DbSequence],
|
|
441
|
+
db_upload_set: Optional[DbUploadSet],
|
|
442
|
+
task: ProcessTask,
|
|
443
|
+
) -> DbJob:
|
|
427
444
|
r = reporting_conn.execute(
|
|
428
|
-
"""INSERT INTO job_history(picture_id,
|
|
429
|
-
VALUES (%(
|
|
430
|
-
RETURNING id
|
|
431
|
-
|
|
432
|
-
|
|
445
|
+
"""INSERT INTO job_history(job_id, picture_id, sequence_id, upload_set_id, picture_to_delete_id, job_task)
|
|
446
|
+
VALUES (%(job_id)s, %(pic_id)s, %(seq_id)s, %(us_id)s, %(pic_to_delete)s, %(task)s)
|
|
447
|
+
RETURNING id""",
|
|
448
|
+
{
|
|
449
|
+
"job_id": job_queue_id,
|
|
450
|
+
"pic_id": db_pic.id if db_pic and task != ProcessTask.delete else None,
|
|
451
|
+
"seq_id": db_seq.id if db_seq else None,
|
|
452
|
+
"pic_to_delete": db_pic.id if db_pic and task == ProcessTask.delete else None,
|
|
453
|
+
"us_id": db_upload_set.id if db_upload_set else None,
|
|
454
|
+
"task": task.value,
|
|
455
|
+
},
|
|
433
456
|
).fetchone()
|
|
434
457
|
|
|
435
458
|
if not r:
|
|
436
459
|
raise Exception("impossible to insert task in database")
|
|
437
|
-
|
|
460
|
+
|
|
461
|
+
return DbJob(
|
|
462
|
+
reporting_conn=reporting_conn,
|
|
463
|
+
job_queue_id=job_queue_id,
|
|
464
|
+
pic=db_pic,
|
|
465
|
+
seq=db_seq,
|
|
466
|
+
upload_set=db_upload_set,
|
|
467
|
+
task=task,
|
|
468
|
+
job_history_id=r[0],
|
|
469
|
+
)
|
|
438
470
|
|
|
439
471
|
|
|
440
|
-
def _mark_process_as_error(
|
|
472
|
+
def _mark_process_as_error(conn, job: DbJob, e: Exception, recoverable: bool = False, mark_as_error: bool = True):
|
|
441
473
|
job.reporting_conn.execute(
|
|
442
474
|
"""UPDATE job_history SET
|
|
443
475
|
error = %(err)s, finished_at = CURRENT_TIMESTAMP
|
|
444
476
|
WHERE id = %(id)s""",
|
|
445
|
-
{"err": str(e), "id": job.
|
|
477
|
+
{"err": str(e), "id": job.job_history_id},
|
|
446
478
|
)
|
|
447
479
|
if recoverable:
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
480
|
+
if mark_as_error:
|
|
481
|
+
nb_error = conn.execute(
|
|
482
|
+
"""UPDATE job_queue SET
|
|
483
|
+
nb_errors = nb_errors + 1
|
|
484
|
+
WHERE id = %(id)s
|
|
485
|
+
RETURNING nb_errors""",
|
|
486
|
+
{"err": str(e), "id": job.job_queue_id},
|
|
487
|
+
).fetchone()
|
|
488
|
+
if nb_error and nb_error[0] > PICTURE_PROCESS_MAX_RETRY:
|
|
489
|
+
logging.info(f"Job {job.label()} has failed {nb_error} times, we stop trying to process it.")
|
|
490
|
+
recoverable = False
|
|
491
|
+
else:
|
|
492
|
+
# it's not a real error, we just want to retry later
|
|
493
|
+
conn.execute(
|
|
494
|
+
SQL("UPDATE job_queue SET ts = NOW() WHERE id = %(id)s"),
|
|
495
|
+
{"err": str(e), "id": job.job_queue_id},
|
|
496
|
+
)
|
|
459
497
|
|
|
460
498
|
if not recoverable:
|
|
461
499
|
# Note: the status is slowly been deprecated by replacing it with more precise status, and in the end it will be removed
|
|
462
|
-
job.
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
DELETE FROM pictures_to_process
|
|
472
|
-
WHERE picture_id = %(id)s""",
|
|
473
|
-
{"id": job.pic.id},
|
|
474
|
-
)
|
|
500
|
+
if job.task == "prepare" and job.pic:
|
|
501
|
+
job.reporting_conn.execute(
|
|
502
|
+
"""UPDATE pictures SET
|
|
503
|
+
preparing_status = 'broken', status = 'broken'
|
|
504
|
+
WHERE id = %(id)s""",
|
|
505
|
+
{"id": job.pic.id},
|
|
506
|
+
)
|
|
507
|
+
# on unrecoverable error, we remove the job from the queue
|
|
508
|
+
conn.execute("DELETE FROM job_queue WHERE id = %(id)s", {"id": job.job_queue_id})
|
|
475
509
|
|
|
476
510
|
|
|
477
|
-
def _delete_picture(
|
|
511
|
+
def _delete_picture(pic: DbPicture):
|
|
478
512
|
"""Delete a picture from the filesystem"""
|
|
479
|
-
log.debug(f"Deleting picture files {
|
|
480
|
-
utils.pictures.removeAllFiles(
|
|
513
|
+
log.debug(f"Deleting picture files {pic.id}")
|
|
514
|
+
utils.pictures.removeAllFiles(pic.id)
|