geovisio 2.6.0__py3-none-any.whl → 2.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. geovisio/__init__.py +36 -7
  2. geovisio/admin_cli/cleanup.py +2 -2
  3. geovisio/admin_cli/db.py +1 -4
  4. geovisio/config_app.py +40 -1
  5. geovisio/db_migrations.py +24 -3
  6. geovisio/templates/main.html +13 -13
  7. geovisio/templates/viewer.html +3 -3
  8. geovisio/translations/de/LC_MESSAGES/messages.mo +0 -0
  9. geovisio/translations/de/LC_MESSAGES/messages.po +804 -0
  10. geovisio/translations/el/LC_MESSAGES/messages.mo +0 -0
  11. geovisio/translations/el/LC_MESSAGES/messages.po +685 -0
  12. geovisio/translations/en/LC_MESSAGES/messages.mo +0 -0
  13. geovisio/translations/en/LC_MESSAGES/messages.po +738 -0
  14. geovisio/translations/es/LC_MESSAGES/messages.mo +0 -0
  15. geovisio/translations/es/LC_MESSAGES/messages.po +778 -0
  16. geovisio/translations/fi/LC_MESSAGES/messages.mo +0 -0
  17. geovisio/translations/fi/LC_MESSAGES/messages.po +589 -0
  18. geovisio/translations/fr/LC_MESSAGES/messages.mo +0 -0
  19. geovisio/translations/fr/LC_MESSAGES/messages.po +814 -0
  20. geovisio/translations/hu/LC_MESSAGES/messages.mo +0 -0
  21. geovisio/translations/hu/LC_MESSAGES/messages.po +773 -0
  22. geovisio/translations/ko/LC_MESSAGES/messages.mo +0 -0
  23. geovisio/translations/ko/LC_MESSAGES/messages.po +685 -0
  24. geovisio/translations/messages.pot +694 -0
  25. geovisio/translations/nl/LC_MESSAGES/messages.mo +0 -0
  26. geovisio/translations/nl/LC_MESSAGES/messages.po +602 -0
  27. geovisio/utils/__init__.py +1 -1
  28. geovisio/utils/auth.py +50 -11
  29. geovisio/utils/db.py +65 -0
  30. geovisio/utils/excluded_areas.py +83 -0
  31. geovisio/utils/extent.py +30 -0
  32. geovisio/utils/fields.py +1 -1
  33. geovisio/utils/filesystems.py +0 -1
  34. geovisio/utils/link.py +14 -0
  35. geovisio/utils/params.py +20 -0
  36. geovisio/utils/pictures.py +110 -88
  37. geovisio/utils/reports.py +171 -0
  38. geovisio/utils/sequences.py +262 -126
  39. geovisio/utils/tokens.py +37 -42
  40. geovisio/utils/upload_set.py +642 -0
  41. geovisio/web/auth.py +37 -37
  42. geovisio/web/collections.py +304 -304
  43. geovisio/web/configuration.py +14 -0
  44. geovisio/web/docs.py +276 -15
  45. geovisio/web/excluded_areas.py +377 -0
  46. geovisio/web/items.py +169 -112
  47. geovisio/web/map.py +104 -36
  48. geovisio/web/params.py +69 -26
  49. geovisio/web/pictures.py +14 -31
  50. geovisio/web/reports.py +399 -0
  51. geovisio/web/rss.py +13 -7
  52. geovisio/web/stac.py +129 -134
  53. geovisio/web/tokens.py +98 -109
  54. geovisio/web/upload_set.py +771 -0
  55. geovisio/web/users.py +100 -73
  56. geovisio/web/utils.py +28 -9
  57. geovisio/workers/runner_pictures.py +241 -207
  58. {geovisio-2.6.0.dist-info → geovisio-2.7.1.dist-info}/METADATA +17 -14
  59. geovisio-2.7.1.dist-info/RECORD +70 -0
  60. {geovisio-2.6.0.dist-info → geovisio-2.7.1.dist-info}/WHEEL +1 -1
  61. geovisio-2.6.0.dist-info/RECORD +0 -41
  62. {geovisio-2.6.0.dist-info → geovisio-2.7.1.dist-info}/LICENSE +0 -0
@@ -1,25 +1,23 @@
1
- from fs import open_fs
2
1
  from fs.path import dirname
3
2
  from PIL import Image, ImageOps
4
3
  from flask import current_app
4
+ from geovisio import utils
5
+ from geovisio.utils import db, sequences, upload_set
5
6
  import psycopg
7
+ from psycopg.rows import dict_row
6
8
  from psycopg.sql import SQL
7
9
  import sentry_sdk
8
- from geovisio import utils
9
10
  from geovisio import errors
10
11
  from dataclasses import dataclass
11
12
  import logging
12
13
  from contextlib import contextmanager
13
14
  from enum import Enum
14
- from typing import Any
15
+ from typing import Any, Optional
15
16
  import threading
16
17
  from uuid import UUID
17
18
  from croniter import croniter
18
- from typing import Optional
19
19
  from datetime import datetime, timezone
20
-
21
20
  import geovisio.utils.filesystems
22
- from geovisio.utils.sequences import update_headings
23
21
 
24
22
  log = logging.getLogger("geovisio.runner_pictures")
25
23
 
@@ -27,7 +25,7 @@ PICTURE_PROCESS_MAX_RETRY = 10 # Number of times a job will be retryed if there
27
25
 
28
26
 
29
27
  class PictureBackgroundProcessor(object):
30
- def init_app(self, app):
28
+ def __init__(self, app):
31
29
  nb_threads = app.config["EXECUTOR_MAX_WORKERS"]
32
30
  self.enabled = nb_threads != 0
33
31
 
@@ -38,7 +36,7 @@ class PictureBackgroundProcessor(object):
38
36
  else:
39
37
  import sys
40
38
 
41
- if "run" in sys.argv or "waitress" in sys.argv: # hack not to display a frightening warning uselessly
39
+ if "run" in sys.argv or "waitress" in sys.argv or "gunicorn" in sys.argv: # hack not to display a frightening warning uselessly
42
40
  log.warning("No picture background processor run, no picture will be processed unless another separate worker is run")
43
41
  log.warning("A separate process can be run with:")
44
42
  log.warning("flask picture-worker")
@@ -48,34 +46,63 @@ class PictureBackgroundProcessor(object):
48
46
  Ask for a background picture process that will run until not pictures need to be processed
49
47
  """
50
48
  if self.enabled:
51
- worker = PictureProcessor(config=current_app.config)
52
- return self.executor.submit(worker.process_next_pictures)
49
+ worker = PictureProcessor(app=current_app)
50
+ return self.executor.submit(worker.process_jobs)
53
51
 
54
52
 
55
- background_processor = PictureBackgroundProcessor()
53
+ # background_processor = PictureBackgroundProcessor()
56
54
 
57
55
 
58
56
  class ProcessTask(str, Enum):
59
57
  prepare = "prepare"
60
58
  delete = "delete"
59
+ dispatch = "dispatch"
60
+ finalize = "finalize"
61
61
 
62
62
 
63
63
  @dataclass
64
64
  class DbPicture:
65
- id: str
65
+ id: UUID
66
66
  metadata: dict
67
67
 
68
68
  def blurred_by_author(self):
69
69
  return self.metadata.get("blurredByAuthor", False)
70
70
 
71
71
 
72
+ @dataclass
73
+ class DbSequence:
74
+ id: UUID
75
+
76
+
77
+ @dataclass
78
+ class DbUploadSet:
79
+ id: UUID
80
+
81
+
72
82
  @dataclass
73
83
  class DbJob:
74
84
  reporting_conn: psycopg.Connection
75
- id: UUID
76
- pic: DbPicture
85
+ job_history_id: UUID # ID of the job in the job_history
86
+ job_queue_id: UUID # ID in the job_queue
87
+ pic: Optional[DbPicture]
88
+ upload_set: Optional[DbUploadSet]
89
+ seq: Optional[DbSequence]
90
+
77
91
  task: ProcessTask
78
92
 
93
+ def label(self):
94
+ impacted_object = ""
95
+ if self.pic:
96
+ impacted_object = f"picture {self.pic.id}"
97
+ elif self.seq:
98
+ impacted_object = f"sequence {self.seq.id}"
99
+ elif self.upload_set:
100
+ impacted_object = f"upload set {self.upload_set.id}"
101
+ else:
102
+ impacted_object = "unknown object"
103
+
104
+ return f"{self.task} for {impacted_object}"
105
+
79
106
 
80
107
  def processPictureFiles(pic: DbPicture, config):
81
108
  """Generates the files associated with a sequence picture.
@@ -91,7 +118,7 @@ def processPictureFiles(pic: DbPicture, config):
91
118
  config : dict
92
119
  Flask app.config (passed as param to allow using ThreadPoolExecutor)
93
120
  """
94
- skipBlur = pic.blurred_by_author() or config.get("API_BLUR_URL") == None
121
+ skipBlur = pic.blurred_by_author() or config.get("API_BLUR_URL") is None
95
122
  fses = config["FILESYSTEMS"]
96
123
  fs = fses.permanent if skipBlur else fses.tmp
97
124
  picHdPath = utils.pictures.getHDPicturePath(pic.id)
@@ -156,31 +183,48 @@ class RecoverableProcessException(Exception):
156
183
  super().__init__(msg)
157
184
 
158
185
 
186
+ class RetryLaterProcessException(Exception):
187
+ """Exception raised when we want to retry later, even if it's not an error"""
188
+
189
+ def __init__(self, msg):
190
+ super().__init__(msg)
191
+
192
+
159
193
  class PictureProcessor:
160
194
  stop: bool
161
195
  config: dict[Any, Any]
196
+ waiting_time: float
162
197
 
163
- def __init__(self, config, stop=True) -> None:
164
- self.config = config
198
+ def __init__(self, app, stop=True) -> None:
199
+ self.app = app
165
200
  self.stop = stop
166
201
  if threading.current_thread() is threading.main_thread():
167
202
  # if worker is in daemon mode, register signals to gracefully stop it
168
203
  self._register_signals()
169
204
  self.next_periodic_task_dt = None
205
+ self.cron = croniter(self.app.config["PICTURE_PROCESS_REFRESH_CRON"])
170
206
 
171
- def process_next_pictures(self):
207
+ # Note: in tests, we don't want to wait between each picture processing
208
+ waiting_time = 0 if app.config.get("TESTING") is True else 1
209
+ self.waiting_time = waiting_time
210
+
211
+ def process_jobs(self):
172
212
  try:
173
- while True:
174
- self.check_periodic_tasks()
175
- r = process_next_picture(self.config)
176
- if not r:
177
- if self.stop:
213
+ with self.app.app_context():
214
+ while True:
215
+ if self.app.pool.closed and self.stop:
216
+ # in some tests, the pool is closed before the worker is stopped, we check this here
178
217
  return
179
- # no more picture to process
180
- # wait a bit until there are some
181
- import time
218
+ self.check_periodic_tasks()
219
+ r = process_next_job(self.app)
220
+ if not r:
221
+ if self.stop:
222
+ return
223
+ # no more picture to process
224
+ # wait a bit until there are some
225
+ import time
182
226
 
183
- time.sleep(1)
227
+ time.sleep(self.waiting_time)
184
228
 
185
229
  except:
186
230
  log.exception("Exiting thread")
@@ -200,15 +244,24 @@ class PictureProcessor:
200
244
  Check if a periodic task needs to be done, and do it if necessary
201
245
  This method ensure only one picture worker will do the needed periodic task
202
246
  """
203
- with psycopg.connect(self.config["DB_URL"], autocommit=True) as db:
204
- if self.next_periodic_task_dt is None:
205
- self.next_periodic_task_dt = self.get_next_periodic_task_dt(db)
206
-
207
- if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
208
- self.refresh_database(db)
209
-
210
- def get_next_periodic_task_dt(self, db) -> datetime:
211
- r = db.execute("SELECT refreshed_at, NOW() FROM refresh_database").fetchone()
247
+ if self.next_periodic_task_dt is None:
248
+ with db.conn(self.app) as conn:
249
+ self.next_periodic_task_dt = self.get_next_periodic_task_dt(conn)
250
+
251
+ if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
252
+ with db.conn(self.app) as conn:
253
+ # since the next_periodic_task_dt can have been changed by another process, we check again that the task needs to be done
254
+ self.next_periodic_task_dt = self.get_next_periodic_task_dt(conn)
255
+ if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
256
+ if not self.refresh_database():
257
+ # another refresh is in progress, we'll check again later and ask for the next refresh date considering it's in progress
258
+ self.next_periodic_task_dt = self.cron.get_next(datetime, datetime.now(timezone.utc))
259
+ logging.getLogger("geovisio.periodic_task").info(
260
+ f"Refresh in progress, checking after = {self.next_periodic_task_dt}"
261
+ )
262
+
263
+ def get_next_periodic_task_dt(self, conn) -> datetime:
264
+ r = conn.execute("SELECT refreshed_at, NOW() FROM refresh_database").fetchone()
212
265
  assert r # the table always has exactly one row
213
266
 
214
267
  refreshed_at, db_time = r
@@ -216,42 +269,42 @@ class PictureProcessor:
216
269
  if refreshed_at is None:
217
270
  # if the db has never been updated, we need to update it now
218
271
  return current_time
219
-
220
- cron = croniter(self.config["PICTURE_PROCESS_REFRESH_CRON"])
221
-
222
- next_schedule_date = cron.get_next(datetime, refreshed_at)
272
+ next_schedule_date = self.cron.get_next(datetime, refreshed_at)
223
273
 
224
274
  # if the db time and the app time is not the same, we need to apply an offset on the scheduled time
225
275
  next_schedule_date += db_time - current_time
226
276
  logging.getLogger("geovisio.periodic_task").info(f"Next database refresh = {next_schedule_date}")
227
277
  return next_schedule_date
228
278
 
229
- def refresh_database(self, db):
279
+ def refresh_database(self):
230
280
  with sentry_sdk.start_transaction(op="task", name="refresh_database"):
231
281
  # Note: there is a mechanism in `sequences.update_pictures_grid` to ensure that only one refresh can be done at one time, and it will update the `refreshed_at` value
232
- updated = utils.sequences.update_pictures_grid(db)
233
- if updated:
234
- self.next_periodic_task_dt = self.get_next_periodic_task_dt(db)
235
- else:
236
- # no update could be done because another process was doing it, check next time the scheduled time
237
- self.next_periodic_task_dt = None
282
+ return utils.sequences.update_pictures_grid()
238
283
 
239
284
 
240
- def process_next_picture(config):
285
+ def process_next_job(app):
241
286
  with sentry_sdk.start_transaction(op="task", name="process_next_picture"):
242
- with _get_next_picture_to_process(config) as job:
287
+ with _get_next_job(app) as job:
243
288
  if job is None:
244
289
  return False
245
- if job.task == ProcessTask.prepare:
290
+ if job.task == ProcessTask.prepare and job.pic:
246
291
  with sentry_sdk.start_span(description="Processing picture") as span:
247
292
  span.set_data("pic_id", job.pic.id)
248
293
  with utils.time.log_elapsed(f"Processing picture {job.pic.id}"):
249
- processPictureFiles(job.pic, config)
250
- elif job.task == ProcessTask.delete:
294
+ # open another connection for reporting and queries
295
+ processPictureFiles(job.pic, app.config)
296
+ elif job.task == ProcessTask.delete and job.pic:
251
297
  with sentry_sdk.start_span(description="Deleting picture") as span:
252
298
  span.set_data("pic_id", job.pic.id)
253
299
  with utils.time.log_elapsed(f"Deleting picture {job.pic.id}"):
254
- _delete_picture(job)
300
+ _delete_picture(job.pic)
301
+ elif job.task == ProcessTask.dispatch and job.upload_set:
302
+ with utils.time.log_elapsed(f"Dispatching upload set {job.upload_set.id}"):
303
+ upload_set.dispatch(job.upload_set.id)
304
+ elif job.task == ProcessTask.finalize and job.seq:
305
+ with utils.time.log_elapsed(f"Finalizing sequence {job.seq.id}"):
306
+ with job.reporting_conn.cursor(row_factory=dict_row) as cursor:
307
+ sequences.finalize(cursor, job.seq.id)
255
308
  else:
256
309
  raise RecoverableProcessException(f"Unhandled process task: {job.task}")
257
310
 
@@ -259,58 +312,70 @@ def process_next_picture(config):
259
312
 
260
313
 
261
314
  @contextmanager
262
- def _get_next_picture_to_process(config):
315
+ def _get_next_job(app):
263
316
  """
264
- Open a new connection and return the next picture to process
265
- Note: the picture should be used as a context manager to close the connection when we stop using the returned picture.
317
+ Open a new connection and return the next job to process
318
+ Note: the job should be used as a context manager to close the connection when we stop using the returned job.
266
319
 
267
- The new connection is needed because we lock the `pictures_to_process` for the whole transaction for another worker not to process the same picture
320
+ The new connection is needed because we lock the `job_queue` for the whole transaction for another worker not to process the same job
268
321
  """
269
322
  error = None
270
- with psycopg.connect(config["DB_URL"], autocommit=True) as locking_transaction:
271
- with locking_transaction.transaction():
272
- r = locking_transaction.execute(
273
- """
274
- SELECT p.id, pictures_to_process.task, p.metadata
275
- FROM pictures_to_process
276
- JOIN pictures p ON p.id = pictures_to_process.picture_id
323
+ with app.pool.connection() as locking_transaction:
324
+ with locking_transaction.transaction(), locking_transaction.cursor(row_factory=dict_row) as cursor:
325
+ r = cursor.execute(
326
+ """SELECT j.id, j.picture_id, j.upload_set_id, j.sequence_id, j.task, j.picture_to_delete_id, p.metadata
327
+ FROM job_queue j
328
+ LEFT JOIN pictures p ON p.id = j.picture_id
277
329
  ORDER by
278
- pictures_to_process.nb_errors,
279
- pictures_to_process.ts
280
- FOR UPDATE of pictures_to_process SKIP LOCKED
281
- LIMIT 1
282
- """
330
+ j.nb_errors,
331
+ j.ts
332
+ FOR UPDATE of j SKIP LOCKED
333
+ LIMIT 1"""
283
334
  ).fetchone()
284
335
  if r is None:
285
336
  # Nothing to process
286
337
  yield None
287
338
  else:
288
- log.debug(f"Processing {r[0]}")
289
-
290
- db_pic = DbPicture(id=str(r[0]), metadata=r[2])
291
-
292
- with psycopg.connect(config["DB_URL"], autocommit=True) as reporting_conn:
293
- job = _initialize_picture_process(reporting_conn, pic=db_pic, task=ProcessTask(r[1]))
339
+ log.debug(f"Processing {r['id']}")
340
+
341
+ # picture id can either be in `picture_id` (and it will be a foreign key to picture) or in `picture_to_delete_id`
342
+ # (and it will not a foreign key since the picture's row will already have been deleted from the db)
343
+ pic_id = r["picture_id"] or r["picture_to_delete_id"]
344
+ db_pic = DbPicture(id=pic_id, metadata=r["metadata"]) if pic_id is not None else None
345
+ db_seq = DbSequence(id=r["sequence_id"]) if r["sequence_id"] is not None else None
346
+ db_upload_set = DbUploadSet(id=r["upload_set_id"]) if r["upload_set_id"] is not None else None
347
+
348
+ with app.pool.connection() as reporting_conn:
349
+ job = _initialize_job(
350
+ reporting_conn,
351
+ job_queue_id=r["id"],
352
+ db_pic=db_pic,
353
+ db_seq=db_seq,
354
+ db_upload_set=db_upload_set,
355
+ task=ProcessTask(r["task"]),
356
+ )
294
357
  try:
295
358
  yield job
296
359
 
297
360
  # Finalize the picture process, set the picture status and remove the picture from the queue process
298
- _finalize_picture_process(locking_transaction, job)
299
- log.debug(f"Picture {db_pic.id} processed")
361
+ _finalize_job(locking_transaction, job)
362
+ log.debug(f"Job {job.label()} processed")
300
363
  except RecoverableProcessException as e:
301
- _mark_process_as_error(locking_transaction, job, e, config, recoverable=True)
364
+ _mark_process_as_error(locking_transaction, job, e, recoverable=True)
365
+ except RetryLaterProcessException as e:
366
+ _mark_process_as_error(locking_transaction, job, e, recoverable=True, mark_as_error=False)
302
367
  except InterruptedError as interruption:
303
- log.error(f"Interruption received, stoping job {job.id} for picture {db_pic.id}")
368
+ log.error(f"Interruption received, stoping job {job.label()}")
304
369
  # starts a new connection, since the current one can be corrupted by the exception
305
- with psycopg.connect(config["DB_URL"], autocommit=True) as t:
306
- _mark_process_as_error(t, job, interruption, config, recoverable=True)
370
+ with app.pool.connection() as t:
371
+ _mark_process_as_error(t, job, interruption, recoverable=True)
307
372
  error = interruption
308
373
  except Exception as e:
309
- log.exception(f"Impossible to finish job {job.id} for picture {db_pic.id}")
310
- _mark_process_as_error(locking_transaction, job, e, config, recoverable=False)
374
+ log.exception(f"Impossible to finish job {job.label()}")
375
+ _mark_process_as_error(locking_transaction, job, e, recoverable=False)
311
376
 
312
377
  # try to finalize the sequence anyway
313
- _finalize_sequence_if_last_picture(job)
378
+ _finalize_sequence(job)
314
379
  error = e
315
380
 
316
381
  # we raise an error after the transaction has been comited to be sure to have the state persisted in the database
@@ -318,163 +383,132 @@ def _get_next_picture_to_process(config):
318
383
  raise error
319
384
 
320
385
 
321
- def _finalize_sequence_if_last_picture(job: DbJob):
322
- r = job.reporting_conn.execute(
323
- """
324
- SELECT sp.seq_id AS id FROM sequences_pictures AS sp
325
- WHERE sp.pic_id = %(id)s
326
- """,
327
- {"id": job.pic.id},
328
- ).fetchone()
329
- if not r:
330
- raise Exception(f"impossible to find sequence associated to picture {job.pic.id}")
331
-
332
- seqId = r[0]
333
-
334
- is_sequence_finalized = _is_sequence_finalized(job.reporting_conn, seqId)
335
- if not is_sequence_finalized:
336
- log.debug("sequence not finalized")
386
+ def _finalize_sequence(job: DbJob):
387
+ # on picture preparation finalization, we add a sequence/upload_set finalization job
388
+ if job.task != "prepare" or not job.pic:
337
389
  return
338
390
 
339
- with sentry_sdk.start_span(description="Finalizing sequence") as span:
340
- span.set_data("sequence_id", seqId)
341
- log.debug(f"Finalizing sequence {seqId}")
342
-
343
- with utils.time.log_elapsed(f"Finalizing sequence {seqId}"):
344
- # Complete missing headings in pictures
345
- update_headings(job.reporting_conn, seqId)
346
-
347
- # Change sequence database status in DB
348
- # Also generates data in computed columns
349
- job.reporting_conn.execute(
350
- """WITH
351
- aggregated_pictures AS (
352
- SELECT
353
- sp.seq_id,
354
- MIN(p.ts::DATE) AS day,
355
- ARRAY_AGG(DISTINCT TRIM(
356
- CONCAT(p.metadata->>'make', ' ', p.metadata->>'model')
357
- )) AS models,
358
- ARRAY_AGG(DISTINCT p.metadata->>'type') AS types
359
- FROM sequences_pictures sp
360
- JOIN pictures p ON sp.pic_id = p.id
361
- WHERE sp.seq_id = %(seq)s
362
- GROUP BY sp.seq_id
363
- )
364
- UPDATE sequences
365
- SET
366
- status = 'ready',
367
- geom = compute_sequence_geom(id),
368
- bbox = compute_sequence_bbox(id),
369
- computed_type = CASE WHEN array_length(types, 1) = 1 THEN types[1] ELSE NULL END,
370
- computed_model = CASE WHEN array_length(models, 1) = 1 THEN models[1] ELSE NULL END,
371
- computed_capture_date = day
372
- FROM aggregated_pictures
373
- WHERE id = %(seq)s
374
- """,
375
- {"seq": seqId},
376
- )
391
+ with job.reporting_conn.cursor(row_factory=dict_row) as cursor:
392
+ r = cursor.execute(
393
+ "SELECT upload_set_id, seq_id FROM pictures p LEFT JOIN sequences_pictures sp on sp.pic_id = p.id WHERE p.id = %(pic_id)s",
394
+ {"pic_id": job.pic.id},
395
+ ).fetchone()
377
396
 
378
- log.info(f"Sequence {seqId} is ready")
397
+ if not r or not r["seq_id"]:
398
+ # if the associated upload set has not yet been dispatch, the picture might not be associated to a sequence
399
+ return
379
400
 
401
+ if r["upload_set_id"]:
402
+ # if the picture is part of the upload set, the sequence finalization will be done when the upload set is dispatched
403
+ return
380
404
 
381
- def _is_sequence_finalized(db, seq_id: str):
382
- """
383
- We consider a sequence as ready, if all pictures have been processed and there is at least one correctly processed picture
384
- Eg. we don't want pictures with preparing_status = 'not-processed' and at least one 'prepared'
385
- """
386
- statuses = db.execute(
387
- """SELECT DISTINCT(preparing_status) FROM pictures p
388
- JOIN sequences_pictures sp ON sp.pic_id = p.id
389
- WHERE
390
- sp.seq_id = %(id)s
391
- AND p.preparing_status <> 'broken'
392
- ;
393
- """,
394
- {"id": seq_id},
395
- ).fetchall()
405
+ # Add a task to finalize the sequence/upload_set
406
+ sequences.add_finalization_job(cursor, r["seq_id"])
396
407
 
397
- return [("prepared",)] == statuses
398
408
 
399
-
400
- def _finalize_picture_process(db, job: DbJob):
409
+ def _finalize_job(conn, job: DbJob):
410
+ try:
411
+ # we try to see if our job_history row is still here.
412
+ # It can have been removed if the object this job was preparing has been deleted during the process (since the job_history table store foreign keys)
413
+ job.reporting_conn.execute("SELECT id FROM job_history WHERE id = %(id)s FOR UPDATE NOWAIT", {"id": job.job_history_id})
414
+ except psycopg.errors.LockNotAvailable:
415
+ logging.info(
416
+ f"The job {job.job_history_id} ({job.label()}) has likely been deleted during the process (it can happen if the picture/upload_set/sequence has been deleted by another process), we don't need to finalize it"
417
+ )
418
+ return
401
419
  job.reporting_conn.execute(
402
420
  "UPDATE job_history SET finished_at = CURRENT_TIMESTAMP WHERE id = %(id)s",
403
- {"id": job.id},
421
+ {"id": job.job_history_id},
404
422
  )
405
- if job.task == ProcessTask.prepare:
423
+ if job.task == ProcessTask.prepare and job.pic:
406
424
  # Note: the status is slowly been deprecated by replacing it with more precise status, and in the end it will be removed
407
425
  job.reporting_conn.execute(
408
426
  "UPDATE pictures SET status = 'ready', preparing_status = 'prepared' WHERE id = %(pic_id)s",
409
427
  {"pic_id": job.pic.id},
410
428
  )
411
429
 
412
- # Check if we need to finalize the sequence
413
- _finalize_sequence_if_last_picture(job)
414
- elif job.task == ProcessTask.delete:
415
- # TODO set the status to 'deleted' instead of removing it
416
- db.execute(
417
- "DELETE FROM pictures WHERE id = %(pic_id)s",
418
- {"pic_id": job.pic.id},
419
- )
420
- db.execute(
421
- "DELETE FROM pictures_to_process WHERE picture_id = %(pic_id)s",
422
- {"pic_id": job.pic.id},
423
- )
430
+ # Add a task to finalize the sequence
431
+ _finalize_sequence(job)
432
+
433
+ conn.execute("DELETE FROM job_queue WHERE id = %(job_id)s", {"job_id": job.job_queue_id})
424
434
 
425
435
 
426
- def _initialize_picture_process(reporting_conn: psycopg.Connection, pic: DbPicture, task: ProcessTask) -> DbJob:
436
+ def _initialize_job(
437
+ reporting_conn: psycopg.Connection,
438
+ job_queue_id: UUID,
439
+ db_pic: Optional[DbPicture],
440
+ db_seq: Optional[DbSequence],
441
+ db_upload_set: Optional[DbUploadSet],
442
+ task: ProcessTask,
443
+ ) -> DbJob:
427
444
  r = reporting_conn.execute(
428
- """INSERT INTO job_history(picture_id, task)
429
- VALUES (%(id)s, %(task)s)
430
- RETURNING id
431
- """,
432
- {"id": pic.id, "task": task.value},
445
+ """INSERT INTO job_history(job_id, picture_id, sequence_id, upload_set_id, picture_to_delete_id, job_task)
446
+ VALUES (%(job_id)s, %(pic_id)s, %(seq_id)s, %(us_id)s, %(pic_to_delete)s, %(task)s)
447
+ RETURNING id""",
448
+ {
449
+ "job_id": job_queue_id,
450
+ "pic_id": db_pic.id if db_pic and task != ProcessTask.delete else None,
451
+ "seq_id": db_seq.id if db_seq else None,
452
+ "pic_to_delete": db_pic.id if db_pic and task == ProcessTask.delete else None,
453
+ "us_id": db_upload_set.id if db_upload_set else None,
454
+ "task": task.value,
455
+ },
433
456
  ).fetchone()
434
457
 
435
458
  if not r:
436
459
  raise Exception("impossible to insert task in database")
437
- return DbJob(reporting_conn=reporting_conn, pic=pic, id=r[0], task=task)
460
+
461
+ return DbJob(
462
+ reporting_conn=reporting_conn,
463
+ job_queue_id=job_queue_id,
464
+ pic=db_pic,
465
+ seq=db_seq,
466
+ upload_set=db_upload_set,
467
+ task=task,
468
+ job_history_id=r[0],
469
+ )
438
470
 
439
471
 
440
- def _mark_process_as_error(db, job: DbJob, e: Exception, config, recoverable: bool = False):
472
+ def _mark_process_as_error(conn, job: DbJob, e: Exception, recoverable: bool = False, mark_as_error: bool = True):
441
473
  job.reporting_conn.execute(
442
474
  """UPDATE job_history SET
443
475
  error = %(err)s, finished_at = CURRENT_TIMESTAMP
444
476
  WHERE id = %(id)s""",
445
- {"err": str(e), "id": job.id},
477
+ {"err": str(e), "id": job.job_history_id},
446
478
  )
447
479
  if recoverable:
448
- nb_error = db.execute(
449
- """
450
- UPDATE pictures_to_process SET
451
- nb_errors = nb_errors + 1
452
- WHERE picture_id = %(id)s
453
- RETURNING nb_errors""",
454
- {"err": str(e), "id": job.pic.id},
455
- ).fetchone()
456
- if nb_error and nb_error[0] > PICTURE_PROCESS_MAX_RETRY:
457
- logging.info(f"Job to process picture {job.pic.id} has failed {nb_error} times, we stop trying to process it.")
458
- recoverable = False
480
+ if mark_as_error:
481
+ nb_error = conn.execute(
482
+ """UPDATE job_queue SET
483
+ nb_errors = nb_errors + 1
484
+ WHERE id = %(id)s
485
+ RETURNING nb_errors""",
486
+ {"err": str(e), "id": job.job_queue_id},
487
+ ).fetchone()
488
+ if nb_error and nb_error[0] > PICTURE_PROCESS_MAX_RETRY:
489
+ logging.info(f"Job {job.label()} has failed {nb_error} times, we stop trying to process it.")
490
+ recoverable = False
491
+ else:
492
+ # it's not a real error, we just want to retry later
493
+ conn.execute(
494
+ SQL("UPDATE job_queue SET ts = NOW() WHERE id = %(id)s"),
495
+ {"err": str(e), "id": job.job_queue_id},
496
+ )
459
497
 
460
498
  if not recoverable:
461
499
  # Note: the status is slowly been deprecated by replacing it with more precise status, and in the end it will be removed
462
- job.reporting_conn.execute(
463
- """UPDATE pictures SET
464
- preparing_status = 'broken', status = 'broken'
465
- WHERE id = %(id)s""",
466
- {"id": job.pic.id},
467
- )
468
- # on unrecoverable error, we remove the picture from the queue to process
469
- db.execute(
470
- """
471
- DELETE FROM pictures_to_process
472
- WHERE picture_id = %(id)s""",
473
- {"id": job.pic.id},
474
- )
500
+ if job.task == "prepare" and job.pic:
501
+ job.reporting_conn.execute(
502
+ """UPDATE pictures SET
503
+ preparing_status = 'broken', status = 'broken'
504
+ WHERE id = %(id)s""",
505
+ {"id": job.pic.id},
506
+ )
507
+ # on unrecoverable error, we remove the job from the queue
508
+ conn.execute("DELETE FROM job_queue WHERE id = %(id)s", {"id": job.job_queue_id})
475
509
 
476
510
 
477
- def _delete_picture(job: DbJob):
511
+ def _delete_picture(pic: DbPicture):
478
512
  """Delete a picture from the filesystem"""
479
- log.debug(f"Deleting picture files {job.pic.id}")
480
- utils.pictures.removeAllFiles(job.pic.id)
513
+ log.debug(f"Deleting picture files {pic.id}")
514
+ utils.pictures.removeAllFiles(pic.id)