geovisio 2.6.0__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. geovisio/__init__.py +36 -7
  2. geovisio/admin_cli/db.py +1 -4
  3. geovisio/config_app.py +40 -1
  4. geovisio/db_migrations.py +24 -3
  5. geovisio/templates/main.html +13 -13
  6. geovisio/templates/viewer.html +3 -3
  7. geovisio/translations/de/LC_MESSAGES/messages.mo +0 -0
  8. geovisio/translations/de/LC_MESSAGES/messages.po +667 -0
  9. geovisio/translations/en/LC_MESSAGES/messages.mo +0 -0
  10. geovisio/translations/en/LC_MESSAGES/messages.po +730 -0
  11. geovisio/translations/es/LC_MESSAGES/messages.mo +0 -0
  12. geovisio/translations/es/LC_MESSAGES/messages.po +778 -0
  13. geovisio/translations/fi/LC_MESSAGES/messages.mo +0 -0
  14. geovisio/translations/fi/LC_MESSAGES/messages.po +589 -0
  15. geovisio/translations/fr/LC_MESSAGES/messages.mo +0 -0
  16. geovisio/translations/fr/LC_MESSAGES/messages.po +814 -0
  17. geovisio/translations/ko/LC_MESSAGES/messages.mo +0 -0
  18. geovisio/translations/ko/LC_MESSAGES/messages.po +685 -0
  19. geovisio/translations/messages.pot +686 -0
  20. geovisio/translations/nl/LC_MESSAGES/messages.mo +0 -0
  21. geovisio/translations/nl/LC_MESSAGES/messages.po +594 -0
  22. geovisio/utils/__init__.py +1 -1
  23. geovisio/utils/auth.py +50 -11
  24. geovisio/utils/db.py +65 -0
  25. geovisio/utils/excluded_areas.py +83 -0
  26. geovisio/utils/extent.py +30 -0
  27. geovisio/utils/fields.py +1 -1
  28. geovisio/utils/filesystems.py +0 -1
  29. geovisio/utils/link.py +14 -0
  30. geovisio/utils/params.py +20 -0
  31. geovisio/utils/pictures.py +92 -68
  32. geovisio/utils/reports.py +171 -0
  33. geovisio/utils/sequences.py +264 -126
  34. geovisio/utils/tokens.py +37 -42
  35. geovisio/utils/upload_set.py +654 -0
  36. geovisio/web/auth.py +37 -37
  37. geovisio/web/collections.py +286 -302
  38. geovisio/web/configuration.py +14 -0
  39. geovisio/web/docs.py +241 -14
  40. geovisio/web/excluded_areas.py +377 -0
  41. geovisio/web/items.py +156 -108
  42. geovisio/web/map.py +20 -20
  43. geovisio/web/params.py +69 -26
  44. geovisio/web/pictures.py +14 -31
  45. geovisio/web/reports.py +399 -0
  46. geovisio/web/rss.py +13 -7
  47. geovisio/web/stac.py +129 -134
  48. geovisio/web/tokens.py +98 -109
  49. geovisio/web/upload_set.py +768 -0
  50. geovisio/web/users.py +100 -73
  51. geovisio/web/utils.py +28 -9
  52. geovisio/workers/runner_pictures.py +252 -204
  53. {geovisio-2.6.0.dist-info → geovisio-2.7.0.dist-info}/METADATA +16 -13
  54. geovisio-2.7.0.dist-info/RECORD +66 -0
  55. geovisio-2.6.0.dist-info/RECORD +0 -41
  56. {geovisio-2.6.0.dist-info → geovisio-2.7.0.dist-info}/LICENSE +0 -0
  57. {geovisio-2.6.0.dist-info → geovisio-2.7.0.dist-info}/WHEEL +0 -0
@@ -1,25 +1,23 @@
1
- from fs import open_fs
2
1
  from fs.path import dirname
3
2
  from PIL import Image, ImageOps
4
3
  from flask import current_app
4
+ from geovisio import utils
5
+ from geovisio.utils import db, sequences, upload_set
5
6
  import psycopg
7
+ from psycopg.rows import dict_row
6
8
  from psycopg.sql import SQL
7
9
  import sentry_sdk
8
- from geovisio import utils
9
10
  from geovisio import errors
10
11
  from dataclasses import dataclass
11
12
  import logging
12
13
  from contextlib import contextmanager
13
14
  from enum import Enum
14
- from typing import Any
15
+ from typing import Any, Optional
15
16
  import threading
16
17
  from uuid import UUID
17
18
  from croniter import croniter
18
- from typing import Optional
19
19
  from datetime import datetime, timezone
20
-
21
20
  import geovisio.utils.filesystems
22
- from geovisio.utils.sequences import update_headings
23
21
 
24
22
  log = logging.getLogger("geovisio.runner_pictures")
25
23
 
@@ -27,7 +25,7 @@ PICTURE_PROCESS_MAX_RETRY = 10 # Number of times a job will be retryed if there
27
25
 
28
26
 
29
27
  class PictureBackgroundProcessor(object):
30
- def init_app(self, app):
28
+ def __init__(self, app):
31
29
  nb_threads = app.config["EXECUTOR_MAX_WORKERS"]
32
30
  self.enabled = nb_threads != 0
33
31
 
@@ -38,7 +36,7 @@ class PictureBackgroundProcessor(object):
38
36
  else:
39
37
  import sys
40
38
 
41
- if "run" in sys.argv or "waitress" in sys.argv: # hack not to display a frightening warning uselessly
39
+ if "run" in sys.argv or "waitress" in sys.argv or "gunicorn" in sys.argv: # hack not to display a frightening warning uselessly
42
40
  log.warning("No picture background processor run, no picture will be processed unless another separate worker is run")
43
41
  log.warning("A separate process can be run with:")
44
42
  log.warning("flask picture-worker")
@@ -48,34 +46,63 @@ class PictureBackgroundProcessor(object):
48
46
  Ask for a background picture process that will run until not pictures need to be processed
49
47
  """
50
48
  if self.enabled:
51
- worker = PictureProcessor(config=current_app.config)
52
- return self.executor.submit(worker.process_next_pictures)
49
+ worker = PictureProcessor(app=current_app)
50
+ return self.executor.submit(worker.process_jobs)
53
51
 
54
52
 
55
- background_processor = PictureBackgroundProcessor()
53
+ # background_processor = PictureBackgroundProcessor()
56
54
 
57
55
 
58
56
  class ProcessTask(str, Enum):
59
57
  prepare = "prepare"
60
58
  delete = "delete"
59
+ dispatch = "dispatch"
60
+ finalize = "finalize"
61
61
 
62
62
 
63
63
  @dataclass
64
64
  class DbPicture:
65
- id: str
65
+ id: UUID
66
66
  metadata: dict
67
67
 
68
68
  def blurred_by_author(self):
69
69
  return self.metadata.get("blurredByAuthor", False)
70
70
 
71
71
 
72
+ @dataclass
73
+ class DbSequence:
74
+ id: UUID
75
+
76
+
77
+ @dataclass
78
+ class DbUploadSet:
79
+ id: UUID
80
+
81
+
72
82
  @dataclass
73
83
  class DbJob:
74
84
  reporting_conn: psycopg.Connection
75
- id: UUID
76
- pic: DbPicture
85
+ job_history_id: UUID # ID of the job in the job_history
86
+ job_queue_id: UUID # ID in the job_queue
87
+ pic: Optional[DbPicture]
88
+ upload_set: Optional[DbUploadSet]
89
+ seq: Optional[DbSequence]
90
+
77
91
  task: ProcessTask
78
92
 
93
+ def label(self):
94
+ impacted_object = ""
95
+ if self.pic:
96
+ impacted_object = f"picture {self.pic.id}"
97
+ elif self.seq:
98
+ impacted_object = f"sequence {self.seq.id}"
99
+ elif self.upload_set:
100
+ impacted_object = f"upload set {self.upload_set.id}"
101
+ else:
102
+ impacted_object = "unknown object"
103
+
104
+ return f"{self.task} for {impacted_object}"
105
+
79
106
 
80
107
  def processPictureFiles(pic: DbPicture, config):
81
108
  """Generates the files associated with a sequence picture.
@@ -91,7 +118,7 @@ def processPictureFiles(pic: DbPicture, config):
91
118
  config : dict
92
119
  Flask app.config (passed as param to allow using ThreadPoolExecutor)
93
120
  """
94
- skipBlur = pic.blurred_by_author() or config.get("API_BLUR_URL") == None
121
+ skipBlur = pic.blurred_by_author() or config.get("API_BLUR_URL") is None
95
122
  fses = config["FILESYSTEMS"]
96
123
  fs = fses.permanent if skipBlur else fses.tmp
97
124
  picHdPath = utils.pictures.getHDPicturePath(pic.id)
@@ -156,31 +183,48 @@ class RecoverableProcessException(Exception):
156
183
  super().__init__(msg)
157
184
 
158
185
 
186
+ class RetryLaterProcessException(Exception):
187
+ """Exception raised when we want to retry later, even if it's not an error"""
188
+
189
+ def __init__(self, msg):
190
+ super().__init__(msg)
191
+
192
+
159
193
  class PictureProcessor:
160
194
  stop: bool
161
195
  config: dict[Any, Any]
196
+ waiting_time: float
162
197
 
163
- def __init__(self, config, stop=True) -> None:
164
- self.config = config
198
+ def __init__(self, app, stop=True) -> None:
199
+ self.app = app
165
200
  self.stop = stop
166
201
  if threading.current_thread() is threading.main_thread():
167
202
  # if worker is in daemon mode, register signals to gracefully stop it
168
203
  self._register_signals()
169
204
  self.next_periodic_task_dt = None
205
+ self.cron = croniter(self.app.config["PICTURE_PROCESS_REFRESH_CRON"])
170
206
 
171
- def process_next_pictures(self):
207
+ # Note: in tests, we don't want to wait between each picture processing
208
+ waiting_time = 0 if app.config.get("TESTING") is True else 1
209
+ self.waiting_time = waiting_time
210
+
211
+ def process_jobs(self):
172
212
  try:
173
- while True:
174
- self.check_periodic_tasks()
175
- r = process_next_picture(self.config)
176
- if not r:
177
- if self.stop:
213
+ with self.app.app_context():
214
+ while True:
215
+ if self.app.pool.closed and self.stop:
216
+ # in some tests, the pool is closed before the worker is stopped, we check this here
178
217
  return
179
- # no more picture to process
180
- # wait a bit until there are some
181
- import time
218
+ self.check_periodic_tasks()
219
+ r = process_next_job(self.app)
220
+ if not r:
221
+ if self.stop:
222
+ return
223
+ # no more picture to process
224
+ # wait a bit until there are some
225
+ import time
182
226
 
183
- time.sleep(1)
227
+ time.sleep(self.waiting_time)
184
228
 
185
229
  except:
186
230
  log.exception("Exiting thread")
@@ -200,15 +244,24 @@ class PictureProcessor:
200
244
  Check if a periodic task needs to be done, and do it if necessary
201
245
  This method ensure only one picture worker will do the needed periodic task
202
246
  """
203
- with psycopg.connect(self.config["DB_URL"], autocommit=True) as db:
204
- if self.next_periodic_task_dt is None:
205
- self.next_periodic_task_dt = self.get_next_periodic_task_dt(db)
206
-
207
- if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
208
- self.refresh_database(db)
209
-
210
- def get_next_periodic_task_dt(self, db) -> datetime:
211
- r = db.execute("SELECT refreshed_at, NOW() FROM refresh_database").fetchone()
247
+ if self.next_periodic_task_dt is None:
248
+ with db.conn(self.app) as conn:
249
+ self.next_periodic_task_dt = self.get_next_periodic_task_dt(conn)
250
+
251
+ if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
252
+ with db.conn(self.app) as conn:
253
+ # since the next_periodic_task_dt can have been changed by another process, we check again that the task needs to be done
254
+ self.next_periodic_task_dt = self.get_next_periodic_task_dt(conn)
255
+ if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
256
+ if not self.refresh_database():
257
+ # another refresh is in progress, we'll check again later and ask for the next refresh date considering it's in progress
258
+ self.next_periodic_task_dt = self.cron.get_next(datetime, datetime.now(timezone.utc))
259
+ logging.getLogger("geovisio.periodic_task").info(
260
+ f"Refresh in progress, checking after = {self.next_periodic_task_dt}"
261
+ )
262
+
263
+ def get_next_periodic_task_dt(self, conn) -> datetime:
264
+ r = conn.execute("SELECT refreshed_at, NOW() FROM refresh_database").fetchone()
212
265
  assert r # the table always has exactly one row
213
266
 
214
267
  refreshed_at, db_time = r
@@ -216,42 +269,47 @@ class PictureProcessor:
216
269
  if refreshed_at is None:
217
270
  # if the db has never been updated, we need to update it now
218
271
  return current_time
219
-
220
- cron = croniter(self.config["PICTURE_PROCESS_REFRESH_CRON"])
221
-
222
- next_schedule_date = cron.get_next(datetime, refreshed_at)
272
+ next_schedule_date = self.cron.get_next(datetime, refreshed_at)
223
273
 
224
274
  # if the db time and the app time is not the same, we need to apply an offset on the scheduled time
225
275
  next_schedule_date += db_time - current_time
226
276
  logging.getLogger("geovisio.periodic_task").info(f"Next database refresh = {next_schedule_date}")
227
277
  return next_schedule_date
228
278
 
229
- def refresh_database(self, db):
279
+ def refresh_database(self):
230
280
  with sentry_sdk.start_transaction(op="task", name="refresh_database"):
231
281
  # Note: there is a mechanism in `sequences.update_pictures_grid` to ensure that only one refresh can be done at one time, and it will update the `refreshed_at` value
232
- updated = utils.sequences.update_pictures_grid(db)
233
- if updated:
234
- self.next_periodic_task_dt = self.get_next_periodic_task_dt(db)
235
- else:
236
- # no update could be done because another process was doing it, check next time the scheduled time
237
- self.next_periodic_task_dt = None
282
+ return utils.sequences.update_pictures_grid()
238
283
 
239
284
 
240
- def process_next_picture(config):
285
+ def process_next_job(app):
241
286
  with sentry_sdk.start_transaction(op="task", name="process_next_picture"):
242
- with _get_next_picture_to_process(config) as job:
287
+ with _get_next_job(app) as job:
243
288
  if job is None:
244
289
  return False
245
- if job.task == ProcessTask.prepare:
290
+ if job.task == ProcessTask.prepare and job.pic:
246
291
  with sentry_sdk.start_span(description="Processing picture") as span:
247
292
  span.set_data("pic_id", job.pic.id)
248
293
  with utils.time.log_elapsed(f"Processing picture {job.pic.id}"):
249
- processPictureFiles(job.pic, config)
250
- elif job.task == ProcessTask.delete:
294
+ # open another connection for reporting and queries
295
+ processPictureFiles(job.pic, app.config)
296
+ elif job.task == ProcessTask.delete and job.pic:
251
297
  with sentry_sdk.start_span(description="Deleting picture") as span:
252
298
  span.set_data("pic_id", job.pic.id)
253
299
  with utils.time.log_elapsed(f"Deleting picture {job.pic.id}"):
254
- _delete_picture(job)
300
+ _delete_picture(job.pic)
301
+ elif job.task == ProcessTask.delete and job.upload_set:
302
+ with sentry_sdk.start_span(description="Deleting upload set") as span:
303
+ span.set_data("us_id", job.upload_set.id)
304
+ with utils.time.log_elapsed(f"Deleting upload set {job.upload_set.id}"):
305
+ _delete_upload_set(job.upload_set)
306
+ elif job.task == ProcessTask.dispatch and job.upload_set:
307
+ with utils.time.log_elapsed(f"Dispatching upload set {job.upload_set.id}"):
308
+ upload_set.dispatch(job.upload_set.id)
309
+ elif job.task == ProcessTask.finalize and job.seq:
310
+ with utils.time.log_elapsed(f"Finalizing sequence {job.seq.id}"):
311
+ with job.reporting_conn.cursor(row_factory=dict_row) as cursor:
312
+ sequences.finalize(cursor, job.seq.id)
255
313
  else:
256
314
  raise RecoverableProcessException(f"Unhandled process task: {job.task}")
257
315
 
@@ -259,58 +317,67 @@ def process_next_picture(config):
259
317
 
260
318
 
261
319
  @contextmanager
262
- def _get_next_picture_to_process(config):
320
+ def _get_next_job(app):
263
321
  """
264
- Open a new connection and return the next picture to process
265
- Note: the picture should be used as a context manager to close the connection when we stop using the returned picture.
322
+ Open a new connection and return the next job to process
323
+ Note: the job should be used as a context manager to close the connection when we stop using the returned job.
266
324
 
267
- The new connection is needed because we lock the `pictures_to_process` for the whole transaction for another worker not to process the same picture
325
+ The new connection is needed because we lock the `job_queue` for the whole transaction for another worker not to process the same job
268
326
  """
269
327
  error = None
270
- with psycopg.connect(config["DB_URL"], autocommit=True) as locking_transaction:
271
- with locking_transaction.transaction():
272
- r = locking_transaction.execute(
273
- """
274
- SELECT p.id, pictures_to_process.task, p.metadata
275
- FROM pictures_to_process
276
- JOIN pictures p ON p.id = pictures_to_process.picture_id
328
+ with app.pool.connection() as locking_transaction:
329
+ with locking_transaction.transaction(), locking_transaction.cursor(row_factory=dict_row) as cursor:
330
+ r = cursor.execute(
331
+ """SELECT j.id, j.picture_id, j.upload_set_id, j.sequence_id, j.task, p.metadata
332
+ FROM job_queue j
333
+ LEFT JOIN pictures p ON p.id = j.picture_id
277
334
  ORDER by
278
- pictures_to_process.nb_errors,
279
- pictures_to_process.ts
280
- FOR UPDATE of pictures_to_process SKIP LOCKED
281
- LIMIT 1
282
- """
335
+ j.nb_errors,
336
+ j.ts
337
+ FOR UPDATE of j SKIP LOCKED
338
+ LIMIT 1"""
283
339
  ).fetchone()
284
340
  if r is None:
285
341
  # Nothing to process
286
342
  yield None
287
343
  else:
288
- log.debug(f"Processing {r[0]}")
289
-
290
- db_pic = DbPicture(id=str(r[0]), metadata=r[2])
291
-
292
- with psycopg.connect(config["DB_URL"], autocommit=True) as reporting_conn:
293
- job = _initialize_picture_process(reporting_conn, pic=db_pic, task=ProcessTask(r[1]))
344
+ log.debug(f"Processing {r['id']}")
345
+
346
+ db_pic = DbPicture(id=r["picture_id"], metadata=r["metadata"]) if r["picture_id"] is not None else None
347
+ db_seq = DbSequence(id=r["sequence_id"]) if r["sequence_id"] is not None else None
348
+ db_upload_set = DbUploadSet(id=r["upload_set_id"]) if r["upload_set_id"] is not None else None
349
+
350
+ with app.pool.connection() as reporting_conn:
351
+ job = _initialize_job(
352
+ reporting_conn,
353
+ job_queue_id=r["id"],
354
+ db_pic=db_pic,
355
+ db_seq=db_seq,
356
+ db_upload_set=db_upload_set,
357
+ task=ProcessTask(r["task"]),
358
+ )
294
359
  try:
295
360
  yield job
296
361
 
297
362
  # Finalize the picture process, set the picture status and remove the picture from the queue process
298
- _finalize_picture_process(locking_transaction, job)
299
- log.debug(f"Picture {db_pic.id} processed")
363
+ _finalize_job(locking_transaction, job)
364
+ log.debug(f"Job {job.label()} processed")
300
365
  except RecoverableProcessException as e:
301
- _mark_process_as_error(locking_transaction, job, e, config, recoverable=True)
366
+ _mark_process_as_error(locking_transaction, job, e, recoverable=True)
367
+ except RetryLaterProcessException as e:
368
+ _mark_process_as_error(locking_transaction, job, e, recoverable=True, mark_as_error=False)
302
369
  except InterruptedError as interruption:
303
- log.error(f"Interruption received, stoping job {job.id} for picture {db_pic.id}")
370
+ log.error(f"Interruption received, stoping job {job.label()}")
304
371
  # starts a new connection, since the current one can be corrupted by the exception
305
- with psycopg.connect(config["DB_URL"], autocommit=True) as t:
306
- _mark_process_as_error(t, job, interruption, config, recoverable=True)
372
+ with app.pool.connection() as t:
373
+ _mark_process_as_error(t, job, interruption, recoverable=True)
307
374
  error = interruption
308
375
  except Exception as e:
309
- log.exception(f"Impossible to finish job {job.id} for picture {db_pic.id}")
310
- _mark_process_as_error(locking_transaction, job, e, config, recoverable=False)
376
+ log.exception(f"Impossible to finish job {job.label()}")
377
+ _mark_process_as_error(locking_transaction, job, e, recoverable=False)
311
378
 
312
379
  # try to finalize the sequence anyway
313
- _finalize_sequence_if_last_picture(job)
380
+ _finalize_sequence(job)
314
381
  error = e
315
382
 
316
383
  # we raise an error after the transaction has been comited to be sure to have the state persisted in the database
@@ -318,163 +385,144 @@ def _get_next_picture_to_process(config):
318
385
  raise error
319
386
 
320
387
 
321
- def _finalize_sequence_if_last_picture(job: DbJob):
322
- r = job.reporting_conn.execute(
323
- """
324
- SELECT sp.seq_id AS id FROM sequences_pictures AS sp
325
- WHERE sp.pic_id = %(id)s
326
- """,
327
- {"id": job.pic.id},
328
- ).fetchone()
329
- if not r:
330
- raise Exception(f"impossible to find sequence associated to picture {job.pic.id}")
331
-
332
- seqId = r[0]
333
-
334
- is_sequence_finalized = _is_sequence_finalized(job.reporting_conn, seqId)
335
- if not is_sequence_finalized:
336
- log.debug("sequence not finalized")
388
+ def _finalize_sequence(job: DbJob):
389
+ # on picture preparation finalization, we add a sequence/upload_set finalization job
390
+ if job.task != "prepare" or not job.pic:
337
391
  return
338
392
 
339
- with sentry_sdk.start_span(description="Finalizing sequence") as span:
340
- span.set_data("sequence_id", seqId)
341
- log.debug(f"Finalizing sequence {seqId}")
342
-
343
- with utils.time.log_elapsed(f"Finalizing sequence {seqId}"):
344
- # Complete missing headings in pictures
345
- update_headings(job.reporting_conn, seqId)
346
-
347
- # Change sequence database status in DB
348
- # Also generates data in computed columns
349
- job.reporting_conn.execute(
350
- """WITH
351
- aggregated_pictures AS (
352
- SELECT
353
- sp.seq_id,
354
- MIN(p.ts::DATE) AS day,
355
- ARRAY_AGG(DISTINCT TRIM(
356
- CONCAT(p.metadata->>'make', ' ', p.metadata->>'model')
357
- )) AS models,
358
- ARRAY_AGG(DISTINCT p.metadata->>'type') AS types
359
- FROM sequences_pictures sp
360
- JOIN pictures p ON sp.pic_id = p.id
361
- WHERE sp.seq_id = %(seq)s
362
- GROUP BY sp.seq_id
363
- )
364
- UPDATE sequences
365
- SET
366
- status = 'ready',
367
- geom = compute_sequence_geom(id),
368
- bbox = compute_sequence_bbox(id),
369
- computed_type = CASE WHEN array_length(types, 1) = 1 THEN types[1] ELSE NULL END,
370
- computed_model = CASE WHEN array_length(models, 1) = 1 THEN models[1] ELSE NULL END,
371
- computed_capture_date = day
372
- FROM aggregated_pictures
373
- WHERE id = %(seq)s
374
- """,
375
- {"seq": seqId},
376
- )
377
-
378
- log.info(f"Sequence {seqId} is ready")
393
+ with job.reporting_conn.cursor(row_factory=dict_row) as cursor:
394
+ r = cursor.execute(
395
+ "SELECT upload_set_id, seq_id FROM pictures p LEFT JOIN sequences_pictures sp on sp.pic_id = p.id WHERE p.id = %(pic_id)s",
396
+ {"pic_id": job.pic.id},
397
+ ).fetchone()
379
398
 
399
+ if not r or not r["seq_id"]:
400
+ # if the associated upload set has not yet been dispatch, the picture might not be associated to a sequence
401
+ return
380
402
 
381
- def _is_sequence_finalized(db, seq_id: str):
382
- """
383
- We consider a sequence as ready, if all pictures have been processed and there is at least one correctly processed picture
384
- Eg. we don't want pictures with preparing_status = 'not-processed' and at least one 'prepared'
385
- """
386
- statuses = db.execute(
387
- """SELECT DISTINCT(preparing_status) FROM pictures p
388
- JOIN sequences_pictures sp ON sp.pic_id = p.id
389
- WHERE
390
- sp.seq_id = %(id)s
391
- AND p.preparing_status <> 'broken'
392
- ;
393
- """,
394
- {"id": seq_id},
395
- ).fetchall()
403
+ if r["upload_set_id"]:
404
+ # if the picture is part of the upload set, the sequence finalization will be done when the upload set is dispatched
405
+ return
396
406
 
397
- return [("prepared",)] == statuses
407
+ # Add a task to finalize the sequence/upload_set
408
+ sequences.add_finalization_job(cursor, r["seq_id"])
398
409
 
399
410
 
400
- def _finalize_picture_process(db, job: DbJob):
411
+ def _finalize_job(conn, job: DbJob):
401
412
  job.reporting_conn.execute(
402
413
  "UPDATE job_history SET finished_at = CURRENT_TIMESTAMP WHERE id = %(id)s",
403
- {"id": job.id},
414
+ {"id": job.job_history_id},
404
415
  )
405
- if job.task == ProcessTask.prepare:
416
+ if job.task == ProcessTask.prepare and job.pic:
406
417
  # Note: the status is slowly been deprecated by replacing it with more precise status, and in the end it will be removed
407
418
  job.reporting_conn.execute(
408
419
  "UPDATE pictures SET status = 'ready', preparing_status = 'prepared' WHERE id = %(pic_id)s",
409
420
  {"pic_id": job.pic.id},
410
421
  )
411
422
 
412
- # Check if we need to finalize the sequence
413
- _finalize_sequence_if_last_picture(job)
414
- elif job.task == ProcessTask.delete:
415
- # TODO set the status to 'deleted' instead of removing it
416
- db.execute(
423
+ # Add a task to finalize the sequence
424
+ _finalize_sequence(job)
425
+ elif job.task == ProcessTask.delete and job.pic:
426
+ conn.execute(
417
427
  "DELETE FROM pictures WHERE id = %(pic_id)s",
418
428
  {"pic_id": job.pic.id},
419
429
  )
420
- db.execute(
421
- "DELETE FROM pictures_to_process WHERE picture_id = %(pic_id)s",
422
- {"pic_id": job.pic.id},
423
- )
430
+ elif job.task == ProcessTask.delete and job.upload_set:
431
+ conn.execute(SQL("DELETE FROM upload_sets WHERE id = %s"), [job.upload_set.id])
432
+
433
+ conn.execute("DELETE FROM job_queue WHERE id = %(job_id)s", {"job_id": job.job_queue_id})
424
434
 
425
435
 
426
- def _initialize_picture_process(reporting_conn: psycopg.Connection, pic: DbPicture, task: ProcessTask) -> DbJob:
436
+ def _initialize_job(
437
+ reporting_conn: psycopg.Connection,
438
+ job_queue_id: UUID,
439
+ db_pic: Optional[DbPicture],
440
+ db_seq: Optional[DbSequence],
441
+ db_upload_set: Optional[DbUploadSet],
442
+ task: ProcessTask,
443
+ ) -> DbJob:
427
444
  r = reporting_conn.execute(
428
- """INSERT INTO job_history(picture_id, task)
429
- VALUES (%(id)s, %(task)s)
430
- RETURNING id
431
- """,
432
- {"id": pic.id, "task": task.value},
445
+ """INSERT INTO job_history(job_id, picture_id, sequence_id, upload_set_id, job_task)
446
+ VALUES (%(job_id)s, %(pic_id)s, %(seq_id)s, %(us_id)s, %(task)s)
447
+ RETURNING id""",
448
+ {
449
+ "job_id": job_queue_id,
450
+ "pic_id": db_pic.id if db_pic else None,
451
+ "seq_id": db_seq.id if db_seq else None,
452
+ "us_id": db_upload_set.id if db_upload_set else None,
453
+ "task": task.value,
454
+ },
433
455
  ).fetchone()
434
456
 
435
457
  if not r:
436
458
  raise Exception("impossible to insert task in database")
437
- return DbJob(reporting_conn=reporting_conn, pic=pic, id=r[0], task=task)
459
+
460
+ return DbJob(
461
+ reporting_conn=reporting_conn,
462
+ job_queue_id=job_queue_id,
463
+ pic=db_pic,
464
+ seq=db_seq,
465
+ upload_set=db_upload_set,
466
+ task=task,
467
+ job_history_id=r[0],
468
+ )
438
469
 
439
470
 
440
- def _mark_process_as_error(db, job: DbJob, e: Exception, config, recoverable: bool = False):
471
+ def _mark_process_as_error(conn, job: DbJob, e: Exception, recoverable: bool = False, mark_as_error: bool = True):
441
472
  job.reporting_conn.execute(
442
473
  """UPDATE job_history SET
443
474
  error = %(err)s, finished_at = CURRENT_TIMESTAMP
444
475
  WHERE id = %(id)s""",
445
- {"err": str(e), "id": job.id},
476
+ {"err": str(e), "id": job.job_history_id},
446
477
  )
447
478
  if recoverable:
448
- nb_error = db.execute(
449
- """
450
- UPDATE pictures_to_process SET
451
- nb_errors = nb_errors + 1
452
- WHERE picture_id = %(id)s
453
- RETURNING nb_errors""",
454
- {"err": str(e), "id": job.pic.id},
455
- ).fetchone()
456
- if nb_error and nb_error[0] > PICTURE_PROCESS_MAX_RETRY:
457
- logging.info(f"Job to process picture {job.pic.id} has failed {nb_error} times, we stop trying to process it.")
458
- recoverable = False
479
+ if mark_as_error:
480
+ nb_error = conn.execute(
481
+ """
482
+ UPDATE job_queue SET
483
+ nb_errors = nb_errors + 1
484
+ WHERE id = %(id)s
485
+ RETURNING nb_errors""",
486
+ {"err": str(e), "id": job.job_queue_id},
487
+ ).fetchone()
488
+ if nb_error and nb_error[0] > PICTURE_PROCESS_MAX_RETRY:
489
+ logging.info(f"Job {job.label()} has failed {nb_error} times, we stop trying to process it.")
490
+ recoverable = False
491
+ else:
492
+ # it's not a real error, we just want to retry later
493
+ conn.execute(
494
+ SQL("UPDATE job_queue SET ts = NOW() WHERE id = %(id)s"),
495
+ {"err": str(e), "id": job.job_queue_id},
496
+ )
459
497
 
460
498
  if not recoverable:
461
499
  # Note: the status is slowly been deprecated by replacing it with more precise status, and in the end it will be removed
462
- job.reporting_conn.execute(
463
- """UPDATE pictures SET
464
- preparing_status = 'broken', status = 'broken'
465
- WHERE id = %(id)s""",
466
- {"id": job.pic.id},
467
- )
468
- # on unrecoverable error, we remove the picture from the queue to process
469
- db.execute(
470
- """
471
- DELETE FROM pictures_to_process
472
- WHERE picture_id = %(id)s""",
473
- {"id": job.pic.id},
474
- )
500
+ if job.task == "prepare" and job.pic:
501
+ job.reporting_conn.execute(
502
+ """UPDATE pictures SET
503
+ preparing_status = 'broken', status = 'broken'
504
+ WHERE id = %(id)s""",
505
+ {"id": job.pic.id},
506
+ )
507
+ # on unrecoverable error, we remove the job from the queue
508
+ conn.execute("DELETE FROM job_queue WHERE id = %(id)s", {"id": job.job_queue_id})
475
509
 
476
510
 
477
- def _delete_picture(job: DbJob):
511
+ def _delete_picture(pic: DbPicture):
478
512
  """Delete a picture from the filesystem"""
479
- log.debug(f"Deleting picture files {job.pic.id}")
480
- utils.pictures.removeAllFiles(job.pic.id)
513
+ log.debug(f"Deleting picture files {pic.id}")
514
+ utils.pictures.removeAllFiles(pic.id)
515
+
516
+
517
+ def _delete_upload_set(upload_set: DbUploadSet):
518
+ """Delete an upload set
519
+ We do this in the job queue since we want to wait for all its pictures to be deleted
520
+ """
521
+ with db.conn(current_app) as conn:
522
+ with conn.transaction(), conn.cursor() as cursor:
523
+ # we want to wait for all pictures to be deleted
524
+ has_more_pictures = cursor.execute("SELECT 1 FROM pictures WHERE upload_set_id = %s LIMIT 1", [upload_set.id]).fetchone()
525
+ if has_more_pictures and has_more_pictures[0]:
526
+ logging.info(f"More pictures to be deleted, upload_set {upload_set.id} will be deleted later")
527
+ raise RetryLaterProcessException("More pictures to be deleted, upload_set will be deleted later")
528
+ # Note: the real deletion will be done on job completion so the lock is released