geovisio 2.5.0__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. geovisio/__init__.py +38 -8
  2. geovisio/admin_cli/__init__.py +2 -2
  3. geovisio/admin_cli/db.py +8 -0
  4. geovisio/config_app.py +64 -0
  5. geovisio/db_migrations.py +24 -3
  6. geovisio/templates/main.html +14 -14
  7. geovisio/templates/viewer.html +3 -3
  8. geovisio/translations/de/LC_MESSAGES/messages.mo +0 -0
  9. geovisio/translations/de/LC_MESSAGES/messages.po +667 -0
  10. geovisio/translations/en/LC_MESSAGES/messages.mo +0 -0
  11. geovisio/translations/en/LC_MESSAGES/messages.po +730 -0
  12. geovisio/translations/es/LC_MESSAGES/messages.mo +0 -0
  13. geovisio/translations/es/LC_MESSAGES/messages.po +778 -0
  14. geovisio/translations/fi/LC_MESSAGES/messages.mo +0 -0
  15. geovisio/translations/fi/LC_MESSAGES/messages.po +589 -0
  16. geovisio/translations/fr/LC_MESSAGES/messages.mo +0 -0
  17. geovisio/translations/fr/LC_MESSAGES/messages.po +814 -0
  18. geovisio/translations/ko/LC_MESSAGES/messages.mo +0 -0
  19. geovisio/translations/ko/LC_MESSAGES/messages.po +685 -0
  20. geovisio/translations/messages.pot +686 -0
  21. geovisio/translations/nl/LC_MESSAGES/messages.mo +0 -0
  22. geovisio/translations/nl/LC_MESSAGES/messages.po +594 -0
  23. geovisio/utils/__init__.py +1 -1
  24. geovisio/utils/auth.py +50 -11
  25. geovisio/utils/db.py +65 -0
  26. geovisio/utils/excluded_areas.py +83 -0
  27. geovisio/utils/extent.py +30 -0
  28. geovisio/utils/fields.py +1 -1
  29. geovisio/utils/filesystems.py +0 -1
  30. geovisio/utils/link.py +14 -0
  31. geovisio/utils/params.py +20 -0
  32. geovisio/utils/pictures.py +94 -69
  33. geovisio/utils/reports.py +171 -0
  34. geovisio/utils/sequences.py +288 -126
  35. geovisio/utils/tokens.py +37 -42
  36. geovisio/utils/upload_set.py +654 -0
  37. geovisio/web/auth.py +50 -37
  38. geovisio/web/collections.py +305 -319
  39. geovisio/web/configuration.py +14 -0
  40. geovisio/web/docs.py +288 -12
  41. geovisio/web/excluded_areas.py +377 -0
  42. geovisio/web/items.py +203 -151
  43. geovisio/web/map.py +322 -106
  44. geovisio/web/params.py +69 -26
  45. geovisio/web/pictures.py +14 -31
  46. geovisio/web/reports.py +399 -0
  47. geovisio/web/rss.py +13 -7
  48. geovisio/web/stac.py +129 -121
  49. geovisio/web/tokens.py +105 -112
  50. geovisio/web/upload_set.py +768 -0
  51. geovisio/web/users.py +100 -73
  52. geovisio/web/utils.py +38 -9
  53. geovisio/workers/runner_pictures.py +278 -183
  54. geovisio-2.7.0.dist-info/METADATA +95 -0
  55. geovisio-2.7.0.dist-info/RECORD +66 -0
  56. geovisio-2.5.0.dist-info/METADATA +0 -115
  57. geovisio-2.5.0.dist-info/RECORD +0 -41
  58. {geovisio-2.5.0.dist-info → geovisio-2.7.0.dist-info}/LICENSE +0 -0
  59. {geovisio-2.5.0.dist-info → geovisio-2.7.0.dist-info}/WHEEL +0 -0
@@ -1,21 +1,23 @@
1
- from fs import open_fs
2
1
  from fs.path import dirname
3
2
  from PIL import Image, ImageOps
4
3
  from flask import current_app
4
+ from geovisio import utils
5
+ from geovisio.utils import db, sequences, upload_set
5
6
  import psycopg
7
+ from psycopg.rows import dict_row
8
+ from psycopg.sql import SQL
6
9
  import sentry_sdk
7
- from geovisio import utils
8
10
  from geovisio import errors
9
11
  from dataclasses import dataclass
10
12
  import logging
11
13
  from contextlib import contextmanager
12
14
  from enum import Enum
13
- from typing import Any
15
+ from typing import Any, Optional
14
16
  import threading
15
17
  from uuid import UUID
16
-
18
+ from croniter import croniter
19
+ from datetime import datetime, timezone
17
20
  import geovisio.utils.filesystems
18
- from geovisio.utils.sequences import update_headings
19
21
 
20
22
  log = logging.getLogger("geovisio.runner_pictures")
21
23
 
@@ -23,7 +25,7 @@ PICTURE_PROCESS_MAX_RETRY = 10 # Number of times a job will be retryed if there
23
25
 
24
26
 
25
27
  class PictureBackgroundProcessor(object):
26
- def init_app(self, app):
28
+ def __init__(self, app):
27
29
  nb_threads = app.config["EXECUTOR_MAX_WORKERS"]
28
30
  self.enabled = nb_threads != 0
29
31
 
@@ -34,7 +36,7 @@ class PictureBackgroundProcessor(object):
34
36
  else:
35
37
  import sys
36
38
 
37
- if "run" in sys.argv or "waitress" in sys.argv: # hack not to display a frightening warning uselessly
39
+ if "run" in sys.argv or "waitress" in sys.argv or "gunicorn" in sys.argv: # hack not to display a frightening warning uselessly
38
40
  log.warning("No picture background processor run, no picture will be processed unless another separate worker is run")
39
41
  log.warning("A separate process can be run with:")
40
42
  log.warning("flask picture-worker")
@@ -44,34 +46,63 @@ class PictureBackgroundProcessor(object):
44
46
  Ask for a background picture process that will run until not pictures need to be processed
45
47
  """
46
48
  if self.enabled:
47
- worker = PictureProcessor(config=current_app.config)
48
- return self.executor.submit(worker.process_next_pictures)
49
+ worker = PictureProcessor(app=current_app)
50
+ return self.executor.submit(worker.process_jobs)
49
51
 
50
52
 
51
- background_processor = PictureBackgroundProcessor()
53
+ # background_processor = PictureBackgroundProcessor()
52
54
 
53
55
 
54
56
  class ProcessTask(str, Enum):
55
57
  prepare = "prepare"
56
58
  delete = "delete"
59
+ dispatch = "dispatch"
60
+ finalize = "finalize"
57
61
 
58
62
 
59
63
  @dataclass
60
64
  class DbPicture:
61
- id: str
65
+ id: UUID
62
66
  metadata: dict
63
67
 
64
68
  def blurred_by_author(self):
65
69
  return self.metadata.get("blurredByAuthor", False)
66
70
 
67
71
 
72
+ @dataclass
73
+ class DbSequence:
74
+ id: UUID
75
+
76
+
77
+ @dataclass
78
+ class DbUploadSet:
79
+ id: UUID
80
+
81
+
68
82
  @dataclass
69
83
  class DbJob:
70
84
  reporting_conn: psycopg.Connection
71
- id: UUID
72
- pic: DbPicture
85
+ job_history_id: UUID # ID of the job in the job_history
86
+ job_queue_id: UUID # ID in the job_queue
87
+ pic: Optional[DbPicture]
88
+ upload_set: Optional[DbUploadSet]
89
+ seq: Optional[DbSequence]
90
+
73
91
  task: ProcessTask
74
92
 
93
+ def label(self):
94
+ impacted_object = ""
95
+ if self.pic:
96
+ impacted_object = f"picture {self.pic.id}"
97
+ elif self.seq:
98
+ impacted_object = f"sequence {self.seq.id}"
99
+ elif self.upload_set:
100
+ impacted_object = f"upload set {self.upload_set.id}"
101
+ else:
102
+ impacted_object = "unknown object"
103
+
104
+ return f"{self.task} for {impacted_object}"
105
+
75
106
 
76
107
  def processPictureFiles(pic: DbPicture, config):
77
108
  """Generates the files associated with a sequence picture.
@@ -87,7 +118,7 @@ def processPictureFiles(pic: DbPicture, config):
87
118
  config : dict
88
119
  Flask app.config (passed as param to allow using ThreadPoolExecutor)
89
120
  """
90
- skipBlur = pic.blurred_by_author() or config.get("API_BLUR_URL") == None
121
+ skipBlur = pic.blurred_by_author() or config.get("API_BLUR_URL") is None
91
122
  fses = config["FILESYSTEMS"]
92
123
  fs = fses.permanent if skipBlur else fses.tmp
93
124
  picHdPath = utils.pictures.getHDPicturePath(pic.id)
@@ -152,29 +183,48 @@ class RecoverableProcessException(Exception):
152
183
  super().__init__(msg)
153
184
 
154
185
 
186
+ class RetryLaterProcessException(Exception):
187
+ """Exception raised when we want to retry later, even if it's not an error"""
188
+
189
+ def __init__(self, msg):
190
+ super().__init__(msg)
191
+
192
+
155
193
  class PictureProcessor:
156
194
  stop: bool
157
195
  config: dict[Any, Any]
196
+ waiting_time: float
158
197
 
159
- def __init__(self, config, stop=True) -> None:
160
- self.config = config
198
+ def __init__(self, app, stop=True) -> None:
199
+ self.app = app
161
200
  self.stop = stop
162
201
  if threading.current_thread() is threading.main_thread():
163
202
  # if worker is in daemon mode, register signals to gracefully stop it
164
203
  self._register_signals()
204
+ self.next_periodic_task_dt = None
205
+ self.cron = croniter(self.app.config["PICTURE_PROCESS_REFRESH_CRON"])
206
+
207
+ # Note: in tests, we don't want to wait between each picture processing
208
+ waiting_time = 0 if app.config.get("TESTING") is True else 1
209
+ self.waiting_time = waiting_time
165
210
 
166
- def process_next_pictures(self):
211
+ def process_jobs(self):
167
212
  try:
168
- while True:
169
- r = process_next_picture(self.config)
170
- if not r:
171
- if self.stop:
213
+ with self.app.app_context():
214
+ while True:
215
+ if self.app.pool.closed and self.stop:
216
+ # in some tests, the pool is closed before the worker is stopped, we check this here
172
217
  return
173
- # no more picture to process
174
- # wait a bit until there are some
175
- import time
218
+ self.check_periodic_tasks()
219
+ r = process_next_job(self.app)
220
+ if not r:
221
+ if self.stop:
222
+ return
223
+ # no more picture to process
224
+ # wait a bit until there are some
225
+ import time
176
226
 
177
- time.sleep(1)
227
+ time.sleep(self.waiting_time)
178
228
 
179
229
  except:
180
230
  log.exception("Exiting thread")
@@ -189,22 +239,77 @@ class PictureProcessor:
189
239
  log.info("Stoping worker, waiting for last picture processing to finish...")
190
240
  self.stop = True
191
241
 
192
-
193
- def process_next_picture(config):
242
+ def check_periodic_tasks(self):
243
+ """
244
+ Check if a periodic task needs to be done, and do it if necessary
245
+ This method ensure only one picture worker will do the needed periodic task
246
+ """
247
+ if self.next_periodic_task_dt is None:
248
+ with db.conn(self.app) as conn:
249
+ self.next_periodic_task_dt = self.get_next_periodic_task_dt(conn)
250
+
251
+ if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
252
+ with db.conn(self.app) as conn:
253
+ # since the next_periodic_task_dt can have been changed by another process, we check again that the task needs to be done
254
+ self.next_periodic_task_dt = self.get_next_periodic_task_dt(conn)
255
+ if datetime.now(timezone.utc) >= self.next_periodic_task_dt:
256
+ if not self.refresh_database():
257
+ # another refresh is in progress, we'll check again later and ask for the next refresh date considering it's in progress
258
+ self.next_periodic_task_dt = self.cron.get_next(datetime, datetime.now(timezone.utc))
259
+ logging.getLogger("geovisio.periodic_task").info(
260
+ f"Refresh in progress, checking after = {self.next_periodic_task_dt}"
261
+ )
262
+
263
+ def get_next_periodic_task_dt(self, conn) -> datetime:
264
+ r = conn.execute("SELECT refreshed_at, NOW() FROM refresh_database").fetchone()
265
+ assert r # the table always has exactly one row
266
+
267
+ refreshed_at, db_time = r
268
+ current_time = datetime.now(timezone.utc)
269
+ if refreshed_at is None:
270
+ # if the db has never been updated, we need to update it now
271
+ return current_time
272
+ next_schedule_date = self.cron.get_next(datetime, refreshed_at)
273
+
274
+ # if the db time and the app time is not the same, we need to apply an offset on the scheduled time
275
+ next_schedule_date += db_time - current_time
276
+ logging.getLogger("geovisio.periodic_task").info(f"Next database refresh = {next_schedule_date}")
277
+ return next_schedule_date
278
+
279
+ def refresh_database(self):
280
+ with sentry_sdk.start_transaction(op="task", name="refresh_database"):
281
+ # Note: there is a mechanism in `sequences.update_pictures_grid` to ensure that only one refresh can be done at one time, and it will update the `refreshed_at` value
282
+ return utils.sequences.update_pictures_grid()
283
+
284
+
285
+ def process_next_job(app):
194
286
  with sentry_sdk.start_transaction(op="task", name="process_next_picture"):
195
- with _get_next_picture_to_process(config) as job:
287
+ with _get_next_job(app) as job:
196
288
  if job is None:
197
289
  return False
198
- if job.task == ProcessTask.prepare:
290
+ if job.task == ProcessTask.prepare and job.pic:
199
291
  with sentry_sdk.start_span(description="Processing picture") as span:
200
292
  span.set_data("pic_id", job.pic.id)
201
293
  with utils.time.log_elapsed(f"Processing picture {job.pic.id}"):
202
- processPictureFiles(job.pic, config)
203
- elif job.task == ProcessTask.delete:
294
+ # open another connection for reporting and queries
295
+ processPictureFiles(job.pic, app.config)
296
+ elif job.task == ProcessTask.delete and job.pic:
204
297
  with sentry_sdk.start_span(description="Deleting picture") as span:
205
298
  span.set_data("pic_id", job.pic.id)
206
299
  with utils.time.log_elapsed(f"Deleting picture {job.pic.id}"):
207
- _delete_picture(job)
300
+ _delete_picture(job.pic)
301
+ elif job.task == ProcessTask.delete and job.upload_set:
302
+ with sentry_sdk.start_span(description="Deleting upload set") as span:
303
+ span.set_data("us_id", job.upload_set.id)
304
+ with utils.time.log_elapsed(f"Deleting upload set {job.upload_set.id}"):
305
+ _delete_upload_set(job.upload_set)
306
+ elif job.task == ProcessTask.dispatch and job.upload_set:
307
+ with utils.time.log_elapsed(f"Dispatching upload set {job.upload_set.id}"):
308
+ upload_set.dispatch(job.upload_set.id)
309
+ elif job.task == ProcessTask.finalize and job.seq:
310
+ with utils.time.log_elapsed(f"Finalizing sequence {job.seq.id}"):
311
+ with job.reporting_conn.cursor(row_factory=dict_row) as cursor:
312
+ sequences.finalize(cursor, job.seq.id)
208
313
  else:
209
314
  raise RecoverableProcessException(f"Unhandled process task: {job.task}")
210
315
 
@@ -212,58 +317,67 @@ def process_next_picture(config):
212
317
 
213
318
 
214
319
  @contextmanager
215
- def _get_next_picture_to_process(config):
320
+ def _get_next_job(app):
216
321
  """
217
- Open a new connection and return the next picture to process
218
- Note: the picture should be used as a context manager to close the connection when we stop using the returned picture.
322
+ Open a new connection and return the next job to process
323
+ Note: the job should be used as a context manager to close the connection when we stop using the returned job.
219
324
 
220
- The new connection is needed because we lock the `pictures_to_process` for the whole transaction for another worker not to process the same picture
325
+ The new connection is needed because we lock the `job_queue` for the whole transaction for another worker not to process the same job
221
326
  """
222
327
  error = None
223
- with psycopg.connect(config["DB_URL"], autocommit=True) as locking_transaction:
224
- with locking_transaction.transaction():
225
- r = locking_transaction.execute(
226
- """
227
- SELECT p.id, pictures_to_process.task, p.metadata
228
- FROM pictures_to_process
229
- JOIN pictures p ON p.id = pictures_to_process.picture_id
328
+ with app.pool.connection() as locking_transaction:
329
+ with locking_transaction.transaction(), locking_transaction.cursor(row_factory=dict_row) as cursor:
330
+ r = cursor.execute(
331
+ """SELECT j.id, j.picture_id, j.upload_set_id, j.sequence_id, j.task, p.metadata
332
+ FROM job_queue j
333
+ LEFT JOIN pictures p ON p.id = j.picture_id
230
334
  ORDER by
231
- pictures_to_process.nb_errors,
232
- pictures_to_process.ts
233
- FOR UPDATE of pictures_to_process SKIP LOCKED
234
- LIMIT 1
235
- """
335
+ j.nb_errors,
336
+ j.ts
337
+ FOR UPDATE of j SKIP LOCKED
338
+ LIMIT 1"""
236
339
  ).fetchone()
237
340
  if r is None:
238
341
  # Nothing to process
239
342
  yield None
240
343
  else:
241
- log.debug(f"Processing {r[0]}")
242
-
243
- db_pic = DbPicture(id=str(r[0]), metadata=r[2])
244
-
245
- with psycopg.connect(config["DB_URL"], autocommit=True) as reporting_conn:
246
- job = _initialize_picture_process(reporting_conn, pic=db_pic, task=ProcessTask(r[1]))
344
+ log.debug(f"Processing {r['id']}")
345
+
346
+ db_pic = DbPicture(id=r["picture_id"], metadata=r["metadata"]) if r["picture_id"] is not None else None
347
+ db_seq = DbSequence(id=r["sequence_id"]) if r["sequence_id"] is not None else None
348
+ db_upload_set = DbUploadSet(id=r["upload_set_id"]) if r["upload_set_id"] is not None else None
349
+
350
+ with app.pool.connection() as reporting_conn:
351
+ job = _initialize_job(
352
+ reporting_conn,
353
+ job_queue_id=r["id"],
354
+ db_pic=db_pic,
355
+ db_seq=db_seq,
356
+ db_upload_set=db_upload_set,
357
+ task=ProcessTask(r["task"]),
358
+ )
247
359
  try:
248
360
  yield job
249
361
 
250
362
  # Finalize the picture process, set the picture status and remove the picture from the queue process
251
- _finalize_picture_process(locking_transaction, job)
252
- log.debug(f"Picture {db_pic.id} processed")
363
+ _finalize_job(locking_transaction, job)
364
+ log.debug(f"Job {job.label()} processed")
253
365
  except RecoverableProcessException as e:
254
- _mark_process_as_error(locking_transaction, job, e, config, recoverable=True)
366
+ _mark_process_as_error(locking_transaction, job, e, recoverable=True)
367
+ except RetryLaterProcessException as e:
368
+ _mark_process_as_error(locking_transaction, job, e, recoverable=True, mark_as_error=False)
255
369
  except InterruptedError as interruption:
256
- log.error(f"Interruption received, stoping job {job.id} for picture {db_pic.id}")
370
+ log.error(f"Interruption received, stoping job {job.label()}")
257
371
  # starts a new connection, since the current one can be corrupted by the exception
258
- with psycopg.connect(config["DB_URL"], autocommit=True) as t:
259
- _mark_process_as_error(t, job, interruption, config, recoverable=True)
372
+ with app.pool.connection() as t:
373
+ _mark_process_as_error(t, job, interruption, recoverable=True)
260
374
  error = interruption
261
375
  except Exception as e:
262
- log.exception(f"Impossible to finish job {job.id} for picture {db_pic.id}")
263
- _mark_process_as_error(locking_transaction, job, e, config, recoverable=False)
376
+ log.exception(f"Impossible to finish job {job.label()}")
377
+ _mark_process_as_error(locking_transaction, job, e, recoverable=False)
264
378
 
265
379
  # try to finalize the sequence anyway
266
- _finalize_sequence_if_last_picture(job)
380
+ _finalize_sequence(job)
267
381
  error = e
268
382
 
269
383
  # we raise an error after the transaction has been comited to be sure to have the state persisted in the database
@@ -271,163 +385,144 @@ def _get_next_picture_to_process(config):
271
385
  raise error
272
386
 
273
387
 
274
- def _finalize_sequence_if_last_picture(job: DbJob):
275
- r = job.reporting_conn.execute(
276
- """
277
- SELECT sp.seq_id AS id FROM sequences_pictures AS sp
278
- WHERE sp.pic_id = %(id)s
279
- """,
280
- {"id": job.pic.id},
281
- ).fetchone()
282
- if not r:
283
- raise Exception(f"impossible to find sequence associated to picture {job.pic.id}")
284
-
285
- seqId = r[0]
286
-
287
- is_sequence_finalized = _is_sequence_finalized(job.reporting_conn, seqId)
288
- if not is_sequence_finalized:
289
- log.debug("sequence not finalized")
388
+ def _finalize_sequence(job: DbJob):
389
+ # on picture preparation finalization, we add a sequence/upload_set finalization job
390
+ if job.task != "prepare" or not job.pic:
290
391
  return
291
392
 
292
- with sentry_sdk.start_span(description="Finalizing sequence") as span:
293
- span.set_data("sequence_id", seqId)
294
- log.debug(f"Finalizing sequence {seqId}")
295
-
296
- with utils.time.log_elapsed(f"Finalizing sequence {seqId}"):
297
- # Complete missing headings in pictures
298
- update_headings(job.reporting_conn, seqId)
299
-
300
- # Change sequence database status in DB
301
- # Also generates data in computed columns
302
- job.reporting_conn.execute(
303
- """WITH
304
- aggregated_pictures AS (
305
- SELECT
306
- sp.seq_id,
307
- MIN(p.ts::DATE) AS day,
308
- ARRAY_AGG(DISTINCT TRIM(
309
- CONCAT(p.metadata->>'make', ' ', p.metadata->>'model')
310
- )) AS models,
311
- ARRAY_AGG(DISTINCT p.metadata->>'type') AS types
312
- FROM sequences_pictures sp
313
- JOIN pictures p ON sp.pic_id = p.id
314
- WHERE sp.seq_id = %(seq)s
315
- GROUP BY sp.seq_id
316
- )
317
- UPDATE sequences
318
- SET
319
- status = 'ready',
320
- geom = compute_sequence_geom(id),
321
- bbox = compute_sequence_bbox(id),
322
- computed_type = CASE WHEN array_length(types, 1) = 1 THEN types[1] ELSE NULL END,
323
- computed_model = CASE WHEN array_length(models, 1) = 1 THEN models[1] ELSE NULL END,
324
- computed_capture_date = day
325
- FROM aggregated_pictures
326
- WHERE id = %(seq)s
327
- """,
328
- {"seq": seqId},
329
- )
393
+ with job.reporting_conn.cursor(row_factory=dict_row) as cursor:
394
+ r = cursor.execute(
395
+ "SELECT upload_set_id, seq_id FROM pictures p LEFT JOIN sequences_pictures sp on sp.pic_id = p.id WHERE p.id = %(pic_id)s",
396
+ {"pic_id": job.pic.id},
397
+ ).fetchone()
330
398
 
331
- log.info(f"Sequence {seqId} is ready")
399
+ if not r or not r["seq_id"]:
400
+ # if the associated upload set has not yet been dispatch, the picture might not be associated to a sequence
401
+ return
332
402
 
403
+ if r["upload_set_id"]:
404
+ # if the picture is part of the upload set, the sequence finalization will be done when the upload set is dispatched
405
+ return
333
406
 
334
- def _is_sequence_finalized(db, seq_id: str):
335
- """
336
- We consider a sequence as ready, if all pictures have been processed and there is at least one correctly processed picture
337
- Eg. we don't want pictures with preparing_status = 'not-processed' and at least one 'prepared'
338
- """
339
- statuses = db.execute(
340
- """SELECT DISTINCT(preparing_status) FROM pictures p
341
- JOIN sequences_pictures sp ON sp.pic_id = p.id
342
- WHERE
343
- sp.seq_id = %(id)s
344
- AND p.preparing_status <> 'broken'
345
- ;
346
- """,
347
- {"id": seq_id},
348
- ).fetchall()
407
+ # Add a task to finalize the sequence/upload_set
408
+ sequences.add_finalization_job(cursor, r["seq_id"])
349
409
 
350
- return [("prepared",)] == statuses
351
410
 
352
-
353
- def _finalize_picture_process(db, job: DbJob):
411
+ def _finalize_job(conn, job: DbJob):
354
412
  job.reporting_conn.execute(
355
413
  "UPDATE job_history SET finished_at = CURRENT_TIMESTAMP WHERE id = %(id)s",
356
- {"id": job.id},
414
+ {"id": job.job_history_id},
357
415
  )
358
- if job.task == ProcessTask.prepare:
416
+ if job.task == ProcessTask.prepare and job.pic:
359
417
  # Note: the status is slowly been deprecated by replacing it with more precise status, and in the end it will be removed
360
418
  job.reporting_conn.execute(
361
419
  "UPDATE pictures SET status = 'ready', preparing_status = 'prepared' WHERE id = %(pic_id)s",
362
420
  {"pic_id": job.pic.id},
363
421
  )
364
422
 
365
- # Check if we need to finalize the sequence
366
- _finalize_sequence_if_last_picture(job)
367
- elif job.task == ProcessTask.delete:
368
- # TODO set the status to 'deleted' instead of removing it
369
- db.execute(
423
+ # Add a task to finalize the sequence
424
+ _finalize_sequence(job)
425
+ elif job.task == ProcessTask.delete and job.pic:
426
+ conn.execute(
370
427
  "DELETE FROM pictures WHERE id = %(pic_id)s",
371
428
  {"pic_id": job.pic.id},
372
429
  )
373
- db.execute(
374
- "DELETE FROM pictures_to_process WHERE picture_id = %(pic_id)s",
375
- {"pic_id": job.pic.id},
376
- )
430
+ elif job.task == ProcessTask.delete and job.upload_set:
431
+ conn.execute(SQL("DELETE FROM upload_sets WHERE id = %s"), [job.upload_set.id])
432
+
433
+ conn.execute("DELETE FROM job_queue WHERE id = %(job_id)s", {"job_id": job.job_queue_id})
377
434
 
378
435
 
379
- def _initialize_picture_process(reporting_conn: psycopg.Connection, pic: DbPicture, task: ProcessTask) -> DbJob:
436
+ def _initialize_job(
437
+ reporting_conn: psycopg.Connection,
438
+ job_queue_id: UUID,
439
+ db_pic: Optional[DbPicture],
440
+ db_seq: Optional[DbSequence],
441
+ db_upload_set: Optional[DbUploadSet],
442
+ task: ProcessTask,
443
+ ) -> DbJob:
380
444
  r = reporting_conn.execute(
381
- """INSERT INTO job_history(picture_id, task)
382
- VALUES (%(id)s, %(task)s)
383
- RETURNING id
384
- """,
385
- {"id": pic.id, "task": task.value},
445
+ """INSERT INTO job_history(job_id, picture_id, sequence_id, upload_set_id, job_task)
446
+ VALUES (%(job_id)s, %(pic_id)s, %(seq_id)s, %(us_id)s, %(task)s)
447
+ RETURNING id""",
448
+ {
449
+ "job_id": job_queue_id,
450
+ "pic_id": db_pic.id if db_pic else None,
451
+ "seq_id": db_seq.id if db_seq else None,
452
+ "us_id": db_upload_set.id if db_upload_set else None,
453
+ "task": task.value,
454
+ },
386
455
  ).fetchone()
387
456
 
388
457
  if not r:
389
458
  raise Exception("impossible to insert task in database")
390
- return DbJob(reporting_conn=reporting_conn, pic=pic, id=r[0], task=task)
459
+
460
+ return DbJob(
461
+ reporting_conn=reporting_conn,
462
+ job_queue_id=job_queue_id,
463
+ pic=db_pic,
464
+ seq=db_seq,
465
+ upload_set=db_upload_set,
466
+ task=task,
467
+ job_history_id=r[0],
468
+ )
391
469
 
392
470
 
393
- def _mark_process_as_error(db, job: DbJob, e: Exception, config, recoverable: bool = False):
471
+ def _mark_process_as_error(conn, job: DbJob, e: Exception, recoverable: bool = False, mark_as_error: bool = True):
394
472
  job.reporting_conn.execute(
395
473
  """UPDATE job_history SET
396
474
  error = %(err)s, finished_at = CURRENT_TIMESTAMP
397
475
  WHERE id = %(id)s""",
398
- {"err": str(e), "id": job.id},
476
+ {"err": str(e), "id": job.job_history_id},
399
477
  )
400
478
  if recoverable:
401
- nb_error = db.execute(
402
- """
403
- UPDATE pictures_to_process SET
404
- nb_errors = nb_errors + 1
405
- WHERE picture_id = %(id)s
406
- RETURNING nb_errors""",
407
- {"err": str(e), "id": job.pic.id},
408
- ).fetchone()
409
- if nb_error and nb_error[0] > PICTURE_PROCESS_MAX_RETRY:
410
- logging.info(f"Job to process picture {job.pic.id} has failed {nb_error} times, we stop trying to process it.")
411
- recoverable = False
479
+ if mark_as_error:
480
+ nb_error = conn.execute(
481
+ """
482
+ UPDATE job_queue SET
483
+ nb_errors = nb_errors + 1
484
+ WHERE id = %(id)s
485
+ RETURNING nb_errors""",
486
+ {"err": str(e), "id": job.job_queue_id},
487
+ ).fetchone()
488
+ if nb_error and nb_error[0] > PICTURE_PROCESS_MAX_RETRY:
489
+ logging.info(f"Job {job.label()} has failed {nb_error} times, we stop trying to process it.")
490
+ recoverable = False
491
+ else:
492
+ # it's not a real error, we just want to retry later
493
+ conn.execute(
494
+ SQL("UPDATE job_queue SET ts = NOW() WHERE id = %(id)s"),
495
+ {"err": str(e), "id": job.job_queue_id},
496
+ )
412
497
 
413
498
  if not recoverable:
414
499
  # Note: the status is slowly been deprecated by replacing it with more precise status, and in the end it will be removed
415
- job.reporting_conn.execute(
416
- """UPDATE pictures SET
417
- preparing_status = 'broken', status = 'broken'
418
- WHERE id = %(id)s""",
419
- {"id": job.pic.id},
420
- )
421
- # on unrecoverable error, we remove the picture from the queue to process
422
- db.execute(
423
- """
424
- DELETE FROM pictures_to_process
425
- WHERE picture_id = %(id)s""",
426
- {"id": job.pic.id},
427
- )
500
+ if job.task == "prepare" and job.pic:
501
+ job.reporting_conn.execute(
502
+ """UPDATE pictures SET
503
+ preparing_status = 'broken', status = 'broken'
504
+ WHERE id = %(id)s""",
505
+ {"id": job.pic.id},
506
+ )
507
+ # on unrecoverable error, we remove the job from the queue
508
+ conn.execute("DELETE FROM job_queue WHERE id = %(id)s", {"id": job.job_queue_id})
428
509
 
429
510
 
430
- def _delete_picture(job: DbJob):
511
+ def _delete_picture(pic: DbPicture):
431
512
  """Delete a picture from the filesystem"""
432
- log.debug(f"Deleting picture files {job.pic.id}")
433
- utils.pictures.removeAllFiles(job.pic.id)
513
+ log.debug(f"Deleting picture files {pic.id}")
514
+ utils.pictures.removeAllFiles(pic.id)
515
+
516
+
517
+ def _delete_upload_set(upload_set: DbUploadSet):
518
+ """Delete an upload set
519
+ We do this in the job queue since we want to wait for all its pictures to be deleted
520
+ """
521
+ with db.conn(current_app) as conn:
522
+ with conn.transaction(), conn.cursor() as cursor:
523
+ # we want to wait for all pictures to be deleted
524
+ has_more_pictures = cursor.execute("SELECT 1 FROM pictures WHERE upload_set_id = %s LIMIT 1", [upload_set.id]).fetchone()
525
+ if has_more_pictures and has_more_pictures[0]:
526
+ logging.info(f"More pictures to be deleted, upload_set {upload_set.id} will be deleted later")
527
+ raise RetryLaterProcessException("More pictures to be deleted, upload_set will be deleted later")
528
+ # Note: the real deletion will be done on job completion so the lock is released