geovisio 2.9.0__py3-none-any.whl → 2.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. geovisio/__init__.py +8 -1
  2. geovisio/admin_cli/user.py +7 -2
  3. geovisio/config_app.py +26 -12
  4. geovisio/translations/ar/LC_MESSAGES/messages.mo +0 -0
  5. geovisio/translations/ar/LC_MESSAGES/messages.po +818 -0
  6. geovisio/translations/be/LC_MESSAGES/messages.mo +0 -0
  7. geovisio/translations/be/LC_MESSAGES/messages.po +886 -0
  8. geovisio/translations/br/LC_MESSAGES/messages.po +1 -1
  9. geovisio/translations/da/LC_MESSAGES/messages.mo +0 -0
  10. geovisio/translations/da/LC_MESSAGES/messages.po +96 -4
  11. geovisio/translations/de/LC_MESSAGES/messages.mo +0 -0
  12. geovisio/translations/de/LC_MESSAGES/messages.po +214 -122
  13. geovisio/translations/el/LC_MESSAGES/messages.po +1 -1
  14. geovisio/translations/en/LC_MESSAGES/messages.mo +0 -0
  15. geovisio/translations/en/LC_MESSAGES/messages.po +234 -157
  16. geovisio/translations/eo/LC_MESSAGES/messages.mo +0 -0
  17. geovisio/translations/eo/LC_MESSAGES/messages.po +55 -5
  18. geovisio/translations/es/LC_MESSAGES/messages.po +1 -1
  19. geovisio/translations/fi/LC_MESSAGES/messages.po +1 -1
  20. geovisio/translations/fr/LC_MESSAGES/messages.mo +0 -0
  21. geovisio/translations/fr/LC_MESSAGES/messages.po +92 -3
  22. geovisio/translations/hu/LC_MESSAGES/messages.po +1 -1
  23. geovisio/translations/it/LC_MESSAGES/messages.mo +0 -0
  24. geovisio/translations/it/LC_MESSAGES/messages.po +63 -3
  25. geovisio/translations/ja/LC_MESSAGES/messages.po +1 -1
  26. geovisio/translations/ko/LC_MESSAGES/messages.po +1 -1
  27. geovisio/translations/messages.pot +216 -139
  28. geovisio/translations/nl/LC_MESSAGES/messages.mo +0 -0
  29. geovisio/translations/nl/LC_MESSAGES/messages.po +333 -62
  30. geovisio/translations/oc/LC_MESSAGES/messages.mo +0 -0
  31. geovisio/translations/oc/LC_MESSAGES/messages.po +821 -0
  32. geovisio/translations/pl/LC_MESSAGES/messages.po +1 -1
  33. geovisio/translations/pt/LC_MESSAGES/messages.mo +0 -0
  34. geovisio/translations/pt/LC_MESSAGES/messages.po +944 -0
  35. geovisio/translations/pt_BR/LC_MESSAGES/messages.mo +0 -0
  36. geovisio/translations/pt_BR/LC_MESSAGES/messages.po +942 -0
  37. geovisio/translations/sv/LC_MESSAGES/messages.mo +0 -0
  38. geovisio/translations/sv/LC_MESSAGES/messages.po +4 -3
  39. geovisio/translations/ti/LC_MESSAGES/messages.mo +0 -0
  40. geovisio/translations/ti/LC_MESSAGES/messages.po +762 -0
  41. geovisio/translations/tr/LC_MESSAGES/messages.mo +0 -0
  42. geovisio/translations/tr/LC_MESSAGES/messages.po +927 -0
  43. geovisio/translations/uk/LC_MESSAGES/messages.mo +0 -0
  44. geovisio/translations/uk/LC_MESSAGES/messages.po +920 -0
  45. geovisio/translations/zh_Hant/LC_MESSAGES/messages.po +1 -1
  46. geovisio/utils/annotations.py +21 -21
  47. geovisio/utils/auth.py +47 -13
  48. geovisio/utils/cql2.py +22 -5
  49. geovisio/utils/fields.py +14 -2
  50. geovisio/utils/items.py +44 -0
  51. geovisio/utils/model_query.py +2 -2
  52. geovisio/utils/pic_shape.py +1 -1
  53. geovisio/utils/pictures.py +127 -36
  54. geovisio/utils/semantics.py +32 -3
  55. geovisio/utils/sentry.py +1 -1
  56. geovisio/utils/sequences.py +155 -109
  57. geovisio/utils/upload_set.py +303 -206
  58. geovisio/utils/users.py +18 -0
  59. geovisio/utils/website.py +1 -1
  60. geovisio/web/annotations.py +303 -69
  61. geovisio/web/auth.py +1 -1
  62. geovisio/web/collections.py +194 -97
  63. geovisio/web/configuration.py +36 -4
  64. geovisio/web/docs.py +109 -13
  65. geovisio/web/items.py +319 -186
  66. geovisio/web/map.py +92 -54
  67. geovisio/web/pages.py +48 -4
  68. geovisio/web/params.py +100 -42
  69. geovisio/web/pictures.py +37 -3
  70. geovisio/web/prepare.py +4 -2
  71. geovisio/web/queryables.py +57 -0
  72. geovisio/web/stac.py +8 -2
  73. geovisio/web/tokens.py +49 -1
  74. geovisio/web/upload_set.py +226 -51
  75. geovisio/web/users.py +89 -8
  76. geovisio/web/utils.py +26 -8
  77. geovisio/workers/runner_pictures.py +128 -23
  78. {geovisio-2.9.0.dist-info → geovisio-2.11.0.dist-info}/METADATA +15 -14
  79. geovisio-2.11.0.dist-info/RECORD +117 -0
  80. geovisio-2.9.0.dist-info/RECORD +0 -98
  81. {geovisio-2.9.0.dist-info → geovisio-2.11.0.dist-info}/WHEEL +0 -0
  82. {geovisio-2.9.0.dist-info → geovisio-2.11.0.dist-info}/licenses/LICENSE +0 -0
@@ -6,6 +6,7 @@ from geovisio.utils.extent import TemporalExtent
6
6
  from uuid import UUID
7
7
  from typing import Optional, List, Dict, Any
8
8
  from datetime import datetime, timedelta
9
+ from dataclasses import dataclass
9
10
  from geovisio.utils import cql2, db, sequences
10
11
  from geovisio import errors
11
12
  from geovisio.utils.link import make_link, Link
@@ -16,6 +17,8 @@ from psycopg.rows import class_row, dict_row
16
17
  from flask import current_app
17
18
  from flask_babel import gettext as _
18
19
  from geopic_tag_reader import sequence as geopic_sequence, reader
20
+ from geovisio.utils.tags import SemanticTag
21
+ from geovisio.web.params import Visibility
19
22
 
20
23
  from geovisio.utils.loggers import getLoggerWithExtra
21
24
 
@@ -73,15 +76,27 @@ class UploadSet(BaseModel):
73
76
  title: str
74
77
  estimated_nb_files: Optional[int] = None
75
78
  sort_method: geopic_sequence.SortMethod
76
- split_distance: int
77
- split_time: timedelta
78
- duplicate_distance: float
79
- duplicate_rotation: int
79
+ no_split: Optional[bool] = None
80
+ split_distance: Optional[int] = None
81
+ split_time: Optional[timedelta] = None
82
+ no_deduplication: Optional[bool] = None
83
+ duplicate_distance: Optional[float] = None
84
+ duplicate_rotation: Optional[int] = None
80
85
  metadata: Optional[Dict[str, Any]]
81
86
  user_agent: Optional[str] = Field(exclude=True)
82
87
  associated_collections: List[AssociatedCollection] = []
83
88
  nb_items: int = 0
84
89
  items_status: Optional[AggregatedStatus] = None
90
+ semantics: List[SemanticTag] = Field(default_factory=list)
91
+ """Semantic tags associated to the upload_set"""
92
+ relative_heading: Optional[int] = None
93
+ """The relative heading (in degrees), offset based on movement path (0° = looking forward, -90° = looking left, 90° = looking right). For single picture upload_sets, 0° is heading north). Is applied to all associated collections if set."""
94
+ visibility: Optional[Visibility] = None
95
+ """Visibility of the upload set. Can be set to:
96
+ * `anyone`: the upload is visible to anyone
97
+ * `owner-only`: the upload is visible to the owner and administrator only
98
+ * `logged-only`: the upload is visible to logged users only
99
+ """
85
100
 
86
101
  @computed_field
87
102
  @property
@@ -128,19 +143,13 @@ class FileRejectionStatus(Enum):
128
143
  """other_error means there was an error that is not related to the picture itself"""
129
144
 
130
145
 
131
- class FileRejectionDetails(BaseModel):
132
-
133
- missing_fields: List[str]
134
- """Mandatory metadata missing from the file. Metadata can be `datetime` or `location`."""
135
-
136
-
137
146
  class FileRejection(BaseModel):
138
147
  """Details about a file rejection"""
139
148
 
140
149
  reason: str
141
150
  severity: FileRejectionStatusSeverity
142
151
  message: Optional[str]
143
- details: Optional[FileRejectionDetails]
152
+ details: Optional[Dict[str, Any]]
144
153
 
145
154
  model_config = ConfigDict(use_enum_values=True, use_attribute_docstrings=True)
146
155
 
@@ -224,32 +233,32 @@ def get_simple_upload_set(id: UUID) -> Optional[UploadSet]:
224
233
  return u
225
234
 
226
235
 
227
- def get_upload_set(id: UUID) -> Optional[UploadSet]:
236
+ def get_upload_set(id: UUID, account_to_query: Optional[UUID] = None) -> Optional[UploadSet]:
228
237
  """Get the UploadSet corresponding to the ID"""
229
238
  db_upload_set = db.fetchone(
230
239
  current_app,
231
240
  SQL(
232
241
  """WITH picture_last_job AS (
233
242
  SELECT p.id as picture_id,
234
- -- Note: to know if a picture is beeing processed, check the latest job_history entry for this picture
235
- -- If there is no finished_at, the picture is still beeing processed
243
+ -- Note: to know if a picture is being processed, check the latest job_history entry for this picture
244
+ -- If there is no finished_at, the picture is still being processed
236
245
  (MAX(ARRAY [started_at, finished_at])) AS last_job,
237
246
  p.preparing_status,
238
247
  p.status,
239
248
  p.upload_set_id
240
249
  FROM pictures p
241
250
  LEFT JOIN job_history ON p.id = job_history.picture_id
242
- WHERE p.upload_set_id = %(id)s
251
+ WHERE p.upload_set_id = %(id)s AND is_picture_visible_by_user(p, %(account_to_query)s)
243
252
  GROUP BY p.id
244
253
  ),
245
254
  picture_statuses AS (
246
- SELECT
255
+ SELECT
247
256
  *,
248
257
  (last_job[1] IS NOT NULL AND last_job[2] IS NULL) AS is_job_running
249
258
  FROM picture_last_job psj
250
259
  ),
251
260
  associated_collections AS (
252
- SELECT
261
+ SELECT
253
262
  ps.upload_set_id,
254
263
  COUNT(ps.picture_id) FILTER (WHERE ps.preparing_status = 'broken') AS nb_broken,
255
264
  COUNT(ps.picture_id) FILTER (WHERE ps.preparing_status = 'prepared') AS nb_prepared,
@@ -264,10 +273,19 @@ associated_collections AS (
264
273
  FROM picture_statuses ps
265
274
  JOIN sequences_pictures sp ON sp.pic_id = ps.picture_id
266
275
  JOIN sequences s ON s.id = sp.seq_id
267
- WHERE ps.upload_set_id = %(id)s AND s.status != 'deleted'
276
+ WHERE ps.upload_set_id = %(id)s AND s.status != 'deleted' AND is_sequence_visible_by_user(s, %(account_to_query)s)
268
277
  GROUP BY ps.upload_set_id,
269
278
  s.id
270
279
  ),
280
+ semantics AS (
281
+ SELECT upload_set_id, json_agg(json_strip_nulls(json_build_object(
282
+ 'key', key,
283
+ 'value', value
284
+ )) ORDER BY key, value) AS semantics
285
+ FROM upload_sets_semantics
286
+ WHERE upload_set_id = %(id)s
287
+ GROUP BY upload_set_id
288
+ ),
271
289
  upload_set_statuses AS (
272
290
  SELECT ps.upload_set_id,
273
291
  COUNT(ps.picture_id) AS nb_items,
@@ -280,13 +298,14 @@ upload_set_statuses AS (
280
298
  )
281
299
  SELECT u.*,
282
300
  COALESCE(us.nb_items, 0) AS nb_items,
301
+ COALESCE(s.semantics, '[]'::json) AS semantics,
283
302
  json_build_object(
284
303
  'broken', COALESCE(us.nb_broken, 0),
285
304
  'prepared', COALESCE(us.nb_prepared, 0),
286
305
  'not_processed', COALESCE(us.nb_not_processed, 0),
287
306
  'preparing', COALESCE(us.nb_preparing, 0),
288
307
  'rejected', (
289
- SELECT count(*) FROM files
308
+ SELECT count(*) FROM files
290
309
  WHERE upload_set_id = %(id)s AND rejection_status IS NOT NULL
291
310
  )
292
311
  ) AS items_status,
@@ -327,9 +346,10 @@ SELECT u.*,
327
346
  ) AS associated_collections
328
347
  FROM upload_sets u
329
348
  LEFT JOIN upload_set_statuses us on us.upload_set_id = u.id
330
- WHERE u.id = %(id)s"""
349
+ LEFT JOIN semantics s on s.upload_set_id = u.id
350
+ WHERE u.id = %(id)s AND is_upload_set_visible_by_user(u, %(account_to_query)s)"""
331
351
  ),
332
- {"id": id},
352
+ {"id": id, "account_to_query": account_to_query},
333
353
  row_factory=class_row(UploadSet),
334
354
  )
335
355
 
@@ -360,44 +380,39 @@ def _parse_filter(filter: Optional[str]) -> SQL:
360
380
  return cql2.parse_cql2_filter(filter, FIELD_TO_SQL_FILTER)
361
381
 
362
382
 
363
- def list_upload_sets(account_id: UUID, limit: int = 100, filter: Optional[str] = None) -> UploadSets:
383
+ def list_upload_sets(
384
+ account_id: UUID, limit: int = 100, filter: Optional[str] = None, account_to_query: Optional[UUID] = None
385
+ ) -> UploadSets:
364
386
  filter_sql = _parse_filter(filter)
365
387
  l = db.fetchall(
366
388
  current_app,
367
389
  SQL(
368
- """SELECT
390
+ """SELECT
369
391
  u.*,
370
392
  COALESCE(
371
393
  (
372
- SELECT
394
+ SELECT
373
395
  json_agg(json_build_object(
374
- 'id', ac.collection_id,
375
- 'nb_items', ac.nb_items
396
+ 'id', s.id,
397
+ 'nb_items', s.nb_pictures
376
398
  ))
377
- FROM (
378
- SELECT
379
- sp.seq_id as collection_id,
380
- count(sp.pic_id) AS nb_items
381
- FROM pictures p
382
- JOIN sequences_pictures sp ON sp.pic_id = p.id
383
- WHERE p.upload_set_id = u.id
384
- GROUP BY sp.seq_id
385
- ) ac
399
+ FROM sequences s
400
+ WHERE s.upload_set_id = u.id
386
401
  ),
387
402
  '[]'::json
388
403
  ) AS associated_collections,
389
404
  (
390
405
  SELECT count(*) AS nb
391
- FROM pictures p
406
+ FROM pictures p
392
407
  WHERE p.upload_set_id = u.id
393
408
  ) AS nb_items
394
409
  FROM upload_sets u
395
- WHERE account_id = %(account_id)s AND {filter}
410
+ WHERE account_id = %(account_id)s AND is_upload_set_visible_by_user(u, %(account_to_query)s) AND {filter}
396
411
  ORDER BY created_at ASC
397
412
  LIMIT %(limit)s
398
413
  """
399
414
  ).format(filter=filter_sql),
400
- {"account_id": account_id, "limit": limit},
415
+ {"account_id": account_id, "limit": limit, "account_to_query": account_to_query},
401
416
  row_factory=class_row(UploadSet),
402
417
  )
403
418
 
@@ -408,7 +423,7 @@ def ask_for_dispatch(upload_set_id: UUID):
408
423
  """Add a dispatch task to the job queue for the upload set. If there is already a task, postpone it."""
409
424
  with db.conn(current_app) as conn:
410
425
  conn.execute(
411
- """INSERT INTO
426
+ """INSERT INTO
412
427
  job_queue(sequence_id, task)
413
428
  VALUES (%(upload_set_id)s, 'dispatch')
414
429
  ON CONFLICT (upload_set_id) DO UPDATE SET ts = CURRENT_TIMESTAMP""",
@@ -416,7 +431,13 @@ def ask_for_dispatch(upload_set_id: UUID):
416
431
  )
417
432
 
418
433
 
419
- def dispatch(upload_set_id: UUID):
434
+ @dataclass
435
+ class PicToDelete:
436
+ picture_id: UUID
437
+ detail: Optional[Dict] = None
438
+
439
+
440
+ def dispatch(conn: psycopg.Connection, upload_set_id: UUID):
420
441
  """Finalize an upload set.
421
442
 
422
443
  For the moment we only create a collection around all the items of the upload set, but later we'll split the items into several collections
@@ -429,13 +450,15 @@ def dispatch(upload_set_id: UUID):
429
450
  raise Exception(f"Upload set {upload_set_id} not found")
430
451
 
431
452
  logger = getLoggerWithExtra("geovisio.upload_set", {"upload_set_id": str(upload_set_id)})
432
- with db.conn(current_app) as conn:
433
- with conn.transaction(), conn.cursor(row_factory=dict_row) as cursor:
434
-
435
- # get all the pictures of the upload set
436
- db_pics = cursor.execute(
437
- SQL(
438
- """SELECT
453
+ with conn.transaction(), conn.cursor(row_factory=dict_row) as cursor:
454
+ # we put a lock on the upload set, to avoid new semantics being added while dispatching it
455
+ # Note: I did not find a way to only put a lock on the upload_sets_semantics table, so we lock the whole upload_set row (and any child rows)
456
+ _us_lock = cursor.execute(SQL("SELECT id FROM upload_sets WHERE id = %s FOR UPDATE"), [upload_set_id])
457
+
458
+ # get all the pictures of the upload set
459
+ db_pics = cursor.execute(
460
+ SQL(
461
+ """SELECT
439
462
  p.id,
440
463
  p.ts,
441
464
  ST_X(p.geom) as lon,
@@ -444,132 +467,198 @@ def dispatch(upload_set_id: UUID):
444
467
  p.metadata->>'originalFileName' as file_name,
445
468
  p.metadata,
446
469
  s.id as sequence_id,
447
- f is null as has_no_file
470
+ f is null as has_no_file,
471
+ p.heading_computed
448
472
  FROM pictures p
449
473
  LEFT JOIN sequences_pictures sp ON sp.pic_id = p.id
450
474
  LEFT JOIN sequences s ON s.id = sp.seq_id
451
475
  LEFT JOIN files f ON f.picture_id = p.id
452
476
  WHERE p.upload_set_id = %(upload_set_id)s"""
453
- ),
454
- {"upload_set_id": upload_set_id},
455
- ).fetchall()
456
-
457
- # there is currently a bug where 2 pictures can be uploaded for the same file, so only 1 is associated to it.
458
- # we want to delete one of them
459
- # Those duplicates happen when a client send an upload that timeouts, but the client retries the upload and the server is not aware of this timeout (the connection is not closed).
460
- # Note: later, if we are confident the bug has been removed, we might clean this code.
461
- pics_to_delete_bug = [p["id"] for p in db_pics if p["has_no_file"]]
462
- db_pics = [p for p in db_pics if p["has_no_file"] is False] # pictures without files will be deleted, we don't need them
463
- pics_by_filename = {p["file_name"]: p for p in db_pics}
464
-
465
- pics = [
466
- geopic_sequence.Picture(
467
- p["file_name"],
468
- reader.GeoPicTags(
469
- lon=p["lon"],
470
- lat=p["lat"],
471
- ts=p["ts"],
472
- type=p["metadata"]["type"],
473
- heading=p["heading"],
474
- make=p["metadata"]["make"],
475
- model=p["metadata"]["model"],
476
- focal_length=p["metadata"]["focal_length"],
477
- crop=p["metadata"]["crop"],
478
- exif={},
479
- ),
480
- )
481
- for p in db_pics
482
- ]
477
+ ),
478
+ {"upload_set_id": upload_set_id},
479
+ ).fetchall()
483
480
 
484
- report = geopic_sequence.dispatch_pictures(
485
- pics,
486
- mergeParams=geopic_sequence.MergeParams(
487
- maxDistance=db_upload_set.duplicate_distance, maxRotationAngle=db_upload_set.duplicate_rotation
488
- ),
489
- sortMethod=db_upload_set.sort_method,
490
- splitParams=geopic_sequence.SplitParams(
491
- maxDistance=db_upload_set.split_distance, maxTime=db_upload_set.split_time.total_seconds()
481
+ config = cursor.execute(
482
+ SQL(
483
+ "SELECT default_split_distance, default_split_time, default_duplicate_distance, default_duplicate_rotation FROM configurations"
484
+ )
485
+ ).fetchone()
486
+
487
+ # there is currently a bug where 2 pictures can be uploaded for the same file, so only 1 is associated to it.
488
+ # we want to delete one of them
489
+ # Those duplicates happen when a client send an upload that timeouts, but the client retries the upload and the server is not aware of this timeout (the connection is not closed).
490
+ # Note: later, if we are confident the bug has been removed, we might clean this code.
491
+ pics_to_delete_bug = [PicToDelete(picture_id=p["id"]) for p in db_pics if p["has_no_file"]]
492
+ db_pics = [p for p in db_pics if p["has_no_file"] is False] # pictures without files will be deleted, we don't need them
493
+ pics_by_filename = {p["file_name"]: p for p in db_pics}
494
+
495
+ pics = [
496
+ geopic_sequence.Picture(
497
+ p["file_name"],
498
+ reader.GeoPicTags(
499
+ lon=p["lon"],
500
+ lat=p["lat"],
501
+ ts=p["ts"],
502
+ type=p["metadata"]["type"],
503
+ heading=p["heading"],
504
+ make=p["metadata"]["make"],
505
+ model=p["metadata"]["model"],
506
+ focal_length=p["metadata"]["focal_length"],
507
+ crop=p["metadata"]["crop"],
508
+ exif={},
492
509
  ),
510
+ heading_computed=p["heading_computed"],
493
511
  )
494
- reused_sequence = set()
512
+ for p in db_pics
513
+ ]
495
514
 
496
- pics_to_delete_duplicates = [pics_by_filename[p.filename]["id"] for p in report.duplicate_pictures or []]
497
- pics_to_delete = pics_to_delete_duplicates + pics_to_delete_bug
498
- if pics_to_delete:
499
- logger.debug(
500
- f"nb duplicate pictures {len(pics_to_delete_duplicates)} {f' and {len(pics_to_delete_bug)} pictures without files' if pics_to_delete_bug else ''}"
501
- )
502
- logger.debug(f"duplicate pictures {[p.filename for p in report.duplicate_pictures or []]}")
515
+ split_params = None
516
+ if not db_upload_set.no_split:
517
+ distance = db_upload_set.split_distance if db_upload_set.split_distance is not None else config["default_split_distance"]
518
+ t = db_upload_set.split_time if db_upload_set.split_time is not None else config["default_split_time"]
519
+ if t is not None and distance is not None:
520
+ split_params = geopic_sequence.SplitParams(maxDistance=distance, maxTime=t.total_seconds())
521
+ merge_params = None
522
+ if not db_upload_set.no_deduplication:
523
+ distance = (
524
+ db_upload_set.duplicate_distance if db_upload_set.duplicate_distance is not None else config["default_duplicate_distance"]
525
+ )
526
+ rotation = (
527
+ db_upload_set.duplicate_rotation if db_upload_set.duplicate_rotation is not None else config["default_duplicate_rotation"]
528
+ )
529
+ if distance is not None and rotation is not None:
530
+ merge_params = geopic_sequence.MergeParams(maxDistance=distance, maxRotationAngle=rotation)
503
531
 
504
- cursor.execute(SQL("CREATE TEMPORARY TABLE tmp_duplicates(picture_id UUID) ON COMMIT DROP"))
505
- with cursor.copy("COPY tmp_duplicates(picture_id) FROM stdin;") as copy:
506
- for p in pics_to_delete:
507
- copy.write_row((p,))
532
+ report = geopic_sequence.dispatch_pictures(
533
+ pics, mergeParams=merge_params, sortMethod=db_upload_set.sort_method, splitParams=split_params
534
+ )
535
+ reused_sequence = set()
536
+
537
+ pics_to_delete_duplicates = [
538
+ PicToDelete(
539
+ picture_id=pics_by_filename[d.picture.filename]["id"],
540
+ detail={
541
+ "duplicate_of": str(pics_by_filename[d.duplicate_of.filename]["id"]),
542
+ "distance": d.distance,
543
+ "angle": d.angle,
544
+ },
545
+ )
546
+ for d in report.duplicate_pictures
547
+ ]
548
+ pics_to_delete = pics_to_delete_duplicates + pics_to_delete_bug
549
+ if pics_to_delete:
550
+ logger.debug(
551
+ f"nb duplicate pictures {len(pics_to_delete_duplicates)} {f' and {len(pics_to_delete_bug)} pictures without files' if pics_to_delete_bug else ''}"
552
+ )
553
+ logger.debug(f"duplicate pictures {[p.picture.filename for p in report.duplicate_pictures]}")
508
554
 
509
- cursor.execute(
510
- SQL(
511
- "UPDATE files SET rejection_status = 'capture_duplicate' WHERE picture_id IN (select picture_id from tmp_duplicates)"
512
- )
555
+ cursor.execute(SQL("CREATE TEMPORARY TABLE tmp_duplicates(picture_id UUID, details JSONB) ON COMMIT DROP"))
556
+ with cursor.copy("COPY tmp_duplicates(picture_id, details) FROM stdin;") as copy:
557
+ for p in pics_to_delete:
558
+ copy.write_row((p.picture_id, Jsonb(p.detail) if p.detail else None))
559
+
560
+ cursor.execute(
561
+ SQL(
562
+ """UPDATE files SET
563
+ rejection_status = 'capture_duplicate', rejection_details = d.details
564
+ FROM tmp_duplicates d
565
+ WHERE d.picture_id = files.picture_id"""
513
566
  )
514
- # delete all pictures (the DB triggers will also add background jobs to delete the associated files)
515
- cursor.execute(SQL("DELETE FROM pictures WHERE id IN (select picture_id FROM tmp_duplicates)"))
516
-
517
- number_title = len(report.sequences) > 1
518
- existing_sequences = set(p["sequence_id"] for p in db_pics if p["sequence_id"])
519
- new_sequence_ids = set()
520
- for i, s in enumerate(report.sequences, start=1):
521
- existing_sequence = next(
522
- (seq for p in s.pictures if (seq := pics_by_filename[p.filename]["sequence_id"]) not in reused_sequence),
523
- None,
567
+ )
568
+ # set all the pictures as waiting for deletion and add background jobs to delete them
569
+ # Note: we do not delte the picture's row because it can cause some deadlocks if some workers are preparing thoses pictures.
570
+ cursor.execute(SQL("UPDATE pictures SET status = 'waiting-for-delete' WHERE id IN (select picture_id FROM tmp_duplicates)"))
571
+ cursor.execute(
572
+ SQL(
573
+ """INSERT INTO job_queue(picture_to_delete_id, task)
574
+ SELECT picture_id, 'delete' FROM tmp_duplicates"""
575
+ )
576
+ )
577
+
578
+ number_title = len(report.sequences) > 1
579
+ existing_sequences = set(p["sequence_id"] for p in db_pics if p["sequence_id"])
580
+ new_sequence_ids = set()
581
+ for i, s in enumerate(report.sequences, start=1):
582
+ existing_sequence = next(
583
+ (seq for p in s.pictures if (seq := pics_by_filename[p.filename]["sequence_id"]) not in reused_sequence),
584
+ None,
585
+ )
586
+ # if some of the pictures were already in a sequence, we should not create a new one
587
+ if existing_sequence:
588
+ logger.info(f"sequence {existing_sequence} already contains pictures, we will not create a new one")
589
+ # we should wipe the sequences_pictures though
590
+ seq_id = existing_sequence
591
+ cursor.execute(
592
+ SQL("DELETE FROM sequences_pictures WHERE seq_id = %(seq_id)s"),
593
+ {"seq_id": seq_id},
524
594
  )
525
- # if some of the pictures were already in a sequence, we should not create a new one
526
- if existing_sequence:
527
- logger.info(f"sequence {existing_sequence} already contains pictures, we will not create a new one")
528
- # we should wipe the sequences_pictures though
529
- seq_id = existing_sequence
530
- cursor.execute(
531
- SQL("DELETE FROM sequences_pictures WHERE seq_id = %(seq_id)s"),
532
- {"seq_id": seq_id},
595
+ reused_sequence.add(seq_id)
596
+ # Note: we do not update the sequences_semantics if reusing a sequence, because the sequence semantics's updates are reported to the existing sequences if there are some
597
+ else:
598
+ new_title = f"{db_upload_set.title}{f'-{i}' if number_title else ''}"
599
+ seq_id = cursor.execute(
600
+ SQL(
601
+ """INSERT INTO sequences(account_id, metadata, user_agent, upload_set_id, visibility)
602
+ VALUES (%(account_id)s, %(metadata)s, %(user_agent)s, %(upload_set_id)s, %(visibility)s)
603
+ RETURNING id"""
604
+ ),
605
+ {
606
+ "account_id": db_upload_set.account_id,
607
+ "metadata": Jsonb({"title": new_title}),
608
+ "user_agent": db_upload_set.user_agent,
609
+ "upload_set_id": db_upload_set.id,
610
+ "visibility": db_upload_set.visibility,
611
+ },
612
+ ).fetchone()
613
+ seq_id = seq_id["id"]
614
+
615
+ # Pass all semantics to the new sequence
616
+ copy_upload_set_semantics_to_sequence(cursor, db_upload_set.id, seq_id)
617
+ new_sequence_ids.add(seq_id)
618
+
619
+ with cursor.copy("COPY sequences_pictures(seq_id, pic_id, rank) FROM stdin;") as copy:
620
+ for i, p in enumerate(s.pictures, 1):
621
+ copy.write_row(
622
+ (seq_id, pics_by_filename[p.filename]["id"], i),
533
623
  )
534
- reused_sequence.add(seq_id)
535
- else:
536
- new_title = f"{db_upload_set.title}{f'-{i}' if number_title else ''}"
537
- seq_id = cursor.execute(
538
- SQL(
539
- """INSERT INTO sequences(account_id, metadata, user_agent)
540
- VALUES (%(account_id)s, %(metadata)s, %(user_agent)s)
541
- RETURNING id"""
542
- ),
543
- {
544
- "account_id": db_upload_set.account_id,
545
- "metadata": Jsonb({"title": new_title}),
546
- "user_agent": db_upload_set.user_agent,
547
- },
548
- ).fetchone()
549
- seq_id = seq_id["id"]
550
-
551
- new_sequence_ids.add(seq_id)
552
-
553
- with cursor.copy("COPY sequences_pictures(seq_id, pic_id, rank) FROM stdin;") as copy:
554
- for i, p in enumerate(s.pictures, 1):
555
- copy.write_row(
556
- (seq_id, pics_by_filename[p.filename]["id"], i),
557
- )
558
624
 
559
- sequences.add_finalization_job(cursor=cursor, seqId=seq_id)
625
+ sequences.add_finalization_job(cursor=cursor, seqId=seq_id)
560
626
 
561
- # we can delete all the old sequences
562
- sequences_to_delete = existing_sequences - new_sequence_ids
563
- if sequences_to_delete:
564
- logger.debug(f"sequences to delete = {sequences_to_delete} (existing = {existing_sequences}, new = {new_sequence_ids})")
565
- conn.execute(SQL("DELETE FROM sequences_pictures WHERE seq_id = ANY(%(seq_ids)s)"), {"seq_ids": list(sequences_to_delete)})
566
- conn.execute(
567
- SQL("UPDATE sequences SET status = 'deleted' WHERE id = ANY(%(seq_ids)s)"), {"seq_ids": list(sequences_to_delete)}
568
- )
627
+ # we can delete all the old sequences
628
+ sequences_to_delete = existing_sequences - new_sequence_ids
629
+ if sequences_to_delete:
630
+ logger.debug(f"sequences to delete = {sequences_to_delete} (existing = {existing_sequences}, new = {new_sequence_ids})")
631
+ conn.execute(SQL("DELETE FROM sequences_pictures WHERE seq_id = ANY(%(seq_ids)s)"), {"seq_ids": list(sequences_to_delete)})
632
+ conn.execute(SQL("UPDATE sequences SET status = 'deleted' WHERE id = ANY(%(seq_ids)s)"), {"seq_ids": list(sequences_to_delete)})
569
633
 
570
- for s in report.sequences_splits or []:
571
- logger.debug(f"split = {s.prevPic.filename} -> {s.nextPic.filename} : {s.reason}")
572
- conn.execute(SQL("UPDATE upload_sets SET dispatched = true WHERE id = %(upload_set_id)s"), {"upload_set_id": db_upload_set.id})
634
+ for s in report.sequences_splits or []:
635
+ logger.debug(f"split = {s.prevPic.filename} -> {s.nextPic.filename} : {s.reason}")
636
+ conn.execute(SQL("UPDATE upload_sets SET dispatched = true WHERE id = %(upload_set_id)s"), {"upload_set_id": db_upload_set.id})
637
+
638
+
639
+ def copy_upload_set_semantics_to_sequence(cursor, db_upload_id: UUID, seq_id: UUID):
640
+ cursor.execute(
641
+ SQL(
642
+ """WITH upload_set_semantics AS (
643
+ SELECT key, value, upload_set_id, account_id
644
+ FROM upload_sets_semantics
645
+ WHERE upload_set_id = %(upload_set_id)s
646
+ ),
647
+ seq_sem AS (
648
+ INSERT INTO sequences_semantics(sequence_id, key, value)
649
+ SELECT %(seq_id)s, key, value
650
+ FROM upload_set_semantics
651
+ )
652
+ INSERT INTO sequences_semantics_history(sequence_id, account_id, ts, updates)
653
+ SELECT %(seq_id)s, account_id, NOW(), jsonb_build_object('key', key, 'value', value, 'action', 'add')
654
+ FROM upload_set_semantics
655
+ """
656
+ ),
657
+ {
658
+ "upload_set_id": db_upload_id,
659
+ "seq_id": seq_id,
660
+ },
661
+ )
573
662
 
574
663
 
575
664
  def insertFileInDatabase(
@@ -589,51 +678,59 @@ def insertFileInDatabase(
589
678
 
590
679
  # we check if there is already a file with this name in the upload set with an associated picture.
591
680
  # If there is no picture (because the picture has been rejected), we accept that the file is overridden
592
- existing_file = cursor.execute(
593
- SQL(
594
- """SELECT picture_id, rejection_status
595
- FROM files
596
- WHERE upload_set_id = %(upload_set_id)s AND file_name = %(file_name)s AND picture_id IS NOT NULL"""
597
- ),
598
- params={
599
- "upload_set_id": upload_set_id,
600
- "file_name": file_name,
601
- },
602
- ).fetchone()
603
- if existing_file:
604
- raise errors.InvalidAPIUsage(
605
- _("A different picture with the same name has already been added to this uploadset"),
606
- status_code=409,
607
- payload={"existing_item": {"id": existing_file["picture_id"]}},
608
- )
681
+ with cursor.connection.transaction():
682
+ existing_file = cursor.execute(
683
+ SQL(
684
+ """SELECT picture_id, rejection_status
685
+ FROM files
686
+ WHERE upload_set_id = %(upload_set_id)s AND file_name = %(file_name)s AND picture_id IS NOT NULL"""
687
+ ),
688
+ params={
689
+ "upload_set_id": upload_set_id,
690
+ "file_name": file_name,
691
+ },
692
+ ).fetchone()
693
+ if existing_file:
694
+ raise errors.InvalidAPIUsage(
695
+ _("A different picture with the same name has already been added to this uploadset"),
696
+ status_code=409,
697
+ payload={"existing_item": {"id": existing_file["picture_id"]}},
698
+ )
609
699
 
610
- f = cursor.execute(
611
- SQL(
612
- """INSERT INTO files(
613
- upload_set_id, picture_id, file_type, file_name,
614
- size, content_md5, rejection_status, rejection_message, rejection_details)
615
- VALUES (
616
- %(upload_set_id)s, %(picture_id)s, %(type)s, %(file_name)s,
617
- %(size)s, %(content_md5)s, %(rejection_status)s, %(rejection_message)s, %(rejection_details)s)
618
- ON CONFLICT (upload_set_id, file_name)
619
- DO UPDATE SET picture_id = %(picture_id)s, size = %(size)s, content_md5 = %(content_md5)s,
620
- rejection_status = %(rejection_status)s, rejection_message = %(rejection_message)s, rejection_details = %(rejection_details)s
621
- WHERE files.picture_id IS NULL -- check again that we do not override an existing picture
622
- RETURNING *"""
623
- ),
624
- params={
625
- "upload_set_id": upload_set_id,
626
- "type": file_type,
627
- "picture_id": picture_id,
628
- "file_name": file_name,
629
- "size": size,
630
- "content_md5": content_md5,
631
- "rejection_status": rejection_status,
632
- "rejection_message": rejection_message,
633
- "rejection_details": Jsonb(rejection_details),
634
- },
635
- )
636
- return UploadSetFile(**f.fetchone())
700
+ f = cursor.execute(
701
+ SQL(
702
+ """INSERT INTO files(
703
+ upload_set_id, picture_id, file_type, file_name,
704
+ size, content_md5, rejection_status, rejection_message, rejection_details)
705
+ VALUES (
706
+ %(upload_set_id)s, %(picture_id)s, %(type)s, %(file_name)s,
707
+ %(size)s, %(content_md5)s, %(rejection_status)s, %(rejection_message)s, %(rejection_details)s)
708
+ ON CONFLICT (upload_set_id, file_name)
709
+ DO UPDATE SET picture_id = %(picture_id)s, size = %(size)s, content_md5 = %(content_md5)s,
710
+ rejection_status = %(rejection_status)s, rejection_message = %(rejection_message)s, rejection_details = %(rejection_details)s
711
+ WHERE files.picture_id IS NULL -- check again that we do not override an existing picture
712
+ RETURNING *"""
713
+ ),
714
+ params={
715
+ "upload_set_id": upload_set_id,
716
+ "type": file_type,
717
+ "picture_id": picture_id,
718
+ "file_name": file_name,
719
+ "size": size,
720
+ "content_md5": content_md5,
721
+ "rejection_status": rejection_status,
722
+ "rejection_message": rejection_message,
723
+ "rejection_details": Jsonb(rejection_details),
724
+ },
725
+ )
726
+ u = f.fetchone()
727
+ if u is None:
728
+ logging.error(f"Impossible to add file {file_name} to uploadset {upload_set_id}")
729
+ raise errors.InvalidAPIUsage(
730
+ _("Impossible to add the picture to this uploadset"),
731
+ status_code=500,
732
+ )
733
+ return UploadSetFile(**u)
637
734
 
638
735
 
639
736
  def get_upload_set_files(upload_set_id: UUID) -> UploadSetFiles:
@@ -642,15 +739,15 @@ def get_upload_set_files(upload_set_id: UUID) -> UploadSetFiles:
642
739
  current_app,
643
740
  SQL(
644
741
  """SELECT
645
- upload_set_id,
646
- file_type,
647
- file_name,
648
- size,
649
- content_md5,
742
+ upload_set_id,
743
+ file_type,
744
+ file_name,
745
+ size,
746
+ content_md5,
650
747
  rejection_status,
651
748
  rejection_message,
652
749
  rejection_details,
653
- picture_id,
750
+ picture_id,
654
751
  inserted_at
655
752
  FROM files
656
753
  WHERE upload_set_id = %(upload_set_id)s