geovisio 2.6.0__py3-none-any.whl → 2.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. geovisio/__init__.py +36 -7
  2. geovisio/admin_cli/cleanup.py +2 -2
  3. geovisio/admin_cli/db.py +1 -4
  4. geovisio/config_app.py +40 -1
  5. geovisio/db_migrations.py +24 -3
  6. geovisio/templates/main.html +13 -13
  7. geovisio/templates/viewer.html +3 -3
  8. geovisio/translations/de/LC_MESSAGES/messages.mo +0 -0
  9. geovisio/translations/de/LC_MESSAGES/messages.po +804 -0
  10. geovisio/translations/el/LC_MESSAGES/messages.mo +0 -0
  11. geovisio/translations/el/LC_MESSAGES/messages.po +685 -0
  12. geovisio/translations/en/LC_MESSAGES/messages.mo +0 -0
  13. geovisio/translations/en/LC_MESSAGES/messages.po +738 -0
  14. geovisio/translations/es/LC_MESSAGES/messages.mo +0 -0
  15. geovisio/translations/es/LC_MESSAGES/messages.po +778 -0
  16. geovisio/translations/fi/LC_MESSAGES/messages.mo +0 -0
  17. geovisio/translations/fi/LC_MESSAGES/messages.po +589 -0
  18. geovisio/translations/fr/LC_MESSAGES/messages.mo +0 -0
  19. geovisio/translations/fr/LC_MESSAGES/messages.po +814 -0
  20. geovisio/translations/hu/LC_MESSAGES/messages.mo +0 -0
  21. geovisio/translations/hu/LC_MESSAGES/messages.po +773 -0
  22. geovisio/translations/ko/LC_MESSAGES/messages.mo +0 -0
  23. geovisio/translations/ko/LC_MESSAGES/messages.po +685 -0
  24. geovisio/translations/messages.pot +694 -0
  25. geovisio/translations/nl/LC_MESSAGES/messages.mo +0 -0
  26. geovisio/translations/nl/LC_MESSAGES/messages.po +602 -0
  27. geovisio/utils/__init__.py +1 -1
  28. geovisio/utils/auth.py +50 -11
  29. geovisio/utils/db.py +65 -0
  30. geovisio/utils/excluded_areas.py +83 -0
  31. geovisio/utils/extent.py +30 -0
  32. geovisio/utils/fields.py +1 -1
  33. geovisio/utils/filesystems.py +0 -1
  34. geovisio/utils/link.py +14 -0
  35. geovisio/utils/params.py +20 -0
  36. geovisio/utils/pictures.py +110 -88
  37. geovisio/utils/reports.py +171 -0
  38. geovisio/utils/sequences.py +262 -126
  39. geovisio/utils/tokens.py +37 -42
  40. geovisio/utils/upload_set.py +642 -0
  41. geovisio/web/auth.py +37 -37
  42. geovisio/web/collections.py +304 -304
  43. geovisio/web/configuration.py +14 -0
  44. geovisio/web/docs.py +276 -15
  45. geovisio/web/excluded_areas.py +377 -0
  46. geovisio/web/items.py +169 -112
  47. geovisio/web/map.py +104 -36
  48. geovisio/web/params.py +69 -26
  49. geovisio/web/pictures.py +14 -31
  50. geovisio/web/reports.py +399 -0
  51. geovisio/web/rss.py +13 -7
  52. geovisio/web/stac.py +129 -134
  53. geovisio/web/tokens.py +98 -109
  54. geovisio/web/upload_set.py +771 -0
  55. geovisio/web/users.py +100 -73
  56. geovisio/web/utils.py +28 -9
  57. geovisio/workers/runner_pictures.py +241 -207
  58. {geovisio-2.6.0.dist-info → geovisio-2.7.1.dist-info}/METADATA +17 -14
  59. geovisio-2.7.1.dist-info/RECORD +70 -0
  60. {geovisio-2.6.0.dist-info → geovisio-2.7.1.dist-info}/WHEEL +1 -1
  61. geovisio-2.6.0.dist-info/RECORD +0 -41
  62. {geovisio-2.6.0.dist-info → geovisio-2.7.1.dist-info}/LICENSE +0 -0
@@ -0,0 +1,642 @@
1
+ from enum import Enum
2
+ import logging
3
+ import psycopg.rows
4
+ from pydantic import BaseModel, ConfigDict, computed_field, Field, field_serializer
5
+ from geovisio.utils.extent import TemporalExtent
6
+ from uuid import UUID
7
+ from typing import Optional, List, Dict, Any
8
+ from datetime import datetime, timedelta
9
+ from geovisio.utils import db, sequences
10
+ from geovisio import errors
11
+ from geovisio.utils.link import make_link, Link
12
+ import psycopg
13
+ from psycopg.types.json import Jsonb
14
+ from psycopg.sql import SQL
15
+ from psycopg.rows import class_row, dict_row
16
+ from flask import current_app
17
+ from flask_babel import gettext as _
18
+ from geopic_tag_reader import sequence as geopic_sequence, reader
19
+
20
+
21
+ class AggregatedStatus(BaseModel):
22
+ """Aggregated status"""
23
+
24
+ prepared: int
25
+ """Number of pictures successfully processed"""
26
+ preparing: Optional[int]
27
+ """Number of pictures being processed"""
28
+ broken: Optional[int]
29
+ """Number of pictures that failed to be processed. It is likely a server problem."""
30
+ rejected: Optional[int] = None
31
+ """Number of pictures that were rejected by the server. It is likely a client problem."""
32
+ not_processed: Optional[int]
33
+ """Number of pictures that have not been processed yet"""
34
+
35
+ model_config = ConfigDict(use_attribute_docstrings=True)
36
+
37
+
38
+ class AssociatedCollection(BaseModel):
39
+ """Collection associated to an UploadSet"""
40
+
41
+ id: UUID
42
+ nb_items: int
43
+ extent: Optional[TemporalExtent] = None
44
+ title: Optional[str] = None
45
+ items_status: Optional[AggregatedStatus] = None
46
+ status: Optional[str] = Field(exclude=True, default=None)
47
+
48
+ @computed_field
49
+ @property
50
+ def links(self) -> List[Link]:
51
+ return [
52
+ make_link(rel="self", route="stac_collections.getCollection", collectionId=self.id),
53
+ ]
54
+
55
+ @computed_field
56
+ @property
57
+ def ready(self) -> Optional[bool]:
58
+ if self.items_status is None:
59
+ return None
60
+ return self.items_status.not_processed == 0 and self.status == "ready"
61
+
62
+
63
+ class UploadSet(BaseModel):
64
+ """The UploadSet represent a group of files sent in one upload. Those files will be distributed among one or more collections."""
65
+
66
+ id: UUID
67
+ created_at: datetime
68
+ completed: bool
69
+ dispatched: bool
70
+ account_id: UUID
71
+ title: str
72
+ estimated_nb_files: Optional[int] = None
73
+ sort_method: geopic_sequence.SortMethod
74
+ split_distance: int
75
+ split_time: timedelta
76
+ duplicate_distance: float
77
+ duplicate_rotation: int
78
+ metadata: Optional[Dict[str, Any]]
79
+ user_agent: Optional[str] = Field(exclude=True)
80
+ associated_collections: List[AssociatedCollection] = []
81
+ nb_items: int = 0
82
+ items_status: Optional[AggregatedStatus] = None
83
+
84
+ @computed_field
85
+ @property
86
+ def links(self) -> List[Link]:
87
+ return [
88
+ make_link(rel="self", route="upload_set.getUploadSet", upload_set_id=self.id),
89
+ ]
90
+
91
+ @computed_field
92
+ @property
93
+ def ready(self) -> bool:
94
+ return self.dispatched and all(c.ready for c in self.associated_collections)
95
+
96
+ model_config = ConfigDict(use_enum_values=True, ser_json_timedelta="float", use_attribute_docstrings=True)
97
+
98
+
99
+ class UploadSets(BaseModel):
100
+ upload_sets: List[UploadSet]
101
+
102
+
103
+ class FileType(Enum):
104
+ """Type of uploadedfile"""
105
+
106
+ picture = "picture"
107
+ # Note: for the moment we only support pictures, but later we might accept more kind of files (like gpx traces, video, ...)
108
+
109
+
110
+ class FileRejectionStatusSeverity(Enum):
111
+ error = "error"
112
+ warning = "warning"
113
+ info = "info"
114
+
115
+
116
+ class FileRejectionStatus(Enum):
117
+ capture_duplicate = "capture_duplicate"
118
+ """capture duplicate means there was another picture too near (in space and time)"""
119
+ file_duplicate = "file_duplicate"
120
+ """File duplicate means the same file was already uploaded"""
121
+ invalid_file = "invalid_file"
122
+ """invalid_file means the file is not a valid jpeg"""
123
+ invalid_metadata = "invalid_metadata"
124
+ """invalid_metadata means the file has invalid metadata"""
125
+ other_error = "other_error"
126
+ """other_error means there was an error that is not related to the picture itself"""
127
+
128
+
129
+ class FileRejectionDetails(BaseModel):
130
+
131
+ missing_fields: List[str]
132
+ """Mandatory metadata missing from the file. Metadata can be `datetime` or `location`."""
133
+
134
+
135
+ class FileRejection(BaseModel):
136
+ """Details about a file rejection"""
137
+
138
+ reason: str
139
+ severity: FileRejectionStatusSeverity
140
+ message: Optional[str]
141
+ details: Optional[FileRejectionDetails]
142
+
143
+ model_config = ConfigDict(use_enum_values=True, use_attribute_docstrings=True)
144
+
145
+
146
+ class UploadSetFile(BaseModel):
147
+ """File uploaded in an UploadSet"""
148
+
149
+ picture_id: Optional[UUID] = None
150
+ file_name: str
151
+ content_md5: Optional[UUID] = None
152
+ inserted_at: datetime
153
+ upload_set_id: UUID = Field(..., exclude=True)
154
+ rejection_status: Optional[FileRejectionStatus] = Field(None, exclude=True)
155
+ rejection_message: Optional[str] = Field(None, exclude=True)
156
+ rejection_details: Optional[Dict[str, Any]] = Field(None, exclude=True)
157
+ file_type: Optional[FileType] = None
158
+ size: Optional[int] = None
159
+
160
+ @computed_field
161
+ @property
162
+ def links(self) -> List[Link]:
163
+ return [
164
+ make_link(rel="parent", route="upload_set.getUploadSet", upload_set_id=self.upload_set_id),
165
+ ]
166
+
167
+ @computed_field
168
+ @property
169
+ def rejected(self) -> Optional[FileRejection]:
170
+ if self.rejection_status is None:
171
+ return None
172
+ msg = None
173
+ severity = FileRejectionStatusSeverity.error
174
+ if self.rejection_message is None:
175
+ if self.rejection_status == FileRejectionStatus.capture_duplicate.value:
176
+ msg = _("The picture is too similar to another one (nearby and taken almost at the same time)")
177
+ severity = FileRejectionStatusSeverity.info
178
+ if self.rejection_status == FileRejectionStatus.invalid_file.value:
179
+ msg = _("The sent file is not a valid JPEG")
180
+ severity = FileRejectionStatusSeverity.error
181
+ if self.rejection_status == FileRejectionStatus.invalid_metadata.value:
182
+ msg = _("The picture has invalid EXIF or XMP metadata, making it impossible to use")
183
+ severity = FileRejectionStatusSeverity.error
184
+ if self.rejection_status == FileRejectionStatus.other_error.value:
185
+ msg = _("Something went very wrong, but not due to the picture itself")
186
+ severity = FileRejectionStatusSeverity.error
187
+ else:
188
+ msg = self.rejection_message
189
+ return FileRejection(reason=self.rejection_status, severity=severity, message=msg, details=self.rejection_details)
190
+
191
+ @field_serializer("content_md5")
192
+ def serialize_md5(self, md5: UUID, _info):
193
+ return md5.hex
194
+
195
+ model_config = ConfigDict(use_enum_values=True, use_attribute_docstrings=True)
196
+
197
+
198
+ class UploadSetFiles(BaseModel):
199
+ """List of files uploaded in an UploadSet"""
200
+
201
+ files: List[UploadSetFile]
202
+ upload_set_id: UUID = Field(..., exclude=True)
203
+
204
+ @computed_field
205
+ @property
206
+ def links(self) -> List[Link]:
207
+ return [
208
+ make_link(rel="self", route="upload_set.getUploadSet", upload_set_id=self.upload_set_id),
209
+ ]
210
+
211
+
212
+ def get_simple_upload_set(id: UUID) -> Optional[UploadSet]:
213
+ """Get the DB representation of an UploadSet, without associated collections and statuses"""
214
+ u = db.fetchone(
215
+ current_app,
216
+ SQL("SELECT * FROM upload_sets WHERE id = %(id)s"),
217
+ {"id": id},
218
+ row_factory=class_row(UploadSet),
219
+ )
220
+
221
+ return u
222
+
223
+
224
+ def get_upload_set(id: UUID) -> Optional[UploadSet]:
225
+ """Get the UploadSet corresponding to the ID"""
226
+ db_upload_set = db.fetchone(
227
+ current_app,
228
+ SQL(
229
+ """WITH picture_last_job AS (
230
+ SELECT p.id as picture_id,
231
+ -- Note: to know if a picture is beeing processed, check the latest job_history entry for this picture
232
+ -- If there is no finished_at, the picture is still beeing processed
233
+ (MAX(ARRAY [started_at, finished_at])) AS last_job,
234
+ p.preparing_status,
235
+ p.status,
236
+ p.upload_set_id
237
+ FROM pictures p
238
+ LEFT JOIN job_history ON p.id = job_history.picture_id
239
+ WHERE p.upload_set_id = %(id)s
240
+ GROUP BY p.id
241
+ ),
242
+ picture_statuses AS (
243
+ SELECT
244
+ *,
245
+ (last_job[1] IS NOT NULL AND last_job[2] IS NULL) AS is_job_running
246
+ FROM picture_last_job psj
247
+ ),
248
+ associated_collections AS (
249
+ SELECT
250
+ ps.upload_set_id,
251
+ COUNT(ps.picture_id) FILTER (WHERE ps.preparing_status = 'broken') AS nb_broken,
252
+ COUNT(ps.picture_id) FILTER (WHERE ps.preparing_status = 'prepared') AS nb_prepared,
253
+ COUNT(ps.picture_id) FILTER (WHERE ps.preparing_status = 'not-processed') AS nb_not_processed,
254
+ COUNT(ps.picture_id) FILTER (WHERE ps.is_job_running AND ps.status != 'waiting-for-delete') AS nb_preparing,
255
+ s.id as collection_id,
256
+ s.nb_pictures AS nb_items,
257
+ s.min_picture_ts AS mints,
258
+ s.max_picture_ts AS maxts,
259
+ s.metadata->>'title' AS title,
260
+ s.status AS status
261
+ FROM picture_statuses ps
262
+ JOIN sequences_pictures sp ON sp.pic_id = ps.picture_id
263
+ JOIN sequences s ON s.id = sp.seq_id
264
+ WHERE ps.upload_set_id = %(id)s AND s.status != 'deleted'
265
+ GROUP BY ps.upload_set_id,
266
+ s.id
267
+ ),
268
+ upload_set_statuses AS (
269
+ SELECT ps.upload_set_id,
270
+ COUNT(ps.picture_id) AS nb_items,
271
+ COUNT(ps.picture_id) FILTER (WHERE ps.preparing_status = 'broken') AS nb_broken,
272
+ COUNT(ps.picture_id) FILTER (WHERE ps.preparing_status = 'prepared') AS nb_prepared,
273
+ COUNT(ps.picture_id) FILTER (WHERE ps.preparing_status = 'not-processed') AS nb_not_processed,
274
+ COUNT(ps.picture_id) FILTER (WHERE ps.is_job_running) AS nb_preparing
275
+ FROM picture_statuses ps
276
+ GROUP BY ps.upload_set_id
277
+ )
278
+ SELECT u.*,
279
+ COALESCE(us.nb_items, 0) AS nb_items,
280
+ json_build_object(
281
+ 'broken', COALESCE(us.nb_broken, 0),
282
+ 'prepared', COALESCE(us.nb_prepared, 0),
283
+ 'not_processed', COALESCE(us.nb_not_processed, 0),
284
+ 'preparing', COALESCE(us.nb_preparing, 0),
285
+ 'rejected', (
286
+ SELECT count(*) FROM files
287
+ WHERE upload_set_id = %(id)s AND rejection_status IS NOT NULL
288
+ )
289
+ ) AS items_status,
290
+ COALESCE(
291
+ (
292
+ SELECT json_agg(
293
+ json_build_object(
294
+ 'id',
295
+ ac.collection_id,
296
+ 'title',
297
+ ac.title,
298
+ 'nb_items',
299
+ ac.nb_items,
300
+ 'status',
301
+ ac.status,
302
+ 'extent',
303
+ json_build_object(
304
+ 'temporal',
305
+ json_build_object(
306
+ 'interval',
307
+ json_build_array(
308
+ json_build_array(ac.mints, ac.maxts)
309
+ )
310
+ )
311
+ ),
312
+ 'items_status',
313
+ json_build_object(
314
+ 'broken', ac.nb_broken,
315
+ 'prepared', ac.nb_prepared,
316
+ 'not_processed', ac.nb_not_processed,
317
+ 'preparing', ac.nb_preparing
318
+ )
319
+ )
320
+ )
321
+ FROM associated_collections ac
322
+ ),
323
+ '[]'::json
324
+ ) AS associated_collections
325
+ FROM upload_sets u
326
+ LEFT JOIN upload_set_statuses us on us.upload_set_id = u.id
327
+ WHERE u.id = %(id)s"""
328
+ ),
329
+ {"id": id},
330
+ row_factory=class_row(UploadSet),
331
+ )
332
+
333
+ return db_upload_set
334
+
335
+
336
+ FIELD_TO_SQL_FILTER = {
337
+ "completed": "completed",
338
+ "dispatched": "dispatched",
339
+ }
340
+
341
+
342
+ def _parse_filter(filter: Optional[str]) -> SQL:
343
+ """
344
+ Parse a filter string and return a SQL expression
345
+
346
+ >>> _parse_filter('')
347
+ SQL('TRUE')
348
+ >>> _parse_filter(None)
349
+ SQL('TRUE')
350
+ >>> _parse_filter('completed = TRUE')
351
+ SQL('(completed = True)')
352
+ >>> _parse_filter('completed = TRUE AND dispatched = FALSE')
353
+ SQL('((completed = True) AND (dispatched = False))')
354
+ """
355
+ if not filter:
356
+ return SQL("TRUE")
357
+ from pygeofilter.backends.sql import to_sql_where
358
+ from pygeofilter.parsers.cql2_text import parse as cql_parser
359
+
360
+ try:
361
+ filterAst = cql_parser(filter)
362
+ f = to_sql_where(filterAst, FIELD_TO_SQL_FILTER).replace('"', "") # type: ignore
363
+ return SQL(f) # type: ignore
364
+ except Exception:
365
+ logging.error(f"Unsupported filter parameter: {filter}")
366
+ raise errors.InvalidAPIUsage(_("Unsupported filter parameter"), status_code=400)
367
+
368
+
369
+ def list_upload_sets(account_id: UUID, limit: int = 100, filter: Optional[str] = None) -> UploadSets:
370
+ filter_sql = _parse_filter(filter)
371
+ l = db.fetchall(
372
+ current_app,
373
+ SQL(
374
+ """SELECT
375
+ u.*,
376
+ COALESCE(
377
+ (
378
+ SELECT
379
+ json_agg(json_build_object(
380
+ 'id', ac.collection_id,
381
+ 'nb_items', ac.nb_items
382
+ ))
383
+ FROM (
384
+ SELECT
385
+ sp.seq_id as collection_id,
386
+ count(sp.pic_id) AS nb_items
387
+ FROM pictures p
388
+ JOIN sequences_pictures sp ON sp.pic_id = p.id
389
+ WHERE p.upload_set_id = u.id
390
+ GROUP BY sp.seq_id
391
+ ) ac
392
+ ),
393
+ '[]'::json
394
+ ) AS associated_collections,
395
+ (
396
+ SELECT count(*) AS nb
397
+ FROM pictures p
398
+ WHERE p.upload_set_id = u.id
399
+ ) AS nb_items
400
+ FROM upload_sets u
401
+ WHERE account_id = %(account_id)s AND {filter}
402
+ ORDER BY created_at ASC
403
+ LIMIT %(limit)s
404
+ """
405
+ ).format(filter=filter_sql),
406
+ {"account_id": account_id, "limit": limit},
407
+ row_factory=class_row(UploadSet),
408
+ )
409
+
410
+ return UploadSets(upload_sets=l)
411
+
412
+
413
+ def ask_for_dispatch(upload_set_id: UUID):
414
+ """Add a dispatch task to the job queue for the upload set. If there is already a task, postpone it."""
415
+ with db.conn(current_app) as conn:
416
+ conn.execute(
417
+ """INSERT INTO
418
+ job_queue(sequence_id, task)
419
+ VALUES (%(upload_set_id)s, 'dispatch')
420
+ ON CONFLICT (upload_set_id) DO UPDATE SET ts = CURRENT_TIMESTAMP""",
421
+ {"upload_set_id": upload_set_id},
422
+ )
423
+
424
+
425
+ def dispatch(upload_set_id: UUID):
426
+ """Finalize an upload set.
427
+
428
+ For the moment we only create a collection around all the items of the upload set, but later we'll split the items into several collections
429
+
430
+ Note: even if all pictures are not prepared, it's not a problem as we only need the pictures metadata for distributing them in collections
431
+ """
432
+
433
+ db_upload_set = get_simple_upload_set(upload_set_id)
434
+ if not db_upload_set:
435
+ raise Exception(f"Upload set {upload_set_id} not found")
436
+
437
+ with db.conn(current_app) as conn:
438
+ with conn.transaction(), conn.cursor(row_factory=dict_row) as cursor:
439
+
440
+ # get all the pictures of the upload set
441
+ db_pics = cursor.execute(
442
+ SQL(
443
+ """SELECT
444
+ p.id,
445
+ p.ts,
446
+ ST_X(p.geom) as lon,
447
+ ST_Y(p.geom) as lat,
448
+ p.heading as heading,
449
+ p.metadata->>'originalFileName' as file_name,
450
+ p.metadata,
451
+ s.id as sequence_id
452
+ FROM pictures p
453
+ LEFT JOIN sequences_pictures sp ON sp.pic_id = p.id
454
+ LEFT JOIN sequences s ON s.id = sp.seq_id
455
+ WHERE p.upload_set_id = %(upload_set_id)s"""
456
+ ),
457
+ {"upload_set_id": upload_set_id},
458
+ ).fetchall()
459
+
460
+ pics_by_filename = {p["file_name"]: p for p in db_pics}
461
+ pics = [
462
+ geopic_sequence.Picture(
463
+ p["file_name"],
464
+ reader.GeoPicTags(
465
+ lon=p["lon"],
466
+ lat=p["lat"],
467
+ ts=p["ts"],
468
+ type=p["metadata"]["type"],
469
+ heading=p["heading"],
470
+ make=p["metadata"]["make"],
471
+ model=p["metadata"]["model"],
472
+ focal_length=p["metadata"]["focal_length"],
473
+ crop=p["metadata"]["crop"],
474
+ exif={},
475
+ ),
476
+ )
477
+ for p in db_pics
478
+ ]
479
+
480
+ report = geopic_sequence.dispatch_pictures(
481
+ pics,
482
+ mergeParams=geopic_sequence.MergeParams(
483
+ maxDistance=db_upload_set.duplicate_distance, maxRotationAngle=db_upload_set.duplicate_rotation
484
+ ),
485
+ sortMethod=db_upload_set.sort_method,
486
+ splitParams=geopic_sequence.SplitParams(maxDistance=db_upload_set.split_distance, maxTime=db_upload_set.split_time.seconds),
487
+ )
488
+ reused_sequence = set()
489
+
490
+ pics_to_delete = [pics_by_filename[p.filename]["id"] for p in report.duplicate_pictures or []]
491
+ if pics_to_delete:
492
+ logging.debug(f"For uploadset '{upload_set_id}', nb duplicate pictures {len(pics_to_delete)}")
493
+ logging.debug(
494
+ f"For uploadset '{upload_set_id}', duplicate pictures {[p.filename for p in report.duplicate_pictures or []]}"
495
+ )
496
+
497
+ cursor.execute(SQL("CREATE TEMPORARY TABLE tmp_duplicates(picture_id UUID) ON COMMIT DROP"))
498
+ with cursor.copy("COPY tmp_duplicates(picture_id) FROM stdin;") as copy:
499
+ for p in pics_to_delete:
500
+ copy.write_row((p,))
501
+
502
+ cursor.execute(
503
+ SQL(
504
+ "UPDATE files SET rejection_status = 'capture_duplicate' WHERE picture_id IN (select picture_id from tmp_duplicates)"
505
+ )
506
+ )
507
+ # delete all pictures (the DB triggers will also add background jobs to delete the associated files)
508
+ cursor.execute(SQL("DELETE FROM pictures WHERE id IN (select picture_id FROM tmp_duplicates)"))
509
+
510
+ for s in report.sequences:
511
+ existing_sequence = next(
512
+ (seq for p in s.pictures if (seq := pics_by_filename[p.filename]["sequence_id"]) not in reused_sequence),
513
+ None,
514
+ )
515
+ # if some of the pictures were already in a sequence, we should not create a new one
516
+ if existing_sequence:
517
+ logging.info(
518
+ f"For uploadset '{upload_set_id}', sequence {existing_sequence} already contains pictures, we will not create a new one"
519
+ )
520
+ # we should wipe the sequences_pictures though
521
+ seq_id = existing_sequence
522
+ cursor.execute(
523
+ SQL("DELETE FROM sequences_pictures WHERE seq_id = %(seq_id)s"),
524
+ {"seq_id": seq_id},
525
+ )
526
+ reused_sequence.add(seq_id)
527
+ else:
528
+ seq_id = cursor.execute(
529
+ SQL(
530
+ """INSERT INTO sequences(account_id, metadata, user_agent)
531
+ VALUES (%(account_id)s, %(metadata)s, %(user_agent)s)
532
+ RETURNING id"""
533
+ ),
534
+ {
535
+ "account_id": db_upload_set.account_id,
536
+ "metadata": Jsonb({"title": db_upload_set.title}),
537
+ "user_agent": db_upload_set.user_agent,
538
+ },
539
+ ).fetchone()
540
+ seq_id = seq_id["id"]
541
+
542
+ with cursor.copy("COPY sequences_pictures(seq_id, pic_id, rank) FROM stdin;") as copy:
543
+ for i, p in enumerate(s.pictures, 1):
544
+ copy.write_row(
545
+ (seq_id, pics_by_filename[p.filename]["id"], i),
546
+ )
547
+
548
+ sequences.add_finalization_job(cursor=cursor, seqId=seq_id)
549
+
550
+ for s in report.sequences_splits or []:
551
+ logging.debug(f"For uploadset '{upload_set_id}', split = {s.prevPic.filename} -> {s.nextPic.filename} : {s.reason}")
552
+ conn.execute(SQL("UPDATE upload_sets SET dispatched = true WHERE id = %(upload_set_id)s"), {"upload_set_id": db_upload_set.id})
553
+
554
+
555
+ def insertFileInDatabase(
556
+ *,
557
+ cursor: psycopg.Cursor[psycopg.rows.DictRow],
558
+ upload_set_id: UUID,
559
+ file_name: str,
560
+ content_md5: Optional[str] = None,
561
+ size: Optional[int] = None,
562
+ file_type: Optional[FileType] = None,
563
+ picture_id: Optional[UUID] = None,
564
+ rejection_status: Optional[FileRejectionStatus] = None,
565
+ rejection_message: Optional[str] = None,
566
+ rejection_details: Optional[Dict[str, Any]] = None,
567
+ ) -> UploadSetFile:
568
+ """Insert a file linked to an UploadSet into the database"""
569
+
570
+ f = cursor.execute(
571
+ SQL(
572
+ """INSERT INTO files(
573
+ upload_set_id, picture_id, file_type, file_name,
574
+ size, content_md5, rejection_status, rejection_message, rejection_details)
575
+ VALUES (
576
+ %(upload_set_id)s, %(picture_id)s, %(type)s, %(file_name)s,
577
+ %(size)s, %(content_md5)s, %(rejection_status)s, %(rejection_message)s, %(rejection_details)s)
578
+ ON CONFLICT (upload_set_id, file_name)
579
+ DO UPDATE SET picture_id = %(picture_id)s, size = %(size)s, content_md5 = %(content_md5)s,
580
+ rejection_status = %(rejection_status)s, rejection_message = %(rejection_message)s, rejection_details = %(rejection_details)s
581
+ RETURNING *
582
+ """
583
+ ),
584
+ params={
585
+ "upload_set_id": upload_set_id,
586
+ "type": file_type,
587
+ "picture_id": picture_id,
588
+ "file_name": file_name,
589
+ "size": size,
590
+ "content_md5": content_md5,
591
+ "rejection_status": rejection_status,
592
+ "rejection_message": rejection_message,
593
+ "rejection_details": Jsonb(rejection_details),
594
+ },
595
+ )
596
+ return UploadSetFile(**f.fetchone())
597
+
598
+
599
+ def get_upload_set_files(upload_set_id: UUID) -> UploadSetFiles:
600
+ """Get the files of an UploadSet"""
601
+ files = db.fetchall(
602
+ current_app,
603
+ SQL(
604
+ """SELECT
605
+ upload_set_id,
606
+ file_type,
607
+ file_name,
608
+ size,
609
+ content_md5,
610
+ rejection_status,
611
+ rejection_message,
612
+ rejection_details,
613
+ picture_id,
614
+ inserted_at
615
+ FROM files
616
+ WHERE upload_set_id = %(upload_set_id)s
617
+ ORDER BY inserted_at"""
618
+ ),
619
+ {"upload_set_id": upload_set_id},
620
+ row_factory=dict_row,
621
+ )
622
+ return UploadSetFiles(files=files, upload_set_id=upload_set_id)
623
+
624
+
625
+ def delete(upload_set: UploadSet):
626
+ """Delete an UploadSet"""
627
+ logging.info(f"Asking for deletion of uploadset {upload_set.id}")
628
+ with db.conn(current_app) as conn:
629
+ # clean job queue, to ensure no async runner are currently processing pictures/sequences/upload_sets
630
+ # Done outside the real deletion transaction to not trigger deadlock
631
+ conn.execute(SQL("DELETE FROM job_queue WHERE picture_id IN (SELECT id FROM pictures where upload_set_id = %s)"), [upload_set.id])
632
+ for c in upload_set.associated_collections:
633
+ conn.execute(SQL("DELETE FROM job_queue WHERE sequence_id = %s"), [c.id])
634
+
635
+ with conn.transaction(), conn.cursor() as cursor:
636
+ for c in upload_set.associated_collections:
637
+ # Mark all collections as deleted, but do not delete them
638
+ # Note: we do not use utils.sequences.delete_collection here, since we also want to remove the pictures not associated to any collection
639
+ cursor.execute(SQL("UPDATE sequences SET status = 'deleted' WHERE id = %s"), [c.id])
640
+
641
+ # after the task have been added to the queue, we delete the upload set, and this will delete all pictures associated to it
642
+ cursor.execute(SQL("DELETE FROM upload_sets WHERE id = %(upload_set_id)s"), {"upload_set_id": upload_set.id})