mapillary-tools 0.14.0b1__py3-none-any.whl → 0.14.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +66 -263
  3. mapillary_tools/authenticate.py +47 -39
  4. mapillary_tools/commands/__main__.py +15 -16
  5. mapillary_tools/commands/upload.py +33 -4
  6. mapillary_tools/config.py +5 -0
  7. mapillary_tools/constants.py +127 -45
  8. mapillary_tools/exceptions.py +4 -0
  9. mapillary_tools/exif_read.py +2 -1
  10. mapillary_tools/exif_write.py +3 -1
  11. mapillary_tools/geo.py +16 -0
  12. mapillary_tools/geotag/base.py +6 -2
  13. mapillary_tools/geotag/factory.py +9 -1
  14. mapillary_tools/geotag/geotag_images_from_exiftool.py +1 -1
  15. mapillary_tools/geotag/geotag_images_from_gpx.py +0 -6
  16. mapillary_tools/geotag/geotag_videos_from_exiftool.py +30 -9
  17. mapillary_tools/geotag/options.py +4 -1
  18. mapillary_tools/geotag/utils.py +9 -12
  19. mapillary_tools/geotag/video_extractors/gpx.py +2 -1
  20. mapillary_tools/geotag/video_extractors/native.py +25 -0
  21. mapillary_tools/history.py +124 -7
  22. mapillary_tools/http.py +211 -0
  23. mapillary_tools/mp4/construct_mp4_parser.py +8 -2
  24. mapillary_tools/process_geotag_properties.py +35 -38
  25. mapillary_tools/process_sequence_properties.py +339 -322
  26. mapillary_tools/sample_video.py +1 -2
  27. mapillary_tools/serializer/description.py +68 -58
  28. mapillary_tools/serializer/gpx.py +1 -1
  29. mapillary_tools/upload.py +202 -207
  30. mapillary_tools/upload_api_v4.py +57 -47
  31. mapillary_tools/uploader.py +728 -285
  32. mapillary_tools/utils.py +57 -5
  33. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/METADATA +7 -6
  34. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/RECORD +38 -37
  35. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/WHEEL +0 -0
  36. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/entry_points.txt +0 -0
  37. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/licenses/LICENSE +0 -0
  38. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import concurrent.futures
4
-
5
4
  import dataclasses
6
5
  import io
7
6
  import json
8
7
  import logging
9
8
  import os
9
+ import queue
10
10
  import struct
11
11
  import sys
12
12
  import tempfile
@@ -25,7 +25,22 @@ else:
25
25
 
26
26
  import requests
27
27
 
28
- from . import api_v4, config, constants, exif_write, types, upload_api_v4, utils
28
+ from . import (
29
+ api_v4,
30
+ config,
31
+ constants,
32
+ exif_write,
33
+ geo,
34
+ history,
35
+ telemetry,
36
+ types,
37
+ upload_api_v4,
38
+ utils,
39
+ VERSION,
40
+ )
41
+ from .camm import camm_builder, camm_parser
42
+ from .gpmf import gpmf_parser
43
+ from .mp4 import simple_mp4_builder
29
44
  from .serializer.description import (
30
45
  desc_file_to_exif,
31
46
  DescriptionJSONSerializer,
@@ -36,6 +51,25 @@ from .serializer.description import (
36
51
  LOG = logging.getLogger(__name__)
37
52
 
38
53
 
54
+ @dataclasses.dataclass(frozen=True)
55
+ class UploadOptions:
56
+ user_items: config.UserItem
57
+ chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024)
58
+ num_upload_workers: int = constants.MAX_IMAGE_UPLOAD_WORKERS
59
+ dry_run: bool = False
60
+ nofinish: bool = False
61
+ noresume: bool = False
62
+
63
+ def __post_init__(self):
64
+ if self.num_upload_workers <= 0:
65
+ raise ValueError(
66
+ f"Expect positive num_upload_workers but got {self.num_upload_workers}"
67
+ )
68
+
69
+ if self.chunk_size <= 0:
70
+ raise ValueError(f"Expect positive chunk_size but got {self.chunk_size}")
71
+
72
+
39
73
  class UploaderProgress(T.TypedDict, total=True):
40
74
  """
41
75
  Progress data that Uploader cares about.
@@ -60,7 +94,7 @@ class UploaderProgress(T.TypedDict, total=True):
60
94
  # - offset == entity_size when "upload_end" or "upload_finished"
61
95
  entity_size: int
62
96
 
63
- # An "upload_interrupted" will increase it. Reset to 0 if a chunk is uploaded
97
+ # An "upload_retrying" will increase it. Reset to 0 if a chunk is uploaded
64
98
  retries: int
65
99
 
66
100
  # Cluster ID after finishing the upload
@@ -92,7 +126,7 @@ class SequenceProgress(T.TypedDict, total=False):
92
126
  # MAPSequenceUUID. It is only available for directory uploading
93
127
  sequence_uuid: str
94
128
 
95
- # Path to the Zipfile/BlackVue/CAMM
129
+ # Path to the image/video/zip
96
130
  import_path: str
97
131
 
98
132
 
@@ -120,13 +154,43 @@ class InvalidMapillaryZipFileError(SequenceError):
120
154
  pass
121
155
 
122
156
 
157
+ # BELOW demonstrates the pseudocode for a typical upload workflow
158
+ # and when upload events are emitted
159
+ #################################################################
160
+ # def pseudo_upload(metadata):
161
+ # emit("upload_start")
162
+ # while True:
163
+ # try:
164
+ # if is_sequence(metadata):
165
+ # for image in metadata:
166
+ # upload_stream(image.read())
167
+ # emit("upload_progress")
168
+ # elif is_video(metadata):
169
+ # offset = fetch_offset()
170
+ # emit("upload_fetch_offset")
171
+ # for chunk in metadata.read()[offset:]:
172
+ # upload_stream(chunk)
173
+ # emit("upload_progress")
174
+ # except BaseException as ex: # Include KeyboardInterrupt
175
+ # if retryable(ex):
176
+ # emit("upload_retrying")
177
+ # continue
178
+ # else:
179
+ # emit("upload_failed")
180
+ # raise ex
181
+ # else:
182
+ # break
183
+ # emit("upload_end")
184
+ # finish_upload(data)
185
+ # emit("upload_finished")
123
186
  EventName = T.Literal[
124
187
  "upload_start",
125
188
  "upload_fetch_offset",
126
189
  "upload_progress",
190
+ "upload_retrying",
127
191
  "upload_end",
192
+ "upload_failed",
128
193
  "upload_finished",
129
- "upload_interrupted",
130
194
  ]
131
195
 
132
196
 
@@ -154,7 +218,131 @@ class UploadResult:
154
218
  error: Exception | None = None
155
219
 
156
220
 
157
- class ZipImageSequence:
221
+ class VideoUploader:
222
+ @classmethod
223
+ def upload_videos(
224
+ cls, mly_uploader: Uploader, video_metadatas: T.Sequence[types.VideoMetadata]
225
+ ) -> T.Generator[tuple[types.VideoMetadata, UploadResult], None, None]:
226
+ # If upload in a random order, then interrupted uploads has a higher chance to expire.
227
+ # Therefore sort videos to make sure interrupted uploads are resumed as early as possible
228
+ sorted_video_metadatas = sorted(video_metadatas, key=lambda m: m.filename)
229
+
230
+ for idx, video_metadata in enumerate(sorted_video_metadatas):
231
+ LOG.debug(f"Checksum for video {video_metadata.filename}...")
232
+ try:
233
+ video_metadata.update_md5sum()
234
+ except Exception as ex:
235
+ yield video_metadata, UploadResult(error=ex)
236
+ continue
237
+
238
+ assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
239
+
240
+ progress: SequenceProgress = {
241
+ "total_sequence_count": len(sorted_video_metadatas),
242
+ "sequence_idx": idx,
243
+ "file_type": video_metadata.filetype.value,
244
+ "import_path": str(video_metadata.filename),
245
+ "sequence_md5sum": video_metadata.md5sum,
246
+ }
247
+
248
+ try:
249
+ with cls.build_camm_stream(video_metadata) as camm_fp:
250
+ # Upload the mp4 stream
251
+ file_handle = mly_uploader.upload_stream(
252
+ T.cast(T.IO[bytes], camm_fp),
253
+ progress=T.cast(T.Dict[str, T.Any], progress),
254
+ )
255
+
256
+ cluster_id = mly_uploader.finish_upload(
257
+ file_handle,
258
+ api_v4.ClusterFileType.CAMM,
259
+ progress=T.cast(T.Dict[str, T.Any], progress),
260
+ )
261
+ except Exception as ex:
262
+ yield video_metadata, UploadResult(error=ex)
263
+ else:
264
+ yield video_metadata, UploadResult(result=cluster_id)
265
+
266
+ @classmethod
267
+ @contextmanager
268
+ def build_camm_stream(cls, video_metadata: types.VideoMetadata):
269
+ # Convert video metadata to CAMMInfo
270
+ camm_info = cls.prepare_camm_info(video_metadata)
271
+
272
+ # Create the CAMM sample generator
273
+ camm_sample_generator = camm_builder.camm_sample_generator2(camm_info)
274
+
275
+ with video_metadata.filename.open("rb") as src_fp:
276
+ # Build the mp4 stream with the CAMM samples
277
+ yield simple_mp4_builder.transform_mp4(src_fp, camm_sample_generator)
278
+
279
+ @classmethod
280
+ def prepare_camm_info(
281
+ cls, video_metadata: types.VideoMetadata
282
+ ) -> camm_parser.CAMMInfo:
283
+ camm_info = camm_parser.CAMMInfo(
284
+ make=video_metadata.make or "", model=video_metadata.model or ""
285
+ )
286
+
287
+ for point in video_metadata.points:
288
+ if isinstance(point, telemetry.CAMMGPSPoint):
289
+ if camm_info.gps is None:
290
+ camm_info.gps = []
291
+ camm_info.gps.append(point)
292
+
293
+ elif isinstance(point, telemetry.GPSPoint):
294
+ # There is no proper CAMM entry for GoPro GPS
295
+ if camm_info.mini_gps is None:
296
+ camm_info.mini_gps = []
297
+ camm_info.mini_gps.append(point)
298
+
299
+ elif isinstance(point, geo.Point):
300
+ if camm_info.mini_gps is None:
301
+ camm_info.mini_gps = []
302
+ camm_info.mini_gps.append(point)
303
+ else:
304
+ raise ValueError(f"Unknown point type: {point}")
305
+
306
+ if constants.MAPILLARY__EXPERIMENTAL_ENABLE_IMU:
307
+ if video_metadata.filetype is types.FileType.GOPRO:
308
+ with video_metadata.filename.open("rb") as fp:
309
+ gopro_info = gpmf_parser.extract_gopro_info(fp, telemetry_only=True)
310
+ if gopro_info is not None:
311
+ camm_info.accl = gopro_info.accl or []
312
+ camm_info.gyro = gopro_info.gyro or []
313
+ camm_info.magn = gopro_info.magn or []
314
+
315
+ return camm_info
316
+
317
+
318
+ class ZipUploader:
319
+ @classmethod
320
+ def upload_zipfiles(
321
+ cls, mly_uploader: Uploader, zip_paths: T.Sequence[Path]
322
+ ) -> T.Generator[tuple[Path, UploadResult], None, None]:
323
+ # If upload in a random order, then interrupted uploads has a higher chance to expire.
324
+ # Therefore sort zipfiles to make sure interrupted uploads are resumed as early as possible
325
+ sorted_zip_paths = sorted(zip_paths)
326
+
327
+ for idx, zip_path in enumerate(sorted_zip_paths):
328
+ progress: SequenceProgress = {
329
+ "total_sequence_count": len(sorted_zip_paths),
330
+ "sequence_idx": idx,
331
+ "import_path": str(zip_path),
332
+ "file_type": types.FileType.ZIP.value,
333
+ "sequence_md5sum": "", # Placeholder, will be set in upload_zipfile
334
+ }
335
+ try:
336
+ cluster_id = cls._upload_zipfile(
337
+ mly_uploader,
338
+ zip_path,
339
+ progress=T.cast(T.Dict[str, T.Any], progress),
340
+ )
341
+ except Exception as ex:
342
+ yield zip_path, UploadResult(error=ex)
343
+ else:
344
+ yield zip_path, UploadResult(result=cluster_id)
345
+
158
346
  @classmethod
159
347
  def zip_images(
160
348
  cls, metadatas: T.Sequence[types.ImageMetadata], zip_dir: Path
@@ -173,38 +361,93 @@ class ZipImageSequence:
173
361
  )
174
362
  with cls._wip_file_context(wip_zip_filename) as wip_path:
175
363
  with wip_path.open("wb") as wip_fp:
176
- cls.zip_sequence_fp(sequence, wip_fp)
364
+ cls._zip_sequence_fp(sequence, wip_fp)
177
365
 
178
366
  @classmethod
179
- @contextmanager
180
- def _wip_file_context(cls, wip_path: Path):
181
- try:
182
- os.remove(wip_path)
183
- except FileNotFoundError:
184
- pass
185
- try:
186
- yield wip_path
367
+ def zip_images_and_upload(
368
+ cls, uploader: Uploader, image_metadatas: T.Sequence[types.ImageMetadata]
369
+ ) -> T.Generator[tuple[str, UploadResult], None, None]:
370
+ sequences = types.group_and_sort_images(image_metadatas)
187
371
 
188
- with wip_path.open("rb") as fp:
189
- upload_md5sum = utils.md5sum_fp(fp).hexdigest()
372
+ for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
373
+ try:
374
+ _validate_metadatas(sequence)
375
+ except Exception as ex:
376
+ yield sequence_uuid, UploadResult(error=ex)
377
+ continue
190
378
 
191
- done_path = wip_path.parent.joinpath(
192
- _session_key(upload_md5sum, api_v4.ClusterFileType.ZIP)
379
+ with tempfile.NamedTemporaryFile() as fp:
380
+ try:
381
+ sequence_md5sum = cls._zip_sequence_fp(sequence, fp)
382
+ except Exception as ex:
383
+ yield sequence_uuid, UploadResult(error=ex)
384
+ continue
385
+
386
+ sequence_progress: SequenceProgress = {
387
+ "sequence_idx": sequence_idx,
388
+ "total_sequence_count": len(sequences),
389
+ "sequence_image_count": len(sequence),
390
+ "sequence_uuid": sequence_uuid,
391
+ "file_type": types.FileType.ZIP.value,
392
+ "sequence_md5sum": sequence_md5sum,
393
+ }
394
+
395
+ try:
396
+ file_handle = uploader.upload_stream(
397
+ fp, progress=T.cast(T.Dict[str, T.Any], sequence_progress)
398
+ )
399
+ cluster_id = uploader.finish_upload(
400
+ file_handle,
401
+ api_v4.ClusterFileType.ZIP,
402
+ progress=T.cast(T.Dict[str, T.Any], sequence_progress),
403
+ )
404
+ except Exception as ex:
405
+ yield sequence_uuid, UploadResult(error=ex)
406
+ continue
407
+
408
+ yield sequence_uuid, UploadResult(result=cluster_id)
409
+
410
+ @classmethod
411
+ def _upload_zipfile(
412
+ cls,
413
+ uploader: Uploader,
414
+ zip_path: Path,
415
+ progress: dict[str, T.Any] | None = None,
416
+ ) -> str:
417
+ if progress is None:
418
+ progress = {}
419
+
420
+ with zipfile.ZipFile(zip_path) as ziph:
421
+ namelist = ziph.namelist()
422
+ if not namelist:
423
+ raise InvalidMapillaryZipFileError("Zipfile has no files")
424
+
425
+ with zip_path.open("rb") as zip_fp:
426
+ sequence_md5sum = cls._extract_sequence_md5sum(zip_fp)
427
+
428
+ # Send the copy of the input progress to each upload session, to avoid modifying the original one
429
+ mutable_progress: SequenceProgress = {
430
+ **T.cast(SequenceProgress, progress),
431
+ "sequence_image_count": len(namelist),
432
+ "sequence_md5sum": sequence_md5sum,
433
+ "file_type": types.FileType.ZIP.value,
434
+ }
435
+
436
+ with zip_path.open("rb") as zip_fp:
437
+ file_handle = uploader.upload_stream(
438
+ zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress)
193
439
  )
194
440
 
195
- try:
196
- os.remove(done_path)
197
- except FileNotFoundError:
198
- pass
199
- wip_path.rename(done_path)
200
- finally:
201
- try:
202
- os.remove(wip_path)
203
- except FileNotFoundError:
204
- pass
441
+ cluster_id = uploader.finish_upload(
442
+ file_handle,
443
+ api_v4.ClusterFileType.ZIP,
444
+ progress=T.cast(T.Dict[str, T.Any], mutable_progress),
445
+ )
446
+
447
+ return cluster_id
205
448
 
206
449
  @classmethod
207
- def zip_sequence_fp(
450
+ def _zip_sequence_fp(
208
451
  cls,
209
452
  sequence: T.Sequence[types.ImageMetadata],
210
453
  zip_fp: T.IO[bytes],
@@ -219,6 +462,8 @@ class ZipImageSequence:
219
462
  f"Only one sequence is allowed but got {len(sequence_groups)}: {list(sequence_groups.keys())}"
220
463
  )
221
464
 
465
+ if sequence:
466
+ LOG.debug(f"Checksum for sequence {sequence[0].MAPSequenceUUID}...")
222
467
  sequence_md5sum = types.update_sequence_md5sum(sequence)
223
468
 
224
469
  with zipfile.ZipFile(zip_fp, "w", zipfile.ZIP_DEFLATED) as zipf:
@@ -226,16 +471,18 @@ class ZipImageSequence:
226
471
  # Arcname should be unique, the name does not matter
227
472
  arcname = f"{idx}.jpg"
228
473
  zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
229
- zipf.writestr(zipinfo, cls._dump_image_bytes(metadata))
474
+ zipf.writestr(zipinfo, SingleImageUploader.dump_image_bytes(metadata))
230
475
  assert len(sequence) == len(set(zipf.namelist()))
231
- zipf.comment = json.dumps({"sequence_md5sum": sequence_md5sum}).encode(
232
- "utf-8"
233
- )
476
+ zipf.comment = json.dumps(
477
+ {"sequence_md5sum": sequence_md5sum},
478
+ sort_keys=True,
479
+ separators=(",", ":"),
480
+ ).encode("utf-8")
234
481
 
235
482
  return sequence_md5sum
236
483
 
237
484
  @classmethod
238
- def extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
485
+ def _extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
239
486
  with zipfile.ZipFile(zip_fp, "r", zipfile.ZIP_DEFLATED) as ziph:
240
487
  comment = ziph.comment
241
488
 
@@ -258,237 +505,365 @@ class ZipImageSequence:
258
505
  return sequence_md5sum
259
506
 
260
507
  @classmethod
261
- def _dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes:
508
+ @contextmanager
509
+ def _wip_file_context(cls, wip_path: Path):
262
510
  try:
263
- edit = exif_write.ExifEdit(metadata.filename)
264
- except struct.error as ex:
265
- raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex
511
+ os.remove(wip_path)
512
+ except FileNotFoundError:
513
+ pass
514
+ try:
515
+ yield wip_path
266
516
 
267
- # The cast is to fix the type checker error
268
- edit.add_image_description(
269
- T.cast(
270
- T.Dict,
271
- desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata)),
517
+ with wip_path.open("rb") as fp:
518
+ upload_md5sum = utils.md5sum_fp(fp).hexdigest()
519
+
520
+ done_path = wip_path.parent.joinpath(
521
+ _suffix_session_key(upload_md5sum, api_v4.ClusterFileType.ZIP)
272
522
  )
273
- )
274
523
 
275
- try:
276
- return edit.dump_image_bytes()
277
- except struct.error as ex:
278
- raise ExifError(
279
- f"Failed to dump EXIF bytes: {ex}", metadata.filename
280
- ) from ex
524
+ try:
525
+ os.remove(done_path)
526
+ except FileNotFoundError:
527
+ pass
528
+ wip_path.rename(done_path)
529
+ finally:
530
+ try:
531
+ os.remove(wip_path)
532
+ except FileNotFoundError:
533
+ pass
281
534
 
282
- @classmethod
283
- def upload_zipfile(
284
- cls,
285
- uploader: Uploader,
286
- zip_path: Path,
287
- progress: dict[str, T.Any] | None = None,
288
- ) -> str:
289
- if progress is None:
290
- progress = {}
291
535
 
292
- with zipfile.ZipFile(zip_path) as ziph:
293
- namelist = ziph.namelist()
294
- if not namelist:
295
- raise InvalidMapillaryZipFileError("Zipfile has no files")
536
+ class ImageSequenceUploader:
537
+ def __init__(self, upload_options: UploadOptions, emitter: EventEmitter):
538
+ self.upload_options = upload_options
539
+ self.emitter = emitter
296
540
 
297
- with zip_path.open("rb") as zip_fp:
298
- sequence_md5sum = cls.extract_sequence_md5sum(zip_fp)
541
+ def upload_images(
542
+ self, image_metadatas: T.Sequence[types.ImageMetadata]
543
+ ) -> T.Generator[tuple[str, UploadResult], None, None]:
544
+ sequences = types.group_and_sort_images(image_metadatas)
299
545
 
300
- # Send the copy of the input progress to each upload session, to avoid modifying the original one
301
- mutable_progress: SequenceProgress = {
302
- **T.cast(SequenceProgress, progress),
303
- "sequence_image_count": len(namelist),
304
- "sequence_md5sum": sequence_md5sum,
305
- "file_type": types.FileType.ZIP.value,
306
- }
546
+ for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
547
+ LOG.debug(f"Checksum for image sequence {sequence_uuid}...")
548
+ sequence_md5sum = types.update_sequence_md5sum(sequence)
307
549
 
308
- with zip_path.open("rb") as zip_fp:
309
- file_handle = uploader.upload_stream(
310
- zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress)
550
+ sequence_progress: SequenceProgress = {
551
+ "sequence_idx": sequence_idx,
552
+ "total_sequence_count": len(sequences),
553
+ "sequence_image_count": len(sequence),
554
+ "sequence_uuid": sequence_uuid,
555
+ "file_type": types.FileType.IMAGE.value,
556
+ "sequence_md5sum": sequence_md5sum,
557
+ }
558
+
559
+ try:
560
+ cluster_id = self._upload_sequence_and_finish(
561
+ sequence,
562
+ sequence_progress=T.cast(dict[str, T.Any], sequence_progress),
563
+ )
564
+ except Exception as ex:
565
+ yield sequence_uuid, UploadResult(error=ex)
566
+ else:
567
+ yield sequence_uuid, UploadResult(result=cluster_id)
568
+
569
+ def _upload_sequence_and_finish(
570
+ self,
571
+ sequence: T.Sequence[types.ImageMetadata],
572
+ sequence_progress: dict[str, T.Any],
573
+ ) -> str:
574
+ _validate_metadatas(sequence)
575
+
576
+ sequence_progress["entity_size"] = sum(m.filesize or 0 for m in sequence)
577
+ self.emitter.emit("upload_start", sequence_progress)
578
+
579
+ try:
580
+ # Retries will be handled in the call (but no upload event emissions)
581
+ image_file_handles = self._upload_images_parallel(
582
+ sequence, sequence_progress
311
583
  )
584
+ except BaseException as ex: # Include KeyboardInterrupt
585
+ self.emitter.emit("upload_failed", sequence_progress)
586
+ raise ex
312
587
 
588
+ manifest_file_handle = self._upload_manifest(image_file_handles)
589
+
590
+ self.emitter.emit("upload_end", sequence_progress)
591
+
592
+ uploader = Uploader(self.upload_options, emitter=self.emitter)
313
593
  cluster_id = uploader.finish_upload(
314
- file_handle,
315
- api_v4.ClusterFileType.ZIP,
316
- progress=T.cast(T.Dict[str, T.Any], mutable_progress),
594
+ manifest_file_handle,
595
+ api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST,
596
+ progress=sequence_progress,
317
597
  )
318
598
 
319
599
  return cluster_id
320
600
 
321
- @classmethod
322
- def zip_images_and_upload(
323
- cls,
324
- uploader: Uploader,
325
- image_metadatas: T.Sequence[types.ImageMetadata],
326
- progress: dict[str, T.Any] | None = None,
327
- ) -> T.Generator[tuple[str, UploadResult], None, None]:
328
- if progress is None:
329
- progress = {}
601
+ def _upload_manifest(self, image_file_handles: T.Sequence[str]) -> str:
602
+ uploader = Uploader(self.upload_options)
330
603
 
331
- sequences = types.group_and_sort_images(image_metadatas)
604
+ manifest = {
605
+ "version": "1",
606
+ "upload_type": "images",
607
+ "image_handles": image_file_handles,
608
+ }
332
609
 
333
- for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
334
- try:
335
- _validate_metadatas(sequence)
336
- except Exception as ex:
337
- yield sequence_uuid, UploadResult(error=ex)
338
- continue
610
+ with io.BytesIO() as manifest_fp:
611
+ manifest_fp.write(
612
+ json.dumps(manifest, sort_keys=True, separators=(",", ":")).encode(
613
+ "utf-8"
614
+ )
615
+ )
616
+ manifest_fp.seek(0, io.SEEK_SET)
617
+ return uploader.upload_stream(
618
+ manifest_fp, session_key=f"{_prefixed_uuid4()}.json"
619
+ )
339
620
 
340
- with tempfile.NamedTemporaryFile() as fp:
341
- try:
342
- sequence_md5sum = cls.zip_sequence_fp(sequence, fp)
343
- except Exception as ex:
344
- yield sequence_uuid, UploadResult(error=ex)
345
- continue
621
+ def _upload_images_parallel(
622
+ self,
623
+ sequence: T.Sequence[types.ImageMetadata],
624
+ sequence_progress: dict[str, T.Any],
625
+ ) -> list[str]:
626
+ if not sequence:
627
+ return []
346
628
 
347
- sequence_progress: SequenceProgress = {
348
- "sequence_idx": sequence_idx,
349
- "total_sequence_count": len(sequences),
350
- "sequence_image_count": len(sequence),
351
- "sequence_uuid": sequence_uuid,
352
- "file_type": types.FileType.ZIP.value,
353
- "sequence_md5sum": sequence_md5sum,
354
- }
629
+ max_workers = min(self.upload_options.num_upload_workers, len(sequence))
355
630
 
356
- mutable_progress: dict[str, T.Any] = {**progress, **sequence_progress}
631
+ # Lock is used to synchronize event emission
632
+ lock = threading.Lock()
357
633
 
358
- try:
359
- file_handle = uploader.upload_stream(fp, progress=mutable_progress)
360
- cluster_id = uploader.finish_upload(
361
- file_handle,
362
- api_v4.ClusterFileType.ZIP,
363
- progress=mutable_progress,
364
- )
365
- except Exception as ex:
366
- yield sequence_uuid, UploadResult(error=ex)
367
- continue
634
+ # Push all images into the queue
635
+ image_queue: queue.Queue[tuple[int, types.ImageMetadata]] = queue.Queue()
636
+ for idx, image_metadata in enumerate(sequence):
637
+ image_queue.put((idx, image_metadata))
638
+
639
+ upload_interrupted = threading.Event()
640
+
641
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
642
+ futures = [
643
+ executor.submit(
644
+ self._upload_images_from_queue,
645
+ image_queue,
646
+ lock,
647
+ upload_interrupted,
648
+ sequence_progress,
649
+ )
650
+ for _ in range(max_workers)
651
+ ]
368
652
 
369
- yield sequence_uuid, UploadResult(result=cluster_id)
653
+ indexed_image_file_handles = []
370
654
 
371
- @classmethod
372
- def _upload_sequence(
373
- cls,
374
- uploader: Uploader,
375
- sequence: T.Sequence[types.ImageMetadata],
376
- progress: dict[str, T.Any] | None = None,
377
- ) -> str:
378
- if progress is None:
379
- progress = {}
655
+ try:
656
+ for future in futures:
657
+ indexed_image_file_handles.extend(future.result())
658
+ except KeyboardInterrupt as ex:
659
+ upload_interrupted.set()
660
+ raise ex
380
661
 
381
- # FIXME: This is a hack to disable the event emitter inside the uploader
382
- uploader_without_emitter = uploader.copy_uploader_without_emitter()
662
+ # All tasks should be done here, so below is more like assertion
663
+ image_queue.join()
664
+ if sys.version_info >= (3, 13):
665
+ image_queue.shutdown()
383
666
 
384
- lock = threading.Lock()
667
+ file_handles: list[str] = []
385
668
 
386
- def _upload_image(image_metadata: types.ImageMetadata) -> str:
387
- mutable_progress = {
388
- **(progress or {}),
389
- "filename": str(image_metadata.filename),
390
- }
669
+ indexed_image_file_handles.sort()
391
670
 
392
- bytes = cls._dump_image_bytes(image_metadata)
393
- file_handle = uploader_without_emitter.upload_stream(
394
- io.BytesIO(bytes), progress=mutable_progress
671
+ # Important to guarantee the order
672
+ assert len(indexed_image_file_handles) == len(sequence)
673
+ for expected_idx, (idx, file_handle) in enumerate(indexed_image_file_handles):
674
+ assert expected_idx == idx
675
+ file_handles.append(file_handle)
676
+
677
+ return file_handles
678
+
679
+ def _upload_images_from_queue(
680
+ self,
681
+ image_queue: queue.Queue[tuple[int, types.ImageMetadata]],
682
+ lock: threading.Lock,
683
+ upload_interrupted: threading.Event,
684
+ sequence_progress: dict[str, T.Any],
685
+ ) -> list[tuple[int, str]]:
686
+ indexed_file_handles = []
687
+
688
+ with api_v4.create_user_session(
689
+ self.upload_options.user_items["user_upload_token"]
690
+ ) as user_session:
691
+ single_image_uploader = SingleImageUploader(
692
+ self.upload_options, user_session=user_session
395
693
  )
396
694
 
397
- mutable_progress["chunk_size"] = image_metadata.filesize
695
+ while True:
696
+ # Assert that all images are already pushed into the queue
697
+ try:
698
+ idx, image_metadata = image_queue.get_nowait()
699
+ except queue.Empty:
700
+ break
701
+
702
+ # Main thread will handle the interruption
703
+ if upload_interrupted.is_set():
704
+ break
705
+
706
+ # Create a new mutatble progress to keep the sequence_progress immutable
707
+ image_progress = {
708
+ **sequence_progress,
709
+ "import_path": str(image_metadata.filename),
710
+ }
398
711
 
399
- with lock:
400
- uploader.emitter.emit("upload_progress", mutable_progress)
712
+ # image_progress will be updated during uploading
713
+ file_handle = single_image_uploader.upload(
714
+ image_metadata, image_progress
715
+ )
401
716
 
402
- return file_handle
717
+ # Update chunk_size (it was constant if set)
718
+ image_progress["chunk_size"] = image_metadata.filesize
403
719
 
404
- _validate_metadatas(sequence)
720
+ # Main thread will handle the interruption
721
+ if upload_interrupted.is_set():
722
+ break
405
723
 
406
- progress["entity_size"] = sum(m.filesize or 0 for m in sequence)
724
+ with lock:
725
+ self.emitter.emit("upload_progress", image_progress)
407
726
 
408
- # TODO: assert sequence is sorted
727
+ indexed_file_handles.append((idx, file_handle))
409
728
 
410
- uploader.emitter.emit("upload_start", progress)
729
+ image_queue.task_done()
411
730
 
412
- with concurrent.futures.ThreadPoolExecutor(
413
- max_workers=constants.MAX_IMAGE_UPLOAD_WORKERS
414
- ) as executor:
415
- image_file_handles = list(executor.map(_upload_image, sequence))
731
+ return indexed_file_handles
416
732
 
417
- manifest = {
418
- "version": "1",
419
- "upload_type": "images",
420
- "image_handles": image_file_handles,
421
- }
422
733
 
423
- with io.BytesIO() as manifest_fp:
424
- manifest_fp.write(json.dumps(manifest).encode("utf-8"))
425
- manifest_fp.seek(0, io.SEEK_SET)
426
- manifest_file_handle = uploader_without_emitter.upload_stream(
427
- manifest_fp, session_key=f"{uuid.uuid4().hex}.json"
734
+ class SingleImageUploader:
735
+ def __init__(
736
+ self,
737
+ upload_options: UploadOptions,
738
+ user_session: requests.Session | None = None,
739
+ ):
740
+ self.upload_options = upload_options
741
+ self.user_session = user_session
742
+ self.cache = self._maybe_create_persistent_cache_instance(
743
+ self.upload_options.user_items, upload_options
744
+ )
745
+
746
+ def upload(
747
+ self, image_metadata: types.ImageMetadata, image_progress: dict[str, T.Any]
748
+ ) -> str:
749
+ image_bytes = self.dump_image_bytes(image_metadata)
750
+
751
+ uploader = Uploader(self.upload_options, user_session=self.user_session)
752
+
753
+ session_key = uploader._gen_session_key(io.BytesIO(image_bytes), image_progress)
754
+
755
+ file_handle = self._get_cached_file_handle(session_key)
756
+
757
+ if file_handle is None:
758
+ # image_progress will be updated during uploading
759
+ file_handle = uploader.upload_stream(
760
+ io.BytesIO(image_bytes),
761
+ session_key=session_key,
762
+ progress=image_progress,
428
763
  )
764
+ self._set_file_handle_cache(session_key, file_handle)
429
765
 
430
- uploader.emitter.emit("upload_end", progress)
766
+ return file_handle
431
767
 
432
- cluster_id = uploader.finish_upload(
433
- manifest_file_handle,
434
- api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST,
435
- progress=progress,
768
+ @classmethod
769
+ def dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes:
770
+ try:
771
+ edit = exif_write.ExifEdit(metadata.filename)
772
+ except struct.error as ex:
773
+ raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex
774
+
775
+ # The cast is to fix the type checker error
776
+ edit.add_image_description(
777
+ T.cast(
778
+ T.Dict, desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata))
779
+ )
436
780
  )
437
781
 
438
- return cluster_id
782
+ try:
783
+ return edit.dump_image_bytes()
784
+ except struct.error as ex:
785
+ raise ExifError(
786
+ f"Failed to dump EXIF bytes: {ex}", metadata.filename
787
+ ) from ex
439
788
 
440
789
  @classmethod
441
- def upload_images(
442
- cls,
443
- uploader: Uploader,
444
- image_metadatas: T.Sequence[types.ImageMetadata],
445
- progress: dict[str, T.Any] | None = None,
446
- ) -> T.Generator[tuple[str, UploadResult], None, None]:
447
- if progress is None:
448
- progress = {}
790
+ def _maybe_create_persistent_cache_instance(
791
+ cls, user_items: config.UserItem, upload_options: UploadOptions
792
+ ) -> history.PersistentCache | None:
793
+ if not constants.UPLOAD_CACHE_DIR:
794
+ LOG.debug(
795
+ "Upload cache directory is set empty, skipping caching upload file handles"
796
+ )
797
+ return None
798
+
799
+ if upload_options.dry_run:
800
+ LOG.debug("Dry-run mode enabled, skipping caching upload file handles")
801
+ return None
802
+
803
+ # Different python/CLI versions use different cache (dbm) formats.
804
+ # Separate them to avoid conflicts
805
+ py_version_parts = [str(part) for part in sys.version_info[:3]]
806
+ version = f"py_{'_'.join(py_version_parts)}_{VERSION}"
807
+
808
+ cache_path_dir = (
809
+ Path(constants.UPLOAD_CACHE_DIR)
810
+ .joinpath(version)
811
+ .joinpath(api_v4.MAPILLARY_CLIENT_TOKEN.replace("|", "_"))
812
+ .joinpath(
813
+ user_items.get("MAPSettingsUserKey", user_items["user_upload_token"])
814
+ )
815
+ )
816
+ cache_path_dir.mkdir(parents=True, exist_ok=True)
817
+ cache_path = cache_path_dir.joinpath("cached_file_handles")
818
+
819
+ # Sanitize sensitive segments for logging
820
+ sanitized_cache_path = (
821
+ Path(constants.UPLOAD_CACHE_DIR)
822
+ .joinpath(version)
823
+ .joinpath("***")
824
+ .joinpath("***")
825
+ .joinpath("cached_file_handles")
826
+ )
827
+ LOG.debug(f"File handle cache path: {sanitized_cache_path}")
449
828
 
450
- sequences = types.group_and_sort_images(image_metadatas)
829
+ cache = history.PersistentCache(str(cache_path.resolve()))
830
+ cache.clear_expired()
451
831
 
452
- for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
453
- sequence_md5sum = types.update_sequence_md5sum(sequence)
832
+ return cache
454
833
 
455
- sequence_progress: SequenceProgress = {
456
- "sequence_idx": sequence_idx,
457
- "total_sequence_count": len(sequences),
458
- "sequence_image_count": len(sequence),
459
- "sequence_uuid": sequence_uuid,
460
- "file_type": types.FileType.IMAGE.value,
461
- "sequence_md5sum": sequence_md5sum,
462
- }
834
+ def _get_cached_file_handle(self, key: str) -> str | None:
835
+ if self.cache is None:
836
+ return None
463
837
 
464
- mutable_progress: dict[str, T.Any] = {**progress, **sequence_progress}
838
+ if _is_uuid(key):
839
+ return None
465
840
 
466
- try:
467
- cluster_id = cls._upload_sequence(
468
- uploader, sequence, progress=mutable_progress
469
- )
470
- except Exception as ex:
471
- yield sequence_uuid, UploadResult(error=ex)
472
- else:
473
- yield sequence_uuid, UploadResult(result=cluster_id)
841
+ return self.cache.get(key)
842
+
843
+ def _set_file_handle_cache(self, key: str, value: str) -> None:
844
+ if self.cache is None:
845
+ return
846
+
847
+ if _is_uuid(key):
848
+ return
849
+
850
+ self.cache.set(key, value)
474
851
 
475
852
 
476
853
  class Uploader:
477
854
  def __init__(
478
855
  self,
479
- user_items: config.UserItem,
856
+ upload_options: UploadOptions,
857
+ user_session: requests.Session | None = None,
480
858
  emitter: EventEmitter | None = None,
481
- chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
482
- dry_run=False,
483
859
  ):
484
- self.user_items = user_items
860
+ self.upload_options = upload_options
861
+ self.user_session = user_session
485
862
  if emitter is None:
486
863
  # An empty event emitter that does nothing
487
864
  self.emitter = EventEmitter()
488
865
  else:
489
866
  self.emitter = emitter
490
- self.chunk_size = chunk_size
491
- self.dry_run = dry_run
492
867
 
493
868
  def upload_stream(
494
869
  self,
@@ -500,21 +875,13 @@ class Uploader:
500
875
  progress = {}
501
876
 
502
877
  if session_key is None:
503
- fp.seek(0, io.SEEK_SET)
504
- md5sum = utils.md5sum_fp(fp).hexdigest()
505
- filetype = progress.get("file_type")
506
- if filetype is not None:
507
- session_key = _session_key(md5sum, types.FileType(filetype))
508
- else:
509
- session_key = md5sum
878
+ session_key = self._gen_session_key(fp, progress)
510
879
 
511
880
  fp.seek(0, io.SEEK_END)
512
881
  entity_size = fp.tell()
513
882
 
514
- upload_service = self._create_upload_service(session_key)
515
-
516
883
  progress["entity_size"] = entity_size
517
- progress["chunk_size"] = self.chunk_size
884
+ progress["chunk_size"] = self.upload_options.chunk_size
518
885
  progress["retries"] = 0
519
886
  progress["begin_offset"] = None
520
887
 
@@ -522,10 +889,24 @@ class Uploader:
522
889
 
523
890
  while True:
524
891
  try:
525
- file_handle = self._upload_stream_retryable(
526
- upload_service, fp, T.cast(UploaderProgress, progress)
527
- )
528
- except Exception as ex:
892
+ if self.user_session is not None:
893
+ file_handle = self._upload_stream_retryable(
894
+ self.user_session,
895
+ fp,
896
+ session_key,
897
+ T.cast(UploaderProgress, progress),
898
+ )
899
+ else:
900
+ with api_v4.create_user_session(
901
+ self.upload_options.user_items["user_upload_token"]
902
+ ) as user_session:
903
+ file_handle = self._upload_stream_retryable(
904
+ user_session,
905
+ fp,
906
+ session_key,
907
+ T.cast(UploaderProgress, progress),
908
+ )
909
+ except BaseException as ex: # Include KeyboardInterrupt
529
910
  self._handle_upload_exception(ex, T.cast(UploaderProgress, progress))
530
911
  else:
531
912
  break
@@ -546,97 +927,102 @@ class Uploader:
546
927
  if progress is None:
547
928
  progress = {}
548
929
 
549
- if self.dry_run:
930
+ if self.upload_options.dry_run or self.upload_options.nofinish:
550
931
  cluster_id = "0"
551
932
  else:
552
- resp = api_v4.finish_upload(
553
- self.user_items["user_upload_token"],
554
- file_handle,
555
- cluster_filetype,
556
- organization_id=self.user_items.get("MAPOrganizationKey"),
557
- )
558
-
559
- data = resp.json()
560
- cluster_id = data.get("cluster_id")
933
+ organization_id = self.upload_options.user_items.get("MAPOrganizationKey")
934
+
935
+ with api_v4.create_user_session(
936
+ self.upload_options.user_items["user_upload_token"]
937
+ ) as user_session:
938
+ resp = api_v4.finish_upload(
939
+ user_session,
940
+ file_handle,
941
+ cluster_filetype,
942
+ organization_id=organization_id,
943
+ )
561
944
 
562
- # TODO: validate cluster_id
945
+ body = api_v4.jsonify_response(resp)
946
+ # TODO: Validate cluster_id
947
+ cluster_id = body.get("cluster_id")
563
948
 
564
949
  progress["cluster_id"] = cluster_id
565
950
  self.emitter.emit("upload_finished", progress)
566
951
 
567
952
  return cluster_id
568
953
 
569
- def copy_uploader_without_emitter(self) -> Uploader:
570
- return Uploader(
571
- self.user_items,
572
- emitter=None,
573
- chunk_size=self.chunk_size,
574
- dry_run=self.dry_run,
575
- )
576
-
577
- def _create_upload_service(self, session_key: str) -> upload_api_v4.UploadService:
954
+ def _create_upload_service(
955
+ self, user_session: requests.Session, session_key: str
956
+ ) -> upload_api_v4.UploadService:
578
957
  upload_service: upload_api_v4.UploadService
579
958
 
580
- if self.dry_run:
959
+ if self.upload_options.dry_run:
581
960
  upload_path = os.getenv("MAPILLARY_UPLOAD_ENDPOINT")
582
961
  upload_service = upload_api_v4.FakeUploadService(
583
- user_access_token=self.user_items["user_upload_token"],
584
- session_key=session_key,
962
+ user_session,
963
+ session_key,
585
964
  upload_path=Path(upload_path) if upload_path is not None else None,
586
965
  )
587
966
  LOG.info(
588
- "Dry run mode enabled. Data will be uploaded to %s",
967
+ "Dry-run mode enabled, uploading to %s",
589
968
  upload_service.upload_path.joinpath(session_key),
590
969
  )
591
970
  else:
592
- upload_service = upload_api_v4.UploadService(
593
- user_access_token=self.user_items["user_upload_token"],
594
- session_key=session_key,
595
- )
971
+ upload_service = upload_api_v4.UploadService(user_session, session_key)
596
972
 
597
973
  return upload_service
598
974
 
599
975
  def _handle_upload_exception(
600
- self, ex: Exception, progress: UploaderProgress
976
+ self, ex: BaseException, progress: UploaderProgress
601
977
  ) -> None:
602
- retries = progress["retries"]
978
+ retries = progress.get("retries", 0)
603
979
  begin_offset = progress.get("begin_offset")
604
- chunk_size = progress["chunk_size"]
980
+ offset = progress.get("offset")
605
981
 
606
982
  if retries <= constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
607
- self.emitter.emit("upload_interrupted", progress)
983
+ self.emitter.emit("upload_retrying", progress)
984
+
608
985
  LOG.warning(
609
- # use %s instead of %d because offset could be None
610
- "Error uploading chunk_size %d at begin_offset %s: %s: %s",
611
- chunk_size,
612
- begin_offset,
613
- ex.__class__.__name__,
614
- str(ex),
986
+ f"Error uploading {self._upload_name(progress)} at {offset=} since {begin_offset=}: {ex.__class__.__name__}: {ex}"
615
987
  )
988
+
616
989
  # Keep things immutable here. Will increment retries in the caller
617
990
  retries += 1
618
- if _is_immediate_retry(ex):
991
+ if _is_immediate_retriable_exception(ex):
619
992
  sleep_for = 0
620
993
  else:
621
994
  sleep_for = min(2**retries, 16)
622
995
  LOG.info(
623
- "Retrying in %d seconds (%d/%d)",
624
- sleep_for,
625
- retries,
626
- constants.MAX_UPLOAD_RETRIES,
996
+ f"Retrying in {sleep_for} seconds ({retries}/{constants.MAX_UPLOAD_RETRIES})"
627
997
  )
628
998
  if sleep_for:
629
999
  time.sleep(sleep_for)
630
1000
  else:
1001
+ self.emitter.emit("upload_failed", progress)
631
1002
  raise ex
632
1003
 
1004
+ @classmethod
1005
+ def _upload_name(cls, progress: UploaderProgress):
1006
+ # Strictly speaking these sequence properties should not be exposed in this context
1007
+ # TODO: Maybe move these logging statements to event handlers
1008
+ sequence_uuid: str | None = T.cast(
1009
+ T.Union[str, None], progress.get("sequence_uuid")
1010
+ )
1011
+ import_path = T.cast(T.Union[str, None], progress.get("import_path"))
1012
+ if sequence_uuid is not None:
1013
+ if import_path is None:
1014
+ name: str = f"sequence_{sequence_uuid}"
1015
+ else:
1016
+ name = f"sequence_{sequence_uuid}/{Path(import_path).name}"
1017
+ else:
1018
+ name = Path(import_path or "unknown").name
1019
+ return name
1020
+
633
1021
  def _chunk_with_progress_emitted(
634
- self,
635
- stream: T.IO[bytes],
636
- progress: UploaderProgress,
1022
+ self, stream: T.IO[bytes], progress: UploaderProgress
637
1023
  ) -> T.Generator[bytes, None, None]:
638
1024
  for chunk in upload_api_v4.UploadService.chunkize_byte_stream(
639
- stream, self.chunk_size
1025
+ stream, self.upload_options.chunk_size
640
1026
  ):
641
1027
  yield chunk
642
1028
 
@@ -649,11 +1035,21 @@ class Uploader:
649
1035
 
650
1036
  def _upload_stream_retryable(
651
1037
  self,
652
- upload_service: upload_api_v4.UploadService,
1038
+ user_session: requests.Session,
653
1039
  fp: T.IO[bytes],
654
- progress: UploaderProgress,
1040
+ session_key: str,
1041
+ progress: UploaderProgress | None = None,
655
1042
  ) -> str:
656
1043
  """Upload the stream with safe retries guraranteed"""
1044
+ if progress is None:
1045
+ progress = T.cast(UploaderProgress, {})
1046
+
1047
+ upload_service = self._create_upload_service(user_session, session_key)
1048
+
1049
+ if "entity_size" not in progress:
1050
+ fp.seek(0, io.SEEK_END)
1051
+ entity_size = fp.tell()
1052
+ progress["entity_size"] = entity_size
657
1053
 
658
1054
  begin_offset = upload_service.fetch_offset()
659
1055
 
@@ -662,11 +1058,39 @@ class Uploader:
662
1058
 
663
1059
  self.emitter.emit("upload_fetch_offset", progress)
664
1060
 
665
- fp.seek(begin_offset, io.SEEK_SET)
1061
+ # Estimate the read timeout
1062
+ if not constants.MIN_UPLOAD_SPEED:
1063
+ read_timeout = None
1064
+ else:
1065
+ remaining_bytes = abs(progress["entity_size"] - begin_offset)
1066
+ read_timeout = max(
1067
+ api_v4.REQUESTS_TIMEOUT,
1068
+ remaining_bytes / constants.MIN_UPLOAD_SPEED,
1069
+ )
666
1070
 
1071
+ # Upload from begin_offset
1072
+ fp.seek(begin_offset, io.SEEK_SET)
667
1073
  shifted_chunks = self._chunk_with_progress_emitted(fp, progress)
668
1074
 
669
- return upload_service.upload_shifted_chunks(shifted_chunks, begin_offset)
1075
+ # Start uploading
1076
+ return upload_service.upload_shifted_chunks(
1077
+ shifted_chunks, begin_offset, read_timeout=read_timeout
1078
+ )
1079
+
1080
+ def _gen_session_key(self, fp: T.IO[bytes], progress: dict[str, T.Any]) -> str:
1081
+ if self.upload_options.noresume:
1082
+ # Generate a unique UUID for session_key when noresume is True
1083
+ # to prevent resuming from previous uploads
1084
+ session_key = f"{_prefixed_uuid4()}"
1085
+ else:
1086
+ fp.seek(0, io.SEEK_SET)
1087
+ session_key = utils.md5sum_fp(fp).hexdigest()
1088
+
1089
+ filetype = progress.get("file_type")
1090
+ if filetype is not None:
1091
+ session_key = _suffix_session_key(session_key, types.FileType(filetype))
1092
+
1093
+ return session_key
670
1094
 
671
1095
 
672
1096
  def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
@@ -676,7 +1100,7 @@ def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
676
1100
  raise FileNotFoundError(f"No such file {metadata.filename}")
677
1101
 
678
1102
 
679
- def _is_immediate_retry(ex: Exception):
1103
+ def _is_immediate_retriable_exception(ex: BaseException) -> bool:
680
1104
  if (
681
1105
  isinstance(ex, requests.HTTPError)
682
1106
  and isinstance(ex.response, requests.Response)
@@ -689,8 +1113,10 @@ def _is_immediate_retry(ex: Exception):
689
1113
  # resp: {"debug_info":{"retriable":true,"type":"OffsetInvalidError","message":"Request starting offset is invalid"}}
690
1114
  return resp.get("debug_info", {}).get("retriable", False)
691
1115
 
1116
+ return False
1117
+
692
1118
 
693
- def _is_retriable_exception(ex: Exception):
1119
+ def _is_retriable_exception(ex: BaseException) -> bool:
694
1120
  if isinstance(ex, (requests.ConnectionError, requests.Timeout)):
695
1121
  return True
696
1122
 
@@ -709,19 +1135,36 @@ def _is_retriable_exception(ex: Exception):
709
1135
  return False
710
1136
 
711
1137
 
712
- def _session_key(
713
- upload_md5sum: str, filetype: api_v4.ClusterFileType | types.FileType
1138
+ _SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
1139
+ api_v4.ClusterFileType.ZIP: ".zip",
1140
+ api_v4.ClusterFileType.CAMM: ".mp4",
1141
+ api_v4.ClusterFileType.BLACKVUE: ".mp4",
1142
+ types.FileType.IMAGE: ".jpg",
1143
+ types.FileType.ZIP: ".zip",
1144
+ types.FileType.BLACKVUE: ".mp4",
1145
+ types.FileType.CAMM: ".mp4",
1146
+ types.FileType.GOPRO: ".mp4",
1147
+ types.FileType.VIDEO: ".mp4",
1148
+ }
1149
+
1150
+
1151
+ def _suffix_session_key(
1152
+ key: str, filetype: api_v4.ClusterFileType | types.FileType
714
1153
  ) -> str:
715
- _SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
716
- api_v4.ClusterFileType.ZIP: ".zip",
717
- api_v4.ClusterFileType.CAMM: ".mp4",
718
- api_v4.ClusterFileType.BLACKVUE: ".mp4",
719
- types.FileType.IMAGE: ".jpg",
720
- types.FileType.ZIP: ".zip",
721
- types.FileType.BLACKVUE: ".mp4",
722
- types.FileType.CAMM: ".mp4",
723
- types.FileType.GOPRO: ".mp4",
724
- types.FileType.VIDEO: ".mp4",
725
- }
726
-
727
- return f"mly_tools_{upload_md5sum}{_SUFFIX_MAP[filetype]}"
1154
+ is_uuid_before = _is_uuid(key)
1155
+
1156
+ key = f"mly_tools_{key}{_SUFFIX_MAP[filetype]}"
1157
+
1158
+ assert _is_uuid(key) is is_uuid_before
1159
+
1160
+ return key
1161
+
1162
+
1163
+ def _prefixed_uuid4():
1164
+ prefixed = f"uuid_{uuid.uuid4().hex}"
1165
+ assert _is_uuid(prefixed)
1166
+ return prefixed
1167
+
1168
+
1169
+ def _is_uuid(key: str) -> bool:
1170
+ return key.startswith("uuid_") or key.startswith("mly_tools_uuid_")