mapillary-tools 0.14.0b1__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +66 -263
  3. mapillary_tools/authenticate.py +46 -38
  4. mapillary_tools/commands/__main__.py +15 -16
  5. mapillary_tools/commands/upload.py +33 -4
  6. mapillary_tools/constants.py +127 -45
  7. mapillary_tools/exceptions.py +4 -0
  8. mapillary_tools/exif_read.py +2 -1
  9. mapillary_tools/exif_write.py +3 -1
  10. mapillary_tools/geo.py +16 -0
  11. mapillary_tools/geotag/base.py +6 -2
  12. mapillary_tools/geotag/factory.py +9 -1
  13. mapillary_tools/geotag/geotag_images_from_exiftool.py +1 -1
  14. mapillary_tools/geotag/geotag_images_from_gpx.py +0 -6
  15. mapillary_tools/geotag/geotag_videos_from_exiftool.py +30 -9
  16. mapillary_tools/geotag/utils.py +9 -12
  17. mapillary_tools/geotag/video_extractors/gpx.py +2 -1
  18. mapillary_tools/geotag/video_extractors/native.py +25 -0
  19. mapillary_tools/history.py +124 -7
  20. mapillary_tools/http.py +211 -0
  21. mapillary_tools/mp4/construct_mp4_parser.py +8 -2
  22. mapillary_tools/process_geotag_properties.py +31 -27
  23. mapillary_tools/process_sequence_properties.py +339 -322
  24. mapillary_tools/sample_video.py +1 -2
  25. mapillary_tools/serializer/description.py +56 -56
  26. mapillary_tools/serializer/gpx.py +1 -1
  27. mapillary_tools/upload.py +201 -205
  28. mapillary_tools/upload_api_v4.py +57 -47
  29. mapillary_tools/uploader.py +720 -285
  30. mapillary_tools/utils.py +57 -5
  31. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/METADATA +7 -6
  32. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/RECORD +36 -35
  33. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/WHEEL +0 -0
  34. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/entry_points.txt +0 -0
  35. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/licenses/LICENSE +0 -0
  36. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import concurrent.futures
4
-
5
4
  import dataclasses
6
5
  import io
7
6
  import json
8
7
  import logging
9
8
  import os
9
+ import queue
10
10
  import struct
11
11
  import sys
12
12
  import tempfile
@@ -25,7 +25,21 @@ else:
25
25
 
26
26
  import requests
27
27
 
28
- from . import api_v4, config, constants, exif_write, types, upload_api_v4, utils
28
+ from . import (
29
+ api_v4,
30
+ config,
31
+ constants,
32
+ exif_write,
33
+ geo,
34
+ history,
35
+ telemetry,
36
+ types,
37
+ upload_api_v4,
38
+ utils,
39
+ )
40
+ from .camm import camm_builder, camm_parser
41
+ from .gpmf import gpmf_parser
42
+ from .mp4 import simple_mp4_builder
29
43
  from .serializer.description import (
30
44
  desc_file_to_exif,
31
45
  DescriptionJSONSerializer,
@@ -36,6 +50,25 @@ from .serializer.description import (
36
50
  LOG = logging.getLogger(__name__)
37
51
 
38
52
 
53
+ @dataclasses.dataclass(frozen=True)
54
+ class UploadOptions:
55
+ user_items: config.UserItem
56
+ chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024)
57
+ num_upload_workers: int = constants.MAX_IMAGE_UPLOAD_WORKERS
58
+ dry_run: bool = False
59
+ nofinish: bool = False
60
+ noresume: bool = False
61
+
62
+ def __post_init__(self):
63
+ if self.num_upload_workers <= 0:
64
+ raise ValueError(
65
+ f"Expect positive num_upload_workers but got {self.num_upload_workers}"
66
+ )
67
+
68
+ if self.chunk_size <= 0:
69
+ raise ValueError(f"Expect positive chunk_size but got {self.chunk_size}")
70
+
71
+
39
72
  class UploaderProgress(T.TypedDict, total=True):
40
73
  """
41
74
  Progress data that Uploader cares about.
@@ -60,7 +93,7 @@ class UploaderProgress(T.TypedDict, total=True):
60
93
  # - offset == entity_size when "upload_end" or "upload_finished"
61
94
  entity_size: int
62
95
 
63
- # An "upload_interrupted" will increase it. Reset to 0 if a chunk is uploaded
96
+ # An "upload_retrying" will increase it. Reset to 0 if a chunk is uploaded
64
97
  retries: int
65
98
 
66
99
  # Cluster ID after finishing the upload
@@ -92,7 +125,7 @@ class SequenceProgress(T.TypedDict, total=False):
92
125
  # MAPSequenceUUID. It is only available for directory uploading
93
126
  sequence_uuid: str
94
127
 
95
- # Path to the Zipfile/BlackVue/CAMM
128
+ # Path to the image/video/zip
96
129
  import_path: str
97
130
 
98
131
 
@@ -120,13 +153,43 @@ class InvalidMapillaryZipFileError(SequenceError):
120
153
  pass
121
154
 
122
155
 
156
+ # BELOW demonstrates the pseudocode for a typical upload workflow
157
+ # and when upload events are emitted
158
+ #################################################################
159
+ # def pseudo_upload(metadata):
160
+ # emit("upload_start")
161
+ # while True:
162
+ # try:
163
+ # if is_sequence(metadata):
164
+ # for image in metadata:
165
+ # upload_stream(image.read())
166
+ # emit("upload_progress")
167
+ # elif is_video(metadata):
168
+ # offset = fetch_offset()
169
+ # emit("upload_fetch_offset")
170
+ # for chunk in metadata.read()[offset:]:
171
+ # upload_stream(chunk)
172
+ # emit("upload_progress")
173
+ # except BaseException as ex: # Include KeyboardInterrupt
174
+ # if retryable(ex):
175
+ # emit("upload_retrying")
176
+ # continue
177
+ # else:
178
+ # emit("upload_failed")
179
+ # raise ex
180
+ # else:
181
+ # break
182
+ # emit("upload_end")
183
+ # finish_upload(data)
184
+ # emit("upload_finished")
123
185
  EventName = T.Literal[
124
186
  "upload_start",
125
187
  "upload_fetch_offset",
126
188
  "upload_progress",
189
+ "upload_retrying",
127
190
  "upload_end",
191
+ "upload_failed",
128
192
  "upload_finished",
129
- "upload_interrupted",
130
193
  ]
131
194
 
132
195
 
@@ -154,7 +217,131 @@ class UploadResult:
154
217
  error: Exception | None = None
155
218
 
156
219
 
157
- class ZipImageSequence:
220
+ class VideoUploader:
221
+ @classmethod
222
+ def upload_videos(
223
+ cls, mly_uploader: Uploader, video_metadatas: T.Sequence[types.VideoMetadata]
224
+ ) -> T.Generator[tuple[types.VideoMetadata, UploadResult], None, None]:
225
+ # If upload in a random order, then interrupted uploads has a higher chance to expire.
226
+ # Therefore sort videos to make sure interrupted uploads are resumed as early as possible
227
+ sorted_video_metadatas = sorted(video_metadatas, key=lambda m: m.filename)
228
+
229
+ for idx, video_metadata in enumerate(sorted_video_metadatas):
230
+ LOG.debug(f"Checksum for video {video_metadata.filename}...")
231
+ try:
232
+ video_metadata.update_md5sum()
233
+ except Exception as ex:
234
+ yield video_metadata, UploadResult(error=ex)
235
+ continue
236
+
237
+ assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
238
+
239
+ progress: SequenceProgress = {
240
+ "total_sequence_count": len(sorted_video_metadatas),
241
+ "sequence_idx": idx,
242
+ "file_type": video_metadata.filetype.value,
243
+ "import_path": str(video_metadata.filename),
244
+ "sequence_md5sum": video_metadata.md5sum,
245
+ }
246
+
247
+ try:
248
+ with cls.build_camm_stream(video_metadata) as camm_fp:
249
+ # Upload the mp4 stream
250
+ file_handle = mly_uploader.upload_stream(
251
+ T.cast(T.IO[bytes], camm_fp),
252
+ progress=T.cast(T.Dict[str, T.Any], progress),
253
+ )
254
+
255
+ cluster_id = mly_uploader.finish_upload(
256
+ file_handle,
257
+ api_v4.ClusterFileType.CAMM,
258
+ progress=T.cast(T.Dict[str, T.Any], progress),
259
+ )
260
+ except Exception as ex:
261
+ yield video_metadata, UploadResult(error=ex)
262
+ else:
263
+ yield video_metadata, UploadResult(result=cluster_id)
264
+
265
+ @classmethod
266
+ @contextmanager
267
+ def build_camm_stream(cls, video_metadata: types.VideoMetadata):
268
+ # Convert video metadata to CAMMInfo
269
+ camm_info = cls.prepare_camm_info(video_metadata)
270
+
271
+ # Create the CAMM sample generator
272
+ camm_sample_generator = camm_builder.camm_sample_generator2(camm_info)
273
+
274
+ with video_metadata.filename.open("rb") as src_fp:
275
+ # Build the mp4 stream with the CAMM samples
276
+ yield simple_mp4_builder.transform_mp4(src_fp, camm_sample_generator)
277
+
278
+ @classmethod
279
+ def prepare_camm_info(
280
+ cls, video_metadata: types.VideoMetadata
281
+ ) -> camm_parser.CAMMInfo:
282
+ camm_info = camm_parser.CAMMInfo(
283
+ make=video_metadata.make or "", model=video_metadata.model or ""
284
+ )
285
+
286
+ for point in video_metadata.points:
287
+ if isinstance(point, telemetry.CAMMGPSPoint):
288
+ if camm_info.gps is None:
289
+ camm_info.gps = []
290
+ camm_info.gps.append(point)
291
+
292
+ elif isinstance(point, telemetry.GPSPoint):
293
+ # There is no proper CAMM entry for GoPro GPS
294
+ if camm_info.mini_gps is None:
295
+ camm_info.mini_gps = []
296
+ camm_info.mini_gps.append(point)
297
+
298
+ elif isinstance(point, geo.Point):
299
+ if camm_info.mini_gps is None:
300
+ camm_info.mini_gps = []
301
+ camm_info.mini_gps.append(point)
302
+ else:
303
+ raise ValueError(f"Unknown point type: {point}")
304
+
305
+ if constants.MAPILLARY__EXPERIMENTAL_ENABLE_IMU:
306
+ if video_metadata.filetype is types.FileType.GOPRO:
307
+ with video_metadata.filename.open("rb") as fp:
308
+ gopro_info = gpmf_parser.extract_gopro_info(fp, telemetry_only=True)
309
+ if gopro_info is not None:
310
+ camm_info.accl = gopro_info.accl or []
311
+ camm_info.gyro = gopro_info.gyro or []
312
+ camm_info.magn = gopro_info.magn or []
313
+
314
+ return camm_info
315
+
316
+
317
+ class ZipUploader:
318
+ @classmethod
319
+ def upload_zipfiles(
320
+ cls, mly_uploader: Uploader, zip_paths: T.Sequence[Path]
321
+ ) -> T.Generator[tuple[Path, UploadResult], None, None]:
322
+ # If upload in a random order, then interrupted uploads has a higher chance to expire.
323
+ # Therefore sort zipfiles to make sure interrupted uploads are resumed as early as possible
324
+ sorted_zip_paths = sorted(zip_paths)
325
+
326
+ for idx, zip_path in enumerate(sorted_zip_paths):
327
+ progress: SequenceProgress = {
328
+ "total_sequence_count": len(sorted_zip_paths),
329
+ "sequence_idx": idx,
330
+ "import_path": str(zip_path),
331
+ "file_type": types.FileType.ZIP.value,
332
+ "sequence_md5sum": "", # Placeholder, will be set in upload_zipfile
333
+ }
334
+ try:
335
+ cluster_id = cls._upload_zipfile(
336
+ mly_uploader,
337
+ zip_path,
338
+ progress=T.cast(T.Dict[str, T.Any], progress),
339
+ )
340
+ except Exception as ex:
341
+ yield zip_path, UploadResult(error=ex)
342
+ else:
343
+ yield zip_path, UploadResult(result=cluster_id)
344
+
158
345
  @classmethod
159
346
  def zip_images(
160
347
  cls, metadatas: T.Sequence[types.ImageMetadata], zip_dir: Path
@@ -173,38 +360,93 @@ class ZipImageSequence:
173
360
  )
174
361
  with cls._wip_file_context(wip_zip_filename) as wip_path:
175
362
  with wip_path.open("wb") as wip_fp:
176
- cls.zip_sequence_fp(sequence, wip_fp)
363
+ cls._zip_sequence_fp(sequence, wip_fp)
177
364
 
178
365
  @classmethod
179
- @contextmanager
180
- def _wip_file_context(cls, wip_path: Path):
181
- try:
182
- os.remove(wip_path)
183
- except FileNotFoundError:
184
- pass
185
- try:
186
- yield wip_path
366
+ def zip_images_and_upload(
367
+ cls, uploader: Uploader, image_metadatas: T.Sequence[types.ImageMetadata]
368
+ ) -> T.Generator[tuple[str, UploadResult], None, None]:
369
+ sequences = types.group_and_sort_images(image_metadatas)
187
370
 
188
- with wip_path.open("rb") as fp:
189
- upload_md5sum = utils.md5sum_fp(fp).hexdigest()
371
+ for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
372
+ try:
373
+ _validate_metadatas(sequence)
374
+ except Exception as ex:
375
+ yield sequence_uuid, UploadResult(error=ex)
376
+ continue
190
377
 
191
- done_path = wip_path.parent.joinpath(
192
- _session_key(upload_md5sum, api_v4.ClusterFileType.ZIP)
378
+ with tempfile.NamedTemporaryFile() as fp:
379
+ try:
380
+ sequence_md5sum = cls._zip_sequence_fp(sequence, fp)
381
+ except Exception as ex:
382
+ yield sequence_uuid, UploadResult(error=ex)
383
+ continue
384
+
385
+ sequence_progress: SequenceProgress = {
386
+ "sequence_idx": sequence_idx,
387
+ "total_sequence_count": len(sequences),
388
+ "sequence_image_count": len(sequence),
389
+ "sequence_uuid": sequence_uuid,
390
+ "file_type": types.FileType.ZIP.value,
391
+ "sequence_md5sum": sequence_md5sum,
392
+ }
393
+
394
+ try:
395
+ file_handle = uploader.upload_stream(
396
+ fp, progress=T.cast(T.Dict[str, T.Any], sequence_progress)
397
+ )
398
+ cluster_id = uploader.finish_upload(
399
+ file_handle,
400
+ api_v4.ClusterFileType.ZIP,
401
+ progress=T.cast(T.Dict[str, T.Any], sequence_progress),
402
+ )
403
+ except Exception as ex:
404
+ yield sequence_uuid, UploadResult(error=ex)
405
+ continue
406
+
407
+ yield sequence_uuid, UploadResult(result=cluster_id)
408
+
409
+ @classmethod
410
+ def _upload_zipfile(
411
+ cls,
412
+ uploader: Uploader,
413
+ zip_path: Path,
414
+ progress: dict[str, T.Any] | None = None,
415
+ ) -> str:
416
+ if progress is None:
417
+ progress = {}
418
+
419
+ with zipfile.ZipFile(zip_path) as ziph:
420
+ namelist = ziph.namelist()
421
+ if not namelist:
422
+ raise InvalidMapillaryZipFileError("Zipfile has no files")
423
+
424
+ with zip_path.open("rb") as zip_fp:
425
+ sequence_md5sum = cls._extract_sequence_md5sum(zip_fp)
426
+
427
+ # Send the copy of the input progress to each upload session, to avoid modifying the original one
428
+ mutable_progress: SequenceProgress = {
429
+ **T.cast(SequenceProgress, progress),
430
+ "sequence_image_count": len(namelist),
431
+ "sequence_md5sum": sequence_md5sum,
432
+ "file_type": types.FileType.ZIP.value,
433
+ }
434
+
435
+ with zip_path.open("rb") as zip_fp:
436
+ file_handle = uploader.upload_stream(
437
+ zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress)
193
438
  )
194
439
 
195
- try:
196
- os.remove(done_path)
197
- except FileNotFoundError:
198
- pass
199
- wip_path.rename(done_path)
200
- finally:
201
- try:
202
- os.remove(wip_path)
203
- except FileNotFoundError:
204
- pass
440
+ cluster_id = uploader.finish_upload(
441
+ file_handle,
442
+ api_v4.ClusterFileType.ZIP,
443
+ progress=T.cast(T.Dict[str, T.Any], mutable_progress),
444
+ )
445
+
446
+ return cluster_id
205
447
 
206
448
  @classmethod
207
- def zip_sequence_fp(
449
+ def _zip_sequence_fp(
208
450
  cls,
209
451
  sequence: T.Sequence[types.ImageMetadata],
210
452
  zip_fp: T.IO[bytes],
@@ -219,6 +461,8 @@ class ZipImageSequence:
219
461
  f"Only one sequence is allowed but got {len(sequence_groups)}: {list(sequence_groups.keys())}"
220
462
  )
221
463
 
464
+ if sequence:
465
+ LOG.debug(f"Checksum for sequence {sequence[0].MAPSequenceUUID}...")
222
466
  sequence_md5sum = types.update_sequence_md5sum(sequence)
223
467
 
224
468
  with zipfile.ZipFile(zip_fp, "w", zipfile.ZIP_DEFLATED) as zipf:
@@ -226,16 +470,18 @@ class ZipImageSequence:
226
470
  # Arcname should be unique, the name does not matter
227
471
  arcname = f"{idx}.jpg"
228
472
  zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
229
- zipf.writestr(zipinfo, cls._dump_image_bytes(metadata))
473
+ zipf.writestr(zipinfo, SingleImageUploader.dump_image_bytes(metadata))
230
474
  assert len(sequence) == len(set(zipf.namelist()))
231
- zipf.comment = json.dumps({"sequence_md5sum": sequence_md5sum}).encode(
232
- "utf-8"
233
- )
475
+ zipf.comment = json.dumps(
476
+ {"sequence_md5sum": sequence_md5sum},
477
+ sort_keys=True,
478
+ separators=(",", ":"),
479
+ ).encode("utf-8")
234
480
 
235
481
  return sequence_md5sum
236
482
 
237
483
  @classmethod
238
- def extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
484
+ def _extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
239
485
  with zipfile.ZipFile(zip_fp, "r", zipfile.ZIP_DEFLATED) as ziph:
240
486
  comment = ziph.comment
241
487
 
@@ -258,237 +504,358 @@ class ZipImageSequence:
258
504
  return sequence_md5sum
259
505
 
260
506
  @classmethod
261
- def _dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes:
507
+ @contextmanager
508
+ def _wip_file_context(cls, wip_path: Path):
262
509
  try:
263
- edit = exif_write.ExifEdit(metadata.filename)
264
- except struct.error as ex:
265
- raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex
510
+ os.remove(wip_path)
511
+ except FileNotFoundError:
512
+ pass
513
+ try:
514
+ yield wip_path
266
515
 
267
- # The cast is to fix the type checker error
268
- edit.add_image_description(
269
- T.cast(
270
- T.Dict,
271
- desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata)),
516
+ with wip_path.open("rb") as fp:
517
+ upload_md5sum = utils.md5sum_fp(fp).hexdigest()
518
+
519
+ done_path = wip_path.parent.joinpath(
520
+ _suffix_session_key(upload_md5sum, api_v4.ClusterFileType.ZIP)
272
521
  )
273
- )
274
522
 
275
- try:
276
- return edit.dump_image_bytes()
277
- except struct.error as ex:
278
- raise ExifError(
279
- f"Failed to dump EXIF bytes: {ex}", metadata.filename
280
- ) from ex
523
+ try:
524
+ os.remove(done_path)
525
+ except FileNotFoundError:
526
+ pass
527
+ wip_path.rename(done_path)
528
+ finally:
529
+ try:
530
+ os.remove(wip_path)
531
+ except FileNotFoundError:
532
+ pass
281
533
 
282
- @classmethod
283
- def upload_zipfile(
284
- cls,
285
- uploader: Uploader,
286
- zip_path: Path,
287
- progress: dict[str, T.Any] | None = None,
288
- ) -> str:
289
- if progress is None:
290
- progress = {}
291
534
 
292
- with zipfile.ZipFile(zip_path) as ziph:
293
- namelist = ziph.namelist()
294
- if not namelist:
295
- raise InvalidMapillaryZipFileError("Zipfile has no files")
535
+ class ImageSequenceUploader:
536
+ def __init__(self, upload_options: UploadOptions, emitter: EventEmitter):
537
+ self.upload_options = upload_options
538
+ self.emitter = emitter
296
539
 
297
- with zip_path.open("rb") as zip_fp:
298
- sequence_md5sum = cls.extract_sequence_md5sum(zip_fp)
540
+ def upload_images(
541
+ self, image_metadatas: T.Sequence[types.ImageMetadata]
542
+ ) -> T.Generator[tuple[str, UploadResult], None, None]:
543
+ sequences = types.group_and_sort_images(image_metadatas)
299
544
 
300
- # Send the copy of the input progress to each upload session, to avoid modifying the original one
301
- mutable_progress: SequenceProgress = {
302
- **T.cast(SequenceProgress, progress),
303
- "sequence_image_count": len(namelist),
304
- "sequence_md5sum": sequence_md5sum,
305
- "file_type": types.FileType.ZIP.value,
306
- }
545
+ for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
546
+ LOG.debug(f"Checksum for image sequence {sequence_uuid}...")
547
+ sequence_md5sum = types.update_sequence_md5sum(sequence)
307
548
 
308
- with zip_path.open("rb") as zip_fp:
309
- file_handle = uploader.upload_stream(
310
- zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress)
549
+ sequence_progress: SequenceProgress = {
550
+ "sequence_idx": sequence_idx,
551
+ "total_sequence_count": len(sequences),
552
+ "sequence_image_count": len(sequence),
553
+ "sequence_uuid": sequence_uuid,
554
+ "file_type": types.FileType.IMAGE.value,
555
+ "sequence_md5sum": sequence_md5sum,
556
+ }
557
+
558
+ try:
559
+ cluster_id = self._upload_sequence_and_finish(
560
+ sequence,
561
+ sequence_progress=T.cast(dict[str, T.Any], sequence_progress),
562
+ )
563
+ except Exception as ex:
564
+ yield sequence_uuid, UploadResult(error=ex)
565
+ else:
566
+ yield sequence_uuid, UploadResult(result=cluster_id)
567
+
568
+ def _upload_sequence_and_finish(
569
+ self,
570
+ sequence: T.Sequence[types.ImageMetadata],
571
+ sequence_progress: dict[str, T.Any],
572
+ ) -> str:
573
+ _validate_metadatas(sequence)
574
+
575
+ sequence_progress["entity_size"] = sum(m.filesize or 0 for m in sequence)
576
+ self.emitter.emit("upload_start", sequence_progress)
577
+
578
+ try:
579
+ # Retries will be handled in the call (but no upload event emissions)
580
+ image_file_handles = self._upload_images_parallel(
581
+ sequence, sequence_progress
311
582
  )
583
+ except BaseException as ex: # Include KeyboardInterrupt
584
+ self.emitter.emit("upload_failed", sequence_progress)
585
+ raise ex
586
+
587
+ manifest_file_handle = self._upload_manifest(image_file_handles)
312
588
 
589
+ self.emitter.emit("upload_end", sequence_progress)
590
+
591
+ uploader = Uploader(self.upload_options, emitter=self.emitter)
313
592
  cluster_id = uploader.finish_upload(
314
- file_handle,
315
- api_v4.ClusterFileType.ZIP,
316
- progress=T.cast(T.Dict[str, T.Any], mutable_progress),
593
+ manifest_file_handle,
594
+ api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST,
595
+ progress=sequence_progress,
317
596
  )
318
597
 
319
598
  return cluster_id
320
599
 
321
- @classmethod
322
- def zip_images_and_upload(
323
- cls,
324
- uploader: Uploader,
325
- image_metadatas: T.Sequence[types.ImageMetadata],
326
- progress: dict[str, T.Any] | None = None,
327
- ) -> T.Generator[tuple[str, UploadResult], None, None]:
328
- if progress is None:
329
- progress = {}
600
+ def _upload_manifest(self, image_file_handles: T.Sequence[str]) -> str:
601
+ uploader = Uploader(self.upload_options)
330
602
 
331
- sequences = types.group_and_sort_images(image_metadatas)
603
+ manifest = {
604
+ "version": "1",
605
+ "upload_type": "images",
606
+ "image_handles": image_file_handles,
607
+ }
332
608
 
333
- for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
334
- try:
335
- _validate_metadatas(sequence)
336
- except Exception as ex:
337
- yield sequence_uuid, UploadResult(error=ex)
338
- continue
609
+ with io.BytesIO() as manifest_fp:
610
+ manifest_fp.write(
611
+ json.dumps(manifest, sort_keys=True, separators=(",", ":")).encode(
612
+ "utf-8"
613
+ )
614
+ )
615
+ manifest_fp.seek(0, io.SEEK_SET)
616
+ return uploader.upload_stream(
617
+ manifest_fp, session_key=f"{_prefixed_uuid4()}.json"
618
+ )
339
619
 
340
- with tempfile.NamedTemporaryFile() as fp:
341
- try:
342
- sequence_md5sum = cls.zip_sequence_fp(sequence, fp)
343
- except Exception as ex:
344
- yield sequence_uuid, UploadResult(error=ex)
345
- continue
620
+ def _upload_images_parallel(
621
+ self,
622
+ sequence: T.Sequence[types.ImageMetadata],
623
+ sequence_progress: dict[str, T.Any],
624
+ ) -> list[str]:
625
+ if not sequence:
626
+ return []
346
627
 
347
- sequence_progress: SequenceProgress = {
348
- "sequence_idx": sequence_idx,
349
- "total_sequence_count": len(sequences),
350
- "sequence_image_count": len(sequence),
351
- "sequence_uuid": sequence_uuid,
352
- "file_type": types.FileType.ZIP.value,
353
- "sequence_md5sum": sequence_md5sum,
354
- }
628
+ max_workers = min(self.upload_options.num_upload_workers, len(sequence))
355
629
 
356
- mutable_progress: dict[str, T.Any] = {**progress, **sequence_progress}
630
+ # Lock is used to synchronize event emission
631
+ lock = threading.Lock()
357
632
 
358
- try:
359
- file_handle = uploader.upload_stream(fp, progress=mutable_progress)
360
- cluster_id = uploader.finish_upload(
361
- file_handle,
362
- api_v4.ClusterFileType.ZIP,
363
- progress=mutable_progress,
364
- )
365
- except Exception as ex:
366
- yield sequence_uuid, UploadResult(error=ex)
367
- continue
633
+ # Push all images into the queue
634
+ image_queue: queue.Queue[tuple[int, types.ImageMetadata]] = queue.Queue()
635
+ for idx, image_metadata in enumerate(sequence):
636
+ image_queue.put((idx, image_metadata))
637
+
638
+ upload_interrupted = threading.Event()
639
+
640
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
641
+ futures = [
642
+ executor.submit(
643
+ self._upload_images_from_queue,
644
+ image_queue,
645
+ lock,
646
+ upload_interrupted,
647
+ sequence_progress,
648
+ )
649
+ for _ in range(max_workers)
650
+ ]
368
651
 
369
- yield sequence_uuid, UploadResult(result=cluster_id)
652
+ indexed_image_file_handles = []
370
653
 
371
- @classmethod
372
- def _upload_sequence(
373
- cls,
374
- uploader: Uploader,
375
- sequence: T.Sequence[types.ImageMetadata],
376
- progress: dict[str, T.Any] | None = None,
377
- ) -> str:
378
- if progress is None:
379
- progress = {}
654
+ try:
655
+ for future in futures:
656
+ indexed_image_file_handles.extend(future.result())
657
+ except KeyboardInterrupt as ex:
658
+ upload_interrupted.set()
659
+ raise ex
380
660
 
381
- # FIXME: This is a hack to disable the event emitter inside the uploader
382
- uploader_without_emitter = uploader.copy_uploader_without_emitter()
661
+ # All tasks should be done here, so below is more like assertion
662
+ image_queue.join()
663
+ if sys.version_info >= (3, 13):
664
+ image_queue.shutdown()
383
665
 
384
- lock = threading.Lock()
666
+ file_handles: list[str] = []
385
667
 
386
- def _upload_image(image_metadata: types.ImageMetadata) -> str:
387
- mutable_progress = {
388
- **(progress or {}),
389
- "filename": str(image_metadata.filename),
390
- }
668
+ indexed_image_file_handles.sort()
669
+
670
+ # Important to guarantee the order
671
+ assert len(indexed_image_file_handles) == len(sequence)
672
+ for expected_idx, (idx, file_handle) in enumerate(indexed_image_file_handles):
673
+ assert expected_idx == idx
674
+ file_handles.append(file_handle)
391
675
 
392
- bytes = cls._dump_image_bytes(image_metadata)
393
- file_handle = uploader_without_emitter.upload_stream(
394
- io.BytesIO(bytes), progress=mutable_progress
676
+ return file_handles
677
+
678
+ def _upload_images_from_queue(
679
+ self,
680
+ image_queue: queue.Queue[tuple[int, types.ImageMetadata]],
681
+ lock: threading.Lock,
682
+ upload_interrupted: threading.Event,
683
+ sequence_progress: dict[str, T.Any],
684
+ ) -> list[tuple[int, str]]:
685
+ indexed_file_handles = []
686
+
687
+ with api_v4.create_user_session(
688
+ self.upload_options.user_items["user_upload_token"]
689
+ ) as user_session:
690
+ single_image_uploader = SingleImageUploader(
691
+ self.upload_options, user_session=user_session
395
692
  )
396
693
 
397
- mutable_progress["chunk_size"] = image_metadata.filesize
694
+ while True:
695
+ # Assert that all images are already pushed into the queue
696
+ try:
697
+ idx, image_metadata = image_queue.get_nowait()
698
+ except queue.Empty:
699
+ break
700
+
701
+ # Main thread will handle the interruption
702
+ if upload_interrupted.is_set():
703
+ break
704
+
705
+ # Create a new mutatble progress to keep the sequence_progress immutable
706
+ image_progress = {
707
+ **sequence_progress,
708
+ "import_path": str(image_metadata.filename),
709
+ }
398
710
 
399
- with lock:
400
- uploader.emitter.emit("upload_progress", mutable_progress)
711
+ # image_progress will be updated during uploading
712
+ file_handle = single_image_uploader.upload(
713
+ image_metadata, image_progress
714
+ )
401
715
 
402
- return file_handle
716
+ # Update chunk_size (it was constant if set)
717
+ image_progress["chunk_size"] = image_metadata.filesize
403
718
 
404
- _validate_metadatas(sequence)
719
+ # Main thread will handle the interruption
720
+ if upload_interrupted.is_set():
721
+ break
405
722
 
406
- progress["entity_size"] = sum(m.filesize or 0 for m in sequence)
723
+ with lock:
724
+ self.emitter.emit("upload_progress", image_progress)
407
725
 
408
- # TODO: assert sequence is sorted
726
+ indexed_file_handles.append((idx, file_handle))
409
727
 
410
- uploader.emitter.emit("upload_start", progress)
728
+ image_queue.task_done()
411
729
 
412
- with concurrent.futures.ThreadPoolExecutor(
413
- max_workers=constants.MAX_IMAGE_UPLOAD_WORKERS
414
- ) as executor:
415
- image_file_handles = list(executor.map(_upload_image, sequence))
730
+ return indexed_file_handles
416
731
 
417
- manifest = {
418
- "version": "1",
419
- "upload_type": "images",
420
- "image_handles": image_file_handles,
421
- }
422
732
 
423
- with io.BytesIO() as manifest_fp:
424
- manifest_fp.write(json.dumps(manifest).encode("utf-8"))
425
- manifest_fp.seek(0, io.SEEK_SET)
426
- manifest_file_handle = uploader_without_emitter.upload_stream(
427
- manifest_fp, session_key=f"{uuid.uuid4().hex}.json"
733
+ class SingleImageUploader:
734
+ def __init__(
735
+ self,
736
+ upload_options: UploadOptions,
737
+ user_session: requests.Session | None = None,
738
+ ):
739
+ self.upload_options = upload_options
740
+ self.user_session = user_session
741
+ self.cache = self._maybe_create_persistent_cache_instance(
742
+ self.upload_options.user_items, upload_options
743
+ )
744
+
745
+ def upload(
746
+ self, image_metadata: types.ImageMetadata, image_progress: dict[str, T.Any]
747
+ ) -> str:
748
+ image_bytes = self.dump_image_bytes(image_metadata)
749
+
750
+ uploader = Uploader(self.upload_options, user_session=self.user_session)
751
+
752
+ session_key = uploader._gen_session_key(io.BytesIO(image_bytes), image_progress)
753
+
754
+ file_handle = self._get_cached_file_handle(session_key)
755
+
756
+ if file_handle is None:
757
+ # image_progress will be updated during uploading
758
+ file_handle = uploader.upload_stream(
759
+ io.BytesIO(image_bytes),
760
+ session_key=session_key,
761
+ progress=image_progress,
428
762
  )
763
+ self._set_file_handle_cache(session_key, file_handle)
429
764
 
430
- uploader.emitter.emit("upload_end", progress)
765
+ return file_handle
431
766
 
432
- cluster_id = uploader.finish_upload(
433
- manifest_file_handle,
434
- api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST,
435
- progress=progress,
767
+ @classmethod
768
+ def dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes:
769
+ try:
770
+ edit = exif_write.ExifEdit(metadata.filename)
771
+ except struct.error as ex:
772
+ raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex
773
+
774
+ # The cast is to fix the type checker error
775
+ edit.add_image_description(
776
+ T.cast(
777
+ T.Dict, desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata))
778
+ )
436
779
  )
437
780
 
438
- return cluster_id
781
+ try:
782
+ return edit.dump_image_bytes()
783
+ except struct.error as ex:
784
+ raise ExifError(
785
+ f"Failed to dump EXIF bytes: {ex}", metadata.filename
786
+ ) from ex
439
787
 
440
788
  @classmethod
441
- def upload_images(
442
- cls,
443
- uploader: Uploader,
444
- image_metadatas: T.Sequence[types.ImageMetadata],
445
- progress: dict[str, T.Any] | None = None,
446
- ) -> T.Generator[tuple[str, UploadResult], None, None]:
447
- if progress is None:
448
- progress = {}
789
+ def _maybe_create_persistent_cache_instance(
790
+ cls, user_items: config.UserItem, upload_options: UploadOptions
791
+ ) -> history.PersistentCache | None:
792
+ if not constants.UPLOAD_CACHE_DIR:
793
+ LOG.debug(
794
+ "Upload cache directory is set empty, skipping caching upload file handles"
795
+ )
796
+ return None
449
797
 
450
- sequences = types.group_and_sort_images(image_metadatas)
798
+ if upload_options.dry_run:
799
+ LOG.debug("Dry-run mode enabled, skipping caching upload file handles")
800
+ return None
451
801
 
452
- for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
453
- sequence_md5sum = types.update_sequence_md5sum(sequence)
802
+ cache_path_dir = (
803
+ Path(constants.UPLOAD_CACHE_DIR)
804
+ .joinpath(api_v4.MAPILLARY_CLIENT_TOKEN.replace("|", "_"))
805
+ .joinpath(
806
+ user_items.get("MAPSettingsUserKey", user_items["user_upload_token"])
807
+ )
808
+ )
809
+ cache_path_dir.mkdir(parents=True, exist_ok=True)
810
+ cache_path = cache_path_dir.joinpath("cached_file_handles")
811
+
812
+ # Sanitize sensitive segments for logging
813
+ sanitized_cache_path = (
814
+ Path(constants.UPLOAD_CACHE_DIR)
815
+ .joinpath("***")
816
+ .joinpath("***")
817
+ .joinpath("cached_file_handles")
818
+ )
819
+ LOG.debug(f"File handle cache path: {sanitized_cache_path}")
454
820
 
455
- sequence_progress: SequenceProgress = {
456
- "sequence_idx": sequence_idx,
457
- "total_sequence_count": len(sequences),
458
- "sequence_image_count": len(sequence),
459
- "sequence_uuid": sequence_uuid,
460
- "file_type": types.FileType.IMAGE.value,
461
- "sequence_md5sum": sequence_md5sum,
462
- }
821
+ cache = history.PersistentCache(str(cache_path.resolve()))
822
+ cache.clear_expired()
463
823
 
464
- mutable_progress: dict[str, T.Any] = {**progress, **sequence_progress}
824
+ return cache
465
825
 
466
- try:
467
- cluster_id = cls._upload_sequence(
468
- uploader, sequence, progress=mutable_progress
469
- )
470
- except Exception as ex:
471
- yield sequence_uuid, UploadResult(error=ex)
472
- else:
473
- yield sequence_uuid, UploadResult(result=cluster_id)
826
+ def _get_cached_file_handle(self, key: str) -> str | None:
827
+ if self.cache is None:
828
+ return None
829
+
830
+ if _is_uuid(key):
831
+ return None
832
+
833
+ return self.cache.get(key)
834
+
835
+ def _set_file_handle_cache(self, key: str, value: str) -> None:
836
+ if self.cache is None:
837
+ return
838
+
839
+ if _is_uuid(key):
840
+ return
841
+
842
+ self.cache.set(key, value)
474
843
 
475
844
 
476
845
  class Uploader:
477
846
  def __init__(
478
847
  self,
479
- user_items: config.UserItem,
848
+ upload_options: UploadOptions,
849
+ user_session: requests.Session | None = None,
480
850
  emitter: EventEmitter | None = None,
481
- chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
482
- dry_run=False,
483
851
  ):
484
- self.user_items = user_items
852
+ self.upload_options = upload_options
853
+ self.user_session = user_session
485
854
  if emitter is None:
486
855
  # An empty event emitter that does nothing
487
856
  self.emitter = EventEmitter()
488
857
  else:
489
858
  self.emitter = emitter
490
- self.chunk_size = chunk_size
491
- self.dry_run = dry_run
492
859
 
493
860
  def upload_stream(
494
861
  self,
@@ -500,21 +867,13 @@ class Uploader:
500
867
  progress = {}
501
868
 
502
869
  if session_key is None:
503
- fp.seek(0, io.SEEK_SET)
504
- md5sum = utils.md5sum_fp(fp).hexdigest()
505
- filetype = progress.get("file_type")
506
- if filetype is not None:
507
- session_key = _session_key(md5sum, types.FileType(filetype))
508
- else:
509
- session_key = md5sum
870
+ session_key = self._gen_session_key(fp, progress)
510
871
 
511
872
  fp.seek(0, io.SEEK_END)
512
873
  entity_size = fp.tell()
513
874
 
514
- upload_service = self._create_upload_service(session_key)
515
-
516
875
  progress["entity_size"] = entity_size
517
- progress["chunk_size"] = self.chunk_size
876
+ progress["chunk_size"] = self.upload_options.chunk_size
518
877
  progress["retries"] = 0
519
878
  progress["begin_offset"] = None
520
879
 
@@ -522,10 +881,24 @@ class Uploader:
522
881
 
523
882
  while True:
524
883
  try:
525
- file_handle = self._upload_stream_retryable(
526
- upload_service, fp, T.cast(UploaderProgress, progress)
527
- )
528
- except Exception as ex:
884
+ if self.user_session is not None:
885
+ file_handle = self._upload_stream_retryable(
886
+ self.user_session,
887
+ fp,
888
+ session_key,
889
+ T.cast(UploaderProgress, progress),
890
+ )
891
+ else:
892
+ with api_v4.create_user_session(
893
+ self.upload_options.user_items["user_upload_token"]
894
+ ) as user_session:
895
+ file_handle = self._upload_stream_retryable(
896
+ user_session,
897
+ fp,
898
+ session_key,
899
+ T.cast(UploaderProgress, progress),
900
+ )
901
+ except BaseException as ex: # Include KeyboardInterrupt
529
902
  self._handle_upload_exception(ex, T.cast(UploaderProgress, progress))
530
903
  else:
531
904
  break
@@ -546,97 +919,102 @@ class Uploader:
546
919
  if progress is None:
547
920
  progress = {}
548
921
 
549
- if self.dry_run:
922
+ if self.upload_options.dry_run or self.upload_options.nofinish:
550
923
  cluster_id = "0"
551
924
  else:
552
- resp = api_v4.finish_upload(
553
- self.user_items["user_upload_token"],
554
- file_handle,
555
- cluster_filetype,
556
- organization_id=self.user_items.get("MAPOrganizationKey"),
557
- )
558
-
559
- data = resp.json()
560
- cluster_id = data.get("cluster_id")
925
+ organization_id = self.upload_options.user_items.get("MAPOrganizationKey")
926
+
927
+ with api_v4.create_user_session(
928
+ self.upload_options.user_items["user_upload_token"]
929
+ ) as user_session:
930
+ resp = api_v4.finish_upload(
931
+ user_session,
932
+ file_handle,
933
+ cluster_filetype,
934
+ organization_id=organization_id,
935
+ )
561
936
 
562
- # TODO: validate cluster_id
937
+ body = api_v4.jsonify_response(resp)
938
+ # TODO: Validate cluster_id
939
+ cluster_id = body.get("cluster_id")
563
940
 
564
941
  progress["cluster_id"] = cluster_id
565
942
  self.emitter.emit("upload_finished", progress)
566
943
 
567
944
  return cluster_id
568
945
 
569
- def copy_uploader_without_emitter(self) -> Uploader:
570
- return Uploader(
571
- self.user_items,
572
- emitter=None,
573
- chunk_size=self.chunk_size,
574
- dry_run=self.dry_run,
575
- )
576
-
577
- def _create_upload_service(self, session_key: str) -> upload_api_v4.UploadService:
946
+ def _create_upload_service(
947
+ self, user_session: requests.Session, session_key: str
948
+ ) -> upload_api_v4.UploadService:
578
949
  upload_service: upload_api_v4.UploadService
579
950
 
580
- if self.dry_run:
951
+ if self.upload_options.dry_run:
581
952
  upload_path = os.getenv("MAPILLARY_UPLOAD_ENDPOINT")
582
953
  upload_service = upload_api_v4.FakeUploadService(
583
- user_access_token=self.user_items["user_upload_token"],
584
- session_key=session_key,
954
+ user_session,
955
+ session_key,
585
956
  upload_path=Path(upload_path) if upload_path is not None else None,
586
957
  )
587
958
  LOG.info(
588
- "Dry run mode enabled. Data will be uploaded to %s",
959
+ "Dry-run mode enabled, uploading to %s",
589
960
  upload_service.upload_path.joinpath(session_key),
590
961
  )
591
962
  else:
592
- upload_service = upload_api_v4.UploadService(
593
- user_access_token=self.user_items["user_upload_token"],
594
- session_key=session_key,
595
- )
963
+ upload_service = upload_api_v4.UploadService(user_session, session_key)
596
964
 
597
965
  return upload_service
598
966
 
599
967
  def _handle_upload_exception(
600
- self, ex: Exception, progress: UploaderProgress
968
+ self, ex: BaseException, progress: UploaderProgress
601
969
  ) -> None:
602
- retries = progress["retries"]
970
+ retries = progress.get("retries", 0)
603
971
  begin_offset = progress.get("begin_offset")
604
- chunk_size = progress["chunk_size"]
972
+ offset = progress.get("offset")
605
973
 
606
974
  if retries <= constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
607
- self.emitter.emit("upload_interrupted", progress)
975
+ self.emitter.emit("upload_retrying", progress)
976
+
608
977
  LOG.warning(
609
- # use %s instead of %d because offset could be None
610
- "Error uploading chunk_size %d at begin_offset %s: %s: %s",
611
- chunk_size,
612
- begin_offset,
613
- ex.__class__.__name__,
614
- str(ex),
978
+ f"Error uploading {self._upload_name(progress)} at {offset=} since {begin_offset=}: {ex.__class__.__name__}: {ex}"
615
979
  )
980
+
616
981
  # Keep things immutable here. Will increment retries in the caller
617
982
  retries += 1
618
- if _is_immediate_retry(ex):
983
+ if _is_immediate_retriable_exception(ex):
619
984
  sleep_for = 0
620
985
  else:
621
986
  sleep_for = min(2**retries, 16)
622
987
  LOG.info(
623
- "Retrying in %d seconds (%d/%d)",
624
- sleep_for,
625
- retries,
626
- constants.MAX_UPLOAD_RETRIES,
988
+ f"Retrying in {sleep_for} seconds ({retries}/{constants.MAX_UPLOAD_RETRIES})"
627
989
  )
628
990
  if sleep_for:
629
991
  time.sleep(sleep_for)
630
992
  else:
993
+ self.emitter.emit("upload_failed", progress)
631
994
  raise ex
632
995
 
996
+ @classmethod
997
+ def _upload_name(cls, progress: UploaderProgress):
998
+ # Strictly speaking these sequence properties should not be exposed in this context
999
+ # TODO: Maybe move these logging statements to event handlers
1000
+ sequence_uuid: str | None = T.cast(
1001
+ T.Union[str, None], progress.get("sequence_uuid")
1002
+ )
1003
+ import_path = T.cast(T.Union[str, None], progress.get("import_path"))
1004
+ if sequence_uuid is not None:
1005
+ if import_path is None:
1006
+ name: str = f"sequence_{sequence_uuid}"
1007
+ else:
1008
+ name = f"sequence_{sequence_uuid}/{Path(import_path).name}"
1009
+ else:
1010
+ name = Path(import_path or "unknown").name
1011
+ return name
1012
+
633
1013
  def _chunk_with_progress_emitted(
634
- self,
635
- stream: T.IO[bytes],
636
- progress: UploaderProgress,
1014
+ self, stream: T.IO[bytes], progress: UploaderProgress
637
1015
  ) -> T.Generator[bytes, None, None]:
638
1016
  for chunk in upload_api_v4.UploadService.chunkize_byte_stream(
639
- stream, self.chunk_size
1017
+ stream, self.upload_options.chunk_size
640
1018
  ):
641
1019
  yield chunk
642
1020
 
@@ -649,11 +1027,21 @@ class Uploader:
649
1027
 
650
1028
  def _upload_stream_retryable(
651
1029
  self,
652
- upload_service: upload_api_v4.UploadService,
1030
+ user_session: requests.Session,
653
1031
  fp: T.IO[bytes],
654
- progress: UploaderProgress,
1032
+ session_key: str,
1033
+ progress: UploaderProgress | None = None,
655
1034
  ) -> str:
656
1035
  """Upload the stream with safe retries guraranteed"""
1036
+ if progress is None:
1037
+ progress = T.cast(UploaderProgress, {})
1038
+
1039
+ upload_service = self._create_upload_service(user_session, session_key)
1040
+
1041
+ if "entity_size" not in progress:
1042
+ fp.seek(0, io.SEEK_END)
1043
+ entity_size = fp.tell()
1044
+ progress["entity_size"] = entity_size
657
1045
 
658
1046
  begin_offset = upload_service.fetch_offset()
659
1047
 
@@ -662,11 +1050,39 @@ class Uploader:
662
1050
 
663
1051
  self.emitter.emit("upload_fetch_offset", progress)
664
1052
 
665
- fp.seek(begin_offset, io.SEEK_SET)
1053
+ # Estimate the read timeout
1054
+ if not constants.MIN_UPLOAD_SPEED:
1055
+ read_timeout = None
1056
+ else:
1057
+ remaining_bytes = abs(progress["entity_size"] - begin_offset)
1058
+ read_timeout = max(
1059
+ api_v4.REQUESTS_TIMEOUT,
1060
+ remaining_bytes / constants.MIN_UPLOAD_SPEED,
1061
+ )
666
1062
 
1063
+ # Upload from begin_offset
1064
+ fp.seek(begin_offset, io.SEEK_SET)
667
1065
  shifted_chunks = self._chunk_with_progress_emitted(fp, progress)
668
1066
 
669
- return upload_service.upload_shifted_chunks(shifted_chunks, begin_offset)
1067
+ # Start uploading
1068
+ return upload_service.upload_shifted_chunks(
1069
+ shifted_chunks, begin_offset, read_timeout=read_timeout
1070
+ )
1071
+
1072
+ def _gen_session_key(self, fp: T.IO[bytes], progress: dict[str, T.Any]) -> str:
1073
+ if self.upload_options.noresume:
1074
+ # Generate a unique UUID for session_key when noresume is True
1075
+ # to prevent resuming from previous uploads
1076
+ session_key = f"{_prefixed_uuid4()}"
1077
+ else:
1078
+ fp.seek(0, io.SEEK_SET)
1079
+ session_key = utils.md5sum_fp(fp).hexdigest()
1080
+
1081
+ filetype = progress.get("file_type")
1082
+ if filetype is not None:
1083
+ session_key = _suffix_session_key(session_key, types.FileType(filetype))
1084
+
1085
+ return session_key
670
1086
 
671
1087
 
672
1088
  def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
@@ -676,7 +1092,7 @@ def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
676
1092
  raise FileNotFoundError(f"No such file {metadata.filename}")
677
1093
 
678
1094
 
679
- def _is_immediate_retry(ex: Exception):
1095
+ def _is_immediate_retriable_exception(ex: BaseException) -> bool:
680
1096
  if (
681
1097
  isinstance(ex, requests.HTTPError)
682
1098
  and isinstance(ex.response, requests.Response)
@@ -689,8 +1105,10 @@ def _is_immediate_retry(ex: Exception):
689
1105
  # resp: {"debug_info":{"retriable":true,"type":"OffsetInvalidError","message":"Request starting offset is invalid"}}
690
1106
  return resp.get("debug_info", {}).get("retriable", False)
691
1107
 
1108
+ return False
1109
+
692
1110
 
693
- def _is_retriable_exception(ex: Exception):
1111
+ def _is_retriable_exception(ex: BaseException) -> bool:
694
1112
  if isinstance(ex, (requests.ConnectionError, requests.Timeout)):
695
1113
  return True
696
1114
 
@@ -709,19 +1127,36 @@ def _is_retriable_exception(ex: Exception):
709
1127
  return False
710
1128
 
711
1129
 
712
- def _session_key(
713
- upload_md5sum: str, filetype: api_v4.ClusterFileType | types.FileType
1130
+ _SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
1131
+ api_v4.ClusterFileType.ZIP: ".zip",
1132
+ api_v4.ClusterFileType.CAMM: ".mp4",
1133
+ api_v4.ClusterFileType.BLACKVUE: ".mp4",
1134
+ types.FileType.IMAGE: ".jpg",
1135
+ types.FileType.ZIP: ".zip",
1136
+ types.FileType.BLACKVUE: ".mp4",
1137
+ types.FileType.CAMM: ".mp4",
1138
+ types.FileType.GOPRO: ".mp4",
1139
+ types.FileType.VIDEO: ".mp4",
1140
+ }
1141
+
1142
+
1143
+ def _suffix_session_key(
1144
+ key: str, filetype: api_v4.ClusterFileType | types.FileType
714
1145
  ) -> str:
715
- _SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
716
- api_v4.ClusterFileType.ZIP: ".zip",
717
- api_v4.ClusterFileType.CAMM: ".mp4",
718
- api_v4.ClusterFileType.BLACKVUE: ".mp4",
719
- types.FileType.IMAGE: ".jpg",
720
- types.FileType.ZIP: ".zip",
721
- types.FileType.BLACKVUE: ".mp4",
722
- types.FileType.CAMM: ".mp4",
723
- types.FileType.GOPRO: ".mp4",
724
- types.FileType.VIDEO: ".mp4",
725
- }
726
-
727
- return f"mly_tools_{upload_md5sum}{_SUFFIX_MAP[filetype]}"
1146
+ is_uuid_before = _is_uuid(key)
1147
+
1148
+ key = f"mly_tools_{key}{_SUFFIX_MAP[filetype]}"
1149
+
1150
+ assert _is_uuid(key) is is_uuid_before
1151
+
1152
+ return key
1153
+
1154
+
1155
+ def _prefixed_uuid4():
1156
+ prefixed = f"uuid_{uuid.uuid4().hex}"
1157
+ assert _is_uuid(prefixed)
1158
+ return prefixed
1159
+
1160
+
1161
+ def _is_uuid(key: str) -> bool:
1162
+ return key.startswith("uuid_") or key.startswith("mly_tools_uuid_")