mapillary-tools 0.14.0a2__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +66 -262
  3. mapillary_tools/authenticate.py +54 -46
  4. mapillary_tools/blackvue_parser.py +79 -22
  5. mapillary_tools/commands/__main__.py +15 -16
  6. mapillary_tools/commands/upload.py +33 -4
  7. mapillary_tools/config.py +38 -17
  8. mapillary_tools/constants.py +127 -43
  9. mapillary_tools/exceptions.py +4 -0
  10. mapillary_tools/exif_read.py +2 -1
  11. mapillary_tools/exif_write.py +3 -1
  12. mapillary_tools/exiftool_read_video.py +52 -15
  13. mapillary_tools/exiftool_runner.py +4 -24
  14. mapillary_tools/ffmpeg.py +406 -232
  15. mapillary_tools/geo.py +16 -0
  16. mapillary_tools/geotag/__init__.py +0 -0
  17. mapillary_tools/geotag/base.py +8 -4
  18. mapillary_tools/geotag/factory.py +106 -89
  19. mapillary_tools/geotag/geotag_images_from_exiftool.py +27 -20
  20. mapillary_tools/geotag/geotag_images_from_gpx.py +7 -6
  21. mapillary_tools/geotag/geotag_images_from_video.py +35 -0
  22. mapillary_tools/geotag/geotag_videos_from_exiftool.py +61 -14
  23. mapillary_tools/geotag/geotag_videos_from_gpx.py +22 -9
  24. mapillary_tools/geotag/options.py +25 -3
  25. mapillary_tools/geotag/utils.py +9 -12
  26. mapillary_tools/geotag/video_extractors/base.py +1 -1
  27. mapillary_tools/geotag/video_extractors/exiftool.py +1 -1
  28. mapillary_tools/geotag/video_extractors/gpx.py +61 -70
  29. mapillary_tools/geotag/video_extractors/native.py +34 -31
  30. mapillary_tools/history.py +128 -8
  31. mapillary_tools/http.py +211 -0
  32. mapillary_tools/mp4/construct_mp4_parser.py +8 -2
  33. mapillary_tools/process_geotag_properties.py +47 -35
  34. mapillary_tools/process_sequence_properties.py +340 -325
  35. mapillary_tools/sample_video.py +8 -8
  36. mapillary_tools/serializer/description.py +587 -0
  37. mapillary_tools/serializer/gpx.py +132 -0
  38. mapillary_tools/types.py +44 -610
  39. mapillary_tools/upload.py +327 -352
  40. mapillary_tools/upload_api_v4.py +125 -72
  41. mapillary_tools/uploader.py +797 -216
  42. mapillary_tools/utils.py +57 -5
  43. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/METADATA +91 -34
  44. mapillary_tools-0.14.1.dist-info/RECORD +76 -0
  45. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/WHEEL +1 -1
  46. mapillary_tools-0.14.0a2.dist-info/RECORD +0 -72
  47. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/entry_points.txt +0 -0
  48. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/licenses/LICENSE +0 -0
  49. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import concurrent.futures
3
4
  import dataclasses
4
5
  import io
5
6
  import json
6
7
  import logging
7
8
  import os
9
+ import queue
8
10
  import struct
11
+ import sys
9
12
  import tempfile
13
+ import threading
10
14
  import time
11
15
  import typing as T
12
16
  import uuid
@@ -14,14 +18,57 @@ import zipfile
14
18
  from contextlib import contextmanager
15
19
  from pathlib import Path
16
20
 
21
+ if sys.version_info >= (3, 11):
22
+ from typing import Required
23
+ else:
24
+ from typing_extensions import Required
25
+
17
26
  import requests
18
27
 
19
- from . import api_v4, constants, exif_write, types, upload_api_v4
28
+ from . import (
29
+ api_v4,
30
+ config,
31
+ constants,
32
+ exif_write,
33
+ geo,
34
+ history,
35
+ telemetry,
36
+ types,
37
+ upload_api_v4,
38
+ utils,
39
+ )
40
+ from .camm import camm_builder, camm_parser
41
+ from .gpmf import gpmf_parser
42
+ from .mp4 import simple_mp4_builder
43
+ from .serializer.description import (
44
+ desc_file_to_exif,
45
+ DescriptionJSONSerializer,
46
+ validate_image_desc,
47
+ )
20
48
 
21
49
 
22
50
  LOG = logging.getLogger(__name__)
23
51
 
24
52
 
53
+ @dataclasses.dataclass(frozen=True)
54
+ class UploadOptions:
55
+ user_items: config.UserItem
56
+ chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024)
57
+ num_upload_workers: int = constants.MAX_IMAGE_UPLOAD_WORKERS
58
+ dry_run: bool = False
59
+ nofinish: bool = False
60
+ noresume: bool = False
61
+
62
+ def __post_init__(self):
63
+ if self.num_upload_workers <= 0:
64
+ raise ValueError(
65
+ f"Expect positive num_upload_workers but got {self.num_upload_workers}"
66
+ )
67
+
68
+ if self.chunk_size <= 0:
69
+ raise ValueError(f"Expect positive chunk_size but got {self.chunk_size}")
70
+
71
+
25
72
  class UploaderProgress(T.TypedDict, total=True):
26
73
  """
27
74
  Progress data that Uploader cares about.
@@ -46,7 +93,7 @@ class UploaderProgress(T.TypedDict, total=True):
46
93
  # - offset == entity_size when "upload_end" or "upload_finished"
47
94
  entity_size: int
48
95
 
49
- # An "upload_interrupted" will increase it. Reset to 0 if a chunk is uploaded
96
+ # An "upload_retrying" will increase it. Reset to 0 if a chunk is uploaded
50
97
  retries: int
51
98
 
52
99
  # Cluster ID after finishing the upload
@@ -56,17 +103,21 @@ class UploaderProgress(T.TypedDict, total=True):
56
103
  class SequenceProgress(T.TypedDict, total=False):
57
104
  """Progress data at sequence level"""
58
105
 
59
- # md5sum of the zipfile/BlackVue/CAMM in uploading
60
- md5sum: str
106
+ # Used to check if it is uploaded or not
107
+ sequence_md5sum: Required[str]
108
+
109
+ # Used to resume from the previous upload,
110
+ # so it has to an unique identifier (hash) of the upload content
111
+ upload_md5sum: str
61
112
 
62
113
  # File type
63
- file_type: str
114
+ file_type: Required[str]
64
115
 
65
116
  # How many sequences in total. It's always 1 when uploading Zipfile/BlackVue/CAMM
66
- total_sequence_count: int
117
+ total_sequence_count: Required[int]
67
118
 
68
119
  # 0-based nth sequence. It is always 0 when uploading Zipfile/BlackVue/CAMM
69
- sequence_idx: int
120
+ sequence_idx: Required[int]
70
121
 
71
122
  # How many images in the sequence. It's available only when uploading directories/Zipfiles
72
123
  sequence_image_count: int
@@ -74,7 +125,7 @@ class SequenceProgress(T.TypedDict, total=False):
74
125
  # MAPSequenceUUID. It is only available for directory uploading
75
126
  sequence_uuid: str
76
127
 
77
- # Path to the Zipfile/BlackVue/CAMM
128
+ # Path to the image/video/zip
78
129
  import_path: str
79
130
 
80
131
 
@@ -102,13 +153,43 @@ class InvalidMapillaryZipFileError(SequenceError):
102
153
  pass
103
154
 
104
155
 
156
+ # BELOW demonstrates the pseudocode for a typical upload workflow
157
+ # and when upload events are emitted
158
+ #################################################################
159
+ # def pseudo_upload(metadata):
160
+ # emit("upload_start")
161
+ # while True:
162
+ # try:
163
+ # if is_sequence(metadata):
164
+ # for image in metadata:
165
+ # upload_stream(image.read())
166
+ # emit("upload_progress")
167
+ # elif is_video(metadata):
168
+ # offset = fetch_offset()
169
+ # emit("upload_fetch_offset")
170
+ # for chunk in metadata.read()[offset:]:
171
+ # upload_stream(chunk)
172
+ # emit("upload_progress")
173
+ # except BaseException as ex: # Include KeyboardInterrupt
174
+ # if retryable(ex):
175
+ # emit("upload_retrying")
176
+ # continue
177
+ # else:
178
+ # emit("upload_failed")
179
+ # raise ex
180
+ # else:
181
+ # break
182
+ # emit("upload_end")
183
+ # finish_upload(data)
184
+ # emit("upload_finished")
105
185
  EventName = T.Literal[
106
186
  "upload_start",
107
187
  "upload_fetch_offset",
108
188
  "upload_progress",
189
+ "upload_retrying",
109
190
  "upload_end",
191
+ "upload_failed",
110
192
  "upload_finished",
111
- "upload_interrupted",
112
193
  ]
113
194
 
114
195
 
@@ -121,6 +202,7 @@ class EventEmitter:
121
202
  def on(self, event: EventName):
122
203
  def _wrap(callback):
123
204
  self.events.setdefault(event, []).append(callback)
205
+ return callback
124
206
 
125
207
  return _wrap
126
208
 
@@ -135,7 +217,131 @@ class UploadResult:
135
217
  error: Exception | None = None
136
218
 
137
219
 
138
- class ZipImageSequence:
220
+ class VideoUploader:
221
+ @classmethod
222
+ def upload_videos(
223
+ cls, mly_uploader: Uploader, video_metadatas: T.Sequence[types.VideoMetadata]
224
+ ) -> T.Generator[tuple[types.VideoMetadata, UploadResult], None, None]:
225
+ # If upload in a random order, then interrupted uploads has a higher chance to expire.
226
+ # Therefore sort videos to make sure interrupted uploads are resumed as early as possible
227
+ sorted_video_metadatas = sorted(video_metadatas, key=lambda m: m.filename)
228
+
229
+ for idx, video_metadata in enumerate(sorted_video_metadatas):
230
+ LOG.debug(f"Checksum for video {video_metadata.filename}...")
231
+ try:
232
+ video_metadata.update_md5sum()
233
+ except Exception as ex:
234
+ yield video_metadata, UploadResult(error=ex)
235
+ continue
236
+
237
+ assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
238
+
239
+ progress: SequenceProgress = {
240
+ "total_sequence_count": len(sorted_video_metadatas),
241
+ "sequence_idx": idx,
242
+ "file_type": video_metadata.filetype.value,
243
+ "import_path": str(video_metadata.filename),
244
+ "sequence_md5sum": video_metadata.md5sum,
245
+ }
246
+
247
+ try:
248
+ with cls.build_camm_stream(video_metadata) as camm_fp:
249
+ # Upload the mp4 stream
250
+ file_handle = mly_uploader.upload_stream(
251
+ T.cast(T.IO[bytes], camm_fp),
252
+ progress=T.cast(T.Dict[str, T.Any], progress),
253
+ )
254
+
255
+ cluster_id = mly_uploader.finish_upload(
256
+ file_handle,
257
+ api_v4.ClusterFileType.CAMM,
258
+ progress=T.cast(T.Dict[str, T.Any], progress),
259
+ )
260
+ except Exception as ex:
261
+ yield video_metadata, UploadResult(error=ex)
262
+ else:
263
+ yield video_metadata, UploadResult(result=cluster_id)
264
+
265
+ @classmethod
266
+ @contextmanager
267
+ def build_camm_stream(cls, video_metadata: types.VideoMetadata):
268
+ # Convert video metadata to CAMMInfo
269
+ camm_info = cls.prepare_camm_info(video_metadata)
270
+
271
+ # Create the CAMM sample generator
272
+ camm_sample_generator = camm_builder.camm_sample_generator2(camm_info)
273
+
274
+ with video_metadata.filename.open("rb") as src_fp:
275
+ # Build the mp4 stream with the CAMM samples
276
+ yield simple_mp4_builder.transform_mp4(src_fp, camm_sample_generator)
277
+
278
+ @classmethod
279
+ def prepare_camm_info(
280
+ cls, video_metadata: types.VideoMetadata
281
+ ) -> camm_parser.CAMMInfo:
282
+ camm_info = camm_parser.CAMMInfo(
283
+ make=video_metadata.make or "", model=video_metadata.model or ""
284
+ )
285
+
286
+ for point in video_metadata.points:
287
+ if isinstance(point, telemetry.CAMMGPSPoint):
288
+ if camm_info.gps is None:
289
+ camm_info.gps = []
290
+ camm_info.gps.append(point)
291
+
292
+ elif isinstance(point, telemetry.GPSPoint):
293
+ # There is no proper CAMM entry for GoPro GPS
294
+ if camm_info.mini_gps is None:
295
+ camm_info.mini_gps = []
296
+ camm_info.mini_gps.append(point)
297
+
298
+ elif isinstance(point, geo.Point):
299
+ if camm_info.mini_gps is None:
300
+ camm_info.mini_gps = []
301
+ camm_info.mini_gps.append(point)
302
+ else:
303
+ raise ValueError(f"Unknown point type: {point}")
304
+
305
+ if constants.MAPILLARY__EXPERIMENTAL_ENABLE_IMU:
306
+ if video_metadata.filetype is types.FileType.GOPRO:
307
+ with video_metadata.filename.open("rb") as fp:
308
+ gopro_info = gpmf_parser.extract_gopro_info(fp, telemetry_only=True)
309
+ if gopro_info is not None:
310
+ camm_info.accl = gopro_info.accl or []
311
+ camm_info.gyro = gopro_info.gyro or []
312
+ camm_info.magn = gopro_info.magn or []
313
+
314
+ return camm_info
315
+
316
+
317
+ class ZipUploader:
318
+ @classmethod
319
+ def upload_zipfiles(
320
+ cls, mly_uploader: Uploader, zip_paths: T.Sequence[Path]
321
+ ) -> T.Generator[tuple[Path, UploadResult], None, None]:
322
+ # If upload in a random order, then interrupted uploads has a higher chance to expire.
323
+ # Therefore sort zipfiles to make sure interrupted uploads are resumed as early as possible
324
+ sorted_zip_paths = sorted(zip_paths)
325
+
326
+ for idx, zip_path in enumerate(sorted_zip_paths):
327
+ progress: SequenceProgress = {
328
+ "total_sequence_count": len(sorted_zip_paths),
329
+ "sequence_idx": idx,
330
+ "import_path": str(zip_path),
331
+ "file_type": types.FileType.ZIP.value,
332
+ "sequence_md5sum": "", # Placeholder, will be set in upload_zipfile
333
+ }
334
+ try:
335
+ cluster_id = cls._upload_zipfile(
336
+ mly_uploader,
337
+ zip_path,
338
+ progress=T.cast(T.Dict[str, T.Any], progress),
339
+ )
340
+ except Exception as ex:
341
+ yield zip_path, UploadResult(error=ex)
342
+ else:
343
+ yield zip_path, UploadResult(result=cluster_id)
344
+
139
345
  @classmethod
140
346
  def zip_images(
141
347
  cls, metadatas: T.Sequence[types.ImageMetadata], zip_dir: Path
@@ -148,30 +354,105 @@ class ZipImageSequence:
148
354
 
149
355
  for sequence_uuid, sequence in sequences.items():
150
356
  _validate_metadatas(sequence)
151
- upload_md5sum = types.update_sequence_md5sum(sequence)
152
-
153
357
  # For atomicity we write into a WIP file and then rename to the final file
154
358
  wip_zip_filename = zip_dir.joinpath(
155
359
  f".mly_zip_{uuid.uuid4()}_{sequence_uuid}_{os.getpid()}_{int(time.time())}"
156
360
  )
157
- filename = _session_key(upload_md5sum, upload_api_v4.ClusterFileType.ZIP)
158
- zip_filename = zip_dir.joinpath(filename)
159
- with wip_file_context(wip_zip_filename, zip_filename) as wip_path:
361
+ with cls._wip_file_context(wip_zip_filename) as wip_path:
160
362
  with wip_path.open("wb") as wip_fp:
161
- actual_md5sum = cls.zip_sequence_deterministically(sequence, wip_fp)
162
- assert actual_md5sum == upload_md5sum, "md5sum mismatch"
363
+ cls._zip_sequence_fp(sequence, wip_fp)
364
+
365
+ @classmethod
366
+ def zip_images_and_upload(
367
+ cls, uploader: Uploader, image_metadatas: T.Sequence[types.ImageMetadata]
368
+ ) -> T.Generator[tuple[str, UploadResult], None, None]:
369
+ sequences = types.group_and_sort_images(image_metadatas)
370
+
371
+ for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
372
+ try:
373
+ _validate_metadatas(sequence)
374
+ except Exception as ex:
375
+ yield sequence_uuid, UploadResult(error=ex)
376
+ continue
377
+
378
+ with tempfile.NamedTemporaryFile() as fp:
379
+ try:
380
+ sequence_md5sum = cls._zip_sequence_fp(sequence, fp)
381
+ except Exception as ex:
382
+ yield sequence_uuid, UploadResult(error=ex)
383
+ continue
384
+
385
+ sequence_progress: SequenceProgress = {
386
+ "sequence_idx": sequence_idx,
387
+ "total_sequence_count": len(sequences),
388
+ "sequence_image_count": len(sequence),
389
+ "sequence_uuid": sequence_uuid,
390
+ "file_type": types.FileType.ZIP.value,
391
+ "sequence_md5sum": sequence_md5sum,
392
+ }
393
+
394
+ try:
395
+ file_handle = uploader.upload_stream(
396
+ fp, progress=T.cast(T.Dict[str, T.Any], sequence_progress)
397
+ )
398
+ cluster_id = uploader.finish_upload(
399
+ file_handle,
400
+ api_v4.ClusterFileType.ZIP,
401
+ progress=T.cast(T.Dict[str, T.Any], sequence_progress),
402
+ )
403
+ except Exception as ex:
404
+ yield sequence_uuid, UploadResult(error=ex)
405
+ continue
406
+
407
+ yield sequence_uuid, UploadResult(result=cluster_id)
408
+
409
+ @classmethod
410
+ def _upload_zipfile(
411
+ cls,
412
+ uploader: Uploader,
413
+ zip_path: Path,
414
+ progress: dict[str, T.Any] | None = None,
415
+ ) -> str:
416
+ if progress is None:
417
+ progress = {}
418
+
419
+ with zipfile.ZipFile(zip_path) as ziph:
420
+ namelist = ziph.namelist()
421
+ if not namelist:
422
+ raise InvalidMapillaryZipFileError("Zipfile has no files")
423
+
424
+ with zip_path.open("rb") as zip_fp:
425
+ sequence_md5sum = cls._extract_sequence_md5sum(zip_fp)
426
+
427
+ # Send the copy of the input progress to each upload session, to avoid modifying the original one
428
+ mutable_progress: SequenceProgress = {
429
+ **T.cast(SequenceProgress, progress),
430
+ "sequence_image_count": len(namelist),
431
+ "sequence_md5sum": sequence_md5sum,
432
+ "file_type": types.FileType.ZIP.value,
433
+ }
434
+
435
+ with zip_path.open("rb") as zip_fp:
436
+ file_handle = uploader.upload_stream(
437
+ zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress)
438
+ )
439
+
440
+ cluster_id = uploader.finish_upload(
441
+ file_handle,
442
+ api_v4.ClusterFileType.ZIP,
443
+ progress=T.cast(T.Dict[str, T.Any], mutable_progress),
444
+ )
445
+
446
+ return cluster_id
163
447
 
164
448
  @classmethod
165
- def zip_sequence_deterministically(
449
+ def _zip_sequence_fp(
166
450
  cls,
167
451
  sequence: T.Sequence[types.ImageMetadata],
168
452
  zip_fp: T.IO[bytes],
169
453
  ) -> str:
170
454
  """
171
- Write a sequence of ImageMetadata into the zipfile handle. It should guarantee
172
- that the same sequence always produces the same zipfile, because the
173
- sequence md5sum will be used to upload the zipfile or resume the upload.
174
-
455
+ Write a sequence of ImageMetadata into the zipfile handle.
175
456
  The sequence has to be one sequence and sorted.
176
457
  """
177
458
 
@@ -180,21 +461,27 @@ class ZipImageSequence:
180
461
  f"Only one sequence is allowed but got {len(sequence_groups)}: {list(sequence_groups.keys())}"
181
462
  )
182
463
 
183
- upload_md5sum = types.update_sequence_md5sum(sequence)
464
+ if sequence:
465
+ LOG.debug(f"Checksum for sequence {sequence[0].MAPSequenceUUID}...")
466
+ sequence_md5sum = types.update_sequence_md5sum(sequence)
184
467
 
185
468
  with zipfile.ZipFile(zip_fp, "w", zipfile.ZIP_DEFLATED) as zipf:
186
469
  for idx, metadata in enumerate(sequence):
187
- # Use {idx}.jpg (suffix does not matter) as the archive name to ensure the
188
- # resulting zipfile is deterministic. This determinism is based on the upload_md5sum,
189
- # which is derived from a list of image md5sums
190
- cls._write_imagebytes_in_zip(zipf, metadata, arcname=f"{idx}.jpg")
470
+ # Arcname should be unique, the name does not matter
471
+ arcname = f"{idx}.jpg"
472
+ zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
473
+ zipf.writestr(zipinfo, SingleImageUploader.dump_image_bytes(metadata))
191
474
  assert len(sequence) == len(set(zipf.namelist()))
192
- zipf.comment = json.dumps({"upload_md5sum": upload_md5sum}).encode("utf-8")
475
+ zipf.comment = json.dumps(
476
+ {"sequence_md5sum": sequence_md5sum},
477
+ sort_keys=True,
478
+ separators=(",", ":"),
479
+ ).encode("utf-8")
193
480
 
194
- return upload_md5sum
481
+ return sequence_md5sum
195
482
 
196
483
  @classmethod
197
- def extract_upload_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
484
+ def _extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
198
485
  with zipfile.ZipFile(zip_fp, "r", zipfile.ZIP_DEFLATED) as ziph:
199
486
  comment = ziph.comment
200
487
 
@@ -209,162 +496,384 @@ class ZipImageSequence:
209
496
  except json.JSONDecodeError as ex:
210
497
  raise InvalidMapillaryZipFileError(str(ex)) from ex
211
498
 
212
- upload_md5sum = zip_metadata.get("upload_md5sum")
499
+ sequence_md5sum = zip_metadata.get("sequence_md5sum")
213
500
 
214
- if not upload_md5sum and not isinstance(upload_md5sum, str):
215
- raise InvalidMapillaryZipFileError("No upload_md5sum found")
501
+ if not sequence_md5sum and not isinstance(sequence_md5sum, str):
502
+ raise InvalidMapillaryZipFileError("No sequence_md5sum found")
216
503
 
217
- return upload_md5sum
504
+ return sequence_md5sum
218
505
 
219
506
  @classmethod
220
- def _write_imagebytes_in_zip(
221
- cls, zipf: zipfile.ZipFile, metadata: types.ImageMetadata, arcname: str
222
- ):
507
+ @contextmanager
508
+ def _wip_file_context(cls, wip_path: Path):
223
509
  try:
224
- edit = exif_write.ExifEdit(metadata.filename)
225
- except struct.error as ex:
226
- raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex
227
-
228
- # The cast is to fix the type checker error
229
- edit.add_image_description(
230
- T.cast(T.Dict, types.desc_file_to_exif(types.as_desc(metadata)))
231
- )
232
-
510
+ os.remove(wip_path)
511
+ except FileNotFoundError:
512
+ pass
233
513
  try:
234
- image_bytes = edit.dump_image_bytes()
235
- except struct.error as ex:
236
- raise ExifError(
237
- f"Failed to dump EXIF bytes: {ex}", metadata.filename
238
- ) from ex
239
-
240
- zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
241
- zipf.writestr(zipinfo, image_bytes)
242
-
243
- @classmethod
244
- def prepare_zipfile_and_upload(
245
- cls,
246
- zip_path: Path,
247
- uploader: Uploader,
248
- progress: dict[str, T.Any] | None = None,
249
- ) -> str:
250
- if progress is None:
251
- progress = {}
514
+ yield wip_path
252
515
 
253
- with zipfile.ZipFile(zip_path) as ziph:
254
- namelist = ziph.namelist()
255
- if not namelist:
256
- raise InvalidMapillaryZipFileError("Zipfile has no files")
516
+ with wip_path.open("rb") as fp:
517
+ upload_md5sum = utils.md5sum_fp(fp).hexdigest()
257
518
 
258
- with zip_path.open("rb") as zip_fp:
259
- upload_md5sum = cls.extract_upload_md5sum(zip_fp)
519
+ done_path = wip_path.parent.joinpath(
520
+ _suffix_session_key(upload_md5sum, api_v4.ClusterFileType.ZIP)
521
+ )
260
522
 
261
- sequence_progress: SequenceProgress = {
262
- "sequence_image_count": len(namelist),
263
- "file_type": types.FileType.ZIP.value,
264
- "md5sum": upload_md5sum,
265
- }
523
+ try:
524
+ os.remove(done_path)
525
+ except FileNotFoundError:
526
+ pass
527
+ wip_path.rename(done_path)
528
+ finally:
529
+ try:
530
+ os.remove(wip_path)
531
+ except FileNotFoundError:
532
+ pass
266
533
 
267
- session_key = _session_key(upload_md5sum, upload_api_v4.ClusterFileType.ZIP)
268
534
 
269
- with zip_path.open("rb") as zip_fp:
270
- return uploader.upload_stream(
271
- zip_fp,
272
- upload_api_v4.ClusterFileType.ZIP,
273
- session_key,
274
- # Send the copy of the input progress to each upload session, to avoid modifying the original one
275
- progress=T.cast(T.Dict[str, T.Any], {**progress, **sequence_progress}),
276
- )
535
+ class ImageSequenceUploader:
536
+ def __init__(self, upload_options: UploadOptions, emitter: EventEmitter):
537
+ self.upload_options = upload_options
538
+ self.emitter = emitter
277
539
 
278
- @classmethod
279
- def prepare_images_and_upload(
280
- cls,
281
- image_metadatas: T.Sequence[types.ImageMetadata],
282
- uploader: Uploader,
283
- progress: dict[str, T.Any] | None = None,
540
+ def upload_images(
541
+ self, image_metadatas: T.Sequence[types.ImageMetadata]
284
542
  ) -> T.Generator[tuple[str, UploadResult], None, None]:
285
- if progress is None:
286
- progress = {}
287
-
288
543
  sequences = types.group_and_sort_images(image_metadatas)
289
544
 
290
545
  for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
546
+ LOG.debug(f"Checksum for image sequence {sequence_uuid}...")
547
+ sequence_md5sum = types.update_sequence_md5sum(sequence)
548
+
291
549
  sequence_progress: SequenceProgress = {
292
550
  "sequence_idx": sequence_idx,
293
551
  "total_sequence_count": len(sequences),
294
552
  "sequence_image_count": len(sequence),
295
553
  "sequence_uuid": sequence_uuid,
296
554
  "file_type": types.FileType.IMAGE.value,
555
+ "sequence_md5sum": sequence_md5sum,
297
556
  }
298
557
 
299
558
  try:
300
- _validate_metadatas(sequence)
559
+ cluster_id = self._upload_sequence_and_finish(
560
+ sequence,
561
+ sequence_progress=T.cast(dict[str, T.Any], sequence_progress),
562
+ )
301
563
  except Exception as ex:
302
564
  yield sequence_uuid, UploadResult(error=ex)
303
- continue
565
+ else:
566
+ yield sequence_uuid, UploadResult(result=cluster_id)
304
567
 
305
- with tempfile.NamedTemporaryFile() as fp:
306
- try:
307
- upload_md5sum = cls.zip_sequence_deterministically(sequence, fp)
308
- except Exception as ex:
309
- yield sequence_uuid, UploadResult(error=ex)
310
- continue
568
+ def _upload_sequence_and_finish(
569
+ self,
570
+ sequence: T.Sequence[types.ImageMetadata],
571
+ sequence_progress: dict[str, T.Any],
572
+ ) -> str:
573
+ _validate_metadatas(sequence)
574
+
575
+ sequence_progress["entity_size"] = sum(m.filesize or 0 for m in sequence)
576
+ self.emitter.emit("upload_start", sequence_progress)
577
+
578
+ try:
579
+ # Retries will be handled in the call (but no upload event emissions)
580
+ image_file_handles = self._upload_images_parallel(
581
+ sequence, sequence_progress
582
+ )
583
+ except BaseException as ex: # Include KeyboardInterrupt
584
+ self.emitter.emit("upload_failed", sequence_progress)
585
+ raise ex
586
+
587
+ manifest_file_handle = self._upload_manifest(image_file_handles)
588
+
589
+ self.emitter.emit("upload_end", sequence_progress)
590
+
591
+ uploader = Uploader(self.upload_options, emitter=self.emitter)
592
+ cluster_id = uploader.finish_upload(
593
+ manifest_file_handle,
594
+ api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST,
595
+ progress=sequence_progress,
596
+ )
597
+
598
+ return cluster_id
599
+
600
+ def _upload_manifest(self, image_file_handles: T.Sequence[str]) -> str:
601
+ uploader = Uploader(self.upload_options)
311
602
 
312
- sequence_progress["md5sum"] = upload_md5sum
603
+ manifest = {
604
+ "version": "1",
605
+ "upload_type": "images",
606
+ "image_handles": image_file_handles,
607
+ }
608
+
609
+ with io.BytesIO() as manifest_fp:
610
+ manifest_fp.write(
611
+ json.dumps(manifest, sort_keys=True, separators=(",", ":")).encode(
612
+ "utf-8"
613
+ )
614
+ )
615
+ manifest_fp.seek(0, io.SEEK_SET)
616
+ return uploader.upload_stream(
617
+ manifest_fp, session_key=f"{_prefixed_uuid4()}.json"
618
+ )
313
619
 
314
- session_key = _session_key(
315
- upload_md5sum, upload_api_v4.ClusterFileType.ZIP
620
+ def _upload_images_parallel(
621
+ self,
622
+ sequence: T.Sequence[types.ImageMetadata],
623
+ sequence_progress: dict[str, T.Any],
624
+ ) -> list[str]:
625
+ if not sequence:
626
+ return []
627
+
628
+ max_workers = min(self.upload_options.num_upload_workers, len(sequence))
629
+
630
+ # Lock is used to synchronize event emission
631
+ lock = threading.Lock()
632
+
633
+ # Push all images into the queue
634
+ image_queue: queue.Queue[tuple[int, types.ImageMetadata]] = queue.Queue()
635
+ for idx, image_metadata in enumerate(sequence):
636
+ image_queue.put((idx, image_metadata))
637
+
638
+ upload_interrupted = threading.Event()
639
+
640
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
641
+ futures = [
642
+ executor.submit(
643
+ self._upload_images_from_queue,
644
+ image_queue,
645
+ lock,
646
+ upload_interrupted,
647
+ sequence_progress,
316
648
  )
649
+ for _ in range(max_workers)
650
+ ]
651
+
652
+ indexed_image_file_handles = []
653
+
654
+ try:
655
+ for future in futures:
656
+ indexed_image_file_handles.extend(future.result())
657
+ except KeyboardInterrupt as ex:
658
+ upload_interrupted.set()
659
+ raise ex
660
+
661
+ # All tasks should be done here, so below is more like assertion
662
+ image_queue.join()
663
+ if sys.version_info >= (3, 13):
664
+ image_queue.shutdown()
665
+
666
+ file_handles: list[str] = []
667
+
668
+ indexed_image_file_handles.sort()
669
+
670
+ # Important to guarantee the order
671
+ assert len(indexed_image_file_handles) == len(sequence)
672
+ for expected_idx, (idx, file_handle) in enumerate(indexed_image_file_handles):
673
+ assert expected_idx == idx
674
+ file_handles.append(file_handle)
317
675
 
676
+ return file_handles
677
+
678
+ def _upload_images_from_queue(
679
+ self,
680
+ image_queue: queue.Queue[tuple[int, types.ImageMetadata]],
681
+ lock: threading.Lock,
682
+ upload_interrupted: threading.Event,
683
+ sequence_progress: dict[str, T.Any],
684
+ ) -> list[tuple[int, str]]:
685
+ indexed_file_handles = []
686
+
687
+ with api_v4.create_user_session(
688
+ self.upload_options.user_items["user_upload_token"]
689
+ ) as user_session:
690
+ single_image_uploader = SingleImageUploader(
691
+ self.upload_options, user_session=user_session
692
+ )
693
+
694
+ while True:
695
+ # Assert that all images are already pushed into the queue
318
696
  try:
319
- cluster_id = uploader.upload_stream(
320
- fp,
321
- upload_api_v4.ClusterFileType.ZIP,
322
- session_key,
323
- progress=T.cast(
324
- T.Dict[str, T.Any], {**progress, **sequence_progress}
325
- ),
326
- )
327
- except Exception as ex:
328
- yield sequence_uuid, UploadResult(error=ex)
329
- continue
697
+ idx, image_metadata = image_queue.get_nowait()
698
+ except queue.Empty:
699
+ break
700
+
701
+ # Main thread will handle the interruption
702
+ if upload_interrupted.is_set():
703
+ break
704
+
705
+ # Create a new mutatble progress to keep the sequence_progress immutable
706
+ image_progress = {
707
+ **sequence_progress,
708
+ "import_path": str(image_metadata.filename),
709
+ }
710
+
711
+ # image_progress will be updated during uploading
712
+ file_handle = single_image_uploader.upload(
713
+ image_metadata, image_progress
714
+ )
330
715
 
331
- yield sequence_uuid, UploadResult(result=cluster_id)
716
+ # Update chunk_size (it was constant if set)
717
+ image_progress["chunk_size"] = image_metadata.filesize
718
+
719
+ # Main thread will handle the interruption
720
+ if upload_interrupted.is_set():
721
+ break
722
+
723
+ with lock:
724
+ self.emitter.emit("upload_progress", image_progress)
725
+
726
+ indexed_file_handles.append((idx, file_handle))
727
+
728
+ image_queue.task_done()
729
+
730
+ return indexed_file_handles
731
+
732
+
733
+ class SingleImageUploader:
734
+ def __init__(
735
+ self,
736
+ upload_options: UploadOptions,
737
+ user_session: requests.Session | None = None,
738
+ ):
739
+ self.upload_options = upload_options
740
+ self.user_session = user_session
741
+ self.cache = self._maybe_create_persistent_cache_instance(
742
+ self.upload_options.user_items, upload_options
743
+ )
744
+
745
+ def upload(
746
+ self, image_metadata: types.ImageMetadata, image_progress: dict[str, T.Any]
747
+ ) -> str:
748
+ image_bytes = self.dump_image_bytes(image_metadata)
749
+
750
+ uploader = Uploader(self.upload_options, user_session=self.user_session)
751
+
752
+ session_key = uploader._gen_session_key(io.BytesIO(image_bytes), image_progress)
753
+
754
+ file_handle = self._get_cached_file_handle(session_key)
755
+
756
+ if file_handle is None:
757
+ # image_progress will be updated during uploading
758
+ file_handle = uploader.upload_stream(
759
+ io.BytesIO(image_bytes),
760
+ session_key=session_key,
761
+ progress=image_progress,
762
+ )
763
+ self._set_file_handle_cache(session_key, file_handle)
764
+
765
+ return file_handle
766
+
767
+ @classmethod
768
+ def dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes:
769
+ try:
770
+ edit = exif_write.ExifEdit(metadata.filename)
771
+ except struct.error as ex:
772
+ raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex
773
+
774
+ # The cast is to fix the type checker error
775
+ edit.add_image_description(
776
+ T.cast(
777
+ T.Dict, desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata))
778
+ )
779
+ )
780
+
781
+ try:
782
+ return edit.dump_image_bytes()
783
+ except struct.error as ex:
784
+ raise ExifError(
785
+ f"Failed to dump EXIF bytes: {ex}", metadata.filename
786
+ ) from ex
787
+
788
+ @classmethod
789
+ def _maybe_create_persistent_cache_instance(
790
+ cls, user_items: config.UserItem, upload_options: UploadOptions
791
+ ) -> history.PersistentCache | None:
792
+ if not constants.UPLOAD_CACHE_DIR:
793
+ LOG.debug(
794
+ "Upload cache directory is set empty, skipping caching upload file handles"
795
+ )
796
+ return None
797
+
798
+ if upload_options.dry_run:
799
+ LOG.debug("Dry-run mode enabled, skipping caching upload file handles")
800
+ return None
801
+
802
+ cache_path_dir = (
803
+ Path(constants.UPLOAD_CACHE_DIR)
804
+ .joinpath(api_v4.MAPILLARY_CLIENT_TOKEN.replace("|", "_"))
805
+ .joinpath(
806
+ user_items.get("MAPSettingsUserKey", user_items["user_upload_token"])
807
+ )
808
+ )
809
+ cache_path_dir.mkdir(parents=True, exist_ok=True)
810
+ cache_path = cache_path_dir.joinpath("cached_file_handles")
811
+
812
+ # Sanitize sensitive segments for logging
813
+ sanitized_cache_path = (
814
+ Path(constants.UPLOAD_CACHE_DIR)
815
+ .joinpath("***")
816
+ .joinpath("***")
817
+ .joinpath("cached_file_handles")
818
+ )
819
+ LOG.debug(f"File handle cache path: {sanitized_cache_path}")
820
+
821
+ cache = history.PersistentCache(str(cache_path.resolve()))
822
+ cache.clear_expired()
823
+
824
+ return cache
825
+
826
+ def _get_cached_file_handle(self, key: str) -> str | None:
827
+ if self.cache is None:
828
+ return None
829
+
830
+ if _is_uuid(key):
831
+ return None
832
+
833
+ return self.cache.get(key)
834
+
835
+ def _set_file_handle_cache(self, key: str, value: str) -> None:
836
+ if self.cache is None:
837
+ return
838
+
839
+ if _is_uuid(key):
840
+ return
841
+
842
+ self.cache.set(key, value)
332
843
 
333
844
 
334
845
  class Uploader:
335
846
  def __init__(
336
847
  self,
337
- user_items: types.UserItem,
848
+ upload_options: UploadOptions,
849
+ user_session: requests.Session | None = None,
338
850
  emitter: EventEmitter | None = None,
339
- chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
340
- dry_run=False,
341
851
  ):
342
- self.user_items = user_items
852
+ self.upload_options = upload_options
853
+ self.user_session = user_session
343
854
  if emitter is None:
344
855
  # An empty event emitter that does nothing
345
856
  self.emitter = EventEmitter()
346
857
  else:
347
858
  self.emitter = emitter
348
- self.chunk_size = chunk_size
349
- self.dry_run = dry_run
350
859
 
351
860
  def upload_stream(
352
861
  self,
353
862
  fp: T.IO[bytes],
354
- cluster_filetype: upload_api_v4.ClusterFileType,
355
- session_key: str,
863
+ session_key: str | None = None,
356
864
  progress: dict[str, T.Any] | None = None,
357
865
  ) -> str:
358
866
  if progress is None:
359
867
  progress = {}
360
868
 
869
+ if session_key is None:
870
+ session_key = self._gen_session_key(fp, progress)
871
+
361
872
  fp.seek(0, io.SEEK_END)
362
873
  entity_size = fp.tell()
363
874
 
364
- upload_service = self._create_upload_service(session_key, cluster_filetype)
365
-
366
875
  progress["entity_size"] = entity_size
367
- progress["chunk_size"] = self.chunk_size
876
+ progress["chunk_size"] = self.upload_options.chunk_size
368
877
  progress["retries"] = 0
369
878
  progress["begin_offset"] = None
370
879
 
@@ -372,10 +881,24 @@ class Uploader:
372
881
 
373
882
  while True:
374
883
  try:
375
- file_handle = self._upload_stream_retryable(
376
- upload_service, fp, T.cast(UploaderProgress, progress)
377
- )
378
- except Exception as ex:
884
+ if self.user_session is not None:
885
+ file_handle = self._upload_stream_retryable(
886
+ self.user_session,
887
+ fp,
888
+ session_key,
889
+ T.cast(UploaderProgress, progress),
890
+ )
891
+ else:
892
+ with api_v4.create_user_session(
893
+ self.upload_options.user_items["user_upload_token"]
894
+ ) as user_session:
895
+ file_handle = self._upload_stream_retryable(
896
+ user_session,
897
+ fp,
898
+ session_key,
899
+ T.cast(UploaderProgress, progress),
900
+ )
901
+ except BaseException as ex: # Include KeyboardInterrupt
379
902
  self._handle_upload_exception(ex, T.cast(UploaderProgress, progress))
380
903
  else:
381
904
  break
@@ -384,75 +907,114 @@ class Uploader:
384
907
 
385
908
  self.emitter.emit("upload_end", progress)
386
909
 
387
- # TODO: retry here
388
- cluster_id = self._finish_upload_retryable(upload_service, file_handle)
389
- progress["cluster_id"] = cluster_id
910
+ return file_handle
911
+
912
+ def finish_upload(
913
+ self,
914
+ file_handle: str,
915
+ cluster_filetype: api_v4.ClusterFileType,
916
+ progress: dict[str, T.Any] | None = None,
917
+ ) -> str:
918
+ """Finish upload with safe retries guraranteed"""
919
+ if progress is None:
920
+ progress = {}
921
+
922
+ if self.upload_options.dry_run or self.upload_options.nofinish:
923
+ cluster_id = "0"
924
+ else:
925
+ organization_id = self.upload_options.user_items.get("MAPOrganizationKey")
926
+
927
+ with api_v4.create_user_session(
928
+ self.upload_options.user_items["user_upload_token"]
929
+ ) as user_session:
930
+ resp = api_v4.finish_upload(
931
+ user_session,
932
+ file_handle,
933
+ cluster_filetype,
934
+ organization_id=organization_id,
935
+ )
390
936
 
937
+ body = api_v4.jsonify_response(resp)
938
+ # TODO: Validate cluster_id
939
+ cluster_id = body.get("cluster_id")
940
+
941
+ progress["cluster_id"] = cluster_id
391
942
  self.emitter.emit("upload_finished", progress)
392
943
 
393
944
  return cluster_id
394
945
 
395
946
  def _create_upload_service(
396
- self, session_key: str, cluster_filetype: upload_api_v4.ClusterFileType
947
+ self, user_session: requests.Session, session_key: str
397
948
  ) -> upload_api_v4.UploadService:
398
949
  upload_service: upload_api_v4.UploadService
399
950
 
400
- if self.dry_run:
951
+ if self.upload_options.dry_run:
952
+ upload_path = os.getenv("MAPILLARY_UPLOAD_ENDPOINT")
401
953
  upload_service = upload_api_v4.FakeUploadService(
402
- user_access_token=self.user_items["user_upload_token"],
403
- session_key=session_key,
404
- cluster_filetype=cluster_filetype,
954
+ user_session,
955
+ session_key,
956
+ upload_path=Path(upload_path) if upload_path is not None else None,
405
957
  )
406
- else:
407
- upload_service = upload_api_v4.UploadService(
408
- user_access_token=self.user_items["user_upload_token"],
409
- session_key=session_key,
410
- cluster_filetype=cluster_filetype,
958
+ LOG.info(
959
+ "Dry-run mode enabled, uploading to %s",
960
+ upload_service.upload_path.joinpath(session_key),
411
961
  )
962
+ else:
963
+ upload_service = upload_api_v4.UploadService(user_session, session_key)
412
964
 
413
965
  return upload_service
414
966
 
415
967
  def _handle_upload_exception(
416
- self, ex: Exception, progress: UploaderProgress
968
+ self, ex: BaseException, progress: UploaderProgress
417
969
  ) -> None:
418
- retries = progress["retries"]
970
+ retries = progress.get("retries", 0)
419
971
  begin_offset = progress.get("begin_offset")
420
- chunk_size = progress["chunk_size"]
972
+ offset = progress.get("offset")
421
973
 
422
974
  if retries <= constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
423
- self.emitter.emit("upload_interrupted", progress)
975
+ self.emitter.emit("upload_retrying", progress)
976
+
424
977
  LOG.warning(
425
- # use %s instead of %d because offset could be None
426
- "Error uploading chunk_size %d at begin_offset %s: %s: %s",
427
- chunk_size,
428
- begin_offset,
429
- ex.__class__.__name__,
430
- str(ex),
978
+ f"Error uploading {self._upload_name(progress)} at {offset=} since {begin_offset=}: {ex.__class__.__name__}: {ex}"
431
979
  )
980
+
432
981
  # Keep things immutable here. Will increment retries in the caller
433
982
  retries += 1
434
- if _is_immediate_retry(ex):
983
+ if _is_immediate_retriable_exception(ex):
435
984
  sleep_for = 0
436
985
  else:
437
986
  sleep_for = min(2**retries, 16)
438
987
  LOG.info(
439
- "Retrying in %d seconds (%d/%d)",
440
- sleep_for,
441
- retries,
442
- constants.MAX_UPLOAD_RETRIES,
988
+ f"Retrying in {sleep_for} seconds ({retries}/{constants.MAX_UPLOAD_RETRIES})"
443
989
  )
444
990
  if sleep_for:
445
991
  time.sleep(sleep_for)
446
992
  else:
993
+ self.emitter.emit("upload_failed", progress)
447
994
  raise ex
448
995
 
996
+ @classmethod
997
+ def _upload_name(cls, progress: UploaderProgress):
998
+ # Strictly speaking these sequence properties should not be exposed in this context
999
+ # TODO: Maybe move these logging statements to event handlers
1000
+ sequence_uuid: str | None = T.cast(
1001
+ T.Union[str, None], progress.get("sequence_uuid")
1002
+ )
1003
+ import_path = T.cast(T.Union[str, None], progress.get("import_path"))
1004
+ if sequence_uuid is not None:
1005
+ if import_path is None:
1006
+ name: str = f"sequence_{sequence_uuid}"
1007
+ else:
1008
+ name = f"sequence_{sequence_uuid}/{Path(import_path).name}"
1009
+ else:
1010
+ name = Path(import_path or "unknown").name
1011
+ return name
1012
+
449
1013
  def _chunk_with_progress_emitted(
450
- self,
451
- stream: T.IO[bytes],
452
- progress: UploaderProgress,
1014
+ self, stream: T.IO[bytes], progress: UploaderProgress
453
1015
  ) -> T.Generator[bytes, None, None]:
454
1016
  for chunk in upload_api_v4.UploadService.chunkize_byte_stream(
455
- stream, self.chunk_size
1017
+ stream, self.upload_options.chunk_size
456
1018
  ):
457
1019
  yield chunk
458
1020
 
@@ -465,11 +1027,21 @@ class Uploader:
465
1027
 
466
1028
  def _upload_stream_retryable(
467
1029
  self,
468
- upload_service: upload_api_v4.UploadService,
1030
+ user_session: requests.Session,
469
1031
  fp: T.IO[bytes],
470
- progress: UploaderProgress,
1032
+ session_key: str,
1033
+ progress: UploaderProgress | None = None,
471
1034
  ) -> str:
472
1035
  """Upload the stream with safe retries guraranteed"""
1036
+ if progress is None:
1037
+ progress = T.cast(UploaderProgress, {})
1038
+
1039
+ upload_service = self._create_upload_service(user_session, session_key)
1040
+
1041
+ if "entity_size" not in progress:
1042
+ fp.seek(0, io.SEEK_END)
1043
+ entity_size = fp.tell()
1044
+ progress["entity_size"] = entity_size
473
1045
 
474
1046
  begin_offset = upload_service.fetch_offset()
475
1047
 
@@ -478,64 +1050,49 @@ class Uploader:
478
1050
 
479
1051
  self.emitter.emit("upload_fetch_offset", progress)
480
1052
 
481
- fp.seek(begin_offset, io.SEEK_SET)
1053
+ # Estimate the read timeout
1054
+ if not constants.MIN_UPLOAD_SPEED:
1055
+ read_timeout = None
1056
+ else:
1057
+ remaining_bytes = abs(progress["entity_size"] - begin_offset)
1058
+ read_timeout = max(
1059
+ api_v4.REQUESTS_TIMEOUT,
1060
+ remaining_bytes / constants.MIN_UPLOAD_SPEED,
1061
+ )
482
1062
 
1063
+ # Upload from begin_offset
1064
+ fp.seek(begin_offset, io.SEEK_SET)
483
1065
  shifted_chunks = self._chunk_with_progress_emitted(fp, progress)
484
1066
 
485
- return upload_service.upload_shifted_chunks(shifted_chunks, begin_offset)
486
-
487
- def _finish_upload_retryable(
488
- self, upload_service: upload_api_v4.UploadService, file_handle: str
489
- ) -> str:
490
- """Finish upload with safe retries guraranteed"""
1067
+ # Start uploading
1068
+ return upload_service.upload_shifted_chunks(
1069
+ shifted_chunks, begin_offset, read_timeout=read_timeout
1070
+ )
491
1071
 
492
- if self.dry_run:
493
- cluster_id = "0"
1072
+ def _gen_session_key(self, fp: T.IO[bytes], progress: dict[str, T.Any]) -> str:
1073
+ if self.upload_options.noresume:
1074
+ # Generate a unique UUID for session_key when noresume is True
1075
+ # to prevent resuming from previous uploads
1076
+ session_key = f"{_prefixed_uuid4()}"
494
1077
  else:
495
- resp = api_v4.finish_upload(
496
- self.user_items["user_upload_token"],
497
- file_handle,
498
- upload_service.cluster_filetype,
499
- organization_id=self.user_items.get("MAPOrganizationKey"),
500
- )
1078
+ fp.seek(0, io.SEEK_SET)
1079
+ session_key = utils.md5sum_fp(fp).hexdigest()
501
1080
 
502
- data = resp.json()
503
- cluster_id = data.get("cluster_id")
1081
+ filetype = progress.get("file_type")
1082
+ if filetype is not None:
1083
+ session_key = _suffix_session_key(session_key, types.FileType(filetype))
504
1084
 
505
- # TODO: validate cluster_id
506
-
507
- return cluster_id
1085
+ return session_key
508
1086
 
509
1087
 
510
1088
  def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
511
1089
  for metadata in metadatas:
512
- types.validate_image_desc(types.as_desc(metadata))
1090
+ validate_image_desc(DescriptionJSONSerializer.as_desc(metadata))
513
1091
  if not metadata.filename.is_file():
514
1092
  raise FileNotFoundError(f"No such file {metadata.filename}")
515
1093
 
516
1094
 
517
- @contextmanager
518
- def wip_file_context(wip_path: Path, done_path: Path):
519
- assert wip_path != done_path, "should not be the same file"
520
- try:
521
- os.remove(wip_path)
522
- except FileNotFoundError:
523
- pass
524
- try:
525
- yield wip_path
526
- try:
527
- os.remove(done_path)
528
- except FileNotFoundError:
529
- pass
530
- wip_path.rename(done_path)
531
- finally:
532
- try:
533
- os.remove(wip_path)
534
- except FileNotFoundError:
535
- pass
536
-
537
-
538
- def _is_immediate_retry(ex: Exception):
1095
+ def _is_immediate_retriable_exception(ex: BaseException) -> bool:
539
1096
  if (
540
1097
  isinstance(ex, requests.HTTPError)
541
1098
  and isinstance(ex.response, requests.Response)
@@ -548,8 +1105,10 @@ def _is_immediate_retry(ex: Exception):
548
1105
  # resp: {"debug_info":{"retriable":true,"type":"OffsetInvalidError","message":"Request starting offset is invalid"}}
549
1106
  return resp.get("debug_info", {}).get("retriable", False)
550
1107
 
1108
+ return False
1109
+
551
1110
 
552
- def _is_retriable_exception(ex: Exception):
1111
+ def _is_retriable_exception(ex: BaseException) -> bool:
553
1112
  if isinstance(ex, (requests.ConnectionError, requests.Timeout)):
554
1113
  return True
555
1114
 
@@ -568,14 +1127,36 @@ def _is_retriable_exception(ex: Exception):
568
1127
  return False
569
1128
 
570
1129
 
571
- _SUFFIX_MAP: dict[upload_api_v4.ClusterFileType, str] = {
572
- upload_api_v4.ClusterFileType.ZIP: ".zip",
573
- upload_api_v4.ClusterFileType.CAMM: ".mp4",
574
- upload_api_v4.ClusterFileType.BLACKVUE: ".mp4",
1130
+ _SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
1131
+ api_v4.ClusterFileType.ZIP: ".zip",
1132
+ api_v4.ClusterFileType.CAMM: ".mp4",
1133
+ api_v4.ClusterFileType.BLACKVUE: ".mp4",
1134
+ types.FileType.IMAGE: ".jpg",
1135
+ types.FileType.ZIP: ".zip",
1136
+ types.FileType.BLACKVUE: ".mp4",
1137
+ types.FileType.CAMM: ".mp4",
1138
+ types.FileType.GOPRO: ".mp4",
1139
+ types.FileType.VIDEO: ".mp4",
575
1140
  }
576
1141
 
577
1142
 
578
- def _session_key(
579
- upload_md5sum: str, cluster_filetype: upload_api_v4.ClusterFileType
1143
+ def _suffix_session_key(
1144
+ key: str, filetype: api_v4.ClusterFileType | types.FileType
580
1145
  ) -> str:
581
- return f"mly_tools_{upload_md5sum}{_SUFFIX_MAP[cluster_filetype]}"
1146
+ is_uuid_before = _is_uuid(key)
1147
+
1148
+ key = f"mly_tools_{key}{_SUFFIX_MAP[filetype]}"
1149
+
1150
+ assert _is_uuid(key) is is_uuid_before
1151
+
1152
+ return key
1153
+
1154
+
1155
+ def _prefixed_uuid4():
1156
+ prefixed = f"uuid_{uuid.uuid4().hex}"
1157
+ assert _is_uuid(prefixed)
1158
+ return prefixed
1159
+
1160
+
1161
+ def _is_uuid(key: str) -> bool:
1162
+ return key.startswith("uuid_") or key.startswith("mly_tools_uuid_")