mapillary-tools 0.13.3a1__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +287 -22
  3. mapillary_tools/authenticate.py +326 -64
  4. mapillary_tools/blackvue_parser.py +195 -0
  5. mapillary_tools/camm/camm_builder.py +55 -97
  6. mapillary_tools/camm/camm_parser.py +429 -181
  7. mapillary_tools/commands/__main__.py +17 -8
  8. mapillary_tools/commands/authenticate.py +8 -1
  9. mapillary_tools/commands/process.py +27 -51
  10. mapillary_tools/commands/process_and_upload.py +19 -5
  11. mapillary_tools/commands/sample_video.py +2 -3
  12. mapillary_tools/commands/upload.py +44 -13
  13. mapillary_tools/commands/video_process_and_upload.py +19 -5
  14. mapillary_tools/config.py +65 -26
  15. mapillary_tools/constants.py +141 -18
  16. mapillary_tools/exceptions.py +37 -34
  17. mapillary_tools/exif_read.py +221 -116
  18. mapillary_tools/exif_write.py +10 -8
  19. mapillary_tools/exiftool_read.py +33 -42
  20. mapillary_tools/exiftool_read_video.py +97 -47
  21. mapillary_tools/exiftool_runner.py +57 -0
  22. mapillary_tools/ffmpeg.py +417 -242
  23. mapillary_tools/geo.py +158 -118
  24. mapillary_tools/geotag/__init__.py +0 -1
  25. mapillary_tools/geotag/base.py +147 -0
  26. mapillary_tools/geotag/factory.py +307 -0
  27. mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
  28. mapillary_tools/geotag/geotag_images_from_exiftool.py +136 -85
  29. mapillary_tools/geotag/geotag_images_from_gpx.py +60 -124
  30. mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
  31. mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
  32. mapillary_tools/geotag/geotag_images_from_video.py +88 -51
  33. mapillary_tools/geotag/geotag_videos_from_exiftool.py +123 -0
  34. mapillary_tools/geotag/geotag_videos_from_gpx.py +52 -0
  35. mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
  36. mapillary_tools/geotag/image_extractors/base.py +18 -0
  37. mapillary_tools/geotag/image_extractors/exif.py +60 -0
  38. mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
  39. mapillary_tools/geotag/options.py +182 -0
  40. mapillary_tools/geotag/utils.py +52 -16
  41. mapillary_tools/geotag/video_extractors/base.py +18 -0
  42. mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
  43. mapillary_tools/geotag/video_extractors/gpx.py +116 -0
  44. mapillary_tools/geotag/video_extractors/native.py +160 -0
  45. mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
  46. mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
  47. mapillary_tools/history.py +134 -20
  48. mapillary_tools/mp4/construct_mp4_parser.py +17 -10
  49. mapillary_tools/mp4/io_utils.py +0 -1
  50. mapillary_tools/mp4/mp4_sample_parser.py +36 -28
  51. mapillary_tools/mp4/simple_mp4_builder.py +10 -9
  52. mapillary_tools/mp4/simple_mp4_parser.py +13 -22
  53. mapillary_tools/process_geotag_properties.py +184 -414
  54. mapillary_tools/process_sequence_properties.py +594 -225
  55. mapillary_tools/sample_video.py +20 -26
  56. mapillary_tools/serializer/description.py +587 -0
  57. mapillary_tools/serializer/gpx.py +132 -0
  58. mapillary_tools/telemetry.py +26 -13
  59. mapillary_tools/types.py +98 -611
  60. mapillary_tools/upload.py +408 -416
  61. mapillary_tools/upload_api_v4.py +172 -174
  62. mapillary_tools/uploader.py +804 -284
  63. mapillary_tools/utils.py +49 -18
  64. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/METADATA +93 -35
  65. mapillary_tools-0.14.0.dist-info/RECORD +75 -0
  66. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/WHEEL +1 -1
  67. mapillary_tools/geotag/blackvue_parser.py +0 -118
  68. mapillary_tools/geotag/geotag_from_generic.py +0 -22
  69. mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
  70. mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
  71. mapillary_tools/video_data_extraction/cli_options.py +0 -22
  72. mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
  73. mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
  74. mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
  75. mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
  76. mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
  77. mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
  78. mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
  79. mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
  80. mapillary_tools/video_data_extraction/extractors/gpx_parser.py +0 -108
  81. mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
  82. mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
  83. mapillary_tools-0.13.3a1.dist-info/RECORD +0 -75
  84. /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
  85. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/entry_points.txt +0 -0
  86. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info/licenses}/LICENSE +0 -0
  87. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,16 @@
1
+ from __future__ import annotations
2
+
3
+ import concurrent.futures
4
+
5
+ import dataclasses
1
6
  import io
2
7
  import json
3
8
  import logging
4
9
  import os
10
+ import struct
11
+ import sys
5
12
  import tempfile
13
+ import threading
6
14
  import time
7
15
  import typing as T
8
16
  import uuid
@@ -10,33 +18,96 @@ import zipfile
10
18
  from contextlib import contextmanager
11
19
  from pathlib import Path
12
20
 
13
- import jsonschema
21
+ if sys.version_info >= (3, 11):
22
+ from typing import Required
23
+ else:
24
+ from typing_extensions import Required
25
+
14
26
  import requests
15
27
 
16
- from . import constants, exif_write, types, upload_api_v4, utils
28
+ from . import (
29
+ api_v4,
30
+ config,
31
+ constants,
32
+ exif_write,
33
+ geo,
34
+ history,
35
+ telemetry,
36
+ types,
37
+ upload_api_v4,
38
+ utils,
39
+ )
40
+ from .camm import camm_builder, camm_parser
41
+ from .gpmf import gpmf_parser
42
+ from .mp4 import simple_mp4_builder
43
+ from .serializer.description import (
44
+ desc_file_to_exif,
45
+ DescriptionJSONSerializer,
46
+ validate_image_desc,
47
+ )
17
48
 
18
49
 
19
50
  LOG = logging.getLogger(__name__)
20
51
 
21
52
 
22
- class Progress(T.TypedDict, total=False):
23
- # The size of the chunk, in bytes, that has been uploaded in the last request
53
+ @dataclasses.dataclass(frozen=True)
54
+ class UploadOptions:
55
+ user_items: config.UserItem
56
+ chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024)
57
+ dry_run: bool = False
58
+ nofinish: bool = False
59
+ noresume: bool = False
60
+
61
+
62
+ class UploaderProgress(T.TypedDict, total=True):
63
+ """
64
+ Progress data that Uploader cares about.
65
+ """
66
+
67
+ # The size, in bytes, of the last chunk that has been read and upload
24
68
  chunk_size: int
25
69
 
26
- # File type
27
- file_type: str
70
+ # The initial offset returned by the upload service, which is also the offset
71
+ # uploader start uploading from.
72
+ # Assert:
73
+ # - 0 <= begin_offset <= offset <= entity_size
74
+ # - Be non-None after at least a successful "upload_fetch_offset"
75
+ begin_offset: int | None
28
76
 
29
- # How many bytes has been uploaded so far since "upload_start"
77
+ # How many bytes of the file has been uploaded so far
30
78
  offset: int
31
79
 
32
- # Size in bytes of the zipfile/BlackVue/CAMM
80
+ # Size in bytes of the file (i.e. fp.tell() after seek to the end)
81
+ # NOTE: It's different from filesize in file system
82
+ # Assert:
83
+ # - offset == entity_size when "upload_end" or "upload_finished"
33
84
  entity_size: int
34
85
 
86
+ # An "upload_interrupted" will increase it. Reset to 0 if a chunk is uploaded
87
+ retries: int
88
+
89
+ # Cluster ID after finishing the upload
90
+ cluster_id: str
91
+
92
+
93
+ class SequenceProgress(T.TypedDict, total=False):
94
+ """Progress data at sequence level"""
95
+
96
+ # Used to check if it is uploaded or not
97
+ sequence_md5sum: Required[str]
98
+
99
+ # Used to resume from the previous upload,
100
+ # so it has to an unique identifier (hash) of the upload content
101
+ upload_md5sum: str
102
+
103
+ # File type
104
+ file_type: Required[str]
105
+
35
106
  # How many sequences in total. It's always 1 when uploading Zipfile/BlackVue/CAMM
36
- total_sequence_count: int
107
+ total_sequence_count: Required[int]
37
108
 
38
109
  # 0-based nth sequence. It is always 0 when uploading Zipfile/BlackVue/CAMM
39
- sequence_idx: int
110
+ sequence_idx: Required[int]
40
111
 
41
112
  # How many images in the sequence. It's available only when uploading directories/Zipfiles
42
113
  sequence_image_count: int
@@ -44,20 +115,31 @@ class Progress(T.TypedDict, total=False):
44
115
  # MAPSequenceUUID. It is only available for directory uploading
45
116
  sequence_uuid: str
46
117
 
47
- # An "upload_interrupted" will increase it. Reset to 0 if the chunk is uploaded
48
- retries: int
49
-
50
- # md5sum of the zipfile/BlackVue/CAMM in uploading
51
- md5sum: str
52
-
53
118
  # Path to the Zipfile/BlackVue/CAMM
54
119
  import_path: str
55
120
 
56
- # Cluster ID after finishing the upload
57
- cluster_id: str
58
121
 
122
+ class Progress(SequenceProgress, UploaderProgress):
123
+ pass
124
+
125
+
126
+ class SequenceError(Exception):
127
+ """
128
+ Base class for sequence specific errors. These errors will cause the
129
+ current sequence upload to fail but will not interrupt the overall upload
130
+ process for other sequences.
131
+ """
59
132
 
60
- class UploadCancelled(Exception):
133
+ pass
134
+
135
+
136
+ class ExifError(SequenceError):
137
+ def __init__(self, message: str, image_path: Path):
138
+ super().__init__(message)
139
+ self.image_path = image_path
140
+
141
+
142
+ class InvalidMapillaryZipFileError(SequenceError):
61
143
  pass
62
144
 
63
145
 
@@ -65,14 +147,15 @@ EventName = T.Literal[
65
147
  "upload_start",
66
148
  "upload_fetch_offset",
67
149
  "upload_progress",
150
+ "upload_interrupted",
68
151
  "upload_end",
152
+ "upload_failed",
69
153
  "upload_finished",
70
- "upload_interrupted",
71
154
  ]
72
155
 
73
156
 
74
157
  class EventEmitter:
75
- events: T.Dict[EventName, T.List]
158
+ events: dict[EventName, list]
76
159
 
77
160
  def __init__(self):
78
161
  self.events = {}
@@ -80,6 +163,7 @@ class EventEmitter:
80
163
  def on(self, event: EventName):
81
164
  def _wrap(callback):
82
165
  self.events.setdefault(event, []).append(callback)
166
+ return callback
83
167
 
84
168
  return _wrap
85
169
 
@@ -88,237 +172,731 @@ class EventEmitter:
88
172
  callback(*args, **kwargs)
89
173
 
90
174
 
91
- class Uploader:
92
- def __init__(
93
- self,
94
- user_items: types.UserItem,
95
- emitter: T.Optional[EventEmitter] = None,
96
- chunk_size: int = upload_api_v4.DEFAULT_CHUNK_SIZE,
97
- dry_run=False,
98
- ):
99
- jsonschema.validate(instance=user_items, schema=types.UserItemSchema)
100
- self.user_items = user_items
101
- self.emitter = emitter
102
- self.chunk_size = chunk_size
103
- self.dry_run = dry_run
175
+ @dataclasses.dataclass
176
+ class UploadResult:
177
+ result: str | None = None
178
+ error: Exception | None = None
104
179
 
105
- def upload_zipfile(
106
- self,
180
+
181
+ class VideoUploader:
182
+ @classmethod
183
+ def upload_videos(
184
+ cls, mly_uploader: Uploader, video_metadatas: T.Sequence[types.VideoMetadata]
185
+ ) -> T.Generator[tuple[types.VideoMetadata, UploadResult], None, None]:
186
+ # If upload in a random order, then interrupted uploads has a higher chance to expire.
187
+ # Therefore sort videos to make sure interrupted uploads are resumed as early as possible
188
+ sorted_video_metadatas = sorted(video_metadatas, key=lambda m: m.filename)
189
+
190
+ for idx, video_metadata in enumerate(sorted_video_metadatas):
191
+ LOG.debug(f"Checksum for video {video_metadata.filename}...")
192
+ try:
193
+ video_metadata.update_md5sum()
194
+ except Exception as ex:
195
+ yield video_metadata, UploadResult(error=ex)
196
+ continue
197
+
198
+ assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
199
+
200
+ progress: SequenceProgress = {
201
+ "total_sequence_count": len(sorted_video_metadatas),
202
+ "sequence_idx": idx,
203
+ "file_type": video_metadata.filetype.value,
204
+ "import_path": str(video_metadata.filename),
205
+ "sequence_md5sum": video_metadata.md5sum,
206
+ }
207
+
208
+ try:
209
+ with cls.build_camm_stream(video_metadata) as camm_fp:
210
+ # Upload the mp4 stream
211
+ file_handle = mly_uploader.upload_stream(
212
+ T.cast(T.IO[bytes], camm_fp),
213
+ progress=T.cast(T.Dict[str, T.Any], progress),
214
+ )
215
+
216
+ cluster_id = mly_uploader.finish_upload(
217
+ file_handle,
218
+ api_v4.ClusterFileType.CAMM,
219
+ progress=T.cast(T.Dict[str, T.Any], progress),
220
+ )
221
+ except Exception as ex:
222
+ yield video_metadata, UploadResult(error=ex)
223
+ else:
224
+ yield video_metadata, UploadResult(result=cluster_id)
225
+
226
+ @classmethod
227
+ @contextmanager
228
+ def build_camm_stream(cls, video_metadata: types.VideoMetadata):
229
+ # Convert video metadata to CAMMInfo
230
+ camm_info = cls.prepare_camm_info(video_metadata)
231
+
232
+ # Create the CAMM sample generator
233
+ camm_sample_generator = camm_builder.camm_sample_generator2(camm_info)
234
+
235
+ with video_metadata.filename.open("rb") as src_fp:
236
+ # Build the mp4 stream with the CAMM samples
237
+ yield simple_mp4_builder.transform_mp4(src_fp, camm_sample_generator)
238
+
239
+ @classmethod
240
+ def prepare_camm_info(
241
+ cls, video_metadata: types.VideoMetadata
242
+ ) -> camm_parser.CAMMInfo:
243
+ camm_info = camm_parser.CAMMInfo(
244
+ make=video_metadata.make or "", model=video_metadata.model or ""
245
+ )
246
+
247
+ for point in video_metadata.points:
248
+ if isinstance(point, telemetry.CAMMGPSPoint):
249
+ if camm_info.gps is None:
250
+ camm_info.gps = []
251
+ camm_info.gps.append(point)
252
+
253
+ elif isinstance(point, telemetry.GPSPoint):
254
+ # There is no proper CAMM entry for GoPro GPS
255
+ if camm_info.mini_gps is None:
256
+ camm_info.mini_gps = []
257
+ camm_info.mini_gps.append(point)
258
+
259
+ elif isinstance(point, geo.Point):
260
+ if camm_info.mini_gps is None:
261
+ camm_info.mini_gps = []
262
+ camm_info.mini_gps.append(point)
263
+ else:
264
+ raise ValueError(f"Unknown point type: {point}")
265
+
266
+ if constants.MAPILLARY__EXPERIMENTAL_ENABLE_IMU:
267
+ if video_metadata.filetype is types.FileType.GOPRO:
268
+ with video_metadata.filename.open("rb") as fp:
269
+ gopro_info = gpmf_parser.extract_gopro_info(fp, telemetry_only=True)
270
+ if gopro_info is not None:
271
+ camm_info.accl = gopro_info.accl or []
272
+ camm_info.gyro = gopro_info.gyro or []
273
+ camm_info.magn = gopro_info.magn or []
274
+
275
+ return camm_info
276
+
277
+
278
+ class ZipUploader:
279
+ @classmethod
280
+ def upload_zipfiles(
281
+ cls, mly_uploader: Uploader, zip_paths: T.Sequence[Path]
282
+ ) -> T.Generator[tuple[Path, UploadResult], None, None]:
283
+ # If upload in a random order, then interrupted uploads has a higher chance to expire.
284
+ # Therefore sort zipfiles to make sure interrupted uploads are resumed as early as possible
285
+ sorted_zip_paths = sorted(zip_paths)
286
+
287
+ for idx, zip_path in enumerate(sorted_zip_paths):
288
+ progress: SequenceProgress = {
289
+ "total_sequence_count": len(sorted_zip_paths),
290
+ "sequence_idx": idx,
291
+ "import_path": str(zip_path),
292
+ "file_type": types.FileType.ZIP.value,
293
+ "sequence_md5sum": "", # Placeholder, will be set in upload_zipfile
294
+ }
295
+ try:
296
+ cluster_id = cls._upload_zipfile(
297
+ mly_uploader,
298
+ zip_path,
299
+ progress=T.cast(T.Dict[str, T.Any], progress),
300
+ )
301
+ except Exception as ex:
302
+ yield zip_path, UploadResult(error=ex)
303
+ else:
304
+ yield zip_path, UploadResult(result=cluster_id)
305
+
306
+ @classmethod
307
+ def zip_images(
308
+ cls, metadatas: T.Sequence[types.ImageMetadata], zip_dir: Path
309
+ ) -> None:
310
+ """
311
+ Group images into sequences and zip each sequence into a zipfile.
312
+ """
313
+ sequences = types.group_and_sort_images(metadatas)
314
+ os.makedirs(zip_dir, exist_ok=True)
315
+
316
+ for sequence_uuid, sequence in sequences.items():
317
+ _validate_metadatas(sequence)
318
+ # For atomicity we write into a WIP file and then rename to the final file
319
+ wip_zip_filename = zip_dir.joinpath(
320
+ f".mly_zip_{uuid.uuid4()}_{sequence_uuid}_{os.getpid()}_{int(time.time())}"
321
+ )
322
+ with cls._wip_file_context(wip_zip_filename) as wip_path:
323
+ with wip_path.open("wb") as wip_fp:
324
+ cls._zip_sequence_fp(sequence, wip_fp)
325
+
326
+ @classmethod
327
+ def zip_images_and_upload(
328
+ cls, uploader: Uploader, image_metadatas: T.Sequence[types.ImageMetadata]
329
+ ) -> T.Generator[tuple[str, UploadResult], None, None]:
330
+ sequences = types.group_and_sort_images(image_metadatas)
331
+
332
+ for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
333
+ try:
334
+ _validate_metadatas(sequence)
335
+ except Exception as ex:
336
+ yield sequence_uuid, UploadResult(error=ex)
337
+ continue
338
+
339
+ with tempfile.NamedTemporaryFile() as fp:
340
+ try:
341
+ sequence_md5sum = cls._zip_sequence_fp(sequence, fp)
342
+ except Exception as ex:
343
+ yield sequence_uuid, UploadResult(error=ex)
344
+ continue
345
+
346
+ sequence_progress: SequenceProgress = {
347
+ "sequence_idx": sequence_idx,
348
+ "total_sequence_count": len(sequences),
349
+ "sequence_image_count": len(sequence),
350
+ "sequence_uuid": sequence_uuid,
351
+ "file_type": types.FileType.ZIP.value,
352
+ "sequence_md5sum": sequence_md5sum,
353
+ }
354
+
355
+ try:
356
+ file_handle = uploader.upload_stream(
357
+ fp, progress=T.cast(T.Dict[str, T.Any], sequence_progress)
358
+ )
359
+ cluster_id = uploader.finish_upload(
360
+ file_handle,
361
+ api_v4.ClusterFileType.ZIP,
362
+ progress=T.cast(T.Dict[str, T.Any], sequence_progress),
363
+ )
364
+ except Exception as ex:
365
+ yield sequence_uuid, UploadResult(error=ex)
366
+ continue
367
+
368
+ yield sequence_uuid, UploadResult(result=cluster_id)
369
+
370
+ @classmethod
371
+ def _upload_zipfile(
372
+ cls,
373
+ uploader: Uploader,
107
374
  zip_path: Path,
108
- event_payload: T.Optional[Progress] = None,
109
- ) -> T.Optional[str]:
110
- if event_payload is None:
111
- event_payload = {}
375
+ progress: dict[str, T.Any] | None = None,
376
+ ) -> str:
377
+ if progress is None:
378
+ progress = {}
112
379
 
113
380
  with zipfile.ZipFile(zip_path) as ziph:
114
381
  namelist = ziph.namelist()
115
382
  if not namelist:
116
- LOG.warning("Skipping empty zipfile: %s", zip_path)
117
- return None
383
+ raise InvalidMapillaryZipFileError("Zipfile has no files")
384
+
385
+ with zip_path.open("rb") as zip_fp:
386
+ sequence_md5sum = cls._extract_sequence_md5sum(zip_fp)
118
387
 
119
- final_event_payload: Progress = {
120
- **event_payload, # type: ignore
388
+ # Send the copy of the input progress to each upload session, to avoid modifying the original one
389
+ mutable_progress: SequenceProgress = {
390
+ **T.cast(SequenceProgress, progress),
121
391
  "sequence_image_count": len(namelist),
392
+ "sequence_md5sum": sequence_md5sum,
393
+ "file_type": types.FileType.ZIP.value,
122
394
  }
123
395
 
124
- with zip_path.open("rb") as fp:
125
- upload_md5sum = _extract_upload_md5sum(fp)
396
+ with zip_path.open("rb") as zip_fp:
397
+ file_handle = uploader.upload_stream(
398
+ zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress)
399
+ )
400
+
401
+ cluster_id = uploader.finish_upload(
402
+ file_handle,
403
+ api_v4.ClusterFileType.ZIP,
404
+ progress=T.cast(T.Dict[str, T.Any], mutable_progress),
405
+ )
406
+
407
+ return cluster_id
408
+
409
+ @classmethod
410
+ def _zip_sequence_fp(
411
+ cls,
412
+ sequence: T.Sequence[types.ImageMetadata],
413
+ zip_fp: T.IO[bytes],
414
+ ) -> str:
415
+ """
416
+ Write a sequence of ImageMetadata into the zipfile handle.
417
+ The sequence has to be one sequence and sorted.
418
+ """
419
+
420
+ sequence_groups = types.group_and_sort_images(sequence)
421
+ assert len(sequence_groups) == 1, (
422
+ f"Only one sequence is allowed but got {len(sequence_groups)}: {list(sequence_groups.keys())}"
423
+ )
424
+
425
+ if sequence:
426
+ LOG.debug(f"Checksum for sequence {sequence[0].MAPSequenceUUID}...")
427
+ sequence_md5sum = types.update_sequence_md5sum(sequence)
428
+
429
+ with zipfile.ZipFile(zip_fp, "w", zipfile.ZIP_DEFLATED) as zipf:
430
+ for idx, metadata in enumerate(sequence):
431
+ # Arcname should be unique, the name does not matter
432
+ arcname = f"{idx}.jpg"
433
+ zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
434
+ zipf.writestr(zipinfo, SingleImageUploader.dump_image_bytes(metadata))
435
+ assert len(sequence) == len(set(zipf.namelist()))
436
+ zipf.comment = json.dumps(
437
+ {"sequence_md5sum": sequence_md5sum},
438
+ sort_keys=True,
439
+ separators=(",", ":"),
440
+ ).encode("utf-8")
441
+
442
+ return sequence_md5sum
443
+
444
+ @classmethod
445
+ def _extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
446
+ with zipfile.ZipFile(zip_fp, "r", zipfile.ZIP_DEFLATED) as ziph:
447
+ comment = ziph.comment
448
+
449
+ if not comment:
450
+ raise InvalidMapillaryZipFileError("No comment found in the zipfile")
451
+
452
+ try:
453
+ decoded = comment.decode("utf-8")
454
+ zip_metadata = json.loads(decoded)
455
+ except UnicodeDecodeError as ex:
456
+ raise InvalidMapillaryZipFileError(str(ex)) from ex
457
+ except json.JSONDecodeError as ex:
458
+ raise InvalidMapillaryZipFileError(str(ex)) from ex
459
+
460
+ sequence_md5sum = zip_metadata.get("sequence_md5sum")
461
+
462
+ if not sequence_md5sum and not isinstance(sequence_md5sum, str):
463
+ raise InvalidMapillaryZipFileError("No sequence_md5sum found")
464
+
465
+ return sequence_md5sum
126
466
 
127
- if upload_md5sum is None:
128
- with zip_path.open("rb") as fp:
467
+ @classmethod
468
+ @contextmanager
469
+ def _wip_file_context(cls, wip_path: Path):
470
+ try:
471
+ os.remove(wip_path)
472
+ except FileNotFoundError:
473
+ pass
474
+ try:
475
+ yield wip_path
476
+
477
+ with wip_path.open("rb") as fp:
129
478
  upload_md5sum = utils.md5sum_fp(fp).hexdigest()
130
479
 
131
- with zip_path.open("rb") as fp:
132
- return self.upload_stream(
133
- fp,
134
- upload_api_v4.ClusterFileType.ZIP,
135
- upload_md5sum,
136
- event_payload=final_event_payload,
480
+ done_path = wip_path.parent.joinpath(
481
+ _session_key(upload_md5sum, api_v4.ClusterFileType.ZIP)
137
482
  )
138
483
 
139
- def upload_images(
140
- self,
141
- image_metadatas: T.Sequence[types.ImageMetadata],
142
- event_payload: T.Optional[Progress] = None,
143
- ) -> T.Dict[str, str]:
144
- if event_payload is None:
145
- event_payload = {}
484
+ try:
485
+ os.remove(done_path)
486
+ except FileNotFoundError:
487
+ pass
488
+ wip_path.rename(done_path)
489
+ finally:
490
+ try:
491
+ os.remove(wip_path)
492
+ except FileNotFoundError:
493
+ pass
494
+
146
495
 
147
- _validate_metadatas(image_metadatas)
496
+ class ImageSequenceUploader:
497
+ @classmethod
498
+ def upload_images(
499
+ cls, uploader: Uploader, image_metadatas: T.Sequence[types.ImageMetadata]
500
+ ) -> T.Generator[tuple[str, UploadResult], None, None]:
148
501
  sequences = types.group_and_sort_images(image_metadatas)
149
- ret: T.Dict[str, str] = {}
502
+
150
503
  for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
151
- final_event_payload: Progress = {
152
- **event_payload, # type: ignore
504
+ LOG.debug(f"Checksum for image sequence {sequence_uuid}...")
505
+ sequence_md5sum = types.update_sequence_md5sum(sequence)
506
+
507
+ sequence_progress: SequenceProgress = {
153
508
  "sequence_idx": sequence_idx,
154
509
  "total_sequence_count": len(sequences),
155
510
  "sequence_image_count": len(sequence),
156
511
  "sequence_uuid": sequence_uuid,
512
+ "file_type": types.FileType.IMAGE.value,
513
+ "sequence_md5sum": sequence_md5sum,
157
514
  }
158
- for metadata in sequence:
159
- metadata.update_md5sum()
160
- upload_md5sum = types.sequence_md5sum(sequence)
161
- with tempfile.NamedTemporaryFile() as fp:
162
- _zip_sequence_fp(sequence, fp, upload_md5sum)
163
- cluster_id = self.upload_stream(
164
- fp,
165
- upload_api_v4.ClusterFileType.ZIP,
166
- upload_md5sum,
167
- final_event_payload,
515
+
516
+ try:
517
+ cluster_id = cls._upload_sequence(
518
+ uploader,
519
+ sequence,
520
+ progress=T.cast(dict[str, T.Any], sequence_progress),
521
+ )
522
+ except Exception as ex:
523
+ yield sequence_uuid, UploadResult(error=ex)
524
+ else:
525
+ yield sequence_uuid, UploadResult(result=cluster_id)
526
+
527
+ @classmethod
528
+ def _upload_sequence(
529
+ cls,
530
+ uploader: Uploader,
531
+ sequence: T.Sequence[types.ImageMetadata],
532
+ progress: dict[str, T.Any],
533
+ ) -> str:
534
+ _validate_metadatas(sequence)
535
+
536
+ progress["entity_size"] = sum(m.filesize or 0 for m in sequence)
537
+ uploader.emitter.emit("upload_start", progress)
538
+
539
+ single_image_uploader = SingleImageUploader(uploader, progress=progress)
540
+ with concurrent.futures.ThreadPoolExecutor(
541
+ max_workers=constants.MAX_IMAGE_UPLOAD_WORKERS
542
+ ) as executor:
543
+ image_file_handles = list(
544
+ executor.map(single_image_uploader.upload, sequence)
545
+ )
546
+
547
+ manifest_file_handle = cls._upload_manifest(uploader, image_file_handles)
548
+
549
+ uploader.emitter.emit("upload_end", progress)
550
+
551
+ cluster_id = uploader.finish_upload(
552
+ manifest_file_handle,
553
+ api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST,
554
+ progress=progress,
555
+ )
556
+
557
+ return cluster_id
558
+
559
+ @classmethod
560
+ def _upload_manifest(
561
+ cls, uploader: Uploader, image_file_handles: T.Sequence[str]
562
+ ) -> str:
563
+ uploader_without_emitter = Uploader(uploader.upload_options)
564
+
565
+ manifest = {
566
+ "version": "1",
567
+ "upload_type": "images",
568
+ "image_handles": image_file_handles,
569
+ }
570
+
571
+ with io.BytesIO() as manifest_fp:
572
+ manifest_fp.write(
573
+ json.dumps(manifest, sort_keys=True, separators=(",", ":")).encode(
574
+ "utf-8"
168
575
  )
169
- if cluster_id is not None:
170
- ret[sequence_uuid] = cluster_id
171
- return ret
576
+ )
577
+ manifest_fp.seek(0, io.SEEK_SET)
578
+ return uploader_without_emitter.upload_stream(
579
+ manifest_fp, session_key=f"{_prefixed_uuid4()}.json"
580
+ )
581
+
582
+
583
+ class SingleImageUploader:
584
+ def __init__(
585
+ self,
586
+ uploader: Uploader,
587
+ progress: dict[str, T.Any] | None = None,
588
+ ):
589
+ self.uploader = uploader
590
+ self.progress = progress or {}
591
+ self.lock = threading.Lock()
592
+ self.cache = self._maybe_create_persistent_cache_instance(
593
+ uploader.upload_options.user_items
594
+ )
595
+
596
+ def upload(self, image_metadata: types.ImageMetadata) -> str:
597
+ mutable_progress = {
598
+ **(self.progress or {}),
599
+ "filename": str(image_metadata.filename),
600
+ }
601
+
602
+ image_bytes = self.dump_image_bytes(image_metadata)
603
+
604
+ uploader_without_emitter = Uploader(self.uploader.upload_options)
605
+
606
+ session_key = uploader_without_emitter._gen_session_key(
607
+ io.BytesIO(image_bytes), mutable_progress
608
+ )
609
+
610
+ file_handle = self._file_handle_cache_get(session_key)
611
+
612
+ if file_handle is None:
613
+ file_handle = uploader_without_emitter.upload_stream(
614
+ io.BytesIO(image_bytes),
615
+ session_key=session_key,
616
+ progress=mutable_progress,
617
+ )
618
+ self._file_handle_cache_set(session_key, file_handle)
619
+
620
+ # Override chunk_size with the actual filesize
621
+ mutable_progress["chunk_size"] = image_metadata.filesize
622
+
623
+ with self.lock:
624
+ self.uploader.emitter.emit("upload_progress", mutable_progress)
625
+
626
+ return file_handle
627
+
628
+ @classmethod
629
+ def dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes:
630
+ try:
631
+ edit = exif_write.ExifEdit(metadata.filename)
632
+ except struct.error as ex:
633
+ raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex
634
+
635
+ # The cast is to fix the type checker error
636
+ edit.add_image_description(
637
+ T.cast(
638
+ T.Dict, desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata))
639
+ )
640
+ )
641
+
642
+ try:
643
+ return edit.dump_image_bytes()
644
+ except struct.error as ex:
645
+ raise ExifError(
646
+ f"Failed to dump EXIF bytes: {ex}", metadata.filename
647
+ ) from ex
648
+
649
+ @classmethod
650
+ def _maybe_create_persistent_cache_instance(
651
+ cls, user_items: config.UserItem
652
+ ) -> history.PersistentCache | None:
653
+ if not constants.UPLOAD_CACHE_DIR:
654
+ LOG.debug(
655
+ "Upload cache directory is set empty, skipping caching upload file handles"
656
+ )
657
+ return None
658
+
659
+ cache_path_dir = (
660
+ Path(constants.UPLOAD_CACHE_DIR)
661
+ .joinpath(api_v4.MAPILLARY_CLIENT_TOKEN.replace("|", "_"))
662
+ .joinpath(
663
+ user_items.get("MAPSettingsUserKey", user_items["user_upload_token"])
664
+ )
665
+ )
666
+ cache_path_dir.mkdir(parents=True, exist_ok=True)
667
+ cache_path = cache_path_dir.joinpath("cached_file_handles")
668
+ LOG.debug(f"File handle cache path: {cache_path}")
669
+
670
+ cache = history.PersistentCache(str(cache_path.resolve()))
671
+ cache.clear_expired()
672
+
673
+ return cache
674
+
675
+ def _file_handle_cache_get(self, key: str) -> str | None:
676
+ if self.cache is None:
677
+ return None
678
+
679
+ if _is_uuid(key):
680
+ return None
681
+
682
+ return self.cache.get(key)
683
+
684
+ def _file_handle_cache_set(self, key: str, value: str) -> None:
685
+ if self.cache is None:
686
+ return
687
+
688
+ if _is_uuid(key):
689
+ return
690
+
691
+ self.cache.set(key, value)
692
+
693
+
694
+ class Uploader:
695
+ def __init__(
696
+ self, upload_options: UploadOptions, emitter: EventEmitter | None = None
697
+ ):
698
+ self.upload_options = upload_options
699
+ if emitter is None:
700
+ # An empty event emitter that does nothing
701
+ self.emitter = EventEmitter()
702
+ else:
703
+ self.emitter = emitter
172
704
 
173
705
  def upload_stream(
174
706
  self,
175
707
  fp: T.IO[bytes],
176
- cluster_filetype: upload_api_v4.ClusterFileType,
177
- upload_md5sum: str,
178
- event_payload: T.Optional[Progress] = None,
179
- ) -> T.Optional[str]:
180
- if event_payload is None:
181
- event_payload = {}
708
+ session_key: str | None = None,
709
+ progress: dict[str, T.Any] | None = None,
710
+ ) -> str:
711
+ if progress is None:
712
+ progress = {}
713
+
714
+ if session_key is None:
715
+ session_key = self._gen_session_key(fp, progress)
182
716
 
183
717
  fp.seek(0, io.SEEK_END)
184
718
  entity_size = fp.tell()
185
719
 
186
- SUFFIX_MAP: T.Dict[upload_api_v4.ClusterFileType, str] = {
187
- upload_api_v4.ClusterFileType.ZIP: ".zip",
188
- upload_api_v4.ClusterFileType.CAMM: ".mp4",
189
- upload_api_v4.ClusterFileType.BLACKVUE: ".mp4",
190
- }
191
- session_key = f"mly_tools_{upload_md5sum}{SUFFIX_MAP[cluster_filetype]}"
192
-
193
- if self.dry_run:
194
- upload_service: upload_api_v4.UploadService = (
195
- upload_api_v4.FakeUploadService(
196
- user_access_token=self.user_items["user_upload_token"],
197
- session_key=session_key,
198
- organization_id=self.user_items.get("MAPOrganizationKey"),
199
- cluster_filetype=cluster_filetype,
200
- chunk_size=self.chunk_size,
720
+ progress["entity_size"] = entity_size
721
+ progress["chunk_size"] = self.upload_options.chunk_size
722
+ progress["retries"] = 0
723
+ progress["begin_offset"] = None
724
+
725
+ self.emitter.emit("upload_start", progress)
726
+
727
+ upload_service = self._create_upload_service(session_key)
728
+
729
+ while True:
730
+ try:
731
+ file_handle = self._upload_stream_retryable(
732
+ upload_service, fp, T.cast(UploaderProgress, progress)
201
733
  )
734
+ except Exception as ex:
735
+ self._handle_upload_exception(ex, T.cast(UploaderProgress, progress))
736
+ except BaseException as ex:
737
+ self.emitter.emit("upload_failed", progress)
738
+ raise ex
739
+ else:
740
+ break
741
+
742
+ progress["retries"] += 1
743
+
744
+ self.emitter.emit("upload_end", progress)
745
+
746
+ return file_handle
747
+
748
+ def finish_upload(
749
+ self,
750
+ file_handle: str,
751
+ cluster_filetype: api_v4.ClusterFileType,
752
+ progress: dict[str, T.Any] | None = None,
753
+ ) -> str:
754
+ """Finish upload with safe retries guraranteed"""
755
+ if progress is None:
756
+ progress = {}
757
+
758
+ if self.upload_options.dry_run or self.upload_options.nofinish:
759
+ cluster_id = "0"
760
+ else:
761
+ resp = api_v4.finish_upload(
762
+ self.upload_options.user_items["user_upload_token"],
763
+ file_handle,
764
+ cluster_filetype,
765
+ organization_id=self.upload_options.user_items.get(
766
+ "MAPOrganizationKey"
767
+ ),
768
+ )
769
+
770
+ body = api_v4.jsonify_response(resp)
771
+ # TODO: Validate cluster_id
772
+ cluster_id = body.get("cluster_id")
773
+
774
+ progress["cluster_id"] = cluster_id
775
+ self.emitter.emit("upload_finished", progress)
776
+
777
+ return cluster_id
778
+
779
+ def _create_upload_service(self, session_key: str) -> upload_api_v4.UploadService:
780
+ upload_service: upload_api_v4.UploadService
781
+
782
+ if self.upload_options.dry_run:
783
+ upload_path = os.getenv("MAPILLARY_UPLOAD_ENDPOINT")
784
+ upload_service = upload_api_v4.FakeUploadService(
785
+ user_access_token=self.upload_options.user_items["user_upload_token"],
786
+ session_key=session_key,
787
+ upload_path=Path(upload_path) if upload_path is not None else None,
788
+ )
789
+ LOG.info(
790
+ "Dry run mode enabled. Data will be uploaded to %s",
791
+ upload_service.upload_path.joinpath(session_key),
202
792
  )
203
793
  else:
204
794
  upload_service = upload_api_v4.UploadService(
205
- user_access_token=self.user_items["user_upload_token"],
795
+ user_access_token=self.upload_options.user_items["user_upload_token"],
206
796
  session_key=session_key,
207
- organization_id=self.user_items.get("MAPOrganizationKey"),
208
- cluster_filetype=cluster_filetype,
209
- chunk_size=self.chunk_size,
210
797
  )
211
798
 
212
- final_event_payload: Progress = {
213
- **event_payload, # type: ignore
214
- "entity_size": entity_size,
215
- "md5sum": upload_md5sum,
216
- }
799
+ return upload_service
217
800
 
218
- try:
219
- return _upload_stream(
220
- upload_service,
221
- fp,
222
- event_payload=final_event_payload,
223
- emitter=self.emitter,
801
+ def _handle_upload_exception(
802
+ self, ex: Exception, progress: UploaderProgress
803
+ ) -> None:
804
+ retries = progress.get("retries", 0)
805
+ begin_offset = progress.get("begin_offset")
806
+ offset = progress.get("offset")
807
+
808
+ if retries <= constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
809
+ self.emitter.emit("upload_interrupted", progress)
810
+ LOG.warning(
811
+ f"Error uploading at {offset=} since {begin_offset=}: {ex.__class__.__name__}: {ex}"
224
812
  )
225
- except UploadCancelled:
226
- return None
813
+ # Keep things immutable here. Will increment retries in the caller
814
+ retries += 1
815
+ if _is_immediate_retriable_exception(ex):
816
+ sleep_for = 0
817
+ else:
818
+ sleep_for = min(2**retries, 16)
819
+ LOG.info(
820
+ f"Retrying in {sleep_for} seconds ({retries}/{constants.MAX_UPLOAD_RETRIES})"
821
+ )
822
+ if sleep_for:
823
+ time.sleep(sleep_for)
824
+ else:
825
+ self.emitter.emit("upload_failed", progress)
826
+ raise ex
827
+
828
+ def _chunk_with_progress_emitted(
829
+ self,
830
+ stream: T.IO[bytes],
831
+ progress: UploaderProgress,
832
+ ) -> T.Generator[bytes, None, None]:
833
+ for chunk in upload_api_v4.UploadService.chunkize_byte_stream(
834
+ stream, self.upload_options.chunk_size
835
+ ):
836
+ yield chunk
837
+
838
+ progress["offset"] += len(chunk)
839
+ progress["chunk_size"] = len(chunk)
840
+ # Whenever a chunk is uploaded, reset retries
841
+ progress["retries"] = 0
842
+
843
+ self.emitter.emit("upload_progress", progress)
844
+
845
+ def _upload_stream_retryable(
846
+ self,
847
+ upload_service: upload_api_v4.UploadService,
848
+ fp: T.IO[bytes],
849
+ progress: UploaderProgress,
850
+ ) -> str:
851
+ """Upload the stream with safe retries guraranteed"""
852
+
853
+ begin_offset = upload_service.fetch_offset()
854
+
855
+ progress["begin_offset"] = begin_offset
856
+ progress["offset"] = begin_offset
857
+
858
+ if not constants.MIN_UPLOAD_SPEED:
859
+ read_timeout = None
860
+ else:
861
+ remaining_bytes = abs(progress["entity_size"] - begin_offset)
862
+ read_timeout = max(
863
+ api_v4.REQUESTS_TIMEOUT, remaining_bytes / constants.MIN_UPLOAD_SPEED
864
+ )
865
+
866
+ self.emitter.emit("upload_fetch_offset", progress)
867
+
868
+ fp.seek(begin_offset, io.SEEK_SET)
869
+
870
+ shifted_chunks = self._chunk_with_progress_emitted(fp, progress)
871
+
872
+ return upload_service.upload_shifted_chunks(
873
+ shifted_chunks, begin_offset, read_timeout=read_timeout
874
+ )
875
+
876
+ def _gen_session_key(self, fp: T.IO[bytes], progress: dict[str, T.Any]) -> str:
877
+ if self.upload_options.noresume:
878
+ # Generate a unique UUID for session_key when noresume is True
879
+ # to prevent resuming from previous uploads
880
+ session_key = f"{_prefixed_uuid4()}"
881
+ else:
882
+ fp.seek(0, io.SEEK_SET)
883
+ session_key = utils.md5sum_fp(fp).hexdigest()
884
+
885
+ filetype = progress.get("file_type")
886
+ if filetype is not None:
887
+ session_key = _session_key(session_key, types.FileType(filetype))
888
+
889
+ return session_key
227
890
 
228
891
 
229
892
  def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
230
893
  for metadata in metadatas:
231
- types.validate_image_desc(types.as_desc(metadata))
894
+ validate_image_desc(DescriptionJSONSerializer.as_desc(metadata))
232
895
  if not metadata.filename.is_file():
233
896
  raise FileNotFoundError(f"No such file {metadata.filename}")
234
897
 
235
898
 
236
- @contextmanager
237
- def wip_file_context(wip_path: Path, done_path: Path):
238
- assert wip_path != done_path, "should not be the same file"
239
- try:
240
- os.remove(wip_path)
241
- except FileNotFoundError:
242
- pass
243
- try:
244
- yield wip_path
245
- try:
246
- os.remove(done_path)
247
- except FileNotFoundError:
248
- pass
249
- wip_path.rename(done_path)
250
- finally:
251
- try:
252
- os.remove(wip_path)
253
- except FileNotFoundError:
254
- pass
255
-
256
-
257
- def zip_images(
258
- metadatas: T.List[types.ImageMetadata],
259
- zip_dir: Path,
260
- ) -> None:
261
- _validate_metadatas(metadatas)
262
- sequences = types.group_and_sort_images(metadatas)
263
- os.makedirs(zip_dir, exist_ok=True)
264
- for sequence_uuid, sequence in sequences.items():
265
- for metadata in sequence:
266
- metadata.update_md5sum()
267
- upload_md5sum = types.sequence_md5sum(sequence)
268
- timestamp = int(time.time())
269
- wip_zip_filename = zip_dir.joinpath(
270
- f".mly_zip_{uuid.uuid4()}_{sequence_uuid}_{os.getpid()}_{timestamp}"
271
- )
272
- zip_filename = zip_dir.joinpath(f"mly_tools_{upload_md5sum}.zip")
273
- with wip_file_context(wip_zip_filename, zip_filename) as wip_dir:
274
- with wip_dir.open("wb") as fp:
275
- _zip_sequence_fp(sequence, fp, upload_md5sum)
276
-
277
-
278
- def _zip_sequence_fp(
279
- sequence: T.Sequence[types.ImageMetadata],
280
- fp: T.IO[bytes],
281
- upload_md5sum: str,
282
- ) -> None:
283
- arcname_idx = 0
284
- arcnames = set()
285
- with zipfile.ZipFile(fp, "w", zipfile.ZIP_DEFLATED) as ziph:
286
- for metadata in sequence:
287
- edit = exif_write.ExifEdit(metadata.filename)
288
- # The cast is to fix the type checker error
289
- edit.add_image_description(
290
- T.cast(T.Dict, types.desc_file_to_exif(types.as_desc(metadata)))
291
- )
292
- image_bytes = edit.dump_image_bytes()
293
- arcname: str = metadata.filename.name
294
- # make sure the arcname is unique, otherwise zipfile.extractAll will eliminate duplicated ones
295
- while arcname in arcnames:
296
- arcname_idx += 1
297
- arcname = (
298
- f"{metadata.filename.stem}_{arcname_idx}{metadata.filename.suffix}"
299
- )
300
- arcnames.add(arcname)
301
- zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
302
- ziph.writestr(zipinfo, image_bytes)
303
- ziph.comment = json.dumps({"upload_md5sum": upload_md5sum}).encode("utf-8")
304
- assert len(sequence) == len(set(ziph.namelist()))
305
-
306
-
307
- def _extract_upload_md5sum(fp: T.IO[bytes]) -> T.Optional[str]:
308
- with zipfile.ZipFile(fp, "r", zipfile.ZIP_DEFLATED) as ziph:
309
- comment = ziph.comment
310
- if not comment:
311
- return None
312
- try:
313
- upload_md5sum = json.loads(comment.decode("utf-8")).get("upload_md5sum")
314
- except Exception:
315
- return None
316
- if not upload_md5sum:
317
- return None
318
- return str(upload_md5sum)
319
-
320
-
321
- def _is_immediate_retry(ex: Exception):
899
+ def _is_immediate_retriable_exception(ex: Exception) -> bool:
322
900
  if (
323
901
  isinstance(ex, requests.HTTPError)
324
902
  and isinstance(ex.response, requests.Response)
@@ -331,8 +909,10 @@ def _is_immediate_retry(ex: Exception):
331
909
  # resp: {"debug_info":{"retriable":true,"type":"OffsetInvalidError","message":"Request starting offset is invalid"}}
332
910
  return resp.get("debug_info", {}).get("retriable", False)
333
911
 
912
+ return False
913
+
334
914
 
335
- def _is_retriable_exception(ex: Exception):
915
+ def _is_retriable_exception(ex: Exception) -> bool:
336
916
  if isinstance(ex, (requests.ConnectionError, requests.Timeout)):
337
917
  return True
338
918
 
@@ -351,89 +931,29 @@ def _is_retriable_exception(ex: Exception):
351
931
  return False
352
932
 
353
933
 
354
- def _setup_callback(emitter: EventEmitter, mutable_payload: Progress):
355
- def _callback(chunk: bytes, _):
356
- assert isinstance(emitter, EventEmitter)
357
- mutable_payload["offset"] += len(chunk)
358
- mutable_payload["chunk_size"] = len(chunk)
359
- emitter.emit("upload_progress", mutable_payload)
360
-
361
- return _callback
362
-
363
-
364
- def _upload_stream(
365
- upload_service: upload_api_v4.UploadService,
366
- fp: T.IO[bytes],
367
- event_payload: T.Optional[Progress] = None,
368
- emitter: T.Optional[EventEmitter] = None,
934
+ def _session_key(
935
+ upload_md5sum: str, filetype: api_v4.ClusterFileType | types.FileType
369
936
  ) -> str:
370
- retries = 0
371
-
372
- if event_payload is None:
373
- event_payload = {}
374
-
375
- mutable_payload = T.cast(Progress, {**event_payload})
376
-
377
- # when it progresses, we reset retries
378
- def _reset_retries(_, __):
379
- nonlocal retries
380
- retries = 0
381
-
382
- if emitter:
383
- emitter.emit("upload_start", mutable_payload)
937
+ _SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
938
+ api_v4.ClusterFileType.ZIP: ".zip",
939
+ api_v4.ClusterFileType.CAMM: ".mp4",
940
+ api_v4.ClusterFileType.BLACKVUE: ".mp4",
941
+ types.FileType.IMAGE: ".jpg",
942
+ types.FileType.ZIP: ".zip",
943
+ types.FileType.BLACKVUE: ".mp4",
944
+ types.FileType.CAMM: ".mp4",
945
+ types.FileType.GOPRO: ".mp4",
946
+ types.FileType.VIDEO: ".mp4",
947
+ }
384
948
 
385
- while True:
386
- fp.seek(0, io.SEEK_SET)
387
- begin_offset: T.Optional[int] = None
388
- try:
389
- begin_offset = upload_service.fetch_offset()
390
- upload_service.callbacks = [_reset_retries]
391
- if emitter:
392
- mutable_payload["offset"] = begin_offset
393
- mutable_payload["retries"] = retries
394
- emitter.emit("upload_fetch_offset", mutable_payload)
395
- upload_service.callbacks.append(
396
- _setup_callback(emitter, mutable_payload)
397
- )
398
- file_handle = upload_service.upload(fp, offset=begin_offset)
399
- except Exception as ex:
400
- if retries < constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
401
- if emitter:
402
- emitter.emit("upload_interrupted", mutable_payload)
403
- LOG.warning(
404
- # use %s instead of %d because offset could be None
405
- "Error uploading chunk_size %d at begin_offset %s: %s: %s",
406
- upload_service.chunk_size,
407
- begin_offset,
408
- ex.__class__.__name__,
409
- str(ex),
410
- )
411
- retries += 1
412
- if _is_immediate_retry(ex):
413
- sleep_for = 0
414
- else:
415
- sleep_for = min(2**retries, 16)
416
- LOG.info(
417
- "Retrying in %d seconds (%d/%d)",
418
- sleep_for,
419
- retries,
420
- constants.MAX_UPLOAD_RETRIES,
421
- )
422
- if sleep_for:
423
- time.sleep(sleep_for)
424
- else:
425
- raise ex
426
- else:
427
- break
949
+ return f"mly_tools_{upload_md5sum}{_SUFFIX_MAP[filetype]}"
428
950
 
429
- if emitter:
430
- emitter.emit("upload_end", mutable_payload)
431
951
 
432
- # TODO: retry here
433
- cluster_id = upload_service.finish(file_handle)
952
+ def _prefixed_uuid4():
953
+ prefixed = f"uuid_{uuid.uuid4().hex}"
954
+ assert _is_uuid(prefixed)
955
+ return prefixed
434
956
 
435
- if emitter:
436
- mutable_payload["cluster_id"] = cluster_id
437
- emitter.emit("upload_finished", mutable_payload)
438
957
 
439
- return cluster_id
958
+ def _is_uuid(session_key: str) -> bool:
959
+ return session_key.startswith("uuid_")