mapillary-tools 0.13.3__py3-none-any.whl → 0.14.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +106 -7
  3. mapillary_tools/authenticate.py +325 -64
  4. mapillary_tools/{geotag/blackvue_parser.py → blackvue_parser.py} +74 -54
  5. mapillary_tools/camm/camm_builder.py +55 -97
  6. mapillary_tools/camm/camm_parser.py +425 -177
  7. mapillary_tools/commands/__main__.py +2 -0
  8. mapillary_tools/commands/authenticate.py +8 -1
  9. mapillary_tools/commands/process.py +27 -51
  10. mapillary_tools/commands/process_and_upload.py +18 -5
  11. mapillary_tools/commands/sample_video.py +2 -3
  12. mapillary_tools/commands/upload.py +18 -9
  13. mapillary_tools/commands/video_process_and_upload.py +19 -5
  14. mapillary_tools/config.py +28 -12
  15. mapillary_tools/constants.py +46 -4
  16. mapillary_tools/exceptions.py +34 -35
  17. mapillary_tools/exif_read.py +158 -53
  18. mapillary_tools/exiftool_read.py +19 -5
  19. mapillary_tools/exiftool_read_video.py +12 -1
  20. mapillary_tools/exiftool_runner.py +77 -0
  21. mapillary_tools/geo.py +148 -107
  22. mapillary_tools/geotag/factory.py +298 -0
  23. mapillary_tools/geotag/geotag_from_generic.py +152 -11
  24. mapillary_tools/geotag/geotag_images_from_exif.py +43 -124
  25. mapillary_tools/geotag/geotag_images_from_exiftool.py +66 -70
  26. mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +32 -48
  27. mapillary_tools/geotag/geotag_images_from_gpx.py +41 -116
  28. mapillary_tools/geotag/geotag_images_from_gpx_file.py +15 -96
  29. mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -2
  30. mapillary_tools/geotag/geotag_images_from_video.py +46 -46
  31. mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +98 -92
  32. mapillary_tools/geotag/geotag_videos_from_gpx.py +140 -0
  33. mapillary_tools/geotag/geotag_videos_from_video.py +149 -181
  34. mapillary_tools/geotag/options.py +159 -0
  35. mapillary_tools/{geotag → gpmf}/gpmf_parser.py +194 -171
  36. mapillary_tools/history.py +3 -11
  37. mapillary_tools/mp4/io_utils.py +0 -1
  38. mapillary_tools/mp4/mp4_sample_parser.py +11 -3
  39. mapillary_tools/mp4/simple_mp4_parser.py +0 -10
  40. mapillary_tools/process_geotag_properties.py +151 -386
  41. mapillary_tools/process_sequence_properties.py +554 -202
  42. mapillary_tools/sample_video.py +8 -15
  43. mapillary_tools/telemetry.py +24 -12
  44. mapillary_tools/types.py +80 -22
  45. mapillary_tools/upload.py +311 -261
  46. mapillary_tools/upload_api_v4.py +55 -95
  47. mapillary_tools/uploader.py +396 -254
  48. mapillary_tools/utils.py +26 -0
  49. mapillary_tools/video_data_extraction/extract_video_data.py +17 -36
  50. mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +34 -19
  51. mapillary_tools/video_data_extraction/extractors/camm_parser.py +41 -17
  52. mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +4 -1
  53. mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +1 -2
  54. mapillary_tools/video_data_extraction/extractors/gopro_parser.py +37 -22
  55. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info}/METADATA +3 -2
  56. mapillary_tools-0.14.0a1.dist-info/RECORD +78 -0
  57. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info}/WHEEL +1 -1
  58. mapillary_tools/geotag/utils.py +0 -26
  59. mapillary_tools-0.13.3.dist-info/RECORD +0 -75
  60. /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
  61. /mapillary_tools/{geotag → gpmf}/gps_filter.py +0 -0
  62. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info}/entry_points.txt +0 -0
  63. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info/licenses}/LICENSE +0 -0
  64. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ import dataclasses
1
4
  import io
2
5
  import json
3
6
  import logging
4
7
  import os
8
+ import struct
5
9
  import tempfile
6
10
  import time
7
11
  import typing as T
@@ -10,28 +14,54 @@ import zipfile
10
14
  from contextlib import contextmanager
11
15
  from pathlib import Path
12
16
 
13
- import jsonschema
14
17
  import requests
15
18
 
16
- from . import constants, exif_write, types, upload_api_v4, utils
19
+ from . import api_v4, constants, exif_write, types, upload_api_v4
17
20
 
18
21
 
19
22
  LOG = logging.getLogger(__name__)
20
23
 
21
24
 
22
- class Progress(T.TypedDict, total=False):
23
- # The size of the chunk, in bytes, that has been uploaded in the last request
25
+ class UploaderProgress(T.TypedDict, total=True):
26
+ """
27
+ Progress data that Uploader cares about.
28
+ """
29
+
30
+ # The size, in bytes, of the last chunk that has been read and upload
24
31
  chunk_size: int
25
32
 
26
- # File type
27
- file_type: str
33
+ # The initial offset returned by the upload service, which is also the offset
34
+ # uploader start uploading from.
35
+ # Assert:
36
+ # - 0 <= begin_offset <= offset <= entity_size
37
+ # - Be non-None after at least a successful "upload_fetch_offset"
38
+ begin_offset: int | None
28
39
 
29
- # How many bytes has been uploaded so far since "upload_start"
40
+ # How many bytes of the file has been uploaded so far
30
41
  offset: int
31
42
 
32
- # Size in bytes of the zipfile/BlackVue/CAMM
43
+ # Size in bytes of the file (i.e. fp.tell() after seek to the end)
44
+ # NOTE: It's different from filesize in file system
45
+ # Assert:
46
+ # - offset == entity_size when "upload_end" or "upload_finished"
33
47
  entity_size: int
34
48
 
49
+ # An "upload_interrupted" will increase it. Reset to 0 if a chunk is uploaded
50
+ retries: int
51
+
52
+ # Cluster ID after finishing the upload
53
+ cluster_id: str
54
+
55
+
56
+ class SequenceProgress(T.TypedDict, total=False):
57
+ """Progress data at sequence level"""
58
+
59
+ # md5sum of the zipfile/BlackVue/CAMM in uploading
60
+ md5sum: str
61
+
62
+ # File type
63
+ file_type: str
64
+
35
65
  # How many sequences in total. It's always 1 when uploading Zipfile/BlackVue/CAMM
36
66
  total_sequence_count: int
37
67
 
@@ -44,20 +74,31 @@ class Progress(T.TypedDict, total=False):
44
74
  # MAPSequenceUUID. It is only available for directory uploading
45
75
  sequence_uuid: str
46
76
 
47
- # An "upload_interrupted" will increase it. Reset to 0 if the chunk is uploaded
48
- retries: int
49
-
50
- # md5sum of the zipfile/BlackVue/CAMM in uploading
51
- md5sum: str
52
-
53
77
  # Path to the Zipfile/BlackVue/CAMM
54
78
  import_path: str
55
79
 
56
- # Cluster ID after finishing the upload
57
- cluster_id: str
58
80
 
81
+ class Progress(SequenceProgress, UploaderProgress):
82
+ pass
83
+
84
+
85
+ class SequenceError(Exception):
86
+ """
87
+ Base class for sequence specific errors. These errors will cause the
88
+ current sequence upload to fail but will not interrupt the overall upload
89
+ process for other sequences.
90
+ """
59
91
 
60
- class UploadCancelled(Exception):
92
+ pass
93
+
94
+
95
+ class ExifError(SequenceError):
96
+ def __init__(self, message: str, image_path: Path):
97
+ super().__init__(message)
98
+ self.image_path = image_path
99
+
100
+
101
+ class InvalidMapillaryZipFileError(SequenceError):
61
102
  pass
62
103
 
63
104
 
@@ -72,7 +113,7 @@ EventName = T.Literal[
72
113
 
73
114
 
74
115
  class EventEmitter:
75
- events: T.Dict[EventName, T.List]
116
+ events: dict[EventName, list]
76
117
 
77
118
  def __init__(self):
78
119
  self.events = {}
@@ -88,142 +129,382 @@ class EventEmitter:
88
129
  callback(*args, **kwargs)
89
130
 
90
131
 
91
- class Uploader:
92
- def __init__(
93
- self,
94
- user_items: types.UserItem,
95
- emitter: T.Optional[EventEmitter] = None,
96
- chunk_size: int = upload_api_v4.DEFAULT_CHUNK_SIZE,
97
- dry_run=False,
132
+ @dataclasses.dataclass
133
+ class UploadResult:
134
+ result: str | None = None
135
+ error: Exception | None = None
136
+
137
+
138
+ class ZipImageSequence:
139
+ @classmethod
140
+ def zip_images(
141
+ cls, metadatas: T.Sequence[types.ImageMetadata], zip_dir: Path
142
+ ) -> None:
143
+ """
144
+ Group images into sequences and zip each sequence into a zipfile.
145
+ """
146
+ sequences = types.group_and_sort_images(metadatas)
147
+ os.makedirs(zip_dir, exist_ok=True)
148
+
149
+ for sequence_uuid, sequence in sequences.items():
150
+ _validate_metadatas(sequence)
151
+ upload_md5sum = types.update_sequence_md5sum(sequence)
152
+
153
+ # For atomicity we write into a WIP file and then rename to the final file
154
+ wip_zip_filename = zip_dir.joinpath(
155
+ f".mly_zip_{uuid.uuid4()}_{sequence_uuid}_{os.getpid()}_{int(time.time())}"
156
+ )
157
+ filename = _session_key(upload_md5sum, upload_api_v4.ClusterFileType.ZIP)
158
+ zip_filename = zip_dir.joinpath(filename)
159
+ with wip_file_context(wip_zip_filename, zip_filename) as wip_path:
160
+ with wip_path.open("wb") as wip_fp:
161
+ actual_md5sum = cls.zip_sequence_deterministically(sequence, wip_fp)
162
+ assert actual_md5sum == upload_md5sum, "md5sum mismatch"
163
+
164
+ @classmethod
165
+ def zip_sequence_deterministically(
166
+ cls,
167
+ sequence: T.Sequence[types.ImageMetadata],
168
+ zip_fp: T.IO[bytes],
169
+ ) -> str:
170
+ """
171
+ Write a sequence of ImageMetadata into the zipfile handle. It should guarantee
172
+ that the same sequence always produces the same zipfile, because the
173
+ sequence md5sum will be used to upload the zipfile or resume the upload.
174
+
175
+ The sequence has to be one sequence and sorted.
176
+ """
177
+
178
+ sequence_groups = types.group_and_sort_images(sequence)
179
+ assert len(sequence_groups) == 1, (
180
+ f"Only one sequence is allowed but got {len(sequence_groups)}: {list(sequence_groups.keys())}"
181
+ )
182
+
183
+ upload_md5sum = types.update_sequence_md5sum(sequence)
184
+
185
+ with zipfile.ZipFile(zip_fp, "w", zipfile.ZIP_DEFLATED) as zipf:
186
+ for idx, metadata in enumerate(sequence):
187
+ # Use {idx}.jpg (suffix does not matter) as the archive name to ensure the
188
+ # resulting zipfile is deterministic. This determinism is based on the upload_md5sum,
189
+ # which is derived from a list of image md5sums
190
+ cls._write_imagebytes_in_zip(zipf, metadata, arcname=f"{idx}.jpg")
191
+ assert len(sequence) == len(set(zipf.namelist()))
192
+ zipf.comment = json.dumps({"upload_md5sum": upload_md5sum}).encode("utf-8")
193
+
194
+ return upload_md5sum
195
+
196
+ @classmethod
197
+ def extract_upload_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
198
+ with zipfile.ZipFile(zip_fp, "r", zipfile.ZIP_DEFLATED) as ziph:
199
+ comment = ziph.comment
200
+
201
+ if not comment:
202
+ raise InvalidMapillaryZipFileError("No comment found in the zipfile")
203
+
204
+ try:
205
+ decoded = comment.decode("utf-8")
206
+ zip_metadata = json.loads(decoded)
207
+ except UnicodeDecodeError as ex:
208
+ raise InvalidMapillaryZipFileError(str(ex)) from ex
209
+ except json.JSONDecodeError as ex:
210
+ raise InvalidMapillaryZipFileError(str(ex)) from ex
211
+
212
+ upload_md5sum = zip_metadata.get("upload_md5sum")
213
+
214
+ if not upload_md5sum and not isinstance(upload_md5sum, str):
215
+ raise InvalidMapillaryZipFileError("No upload_md5sum found")
216
+
217
+ return upload_md5sum
218
+
219
+ @classmethod
220
+ def _write_imagebytes_in_zip(
221
+ cls, zipf: zipfile.ZipFile, metadata: types.ImageMetadata, arcname: str
98
222
  ):
99
- jsonschema.validate(instance=user_items, schema=types.UserItemSchema)
100
- self.user_items = user_items
101
- self.emitter = emitter
102
- self.chunk_size = chunk_size
103
- self.dry_run = dry_run
223
+ try:
224
+ edit = exif_write.ExifEdit(metadata.filename)
225
+ except struct.error as ex:
226
+ raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex
104
227
 
105
- def upload_zipfile(
106
- self,
228
+ # The cast is to fix the type checker error
229
+ edit.add_image_description(
230
+ T.cast(T.Dict, types.desc_file_to_exif(types.as_desc(metadata)))
231
+ )
232
+
233
+ try:
234
+ image_bytes = edit.dump_image_bytes()
235
+ except struct.error as ex:
236
+ raise ExifError(
237
+ f"Failed to dump EXIF bytes: {ex}", metadata.filename
238
+ ) from ex
239
+
240
+ zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
241
+ zipf.writestr(zipinfo, image_bytes)
242
+
243
+ @classmethod
244
+ def prepare_zipfile_and_upload(
245
+ cls,
107
246
  zip_path: Path,
108
- event_payload: T.Optional[Progress] = None,
109
- ) -> T.Optional[str]:
110
- if event_payload is None:
111
- event_payload = {}
247
+ uploader: Uploader,
248
+ progress: dict[str, T.Any] | None = None,
249
+ ) -> str:
250
+ if progress is None:
251
+ progress = {}
112
252
 
113
253
  with zipfile.ZipFile(zip_path) as ziph:
114
254
  namelist = ziph.namelist()
115
255
  if not namelist:
116
- LOG.warning("Skipping empty zipfile: %s", zip_path)
117
- return None
256
+ raise InvalidMapillaryZipFileError("Zipfile has no files")
257
+
258
+ with zip_path.open("rb") as zip_fp:
259
+ upload_md5sum = cls.extract_upload_md5sum(zip_fp)
118
260
 
119
- final_event_payload: Progress = {
120
- **event_payload, # type: ignore
261
+ sequence_progress: SequenceProgress = {
121
262
  "sequence_image_count": len(namelist),
263
+ "file_type": types.FileType.ZIP.value,
264
+ "md5sum": upload_md5sum,
122
265
  }
123
266
 
124
- with zip_path.open("rb") as fp:
125
- upload_md5sum = _extract_upload_md5sum(fp)
126
-
127
- if upload_md5sum is None:
128
- with zip_path.open("rb") as fp:
129
- upload_md5sum = utils.md5sum_fp(fp).hexdigest()
267
+ session_key = _session_key(upload_md5sum, upload_api_v4.ClusterFileType.ZIP)
130
268
 
131
- with zip_path.open("rb") as fp:
132
- return self.upload_stream(
133
- fp,
269
+ with zip_path.open("rb") as zip_fp:
270
+ return uploader.upload_stream(
271
+ zip_fp,
134
272
  upload_api_v4.ClusterFileType.ZIP,
135
- upload_md5sum,
136
- event_payload=final_event_payload,
273
+ session_key,
274
+ # Send the copy of the input progress to each upload session, to avoid modifying the original one
275
+ progress=T.cast(T.Dict[str, T.Any], {**progress, **sequence_progress}),
137
276
  )
138
277
 
139
- def upload_images(
140
- self,
278
+ @classmethod
279
+ def prepare_images_and_upload(
280
+ cls,
141
281
  image_metadatas: T.Sequence[types.ImageMetadata],
142
- event_payload: T.Optional[Progress] = None,
143
- ) -> T.Dict[str, str]:
144
- if event_payload is None:
145
- event_payload = {}
282
+ uploader: Uploader,
283
+ progress: dict[str, T.Any] | None = None,
284
+ ) -> T.Generator[tuple[str, UploadResult], None, None]:
285
+ if progress is None:
286
+ progress = {}
146
287
 
147
- _validate_metadatas(image_metadatas)
148
288
  sequences = types.group_and_sort_images(image_metadatas)
149
- ret: T.Dict[str, str] = {}
289
+
150
290
  for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
151
- final_event_payload: Progress = {
152
- **event_payload, # type: ignore
291
+ sequence_progress: SequenceProgress = {
153
292
  "sequence_idx": sequence_idx,
154
293
  "total_sequence_count": len(sequences),
155
294
  "sequence_image_count": len(sequence),
156
295
  "sequence_uuid": sequence_uuid,
296
+ "file_type": types.FileType.IMAGE.value,
157
297
  }
158
- for metadata in sequence:
159
- metadata.update_md5sum()
160
- upload_md5sum = types.sequence_md5sum(sequence)
298
+
299
+ try:
300
+ _validate_metadatas(sequence)
301
+ except Exception as ex:
302
+ yield sequence_uuid, UploadResult(error=ex)
303
+ continue
304
+
161
305
  with tempfile.NamedTemporaryFile() as fp:
162
- _zip_sequence_fp(sequence, fp, upload_md5sum)
163
- cluster_id = self.upload_stream(
164
- fp,
165
- upload_api_v4.ClusterFileType.ZIP,
166
- upload_md5sum,
167
- final_event_payload,
306
+ try:
307
+ upload_md5sum = cls.zip_sequence_deterministically(sequence, fp)
308
+ except Exception as ex:
309
+ yield sequence_uuid, UploadResult(error=ex)
310
+ continue
311
+
312
+ sequence_progress["md5sum"] = upload_md5sum
313
+
314
+ session_key = _session_key(
315
+ upload_md5sum, upload_api_v4.ClusterFileType.ZIP
168
316
  )
169
- if cluster_id is not None:
170
- ret[sequence_uuid] = cluster_id
171
- return ret
317
+
318
+ try:
319
+ cluster_id = uploader.upload_stream(
320
+ fp,
321
+ upload_api_v4.ClusterFileType.ZIP,
322
+ session_key,
323
+ progress=T.cast(
324
+ T.Dict[str, T.Any], {**progress, **sequence_progress}
325
+ ),
326
+ )
327
+ except Exception as ex:
328
+ yield sequence_uuid, UploadResult(error=ex)
329
+ continue
330
+
331
+ yield sequence_uuid, UploadResult(result=cluster_id)
332
+
333
+
334
+ class Uploader:
335
+ def __init__(
336
+ self,
337
+ user_items: types.UserItem,
338
+ emitter: EventEmitter | None = None,
339
+ chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
340
+ dry_run=False,
341
+ ):
342
+ self.user_items = user_items
343
+ if emitter is None:
344
+ # An empty event emitter that does nothing
345
+ self.emitter = EventEmitter()
346
+ else:
347
+ self.emitter = emitter
348
+ self.chunk_size = chunk_size
349
+ self.dry_run = dry_run
172
350
 
173
351
  def upload_stream(
174
352
  self,
175
353
  fp: T.IO[bytes],
176
354
  cluster_filetype: upload_api_v4.ClusterFileType,
177
- upload_md5sum: str,
178
- event_payload: T.Optional[Progress] = None,
179
- ) -> T.Optional[str]:
180
- if event_payload is None:
181
- event_payload = {}
355
+ session_key: str,
356
+ progress: dict[str, T.Any] | None = None,
357
+ ) -> str:
358
+ if progress is None:
359
+ progress = {}
182
360
 
183
361
  fp.seek(0, io.SEEK_END)
184
362
  entity_size = fp.tell()
185
363
 
186
- SUFFIX_MAP: T.Dict[upload_api_v4.ClusterFileType, str] = {
187
- upload_api_v4.ClusterFileType.ZIP: ".zip",
188
- upload_api_v4.ClusterFileType.CAMM: ".mp4",
189
- upload_api_v4.ClusterFileType.BLACKVUE: ".mp4",
190
- }
191
- session_key = f"mly_tools_{upload_md5sum}{SUFFIX_MAP[cluster_filetype]}"
364
+ upload_service = self._create_upload_service(session_key, cluster_filetype)
192
365
 
193
- if self.dry_run:
194
- upload_service: upload_api_v4.UploadService = (
195
- upload_api_v4.FakeUploadService(
196
- user_access_token=self.user_items["user_upload_token"],
197
- session_key=session_key,
198
- organization_id=self.user_items.get("MAPOrganizationKey"),
199
- cluster_filetype=cluster_filetype,
200
- chunk_size=self.chunk_size,
366
+ progress["entity_size"] = entity_size
367
+ progress["chunk_size"] = self.chunk_size
368
+ progress["retries"] = 0
369
+ progress["begin_offset"] = None
370
+
371
+ self.emitter.emit("upload_start", progress)
372
+
373
+ while True:
374
+ try:
375
+ file_handle = self._upload_stream_retryable(
376
+ upload_service, fp, T.cast(UploaderProgress, progress)
201
377
  )
378
+ except Exception as ex:
379
+ self._handle_upload_exception(ex, T.cast(UploaderProgress, progress))
380
+ else:
381
+ break
382
+
383
+ progress["retries"] += 1
384
+
385
+ self.emitter.emit("upload_end", progress)
386
+
387
+ # TODO: retry here
388
+ cluster_id = self._finish_upload_retryable(upload_service, file_handle)
389
+ progress["cluster_id"] = cluster_id
390
+
391
+ self.emitter.emit("upload_finished", progress)
392
+
393
+ return cluster_id
394
+
395
+ def _create_upload_service(
396
+ self, session_key: str, cluster_filetype: upload_api_v4.ClusterFileType
397
+ ) -> upload_api_v4.UploadService:
398
+ upload_service: upload_api_v4.UploadService
399
+
400
+ if self.dry_run:
401
+ upload_service = upload_api_v4.FakeUploadService(
402
+ user_access_token=self.user_items["user_upload_token"],
403
+ session_key=session_key,
404
+ cluster_filetype=cluster_filetype,
202
405
  )
203
406
  else:
204
407
  upload_service = upload_api_v4.UploadService(
205
408
  user_access_token=self.user_items["user_upload_token"],
206
409
  session_key=session_key,
207
- organization_id=self.user_items.get("MAPOrganizationKey"),
208
410
  cluster_filetype=cluster_filetype,
209
- chunk_size=self.chunk_size,
210
411
  )
211
412
 
212
- final_event_payload: Progress = {
213
- **event_payload, # type: ignore
214
- "entity_size": entity_size,
215
- "md5sum": upload_md5sum,
216
- }
413
+ return upload_service
414
+
415
+ def _handle_upload_exception(
416
+ self, ex: Exception, progress: UploaderProgress
417
+ ) -> None:
418
+ retries = progress["retries"]
419
+ begin_offset = progress.get("begin_offset")
420
+ chunk_size = progress["chunk_size"]
421
+
422
+ if retries <= constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
423
+ self.emitter.emit("upload_interrupted", progress)
424
+ LOG.warning(
425
+ # use %s instead of %d because offset could be None
426
+ "Error uploading chunk_size %d at begin_offset %s: %s: %s",
427
+ chunk_size,
428
+ begin_offset,
429
+ ex.__class__.__name__,
430
+ str(ex),
431
+ )
432
+ # Keep things immutable here. Will increment retries in the caller
433
+ retries += 1
434
+ if _is_immediate_retry(ex):
435
+ sleep_for = 0
436
+ else:
437
+ sleep_for = min(2**retries, 16)
438
+ LOG.info(
439
+ "Retrying in %d seconds (%d/%d)",
440
+ sleep_for,
441
+ retries,
442
+ constants.MAX_UPLOAD_RETRIES,
443
+ )
444
+ if sleep_for:
445
+ time.sleep(sleep_for)
446
+ else:
447
+ raise ex
217
448
 
218
- try:
219
- return _upload_stream(
220
- upload_service,
221
- fp,
222
- event_payload=final_event_payload,
223
- emitter=self.emitter,
449
+ def _chunk_with_progress_emitted(
450
+ self,
451
+ stream: T.IO[bytes],
452
+ progress: UploaderProgress,
453
+ ) -> T.Generator[bytes, None, None]:
454
+ for chunk in upload_api_v4.UploadService.chunkize_byte_stream(
455
+ stream, self.chunk_size
456
+ ):
457
+ yield chunk
458
+
459
+ progress["offset"] += len(chunk)
460
+ progress["chunk_size"] = len(chunk)
461
+ # Whenever a chunk is uploaded, reset retries
462
+ progress["retries"] = 0
463
+
464
+ self.emitter.emit("upload_progress", progress)
465
+
466
+ def _upload_stream_retryable(
467
+ self,
468
+ upload_service: upload_api_v4.UploadService,
469
+ fp: T.IO[bytes],
470
+ progress: UploaderProgress,
471
+ ) -> str:
472
+ """Upload the stream with safe retries guraranteed"""
473
+
474
+ begin_offset = upload_service.fetch_offset()
475
+
476
+ progress["begin_offset"] = begin_offset
477
+ progress["offset"] = begin_offset
478
+
479
+ self.emitter.emit("upload_fetch_offset", progress)
480
+
481
+ fp.seek(begin_offset, io.SEEK_SET)
482
+
483
+ shifted_chunks = self._chunk_with_progress_emitted(fp, progress)
484
+
485
+ return upload_service.upload_shifted_chunks(shifted_chunks, begin_offset)
486
+
487
+ def _finish_upload_retryable(
488
+ self, upload_service: upload_api_v4.UploadService, file_handle: str
489
+ ) -> str:
490
+ """Finish upload with safe retries guraranteed"""
491
+
492
+ if self.dry_run:
493
+ cluster_id = "0"
494
+ else:
495
+ resp = api_v4.finish_upload(
496
+ self.user_items["user_upload_token"],
497
+ file_handle,
498
+ upload_service.cluster_filetype,
499
+ organization_id=self.user_items.get("MAPOrganizationKey"),
224
500
  )
225
- except UploadCancelled:
226
- return None
501
+
502
+ data = resp.json()
503
+ cluster_id = data.get("cluster_id")
504
+
505
+ # TODO: validate cluster_id
506
+
507
+ return cluster_id
227
508
 
228
509
 
229
510
  def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
@@ -254,70 +535,6 @@ def wip_file_context(wip_path: Path, done_path: Path):
254
535
  pass
255
536
 
256
537
 
257
- def zip_images(
258
- metadatas: T.List[types.ImageMetadata],
259
- zip_dir: Path,
260
- ) -> None:
261
- _validate_metadatas(metadatas)
262
- sequences = types.group_and_sort_images(metadatas)
263
- os.makedirs(zip_dir, exist_ok=True)
264
- for sequence_uuid, sequence in sequences.items():
265
- for metadata in sequence:
266
- metadata.update_md5sum()
267
- upload_md5sum = types.sequence_md5sum(sequence)
268
- timestamp = int(time.time())
269
- wip_zip_filename = zip_dir.joinpath(
270
- f".mly_zip_{uuid.uuid4()}_{sequence_uuid}_{os.getpid()}_{timestamp}"
271
- )
272
- zip_filename = zip_dir.joinpath(f"mly_tools_{upload_md5sum}.zip")
273
- with wip_file_context(wip_zip_filename, zip_filename) as wip_dir:
274
- with wip_dir.open("wb") as fp:
275
- _zip_sequence_fp(sequence, fp, upload_md5sum)
276
-
277
-
278
- def _zip_sequence_fp(
279
- sequence: T.Sequence[types.ImageMetadata],
280
- fp: T.IO[bytes],
281
- upload_md5sum: str,
282
- ) -> None:
283
- arcname_idx = 0
284
- arcnames = set()
285
- with zipfile.ZipFile(fp, "w", zipfile.ZIP_DEFLATED) as ziph:
286
- for metadata in sequence:
287
- edit = exif_write.ExifEdit(metadata.filename)
288
- # The cast is to fix the type checker error
289
- edit.add_image_description(
290
- T.cast(T.Dict, types.desc_file_to_exif(types.as_desc(metadata)))
291
- )
292
- image_bytes = edit.dump_image_bytes()
293
- arcname: str = metadata.filename.name
294
- # make sure the arcname is unique, otherwise zipfile.extractAll will eliminate duplicated ones
295
- while arcname in arcnames:
296
- arcname_idx += 1
297
- arcname = (
298
- f"{metadata.filename.stem}_{arcname_idx}{metadata.filename.suffix}"
299
- )
300
- arcnames.add(arcname)
301
- zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
302
- ziph.writestr(zipinfo, image_bytes)
303
- ziph.comment = json.dumps({"upload_md5sum": upload_md5sum}).encode("utf-8")
304
- assert len(sequence) == len(set(ziph.namelist()))
305
-
306
-
307
- def _extract_upload_md5sum(fp: T.IO[bytes]) -> T.Optional[str]:
308
- with zipfile.ZipFile(fp, "r", zipfile.ZIP_DEFLATED) as ziph:
309
- comment = ziph.comment
310
- if not comment:
311
- return None
312
- try:
313
- upload_md5sum = json.loads(comment.decode("utf-8")).get("upload_md5sum")
314
- except Exception:
315
- return None
316
- if not upload_md5sum:
317
- return None
318
- return str(upload_md5sum)
319
-
320
-
321
538
  def _is_immediate_retry(ex: Exception):
322
539
  if (
323
540
  isinstance(ex, requests.HTTPError)
@@ -351,89 +568,14 @@ def _is_retriable_exception(ex: Exception):
351
568
  return False
352
569
 
353
570
 
354
- def _setup_callback(emitter: EventEmitter, mutable_payload: Progress):
355
- def _callback(chunk: bytes, _):
356
- assert isinstance(emitter, EventEmitter)
357
- mutable_payload["offset"] += len(chunk)
358
- mutable_payload["chunk_size"] = len(chunk)
359
- emitter.emit("upload_progress", mutable_payload)
571
+ _SUFFIX_MAP: dict[upload_api_v4.ClusterFileType, str] = {
572
+ upload_api_v4.ClusterFileType.ZIP: ".zip",
573
+ upload_api_v4.ClusterFileType.CAMM: ".mp4",
574
+ upload_api_v4.ClusterFileType.BLACKVUE: ".mp4",
575
+ }
360
576
 
361
- return _callback
362
577
 
363
-
364
- def _upload_stream(
365
- upload_service: upload_api_v4.UploadService,
366
- fp: T.IO[bytes],
367
- event_payload: T.Optional[Progress] = None,
368
- emitter: T.Optional[EventEmitter] = None,
578
+ def _session_key(
579
+ upload_md5sum: str, cluster_filetype: upload_api_v4.ClusterFileType
369
580
  ) -> str:
370
- retries = 0
371
-
372
- if event_payload is None:
373
- event_payload = {}
374
-
375
- mutable_payload = T.cast(Progress, {**event_payload})
376
-
377
- # when it progresses, we reset retries
378
- def _reset_retries(_, __):
379
- nonlocal retries
380
- retries = 0
381
-
382
- if emitter:
383
- emitter.emit("upload_start", mutable_payload)
384
-
385
- while True:
386
- fp.seek(0, io.SEEK_SET)
387
- begin_offset: T.Optional[int] = None
388
- try:
389
- begin_offset = upload_service.fetch_offset()
390
- upload_service.callbacks = [_reset_retries]
391
- if emitter:
392
- mutable_payload["offset"] = begin_offset
393
- mutable_payload["retries"] = retries
394
- emitter.emit("upload_fetch_offset", mutable_payload)
395
- upload_service.callbacks.append(
396
- _setup_callback(emitter, mutable_payload)
397
- )
398
- file_handle = upload_service.upload(fp, offset=begin_offset)
399
- except Exception as ex:
400
- if retries < constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
401
- if emitter:
402
- emitter.emit("upload_interrupted", mutable_payload)
403
- LOG.warning(
404
- # use %s instead of %d because offset could be None
405
- "Error uploading chunk_size %d at begin_offset %s: %s: %s",
406
- upload_service.chunk_size,
407
- begin_offset,
408
- ex.__class__.__name__,
409
- str(ex),
410
- )
411
- retries += 1
412
- if _is_immediate_retry(ex):
413
- sleep_for = 0
414
- else:
415
- sleep_for = min(2**retries, 16)
416
- LOG.info(
417
- "Retrying in %d seconds (%d/%d)",
418
- sleep_for,
419
- retries,
420
- constants.MAX_UPLOAD_RETRIES,
421
- )
422
- if sleep_for:
423
- time.sleep(sleep_for)
424
- else:
425
- raise ex
426
- else:
427
- break
428
-
429
- if emitter:
430
- emitter.emit("upload_end", mutable_payload)
431
-
432
- # TODO: retry here
433
- cluster_id = upload_service.finish(file_handle)
434
-
435
- if emitter:
436
- mutable_payload["cluster_id"] = cluster_id
437
- emitter.emit("upload_finished", mutable_payload)
438
-
439
- return cluster_id
581
+ return f"mly_tools_{upload_md5sum}{_SUFFIX_MAP[cluster_filetype]}"