mapillary-tools 0.14.0a2__py3-none-any.whl → 0.14.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +1 -0
  3. mapillary_tools/authenticate.py +9 -9
  4. mapillary_tools/blackvue_parser.py +79 -22
  5. mapillary_tools/config.py +38 -17
  6. mapillary_tools/constants.py +2 -0
  7. mapillary_tools/exiftool_read_video.py +52 -15
  8. mapillary_tools/exiftool_runner.py +4 -24
  9. mapillary_tools/ffmpeg.py +406 -232
  10. mapillary_tools/geotag/__init__.py +0 -0
  11. mapillary_tools/geotag/base.py +2 -2
  12. mapillary_tools/geotag/factory.py +97 -88
  13. mapillary_tools/geotag/geotag_images_from_exiftool.py +26 -19
  14. mapillary_tools/geotag/geotag_images_from_gpx.py +13 -6
  15. mapillary_tools/geotag/geotag_images_from_video.py +35 -0
  16. mapillary_tools/geotag/geotag_videos_from_exiftool.py +39 -13
  17. mapillary_tools/geotag/geotag_videos_from_gpx.py +22 -9
  18. mapillary_tools/geotag/options.py +25 -3
  19. mapillary_tools/geotag/video_extractors/base.py +1 -1
  20. mapillary_tools/geotag/video_extractors/exiftool.py +1 -1
  21. mapillary_tools/geotag/video_extractors/gpx.py +60 -70
  22. mapillary_tools/geotag/video_extractors/native.py +9 -31
  23. mapillary_tools/history.py +4 -1
  24. mapillary_tools/process_geotag_properties.py +16 -8
  25. mapillary_tools/process_sequence_properties.py +9 -11
  26. mapillary_tools/sample_video.py +7 -6
  27. mapillary_tools/serializer/description.py +587 -0
  28. mapillary_tools/serializer/gpx.py +132 -0
  29. mapillary_tools/types.py +44 -610
  30. mapillary_tools/upload.py +176 -197
  31. mapillary_tools/upload_api_v4.py +94 -51
  32. mapillary_tools/uploader.py +284 -138
  33. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.0b1.dist-info}/METADATA +87 -31
  34. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.0b1.dist-info}/RECORD +38 -35
  35. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.0b1.dist-info}/WHEEL +1 -1
  36. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.0b1.dist-info}/entry_points.txt +0 -0
  37. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.0b1.dist-info}/licenses/LICENSE +0 -0
  38. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.0b1.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import concurrent.futures
4
+
3
5
  import dataclasses
4
6
  import io
5
7
  import json
6
8
  import logging
7
9
  import os
8
10
  import struct
11
+ import sys
9
12
  import tempfile
13
+ import threading
10
14
  import time
11
15
  import typing as T
12
16
  import uuid
@@ -14,9 +18,19 @@ import zipfile
14
18
  from contextlib import contextmanager
15
19
  from pathlib import Path
16
20
 
21
+ if sys.version_info >= (3, 11):
22
+ from typing import Required
23
+ else:
24
+ from typing_extensions import Required
25
+
17
26
  import requests
18
27
 
19
- from . import api_v4, constants, exif_write, types, upload_api_v4
28
+ from . import api_v4, config, constants, exif_write, types, upload_api_v4, utils
29
+ from .serializer.description import (
30
+ desc_file_to_exif,
31
+ DescriptionJSONSerializer,
32
+ validate_image_desc,
33
+ )
20
34
 
21
35
 
22
36
  LOG = logging.getLogger(__name__)
@@ -56,17 +70,21 @@ class UploaderProgress(T.TypedDict, total=True):
56
70
  class SequenceProgress(T.TypedDict, total=False):
57
71
  """Progress data at sequence level"""
58
72
 
59
- # md5sum of the zipfile/BlackVue/CAMM in uploading
60
- md5sum: str
73
+ # Used to check if it is uploaded or not
74
+ sequence_md5sum: Required[str]
75
+
76
+ # Used to resume from the previous upload,
77
+ # so it has to an unique identifier (hash) of the upload content
78
+ upload_md5sum: str
61
79
 
62
80
  # File type
63
- file_type: str
81
+ file_type: Required[str]
64
82
 
65
83
  # How many sequences in total. It's always 1 when uploading Zipfile/BlackVue/CAMM
66
- total_sequence_count: int
84
+ total_sequence_count: Required[int]
67
85
 
68
86
  # 0-based nth sequence. It is always 0 when uploading Zipfile/BlackVue/CAMM
69
- sequence_idx: int
87
+ sequence_idx: Required[int]
70
88
 
71
89
  # How many images in the sequence. It's available only when uploading directories/Zipfiles
72
90
  sequence_image_count: int
@@ -121,6 +139,7 @@ class EventEmitter:
121
139
  def on(self, event: EventName):
122
140
  def _wrap(callback):
123
141
  self.events.setdefault(event, []).append(callback)
142
+ return callback
124
143
 
125
144
  return _wrap
126
145
 
@@ -148,30 +167,50 @@ class ZipImageSequence:
148
167
 
149
168
  for sequence_uuid, sequence in sequences.items():
150
169
  _validate_metadatas(sequence)
151
- upload_md5sum = types.update_sequence_md5sum(sequence)
152
-
153
170
  # For atomicity we write into a WIP file and then rename to the final file
154
171
  wip_zip_filename = zip_dir.joinpath(
155
172
  f".mly_zip_{uuid.uuid4()}_{sequence_uuid}_{os.getpid()}_{int(time.time())}"
156
173
  )
157
- filename = _session_key(upload_md5sum, upload_api_v4.ClusterFileType.ZIP)
158
- zip_filename = zip_dir.joinpath(filename)
159
- with wip_file_context(wip_zip_filename, zip_filename) as wip_path:
174
+ with cls._wip_file_context(wip_zip_filename) as wip_path:
160
175
  with wip_path.open("wb") as wip_fp:
161
- actual_md5sum = cls.zip_sequence_deterministically(sequence, wip_fp)
162
- assert actual_md5sum == upload_md5sum, "md5sum mismatch"
176
+ cls.zip_sequence_fp(sequence, wip_fp)
177
+
178
+ @classmethod
179
+ @contextmanager
180
+ def _wip_file_context(cls, wip_path: Path):
181
+ try:
182
+ os.remove(wip_path)
183
+ except FileNotFoundError:
184
+ pass
185
+ try:
186
+ yield wip_path
187
+
188
+ with wip_path.open("rb") as fp:
189
+ upload_md5sum = utils.md5sum_fp(fp).hexdigest()
190
+
191
+ done_path = wip_path.parent.joinpath(
192
+ _session_key(upload_md5sum, api_v4.ClusterFileType.ZIP)
193
+ )
194
+
195
+ try:
196
+ os.remove(done_path)
197
+ except FileNotFoundError:
198
+ pass
199
+ wip_path.rename(done_path)
200
+ finally:
201
+ try:
202
+ os.remove(wip_path)
203
+ except FileNotFoundError:
204
+ pass
163
205
 
164
206
  @classmethod
165
- def zip_sequence_deterministically(
207
+ def zip_sequence_fp(
166
208
  cls,
167
209
  sequence: T.Sequence[types.ImageMetadata],
168
210
  zip_fp: T.IO[bytes],
169
211
  ) -> str:
170
212
  """
171
- Write a sequence of ImageMetadata into the zipfile handle. It should guarantee
172
- that the same sequence always produces the same zipfile, because the
173
- sequence md5sum will be used to upload the zipfile or resume the upload.
174
-
213
+ Write a sequence of ImageMetadata into the zipfile handle.
175
214
  The sequence has to be one sequence and sorted.
176
215
  """
177
216
 
@@ -180,21 +219,23 @@ class ZipImageSequence:
180
219
  f"Only one sequence is allowed but got {len(sequence_groups)}: {list(sequence_groups.keys())}"
181
220
  )
182
221
 
183
- upload_md5sum = types.update_sequence_md5sum(sequence)
222
+ sequence_md5sum = types.update_sequence_md5sum(sequence)
184
223
 
185
224
  with zipfile.ZipFile(zip_fp, "w", zipfile.ZIP_DEFLATED) as zipf:
186
225
  for idx, metadata in enumerate(sequence):
187
- # Use {idx}.jpg (suffix does not matter) as the archive name to ensure the
188
- # resulting zipfile is deterministic. This determinism is based on the upload_md5sum,
189
- # which is derived from a list of image md5sums
190
- cls._write_imagebytes_in_zip(zipf, metadata, arcname=f"{idx}.jpg")
226
+ # Arcname should be unique, the name does not matter
227
+ arcname = f"{idx}.jpg"
228
+ zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
229
+ zipf.writestr(zipinfo, cls._dump_image_bytes(metadata))
191
230
  assert len(sequence) == len(set(zipf.namelist()))
192
- zipf.comment = json.dumps({"upload_md5sum": upload_md5sum}).encode("utf-8")
231
+ zipf.comment = json.dumps({"sequence_md5sum": sequence_md5sum}).encode(
232
+ "utf-8"
233
+ )
193
234
 
194
- return upload_md5sum
235
+ return sequence_md5sum
195
236
 
196
237
  @classmethod
197
- def extract_upload_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
238
+ def extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
198
239
  with zipfile.ZipFile(zip_fp, "r", zipfile.ZIP_DEFLATED) as ziph:
199
240
  comment = ziph.comment
200
241
 
@@ -209,17 +250,15 @@ class ZipImageSequence:
209
250
  except json.JSONDecodeError as ex:
210
251
  raise InvalidMapillaryZipFileError(str(ex)) from ex
211
252
 
212
- upload_md5sum = zip_metadata.get("upload_md5sum")
253
+ sequence_md5sum = zip_metadata.get("sequence_md5sum")
213
254
 
214
- if not upload_md5sum and not isinstance(upload_md5sum, str):
215
- raise InvalidMapillaryZipFileError("No upload_md5sum found")
255
+ if not sequence_md5sum and not isinstance(sequence_md5sum, str):
256
+ raise InvalidMapillaryZipFileError("No sequence_md5sum found")
216
257
 
217
- return upload_md5sum
258
+ return sequence_md5sum
218
259
 
219
260
  @classmethod
220
- def _write_imagebytes_in_zip(
221
- cls, zipf: zipfile.ZipFile, metadata: types.ImageMetadata, arcname: str
222
- ):
261
+ def _dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes:
223
262
  try:
224
263
  edit = exif_write.ExifEdit(metadata.filename)
225
264
  except struct.error as ex:
@@ -227,24 +266,24 @@ class ZipImageSequence:
227
266
 
228
267
  # The cast is to fix the type checker error
229
268
  edit.add_image_description(
230
- T.cast(T.Dict, types.desc_file_to_exif(types.as_desc(metadata)))
269
+ T.cast(
270
+ T.Dict,
271
+ desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata)),
272
+ )
231
273
  )
232
274
 
233
275
  try:
234
- image_bytes = edit.dump_image_bytes()
276
+ return edit.dump_image_bytes()
235
277
  except struct.error as ex:
236
278
  raise ExifError(
237
279
  f"Failed to dump EXIF bytes: {ex}", metadata.filename
238
280
  ) from ex
239
281
 
240
- zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
241
- zipf.writestr(zipinfo, image_bytes)
242
-
243
282
  @classmethod
244
- def prepare_zipfile_and_upload(
283
+ def upload_zipfile(
245
284
  cls,
246
- zip_path: Path,
247
285
  uploader: Uploader,
286
+ zip_path: Path,
248
287
  progress: dict[str, T.Any] | None = None,
249
288
  ) -> str:
250
289
  if progress is None:
@@ -256,30 +295,34 @@ class ZipImageSequence:
256
295
  raise InvalidMapillaryZipFileError("Zipfile has no files")
257
296
 
258
297
  with zip_path.open("rb") as zip_fp:
259
- upload_md5sum = cls.extract_upload_md5sum(zip_fp)
298
+ sequence_md5sum = cls.extract_sequence_md5sum(zip_fp)
260
299
 
261
- sequence_progress: SequenceProgress = {
300
+ # Send the copy of the input progress to each upload session, to avoid modifying the original one
301
+ mutable_progress: SequenceProgress = {
302
+ **T.cast(SequenceProgress, progress),
262
303
  "sequence_image_count": len(namelist),
304
+ "sequence_md5sum": sequence_md5sum,
263
305
  "file_type": types.FileType.ZIP.value,
264
- "md5sum": upload_md5sum,
265
306
  }
266
307
 
267
- session_key = _session_key(upload_md5sum, upload_api_v4.ClusterFileType.ZIP)
268
-
269
308
  with zip_path.open("rb") as zip_fp:
270
- return uploader.upload_stream(
271
- zip_fp,
272
- upload_api_v4.ClusterFileType.ZIP,
273
- session_key,
274
- # Send the copy of the input progress to each upload session, to avoid modifying the original one
275
- progress=T.cast(T.Dict[str, T.Any], {**progress, **sequence_progress}),
309
+ file_handle = uploader.upload_stream(
310
+ zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress)
276
311
  )
277
312
 
313
+ cluster_id = uploader.finish_upload(
314
+ file_handle,
315
+ api_v4.ClusterFileType.ZIP,
316
+ progress=T.cast(T.Dict[str, T.Any], mutable_progress),
317
+ )
318
+
319
+ return cluster_id
320
+
278
321
  @classmethod
279
- def prepare_images_and_upload(
322
+ def zip_images_and_upload(
280
323
  cls,
281
- image_metadatas: T.Sequence[types.ImageMetadata],
282
324
  uploader: Uploader,
325
+ image_metadatas: T.Sequence[types.ImageMetadata],
283
326
  progress: dict[str, T.Any] | None = None,
284
327
  ) -> T.Generator[tuple[str, UploadResult], None, None]:
285
328
  if progress is None:
@@ -288,14 +331,6 @@ class ZipImageSequence:
288
331
  sequences = types.group_and_sort_images(image_metadatas)
289
332
 
290
333
  for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
291
- sequence_progress: SequenceProgress = {
292
- "sequence_idx": sequence_idx,
293
- "total_sequence_count": len(sequences),
294
- "sequence_image_count": len(sequence),
295
- "sequence_uuid": sequence_uuid,
296
- "file_type": types.FileType.IMAGE.value,
297
- }
298
-
299
334
  try:
300
335
  _validate_metadatas(sequence)
301
336
  except Exception as ex:
@@ -304,25 +339,28 @@ class ZipImageSequence:
304
339
 
305
340
  with tempfile.NamedTemporaryFile() as fp:
306
341
  try:
307
- upload_md5sum = cls.zip_sequence_deterministically(sequence, fp)
342
+ sequence_md5sum = cls.zip_sequence_fp(sequence, fp)
308
343
  except Exception as ex:
309
344
  yield sequence_uuid, UploadResult(error=ex)
310
345
  continue
311
346
 
312
- sequence_progress["md5sum"] = upload_md5sum
347
+ sequence_progress: SequenceProgress = {
348
+ "sequence_idx": sequence_idx,
349
+ "total_sequence_count": len(sequences),
350
+ "sequence_image_count": len(sequence),
351
+ "sequence_uuid": sequence_uuid,
352
+ "file_type": types.FileType.ZIP.value,
353
+ "sequence_md5sum": sequence_md5sum,
354
+ }
313
355
 
314
- session_key = _session_key(
315
- upload_md5sum, upload_api_v4.ClusterFileType.ZIP
316
- )
356
+ mutable_progress: dict[str, T.Any] = {**progress, **sequence_progress}
317
357
 
318
358
  try:
319
- cluster_id = uploader.upload_stream(
320
- fp,
321
- upload_api_v4.ClusterFileType.ZIP,
322
- session_key,
323
- progress=T.cast(
324
- T.Dict[str, T.Any], {**progress, **sequence_progress}
325
- ),
359
+ file_handle = uploader.upload_stream(fp, progress=mutable_progress)
360
+ cluster_id = uploader.finish_upload(
361
+ file_handle,
362
+ api_v4.ClusterFileType.ZIP,
363
+ progress=mutable_progress,
326
364
  )
327
365
  except Exception as ex:
328
366
  yield sequence_uuid, UploadResult(error=ex)
@@ -330,11 +368,115 @@ class ZipImageSequence:
330
368
 
331
369
  yield sequence_uuid, UploadResult(result=cluster_id)
332
370
 
371
+ @classmethod
372
+ def _upload_sequence(
373
+ cls,
374
+ uploader: Uploader,
375
+ sequence: T.Sequence[types.ImageMetadata],
376
+ progress: dict[str, T.Any] | None = None,
377
+ ) -> str:
378
+ if progress is None:
379
+ progress = {}
380
+
381
+ # FIXME: This is a hack to disable the event emitter inside the uploader
382
+ uploader_without_emitter = uploader.copy_uploader_without_emitter()
383
+
384
+ lock = threading.Lock()
385
+
386
+ def _upload_image(image_metadata: types.ImageMetadata) -> str:
387
+ mutable_progress = {
388
+ **(progress or {}),
389
+ "filename": str(image_metadata.filename),
390
+ }
391
+
392
+ bytes = cls._dump_image_bytes(image_metadata)
393
+ file_handle = uploader_without_emitter.upload_stream(
394
+ io.BytesIO(bytes), progress=mutable_progress
395
+ )
396
+
397
+ mutable_progress["chunk_size"] = image_metadata.filesize
398
+
399
+ with lock:
400
+ uploader.emitter.emit("upload_progress", mutable_progress)
401
+
402
+ return file_handle
403
+
404
+ _validate_metadatas(sequence)
405
+
406
+ progress["entity_size"] = sum(m.filesize or 0 for m in sequence)
407
+
408
+ # TODO: assert sequence is sorted
409
+
410
+ uploader.emitter.emit("upload_start", progress)
411
+
412
+ with concurrent.futures.ThreadPoolExecutor(
413
+ max_workers=constants.MAX_IMAGE_UPLOAD_WORKERS
414
+ ) as executor:
415
+ image_file_handles = list(executor.map(_upload_image, sequence))
416
+
417
+ manifest = {
418
+ "version": "1",
419
+ "upload_type": "images",
420
+ "image_handles": image_file_handles,
421
+ }
422
+
423
+ with io.BytesIO() as manifest_fp:
424
+ manifest_fp.write(json.dumps(manifest).encode("utf-8"))
425
+ manifest_fp.seek(0, io.SEEK_SET)
426
+ manifest_file_handle = uploader_without_emitter.upload_stream(
427
+ manifest_fp, session_key=f"{uuid.uuid4().hex}.json"
428
+ )
429
+
430
+ uploader.emitter.emit("upload_end", progress)
431
+
432
+ cluster_id = uploader.finish_upload(
433
+ manifest_file_handle,
434
+ api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST,
435
+ progress=progress,
436
+ )
437
+
438
+ return cluster_id
439
+
440
+ @classmethod
441
+ def upload_images(
442
+ cls,
443
+ uploader: Uploader,
444
+ image_metadatas: T.Sequence[types.ImageMetadata],
445
+ progress: dict[str, T.Any] | None = None,
446
+ ) -> T.Generator[tuple[str, UploadResult], None, None]:
447
+ if progress is None:
448
+ progress = {}
449
+
450
+ sequences = types.group_and_sort_images(image_metadatas)
451
+
452
+ for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
453
+ sequence_md5sum = types.update_sequence_md5sum(sequence)
454
+
455
+ sequence_progress: SequenceProgress = {
456
+ "sequence_idx": sequence_idx,
457
+ "total_sequence_count": len(sequences),
458
+ "sequence_image_count": len(sequence),
459
+ "sequence_uuid": sequence_uuid,
460
+ "file_type": types.FileType.IMAGE.value,
461
+ "sequence_md5sum": sequence_md5sum,
462
+ }
463
+
464
+ mutable_progress: dict[str, T.Any] = {**progress, **sequence_progress}
465
+
466
+ try:
467
+ cluster_id = cls._upload_sequence(
468
+ uploader, sequence, progress=mutable_progress
469
+ )
470
+ except Exception as ex:
471
+ yield sequence_uuid, UploadResult(error=ex)
472
+ else:
473
+ yield sequence_uuid, UploadResult(result=cluster_id)
474
+
333
475
 
334
476
  class Uploader:
335
477
  def __init__(
336
478
  self,
337
- user_items: types.UserItem,
479
+ user_items: config.UserItem,
338
480
  emitter: EventEmitter | None = None,
339
481
  chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
340
482
  dry_run=False,
@@ -351,17 +493,25 @@ class Uploader:
351
493
  def upload_stream(
352
494
  self,
353
495
  fp: T.IO[bytes],
354
- cluster_filetype: upload_api_v4.ClusterFileType,
355
- session_key: str,
496
+ session_key: str | None = None,
356
497
  progress: dict[str, T.Any] | None = None,
357
498
  ) -> str:
358
499
  if progress is None:
359
500
  progress = {}
360
501
 
502
+ if session_key is None:
503
+ fp.seek(0, io.SEEK_SET)
504
+ md5sum = utils.md5sum_fp(fp).hexdigest()
505
+ filetype = progress.get("file_type")
506
+ if filetype is not None:
507
+ session_key = _session_key(md5sum, types.FileType(filetype))
508
+ else:
509
+ session_key = md5sum
510
+
361
511
  fp.seek(0, io.SEEK_END)
362
512
  entity_size = fp.tell()
363
513
 
364
- upload_service = self._create_upload_service(session_key, cluster_filetype)
514
+ upload_service = self._create_upload_service(session_key)
365
515
 
366
516
  progress["entity_size"] = entity_size
367
517
  progress["chunk_size"] = self.chunk_size
@@ -384,30 +534,64 @@ class Uploader:
384
534
 
385
535
  self.emitter.emit("upload_end", progress)
386
536
 
387
- # TODO: retry here
388
- cluster_id = self._finish_upload_retryable(upload_service, file_handle)
389
- progress["cluster_id"] = cluster_id
537
+ return file_handle
538
+
539
+ def finish_upload(
540
+ self,
541
+ file_handle: str,
542
+ cluster_filetype: api_v4.ClusterFileType,
543
+ progress: dict[str, T.Any] | None = None,
544
+ ) -> str:
545
+ """Finish upload with safe retries guraranteed"""
546
+ if progress is None:
547
+ progress = {}
390
548
 
549
+ if self.dry_run:
550
+ cluster_id = "0"
551
+ else:
552
+ resp = api_v4.finish_upload(
553
+ self.user_items["user_upload_token"],
554
+ file_handle,
555
+ cluster_filetype,
556
+ organization_id=self.user_items.get("MAPOrganizationKey"),
557
+ )
558
+
559
+ data = resp.json()
560
+ cluster_id = data.get("cluster_id")
561
+
562
+ # TODO: validate cluster_id
563
+
564
+ progress["cluster_id"] = cluster_id
391
565
  self.emitter.emit("upload_finished", progress)
392
566
 
393
567
  return cluster_id
394
568
 
395
- def _create_upload_service(
396
- self, session_key: str, cluster_filetype: upload_api_v4.ClusterFileType
397
- ) -> upload_api_v4.UploadService:
569
+ def copy_uploader_without_emitter(self) -> Uploader:
570
+ return Uploader(
571
+ self.user_items,
572
+ emitter=None,
573
+ chunk_size=self.chunk_size,
574
+ dry_run=self.dry_run,
575
+ )
576
+
577
+ def _create_upload_service(self, session_key: str) -> upload_api_v4.UploadService:
398
578
  upload_service: upload_api_v4.UploadService
399
579
 
400
580
  if self.dry_run:
581
+ upload_path = os.getenv("MAPILLARY_UPLOAD_ENDPOINT")
401
582
  upload_service = upload_api_v4.FakeUploadService(
402
583
  user_access_token=self.user_items["user_upload_token"],
403
584
  session_key=session_key,
404
- cluster_filetype=cluster_filetype,
585
+ upload_path=Path(upload_path) if upload_path is not None else None,
586
+ )
587
+ LOG.info(
588
+ "Dry run mode enabled. Data will be uploaded to %s",
589
+ upload_service.upload_path.joinpath(session_key),
405
590
  )
406
591
  else:
407
592
  upload_service = upload_api_v4.UploadService(
408
593
  user_access_token=self.user_items["user_upload_token"],
409
594
  session_key=session_key,
410
- cluster_filetype=cluster_filetype,
411
595
  )
412
596
 
413
597
  return upload_service
@@ -484,57 +668,14 @@ class Uploader:
484
668
 
485
669
  return upload_service.upload_shifted_chunks(shifted_chunks, begin_offset)
486
670
 
487
- def _finish_upload_retryable(
488
- self, upload_service: upload_api_v4.UploadService, file_handle: str
489
- ) -> str:
490
- """Finish upload with safe retries guraranteed"""
491
-
492
- if self.dry_run:
493
- cluster_id = "0"
494
- else:
495
- resp = api_v4.finish_upload(
496
- self.user_items["user_upload_token"],
497
- file_handle,
498
- upload_service.cluster_filetype,
499
- organization_id=self.user_items.get("MAPOrganizationKey"),
500
- )
501
-
502
- data = resp.json()
503
- cluster_id = data.get("cluster_id")
504
-
505
- # TODO: validate cluster_id
506
-
507
- return cluster_id
508
-
509
671
 
510
672
  def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
511
673
  for metadata in metadatas:
512
- types.validate_image_desc(types.as_desc(metadata))
674
+ validate_image_desc(DescriptionJSONSerializer.as_desc(metadata))
513
675
  if not metadata.filename.is_file():
514
676
  raise FileNotFoundError(f"No such file {metadata.filename}")
515
677
 
516
678
 
517
- @contextmanager
518
- def wip_file_context(wip_path: Path, done_path: Path):
519
- assert wip_path != done_path, "should not be the same file"
520
- try:
521
- os.remove(wip_path)
522
- except FileNotFoundError:
523
- pass
524
- try:
525
- yield wip_path
526
- try:
527
- os.remove(done_path)
528
- except FileNotFoundError:
529
- pass
530
- wip_path.rename(done_path)
531
- finally:
532
- try:
533
- os.remove(wip_path)
534
- except FileNotFoundError:
535
- pass
536
-
537
-
538
679
  def _is_immediate_retry(ex: Exception):
539
680
  if (
540
681
  isinstance(ex, requests.HTTPError)
@@ -568,14 +709,19 @@ def _is_retriable_exception(ex: Exception):
568
709
  return False
569
710
 
570
711
 
571
- _SUFFIX_MAP: dict[upload_api_v4.ClusterFileType, str] = {
572
- upload_api_v4.ClusterFileType.ZIP: ".zip",
573
- upload_api_v4.ClusterFileType.CAMM: ".mp4",
574
- upload_api_v4.ClusterFileType.BLACKVUE: ".mp4",
575
- }
576
-
577
-
578
712
  def _session_key(
579
- upload_md5sum: str, cluster_filetype: upload_api_v4.ClusterFileType
713
+ upload_md5sum: str, filetype: api_v4.ClusterFileType | types.FileType
580
714
  ) -> str:
581
- return f"mly_tools_{upload_md5sum}{_SUFFIX_MAP[cluster_filetype]}"
715
+ _SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
716
+ api_v4.ClusterFileType.ZIP: ".zip",
717
+ api_v4.ClusterFileType.CAMM: ".mp4",
718
+ api_v4.ClusterFileType.BLACKVUE: ".mp4",
719
+ types.FileType.IMAGE: ".jpg",
720
+ types.FileType.ZIP: ".zip",
721
+ types.FileType.BLACKVUE: ".mp4",
722
+ types.FileType.CAMM: ".mp4",
723
+ types.FileType.GOPRO: ".mp4",
724
+ types.FileType.VIDEO: ".mp4",
725
+ }
726
+
727
+ return f"mly_tools_{upload_md5sum}{_SUFFIX_MAP[filetype]}"