mapillary-tools 0.14.0b1__py3-none-any.whl → 0.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapillary_tools/__init__.py +1 -1
- mapillary_tools/api_v4.py +66 -263
- mapillary_tools/authenticate.py +46 -38
- mapillary_tools/commands/__main__.py +15 -16
- mapillary_tools/commands/upload.py +33 -4
- mapillary_tools/constants.py +127 -45
- mapillary_tools/exceptions.py +4 -0
- mapillary_tools/exif_read.py +2 -1
- mapillary_tools/exif_write.py +3 -1
- mapillary_tools/geo.py +16 -0
- mapillary_tools/geotag/base.py +6 -2
- mapillary_tools/geotag/factory.py +9 -1
- mapillary_tools/geotag/geotag_images_from_exiftool.py +1 -1
- mapillary_tools/geotag/geotag_images_from_gpx.py +0 -6
- mapillary_tools/geotag/geotag_videos_from_exiftool.py +30 -9
- mapillary_tools/geotag/utils.py +9 -12
- mapillary_tools/geotag/video_extractors/gpx.py +2 -1
- mapillary_tools/geotag/video_extractors/native.py +25 -0
- mapillary_tools/history.py +124 -7
- mapillary_tools/http.py +211 -0
- mapillary_tools/mp4/construct_mp4_parser.py +8 -2
- mapillary_tools/process_geotag_properties.py +31 -27
- mapillary_tools/process_sequence_properties.py +339 -322
- mapillary_tools/sample_video.py +1 -2
- mapillary_tools/serializer/description.py +56 -56
- mapillary_tools/serializer/gpx.py +1 -1
- mapillary_tools/upload.py +201 -205
- mapillary_tools/upload_api_v4.py +57 -47
- mapillary_tools/uploader.py +720 -285
- mapillary_tools/utils.py +57 -5
- {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/METADATA +7 -6
- {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/RECORD +36 -35
- {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/WHEEL +0 -0
- {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/entry_points.txt +0 -0
- {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/licenses/LICENSE +0 -0
- {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/top_level.txt +0 -0
mapillary_tools/uploader.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import concurrent.futures
|
|
4
|
-
|
|
5
4
|
import dataclasses
|
|
6
5
|
import io
|
|
7
6
|
import json
|
|
8
7
|
import logging
|
|
9
8
|
import os
|
|
9
|
+
import queue
|
|
10
10
|
import struct
|
|
11
11
|
import sys
|
|
12
12
|
import tempfile
|
|
@@ -25,7 +25,21 @@ else:
|
|
|
25
25
|
|
|
26
26
|
import requests
|
|
27
27
|
|
|
28
|
-
from . import
|
|
28
|
+
from . import (
|
|
29
|
+
api_v4,
|
|
30
|
+
config,
|
|
31
|
+
constants,
|
|
32
|
+
exif_write,
|
|
33
|
+
geo,
|
|
34
|
+
history,
|
|
35
|
+
telemetry,
|
|
36
|
+
types,
|
|
37
|
+
upload_api_v4,
|
|
38
|
+
utils,
|
|
39
|
+
)
|
|
40
|
+
from .camm import camm_builder, camm_parser
|
|
41
|
+
from .gpmf import gpmf_parser
|
|
42
|
+
from .mp4 import simple_mp4_builder
|
|
29
43
|
from .serializer.description import (
|
|
30
44
|
desc_file_to_exif,
|
|
31
45
|
DescriptionJSONSerializer,
|
|
@@ -36,6 +50,25 @@ from .serializer.description import (
|
|
|
36
50
|
LOG = logging.getLogger(__name__)
|
|
37
51
|
|
|
38
52
|
|
|
53
|
+
@dataclasses.dataclass(frozen=True)
|
|
54
|
+
class UploadOptions:
|
|
55
|
+
user_items: config.UserItem
|
|
56
|
+
chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024)
|
|
57
|
+
num_upload_workers: int = constants.MAX_IMAGE_UPLOAD_WORKERS
|
|
58
|
+
dry_run: bool = False
|
|
59
|
+
nofinish: bool = False
|
|
60
|
+
noresume: bool = False
|
|
61
|
+
|
|
62
|
+
def __post_init__(self):
|
|
63
|
+
if self.num_upload_workers <= 0:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"Expect positive num_upload_workers but got {self.num_upload_workers}"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
if self.chunk_size <= 0:
|
|
69
|
+
raise ValueError(f"Expect positive chunk_size but got {self.chunk_size}")
|
|
70
|
+
|
|
71
|
+
|
|
39
72
|
class UploaderProgress(T.TypedDict, total=True):
|
|
40
73
|
"""
|
|
41
74
|
Progress data that Uploader cares about.
|
|
@@ -60,7 +93,7 @@ class UploaderProgress(T.TypedDict, total=True):
|
|
|
60
93
|
# - offset == entity_size when "upload_end" or "upload_finished"
|
|
61
94
|
entity_size: int
|
|
62
95
|
|
|
63
|
-
# An "
|
|
96
|
+
# An "upload_retrying" will increase it. Reset to 0 if a chunk is uploaded
|
|
64
97
|
retries: int
|
|
65
98
|
|
|
66
99
|
# Cluster ID after finishing the upload
|
|
@@ -92,7 +125,7 @@ class SequenceProgress(T.TypedDict, total=False):
|
|
|
92
125
|
# MAPSequenceUUID. It is only available for directory uploading
|
|
93
126
|
sequence_uuid: str
|
|
94
127
|
|
|
95
|
-
# Path to the
|
|
128
|
+
# Path to the image/video/zip
|
|
96
129
|
import_path: str
|
|
97
130
|
|
|
98
131
|
|
|
@@ -120,13 +153,43 @@ class InvalidMapillaryZipFileError(SequenceError):
|
|
|
120
153
|
pass
|
|
121
154
|
|
|
122
155
|
|
|
156
|
+
# BELOW demonstrates the pseudocode for a typical upload workflow
|
|
157
|
+
# and when upload events are emitted
|
|
158
|
+
#################################################################
|
|
159
|
+
# def pseudo_upload(metadata):
|
|
160
|
+
# emit("upload_start")
|
|
161
|
+
# while True:
|
|
162
|
+
# try:
|
|
163
|
+
# if is_sequence(metadata):
|
|
164
|
+
# for image in metadata:
|
|
165
|
+
# upload_stream(image.read())
|
|
166
|
+
# emit("upload_progress")
|
|
167
|
+
# elif is_video(metadata):
|
|
168
|
+
# offset = fetch_offset()
|
|
169
|
+
# emit("upload_fetch_offset")
|
|
170
|
+
# for chunk in metadata.read()[offset:]:
|
|
171
|
+
# upload_stream(chunk)
|
|
172
|
+
# emit("upload_progress")
|
|
173
|
+
# except BaseException as ex: # Include KeyboardInterrupt
|
|
174
|
+
# if retryable(ex):
|
|
175
|
+
# emit("upload_retrying")
|
|
176
|
+
# continue
|
|
177
|
+
# else:
|
|
178
|
+
# emit("upload_failed")
|
|
179
|
+
# raise ex
|
|
180
|
+
# else:
|
|
181
|
+
# break
|
|
182
|
+
# emit("upload_end")
|
|
183
|
+
# finish_upload(data)
|
|
184
|
+
# emit("upload_finished")
|
|
123
185
|
EventName = T.Literal[
|
|
124
186
|
"upload_start",
|
|
125
187
|
"upload_fetch_offset",
|
|
126
188
|
"upload_progress",
|
|
189
|
+
"upload_retrying",
|
|
127
190
|
"upload_end",
|
|
191
|
+
"upload_failed",
|
|
128
192
|
"upload_finished",
|
|
129
|
-
"upload_interrupted",
|
|
130
193
|
]
|
|
131
194
|
|
|
132
195
|
|
|
@@ -154,7 +217,131 @@ class UploadResult:
|
|
|
154
217
|
error: Exception | None = None
|
|
155
218
|
|
|
156
219
|
|
|
157
|
-
class
|
|
220
|
+
class VideoUploader:
|
|
221
|
+
@classmethod
|
|
222
|
+
def upload_videos(
|
|
223
|
+
cls, mly_uploader: Uploader, video_metadatas: T.Sequence[types.VideoMetadata]
|
|
224
|
+
) -> T.Generator[tuple[types.VideoMetadata, UploadResult], None, None]:
|
|
225
|
+
# If upload in a random order, then interrupted uploads has a higher chance to expire.
|
|
226
|
+
# Therefore sort videos to make sure interrupted uploads are resumed as early as possible
|
|
227
|
+
sorted_video_metadatas = sorted(video_metadatas, key=lambda m: m.filename)
|
|
228
|
+
|
|
229
|
+
for idx, video_metadata in enumerate(sorted_video_metadatas):
|
|
230
|
+
LOG.debug(f"Checksum for video {video_metadata.filename}...")
|
|
231
|
+
try:
|
|
232
|
+
video_metadata.update_md5sum()
|
|
233
|
+
except Exception as ex:
|
|
234
|
+
yield video_metadata, UploadResult(error=ex)
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
|
|
238
|
+
|
|
239
|
+
progress: SequenceProgress = {
|
|
240
|
+
"total_sequence_count": len(sorted_video_metadatas),
|
|
241
|
+
"sequence_idx": idx,
|
|
242
|
+
"file_type": video_metadata.filetype.value,
|
|
243
|
+
"import_path": str(video_metadata.filename),
|
|
244
|
+
"sequence_md5sum": video_metadata.md5sum,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
try:
|
|
248
|
+
with cls.build_camm_stream(video_metadata) as camm_fp:
|
|
249
|
+
# Upload the mp4 stream
|
|
250
|
+
file_handle = mly_uploader.upload_stream(
|
|
251
|
+
T.cast(T.IO[bytes], camm_fp),
|
|
252
|
+
progress=T.cast(T.Dict[str, T.Any], progress),
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
cluster_id = mly_uploader.finish_upload(
|
|
256
|
+
file_handle,
|
|
257
|
+
api_v4.ClusterFileType.CAMM,
|
|
258
|
+
progress=T.cast(T.Dict[str, T.Any], progress),
|
|
259
|
+
)
|
|
260
|
+
except Exception as ex:
|
|
261
|
+
yield video_metadata, UploadResult(error=ex)
|
|
262
|
+
else:
|
|
263
|
+
yield video_metadata, UploadResult(result=cluster_id)
|
|
264
|
+
|
|
265
|
+
@classmethod
|
|
266
|
+
@contextmanager
|
|
267
|
+
def build_camm_stream(cls, video_metadata: types.VideoMetadata):
|
|
268
|
+
# Convert video metadata to CAMMInfo
|
|
269
|
+
camm_info = cls.prepare_camm_info(video_metadata)
|
|
270
|
+
|
|
271
|
+
# Create the CAMM sample generator
|
|
272
|
+
camm_sample_generator = camm_builder.camm_sample_generator2(camm_info)
|
|
273
|
+
|
|
274
|
+
with video_metadata.filename.open("rb") as src_fp:
|
|
275
|
+
# Build the mp4 stream with the CAMM samples
|
|
276
|
+
yield simple_mp4_builder.transform_mp4(src_fp, camm_sample_generator)
|
|
277
|
+
|
|
278
|
+
@classmethod
|
|
279
|
+
def prepare_camm_info(
|
|
280
|
+
cls, video_metadata: types.VideoMetadata
|
|
281
|
+
) -> camm_parser.CAMMInfo:
|
|
282
|
+
camm_info = camm_parser.CAMMInfo(
|
|
283
|
+
make=video_metadata.make or "", model=video_metadata.model or ""
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
for point in video_metadata.points:
|
|
287
|
+
if isinstance(point, telemetry.CAMMGPSPoint):
|
|
288
|
+
if camm_info.gps is None:
|
|
289
|
+
camm_info.gps = []
|
|
290
|
+
camm_info.gps.append(point)
|
|
291
|
+
|
|
292
|
+
elif isinstance(point, telemetry.GPSPoint):
|
|
293
|
+
# There is no proper CAMM entry for GoPro GPS
|
|
294
|
+
if camm_info.mini_gps is None:
|
|
295
|
+
camm_info.mini_gps = []
|
|
296
|
+
camm_info.mini_gps.append(point)
|
|
297
|
+
|
|
298
|
+
elif isinstance(point, geo.Point):
|
|
299
|
+
if camm_info.mini_gps is None:
|
|
300
|
+
camm_info.mini_gps = []
|
|
301
|
+
camm_info.mini_gps.append(point)
|
|
302
|
+
else:
|
|
303
|
+
raise ValueError(f"Unknown point type: {point}")
|
|
304
|
+
|
|
305
|
+
if constants.MAPILLARY__EXPERIMENTAL_ENABLE_IMU:
|
|
306
|
+
if video_metadata.filetype is types.FileType.GOPRO:
|
|
307
|
+
with video_metadata.filename.open("rb") as fp:
|
|
308
|
+
gopro_info = gpmf_parser.extract_gopro_info(fp, telemetry_only=True)
|
|
309
|
+
if gopro_info is not None:
|
|
310
|
+
camm_info.accl = gopro_info.accl or []
|
|
311
|
+
camm_info.gyro = gopro_info.gyro or []
|
|
312
|
+
camm_info.magn = gopro_info.magn or []
|
|
313
|
+
|
|
314
|
+
return camm_info
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
class ZipUploader:
|
|
318
|
+
@classmethod
|
|
319
|
+
def upload_zipfiles(
|
|
320
|
+
cls, mly_uploader: Uploader, zip_paths: T.Sequence[Path]
|
|
321
|
+
) -> T.Generator[tuple[Path, UploadResult], None, None]:
|
|
322
|
+
# If upload in a random order, then interrupted uploads has a higher chance to expire.
|
|
323
|
+
# Therefore sort zipfiles to make sure interrupted uploads are resumed as early as possible
|
|
324
|
+
sorted_zip_paths = sorted(zip_paths)
|
|
325
|
+
|
|
326
|
+
for idx, zip_path in enumerate(sorted_zip_paths):
|
|
327
|
+
progress: SequenceProgress = {
|
|
328
|
+
"total_sequence_count": len(sorted_zip_paths),
|
|
329
|
+
"sequence_idx": idx,
|
|
330
|
+
"import_path": str(zip_path),
|
|
331
|
+
"file_type": types.FileType.ZIP.value,
|
|
332
|
+
"sequence_md5sum": "", # Placeholder, will be set in upload_zipfile
|
|
333
|
+
}
|
|
334
|
+
try:
|
|
335
|
+
cluster_id = cls._upload_zipfile(
|
|
336
|
+
mly_uploader,
|
|
337
|
+
zip_path,
|
|
338
|
+
progress=T.cast(T.Dict[str, T.Any], progress),
|
|
339
|
+
)
|
|
340
|
+
except Exception as ex:
|
|
341
|
+
yield zip_path, UploadResult(error=ex)
|
|
342
|
+
else:
|
|
343
|
+
yield zip_path, UploadResult(result=cluster_id)
|
|
344
|
+
|
|
158
345
|
@classmethod
|
|
159
346
|
def zip_images(
|
|
160
347
|
cls, metadatas: T.Sequence[types.ImageMetadata], zip_dir: Path
|
|
@@ -173,38 +360,93 @@ class ZipImageSequence:
|
|
|
173
360
|
)
|
|
174
361
|
with cls._wip_file_context(wip_zip_filename) as wip_path:
|
|
175
362
|
with wip_path.open("wb") as wip_fp:
|
|
176
|
-
cls.
|
|
363
|
+
cls._zip_sequence_fp(sequence, wip_fp)
|
|
177
364
|
|
|
178
365
|
@classmethod
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
except FileNotFoundError:
|
|
184
|
-
pass
|
|
185
|
-
try:
|
|
186
|
-
yield wip_path
|
|
366
|
+
def zip_images_and_upload(
|
|
367
|
+
cls, uploader: Uploader, image_metadatas: T.Sequence[types.ImageMetadata]
|
|
368
|
+
) -> T.Generator[tuple[str, UploadResult], None, None]:
|
|
369
|
+
sequences = types.group_and_sort_images(image_metadatas)
|
|
187
370
|
|
|
188
|
-
|
|
189
|
-
|
|
371
|
+
for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
|
|
372
|
+
try:
|
|
373
|
+
_validate_metadatas(sequence)
|
|
374
|
+
except Exception as ex:
|
|
375
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
376
|
+
continue
|
|
190
377
|
|
|
191
|
-
|
|
192
|
-
|
|
378
|
+
with tempfile.NamedTemporaryFile() as fp:
|
|
379
|
+
try:
|
|
380
|
+
sequence_md5sum = cls._zip_sequence_fp(sequence, fp)
|
|
381
|
+
except Exception as ex:
|
|
382
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
sequence_progress: SequenceProgress = {
|
|
386
|
+
"sequence_idx": sequence_idx,
|
|
387
|
+
"total_sequence_count": len(sequences),
|
|
388
|
+
"sequence_image_count": len(sequence),
|
|
389
|
+
"sequence_uuid": sequence_uuid,
|
|
390
|
+
"file_type": types.FileType.ZIP.value,
|
|
391
|
+
"sequence_md5sum": sequence_md5sum,
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
try:
|
|
395
|
+
file_handle = uploader.upload_stream(
|
|
396
|
+
fp, progress=T.cast(T.Dict[str, T.Any], sequence_progress)
|
|
397
|
+
)
|
|
398
|
+
cluster_id = uploader.finish_upload(
|
|
399
|
+
file_handle,
|
|
400
|
+
api_v4.ClusterFileType.ZIP,
|
|
401
|
+
progress=T.cast(T.Dict[str, T.Any], sequence_progress),
|
|
402
|
+
)
|
|
403
|
+
except Exception as ex:
|
|
404
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
405
|
+
continue
|
|
406
|
+
|
|
407
|
+
yield sequence_uuid, UploadResult(result=cluster_id)
|
|
408
|
+
|
|
409
|
+
@classmethod
|
|
410
|
+
def _upload_zipfile(
|
|
411
|
+
cls,
|
|
412
|
+
uploader: Uploader,
|
|
413
|
+
zip_path: Path,
|
|
414
|
+
progress: dict[str, T.Any] | None = None,
|
|
415
|
+
) -> str:
|
|
416
|
+
if progress is None:
|
|
417
|
+
progress = {}
|
|
418
|
+
|
|
419
|
+
with zipfile.ZipFile(zip_path) as ziph:
|
|
420
|
+
namelist = ziph.namelist()
|
|
421
|
+
if not namelist:
|
|
422
|
+
raise InvalidMapillaryZipFileError("Zipfile has no files")
|
|
423
|
+
|
|
424
|
+
with zip_path.open("rb") as zip_fp:
|
|
425
|
+
sequence_md5sum = cls._extract_sequence_md5sum(zip_fp)
|
|
426
|
+
|
|
427
|
+
# Send the copy of the input progress to each upload session, to avoid modifying the original one
|
|
428
|
+
mutable_progress: SequenceProgress = {
|
|
429
|
+
**T.cast(SequenceProgress, progress),
|
|
430
|
+
"sequence_image_count": len(namelist),
|
|
431
|
+
"sequence_md5sum": sequence_md5sum,
|
|
432
|
+
"file_type": types.FileType.ZIP.value,
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
with zip_path.open("rb") as zip_fp:
|
|
436
|
+
file_handle = uploader.upload_stream(
|
|
437
|
+
zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress)
|
|
193
438
|
)
|
|
194
439
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
os.remove(wip_path)
|
|
203
|
-
except FileNotFoundError:
|
|
204
|
-
pass
|
|
440
|
+
cluster_id = uploader.finish_upload(
|
|
441
|
+
file_handle,
|
|
442
|
+
api_v4.ClusterFileType.ZIP,
|
|
443
|
+
progress=T.cast(T.Dict[str, T.Any], mutable_progress),
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
return cluster_id
|
|
205
447
|
|
|
206
448
|
@classmethod
|
|
207
|
-
def
|
|
449
|
+
def _zip_sequence_fp(
|
|
208
450
|
cls,
|
|
209
451
|
sequence: T.Sequence[types.ImageMetadata],
|
|
210
452
|
zip_fp: T.IO[bytes],
|
|
@@ -219,6 +461,8 @@ class ZipImageSequence:
|
|
|
219
461
|
f"Only one sequence is allowed but got {len(sequence_groups)}: {list(sequence_groups.keys())}"
|
|
220
462
|
)
|
|
221
463
|
|
|
464
|
+
if sequence:
|
|
465
|
+
LOG.debug(f"Checksum for sequence {sequence[0].MAPSequenceUUID}...")
|
|
222
466
|
sequence_md5sum = types.update_sequence_md5sum(sequence)
|
|
223
467
|
|
|
224
468
|
with zipfile.ZipFile(zip_fp, "w", zipfile.ZIP_DEFLATED) as zipf:
|
|
@@ -226,16 +470,18 @@ class ZipImageSequence:
|
|
|
226
470
|
# Arcname should be unique, the name does not matter
|
|
227
471
|
arcname = f"{idx}.jpg"
|
|
228
472
|
zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
|
|
229
|
-
zipf.writestr(zipinfo,
|
|
473
|
+
zipf.writestr(zipinfo, SingleImageUploader.dump_image_bytes(metadata))
|
|
230
474
|
assert len(sequence) == len(set(zipf.namelist()))
|
|
231
|
-
zipf.comment = json.dumps(
|
|
232
|
-
"
|
|
233
|
-
|
|
475
|
+
zipf.comment = json.dumps(
|
|
476
|
+
{"sequence_md5sum": sequence_md5sum},
|
|
477
|
+
sort_keys=True,
|
|
478
|
+
separators=(",", ":"),
|
|
479
|
+
).encode("utf-8")
|
|
234
480
|
|
|
235
481
|
return sequence_md5sum
|
|
236
482
|
|
|
237
483
|
@classmethod
|
|
238
|
-
def
|
|
484
|
+
def _extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
|
|
239
485
|
with zipfile.ZipFile(zip_fp, "r", zipfile.ZIP_DEFLATED) as ziph:
|
|
240
486
|
comment = ziph.comment
|
|
241
487
|
|
|
@@ -258,237 +504,358 @@ class ZipImageSequence:
|
|
|
258
504
|
return sequence_md5sum
|
|
259
505
|
|
|
260
506
|
@classmethod
|
|
261
|
-
|
|
507
|
+
@contextmanager
|
|
508
|
+
def _wip_file_context(cls, wip_path: Path):
|
|
262
509
|
try:
|
|
263
|
-
|
|
264
|
-
except
|
|
265
|
-
|
|
510
|
+
os.remove(wip_path)
|
|
511
|
+
except FileNotFoundError:
|
|
512
|
+
pass
|
|
513
|
+
try:
|
|
514
|
+
yield wip_path
|
|
266
515
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
516
|
+
with wip_path.open("rb") as fp:
|
|
517
|
+
upload_md5sum = utils.md5sum_fp(fp).hexdigest()
|
|
518
|
+
|
|
519
|
+
done_path = wip_path.parent.joinpath(
|
|
520
|
+
_suffix_session_key(upload_md5sum, api_v4.ClusterFileType.ZIP)
|
|
272
521
|
)
|
|
273
|
-
)
|
|
274
522
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
523
|
+
try:
|
|
524
|
+
os.remove(done_path)
|
|
525
|
+
except FileNotFoundError:
|
|
526
|
+
pass
|
|
527
|
+
wip_path.rename(done_path)
|
|
528
|
+
finally:
|
|
529
|
+
try:
|
|
530
|
+
os.remove(wip_path)
|
|
531
|
+
except FileNotFoundError:
|
|
532
|
+
pass
|
|
281
533
|
|
|
282
|
-
@classmethod
|
|
283
|
-
def upload_zipfile(
|
|
284
|
-
cls,
|
|
285
|
-
uploader: Uploader,
|
|
286
|
-
zip_path: Path,
|
|
287
|
-
progress: dict[str, T.Any] | None = None,
|
|
288
|
-
) -> str:
|
|
289
|
-
if progress is None:
|
|
290
|
-
progress = {}
|
|
291
534
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
535
|
+
class ImageSequenceUploader:
|
|
536
|
+
def __init__(self, upload_options: UploadOptions, emitter: EventEmitter):
|
|
537
|
+
self.upload_options = upload_options
|
|
538
|
+
self.emitter = emitter
|
|
296
539
|
|
|
297
|
-
|
|
298
|
-
|
|
540
|
+
def upload_images(
|
|
541
|
+
self, image_metadatas: T.Sequence[types.ImageMetadata]
|
|
542
|
+
) -> T.Generator[tuple[str, UploadResult], None, None]:
|
|
543
|
+
sequences = types.group_and_sort_images(image_metadatas)
|
|
299
544
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
"sequence_image_count": len(namelist),
|
|
304
|
-
"sequence_md5sum": sequence_md5sum,
|
|
305
|
-
"file_type": types.FileType.ZIP.value,
|
|
306
|
-
}
|
|
545
|
+
for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
|
|
546
|
+
LOG.debug(f"Checksum for image sequence {sequence_uuid}...")
|
|
547
|
+
sequence_md5sum = types.update_sequence_md5sum(sequence)
|
|
307
548
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
549
|
+
sequence_progress: SequenceProgress = {
|
|
550
|
+
"sequence_idx": sequence_idx,
|
|
551
|
+
"total_sequence_count": len(sequences),
|
|
552
|
+
"sequence_image_count": len(sequence),
|
|
553
|
+
"sequence_uuid": sequence_uuid,
|
|
554
|
+
"file_type": types.FileType.IMAGE.value,
|
|
555
|
+
"sequence_md5sum": sequence_md5sum,
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
try:
|
|
559
|
+
cluster_id = self._upload_sequence_and_finish(
|
|
560
|
+
sequence,
|
|
561
|
+
sequence_progress=T.cast(dict[str, T.Any], sequence_progress),
|
|
562
|
+
)
|
|
563
|
+
except Exception as ex:
|
|
564
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
565
|
+
else:
|
|
566
|
+
yield sequence_uuid, UploadResult(result=cluster_id)
|
|
567
|
+
|
|
568
|
+
def _upload_sequence_and_finish(
|
|
569
|
+
self,
|
|
570
|
+
sequence: T.Sequence[types.ImageMetadata],
|
|
571
|
+
sequence_progress: dict[str, T.Any],
|
|
572
|
+
) -> str:
|
|
573
|
+
_validate_metadatas(sequence)
|
|
574
|
+
|
|
575
|
+
sequence_progress["entity_size"] = sum(m.filesize or 0 for m in sequence)
|
|
576
|
+
self.emitter.emit("upload_start", sequence_progress)
|
|
577
|
+
|
|
578
|
+
try:
|
|
579
|
+
# Retries will be handled in the call (but no upload event emissions)
|
|
580
|
+
image_file_handles = self._upload_images_parallel(
|
|
581
|
+
sequence, sequence_progress
|
|
311
582
|
)
|
|
583
|
+
except BaseException as ex: # Include KeyboardInterrupt
|
|
584
|
+
self.emitter.emit("upload_failed", sequence_progress)
|
|
585
|
+
raise ex
|
|
586
|
+
|
|
587
|
+
manifest_file_handle = self._upload_manifest(image_file_handles)
|
|
312
588
|
|
|
589
|
+
self.emitter.emit("upload_end", sequence_progress)
|
|
590
|
+
|
|
591
|
+
uploader = Uploader(self.upload_options, emitter=self.emitter)
|
|
313
592
|
cluster_id = uploader.finish_upload(
|
|
314
|
-
|
|
315
|
-
api_v4.ClusterFileType.
|
|
316
|
-
progress=
|
|
593
|
+
manifest_file_handle,
|
|
594
|
+
api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST,
|
|
595
|
+
progress=sequence_progress,
|
|
317
596
|
)
|
|
318
597
|
|
|
319
598
|
return cluster_id
|
|
320
599
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
cls,
|
|
324
|
-
uploader: Uploader,
|
|
325
|
-
image_metadatas: T.Sequence[types.ImageMetadata],
|
|
326
|
-
progress: dict[str, T.Any] | None = None,
|
|
327
|
-
) -> T.Generator[tuple[str, UploadResult], None, None]:
|
|
328
|
-
if progress is None:
|
|
329
|
-
progress = {}
|
|
600
|
+
def _upload_manifest(self, image_file_handles: T.Sequence[str]) -> str:
|
|
601
|
+
uploader = Uploader(self.upload_options)
|
|
330
602
|
|
|
331
|
-
|
|
603
|
+
manifest = {
|
|
604
|
+
"version": "1",
|
|
605
|
+
"upload_type": "images",
|
|
606
|
+
"image_handles": image_file_handles,
|
|
607
|
+
}
|
|
332
608
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
609
|
+
with io.BytesIO() as manifest_fp:
|
|
610
|
+
manifest_fp.write(
|
|
611
|
+
json.dumps(manifest, sort_keys=True, separators=(",", ":")).encode(
|
|
612
|
+
"utf-8"
|
|
613
|
+
)
|
|
614
|
+
)
|
|
615
|
+
manifest_fp.seek(0, io.SEEK_SET)
|
|
616
|
+
return uploader.upload_stream(
|
|
617
|
+
manifest_fp, session_key=f"{_prefixed_uuid4()}.json"
|
|
618
|
+
)
|
|
339
619
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
620
|
+
def _upload_images_parallel(
|
|
621
|
+
self,
|
|
622
|
+
sequence: T.Sequence[types.ImageMetadata],
|
|
623
|
+
sequence_progress: dict[str, T.Any],
|
|
624
|
+
) -> list[str]:
|
|
625
|
+
if not sequence:
|
|
626
|
+
return []
|
|
346
627
|
|
|
347
|
-
|
|
348
|
-
"sequence_idx": sequence_idx,
|
|
349
|
-
"total_sequence_count": len(sequences),
|
|
350
|
-
"sequence_image_count": len(sequence),
|
|
351
|
-
"sequence_uuid": sequence_uuid,
|
|
352
|
-
"file_type": types.FileType.ZIP.value,
|
|
353
|
-
"sequence_md5sum": sequence_md5sum,
|
|
354
|
-
}
|
|
628
|
+
max_workers = min(self.upload_options.num_upload_workers, len(sequence))
|
|
355
629
|
|
|
356
|
-
|
|
630
|
+
# Lock is used to synchronize event emission
|
|
631
|
+
lock = threading.Lock()
|
|
357
632
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
633
|
+
# Push all images into the queue
|
|
634
|
+
image_queue: queue.Queue[tuple[int, types.ImageMetadata]] = queue.Queue()
|
|
635
|
+
for idx, image_metadata in enumerate(sequence):
|
|
636
|
+
image_queue.put((idx, image_metadata))
|
|
637
|
+
|
|
638
|
+
upload_interrupted = threading.Event()
|
|
639
|
+
|
|
640
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
641
|
+
futures = [
|
|
642
|
+
executor.submit(
|
|
643
|
+
self._upload_images_from_queue,
|
|
644
|
+
image_queue,
|
|
645
|
+
lock,
|
|
646
|
+
upload_interrupted,
|
|
647
|
+
sequence_progress,
|
|
648
|
+
)
|
|
649
|
+
for _ in range(max_workers)
|
|
650
|
+
]
|
|
368
651
|
|
|
369
|
-
|
|
652
|
+
indexed_image_file_handles = []
|
|
370
653
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
) -> str:
|
|
378
|
-
if progress is None:
|
|
379
|
-
progress = {}
|
|
654
|
+
try:
|
|
655
|
+
for future in futures:
|
|
656
|
+
indexed_image_file_handles.extend(future.result())
|
|
657
|
+
except KeyboardInterrupt as ex:
|
|
658
|
+
upload_interrupted.set()
|
|
659
|
+
raise ex
|
|
380
660
|
|
|
381
|
-
#
|
|
382
|
-
|
|
661
|
+
# All tasks should be done here, so below is more like assertion
|
|
662
|
+
image_queue.join()
|
|
663
|
+
if sys.version_info >= (3, 13):
|
|
664
|
+
image_queue.shutdown()
|
|
383
665
|
|
|
384
|
-
|
|
666
|
+
file_handles: list[str] = []
|
|
385
667
|
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
668
|
+
indexed_image_file_handles.sort()
|
|
669
|
+
|
|
670
|
+
# Important to guarantee the order
|
|
671
|
+
assert len(indexed_image_file_handles) == len(sequence)
|
|
672
|
+
for expected_idx, (idx, file_handle) in enumerate(indexed_image_file_handles):
|
|
673
|
+
assert expected_idx == idx
|
|
674
|
+
file_handles.append(file_handle)
|
|
391
675
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
676
|
+
return file_handles
|
|
677
|
+
|
|
678
|
+
def _upload_images_from_queue(
|
|
679
|
+
self,
|
|
680
|
+
image_queue: queue.Queue[tuple[int, types.ImageMetadata]],
|
|
681
|
+
lock: threading.Lock,
|
|
682
|
+
upload_interrupted: threading.Event,
|
|
683
|
+
sequence_progress: dict[str, T.Any],
|
|
684
|
+
) -> list[tuple[int, str]]:
|
|
685
|
+
indexed_file_handles = []
|
|
686
|
+
|
|
687
|
+
with api_v4.create_user_session(
|
|
688
|
+
self.upload_options.user_items["user_upload_token"]
|
|
689
|
+
) as user_session:
|
|
690
|
+
single_image_uploader = SingleImageUploader(
|
|
691
|
+
self.upload_options, user_session=user_session
|
|
395
692
|
)
|
|
396
693
|
|
|
397
|
-
|
|
694
|
+
while True:
|
|
695
|
+
# Assert that all images are already pushed into the queue
|
|
696
|
+
try:
|
|
697
|
+
idx, image_metadata = image_queue.get_nowait()
|
|
698
|
+
except queue.Empty:
|
|
699
|
+
break
|
|
700
|
+
|
|
701
|
+
# Main thread will handle the interruption
|
|
702
|
+
if upload_interrupted.is_set():
|
|
703
|
+
break
|
|
704
|
+
|
|
705
|
+
# Create a new mutatble progress to keep the sequence_progress immutable
|
|
706
|
+
image_progress = {
|
|
707
|
+
**sequence_progress,
|
|
708
|
+
"import_path": str(image_metadata.filename),
|
|
709
|
+
}
|
|
398
710
|
|
|
399
|
-
|
|
400
|
-
|
|
711
|
+
# image_progress will be updated during uploading
|
|
712
|
+
file_handle = single_image_uploader.upload(
|
|
713
|
+
image_metadata, image_progress
|
|
714
|
+
)
|
|
401
715
|
|
|
402
|
-
|
|
716
|
+
# Update chunk_size (it was constant if set)
|
|
717
|
+
image_progress["chunk_size"] = image_metadata.filesize
|
|
403
718
|
|
|
404
|
-
|
|
719
|
+
# Main thread will handle the interruption
|
|
720
|
+
if upload_interrupted.is_set():
|
|
721
|
+
break
|
|
405
722
|
|
|
406
|
-
|
|
723
|
+
with lock:
|
|
724
|
+
self.emitter.emit("upload_progress", image_progress)
|
|
407
725
|
|
|
408
|
-
|
|
726
|
+
indexed_file_handles.append((idx, file_handle))
|
|
409
727
|
|
|
410
|
-
|
|
728
|
+
image_queue.task_done()
|
|
411
729
|
|
|
412
|
-
|
|
413
|
-
max_workers=constants.MAX_IMAGE_UPLOAD_WORKERS
|
|
414
|
-
) as executor:
|
|
415
|
-
image_file_handles = list(executor.map(_upload_image, sequence))
|
|
730
|
+
return indexed_file_handles
|
|
416
731
|
|
|
417
|
-
manifest = {
|
|
418
|
-
"version": "1",
|
|
419
|
-
"upload_type": "images",
|
|
420
|
-
"image_handles": image_file_handles,
|
|
421
|
-
}
|
|
422
732
|
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
733
|
+
class SingleImageUploader:
|
|
734
|
+
def __init__(
|
|
735
|
+
self,
|
|
736
|
+
upload_options: UploadOptions,
|
|
737
|
+
user_session: requests.Session | None = None,
|
|
738
|
+
):
|
|
739
|
+
self.upload_options = upload_options
|
|
740
|
+
self.user_session = user_session
|
|
741
|
+
self.cache = self._maybe_create_persistent_cache_instance(
|
|
742
|
+
self.upload_options.user_items, upload_options
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
def upload(
|
|
746
|
+
self, image_metadata: types.ImageMetadata, image_progress: dict[str, T.Any]
|
|
747
|
+
) -> str:
|
|
748
|
+
image_bytes = self.dump_image_bytes(image_metadata)
|
|
749
|
+
|
|
750
|
+
uploader = Uploader(self.upload_options, user_session=self.user_session)
|
|
751
|
+
|
|
752
|
+
session_key = uploader._gen_session_key(io.BytesIO(image_bytes), image_progress)
|
|
753
|
+
|
|
754
|
+
file_handle = self._get_cached_file_handle(session_key)
|
|
755
|
+
|
|
756
|
+
if file_handle is None:
|
|
757
|
+
# image_progress will be updated during uploading
|
|
758
|
+
file_handle = uploader.upload_stream(
|
|
759
|
+
io.BytesIO(image_bytes),
|
|
760
|
+
session_key=session_key,
|
|
761
|
+
progress=image_progress,
|
|
428
762
|
)
|
|
763
|
+
self._set_file_handle_cache(session_key, file_handle)
|
|
429
764
|
|
|
430
|
-
|
|
765
|
+
return file_handle
|
|
431
766
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
767
|
+
@classmethod
|
|
768
|
+
def dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes:
|
|
769
|
+
try:
|
|
770
|
+
edit = exif_write.ExifEdit(metadata.filename)
|
|
771
|
+
except struct.error as ex:
|
|
772
|
+
raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex
|
|
773
|
+
|
|
774
|
+
# The cast is to fix the type checker error
|
|
775
|
+
edit.add_image_description(
|
|
776
|
+
T.cast(
|
|
777
|
+
T.Dict, desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata))
|
|
778
|
+
)
|
|
436
779
|
)
|
|
437
780
|
|
|
438
|
-
|
|
781
|
+
try:
|
|
782
|
+
return edit.dump_image_bytes()
|
|
783
|
+
except struct.error as ex:
|
|
784
|
+
raise ExifError(
|
|
785
|
+
f"Failed to dump EXIF bytes: {ex}", metadata.filename
|
|
786
|
+
) from ex
|
|
439
787
|
|
|
440
788
|
@classmethod
|
|
441
|
-
def
|
|
442
|
-
cls,
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
789
|
+
def _maybe_create_persistent_cache_instance(
|
|
790
|
+
cls, user_items: config.UserItem, upload_options: UploadOptions
|
|
791
|
+
) -> history.PersistentCache | None:
|
|
792
|
+
if not constants.UPLOAD_CACHE_DIR:
|
|
793
|
+
LOG.debug(
|
|
794
|
+
"Upload cache directory is set empty, skipping caching upload file handles"
|
|
795
|
+
)
|
|
796
|
+
return None
|
|
449
797
|
|
|
450
|
-
|
|
798
|
+
if upload_options.dry_run:
|
|
799
|
+
LOG.debug("Dry-run mode enabled, skipping caching upload file handles")
|
|
800
|
+
return None
|
|
451
801
|
|
|
452
|
-
|
|
453
|
-
|
|
802
|
+
cache_path_dir = (
|
|
803
|
+
Path(constants.UPLOAD_CACHE_DIR)
|
|
804
|
+
.joinpath(api_v4.MAPILLARY_CLIENT_TOKEN.replace("|", "_"))
|
|
805
|
+
.joinpath(
|
|
806
|
+
user_items.get("MAPSettingsUserKey", user_items["user_upload_token"])
|
|
807
|
+
)
|
|
808
|
+
)
|
|
809
|
+
cache_path_dir.mkdir(parents=True, exist_ok=True)
|
|
810
|
+
cache_path = cache_path_dir.joinpath("cached_file_handles")
|
|
811
|
+
|
|
812
|
+
# Sanitize sensitive segments for logging
|
|
813
|
+
sanitized_cache_path = (
|
|
814
|
+
Path(constants.UPLOAD_CACHE_DIR)
|
|
815
|
+
.joinpath("***")
|
|
816
|
+
.joinpath("***")
|
|
817
|
+
.joinpath("cached_file_handles")
|
|
818
|
+
)
|
|
819
|
+
LOG.debug(f"File handle cache path: {sanitized_cache_path}")
|
|
454
820
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
"total_sequence_count": len(sequences),
|
|
458
|
-
"sequence_image_count": len(sequence),
|
|
459
|
-
"sequence_uuid": sequence_uuid,
|
|
460
|
-
"file_type": types.FileType.IMAGE.value,
|
|
461
|
-
"sequence_md5sum": sequence_md5sum,
|
|
462
|
-
}
|
|
821
|
+
cache = history.PersistentCache(str(cache_path.resolve()))
|
|
822
|
+
cache.clear_expired()
|
|
463
823
|
|
|
464
|
-
|
|
824
|
+
return cache
|
|
465
825
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
826
|
+
def _get_cached_file_handle(self, key: str) -> str | None:
|
|
827
|
+
if self.cache is None:
|
|
828
|
+
return None
|
|
829
|
+
|
|
830
|
+
if _is_uuid(key):
|
|
831
|
+
return None
|
|
832
|
+
|
|
833
|
+
return self.cache.get(key)
|
|
834
|
+
|
|
835
|
+
def _set_file_handle_cache(self, key: str, value: str) -> None:
|
|
836
|
+
if self.cache is None:
|
|
837
|
+
return
|
|
838
|
+
|
|
839
|
+
if _is_uuid(key):
|
|
840
|
+
return
|
|
841
|
+
|
|
842
|
+
self.cache.set(key, value)
|
|
474
843
|
|
|
475
844
|
|
|
476
845
|
class Uploader:
|
|
477
846
|
def __init__(
|
|
478
847
|
self,
|
|
479
|
-
|
|
848
|
+
upload_options: UploadOptions,
|
|
849
|
+
user_session: requests.Session | None = None,
|
|
480
850
|
emitter: EventEmitter | None = None,
|
|
481
|
-
chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
|
|
482
|
-
dry_run=False,
|
|
483
851
|
):
|
|
484
|
-
self.
|
|
852
|
+
self.upload_options = upload_options
|
|
853
|
+
self.user_session = user_session
|
|
485
854
|
if emitter is None:
|
|
486
855
|
# An empty event emitter that does nothing
|
|
487
856
|
self.emitter = EventEmitter()
|
|
488
857
|
else:
|
|
489
858
|
self.emitter = emitter
|
|
490
|
-
self.chunk_size = chunk_size
|
|
491
|
-
self.dry_run = dry_run
|
|
492
859
|
|
|
493
860
|
def upload_stream(
|
|
494
861
|
self,
|
|
@@ -500,21 +867,13 @@ class Uploader:
|
|
|
500
867
|
progress = {}
|
|
501
868
|
|
|
502
869
|
if session_key is None:
|
|
503
|
-
|
|
504
|
-
md5sum = utils.md5sum_fp(fp).hexdigest()
|
|
505
|
-
filetype = progress.get("file_type")
|
|
506
|
-
if filetype is not None:
|
|
507
|
-
session_key = _session_key(md5sum, types.FileType(filetype))
|
|
508
|
-
else:
|
|
509
|
-
session_key = md5sum
|
|
870
|
+
session_key = self._gen_session_key(fp, progress)
|
|
510
871
|
|
|
511
872
|
fp.seek(0, io.SEEK_END)
|
|
512
873
|
entity_size = fp.tell()
|
|
513
874
|
|
|
514
|
-
upload_service = self._create_upload_service(session_key)
|
|
515
|
-
|
|
516
875
|
progress["entity_size"] = entity_size
|
|
517
|
-
progress["chunk_size"] = self.chunk_size
|
|
876
|
+
progress["chunk_size"] = self.upload_options.chunk_size
|
|
518
877
|
progress["retries"] = 0
|
|
519
878
|
progress["begin_offset"] = None
|
|
520
879
|
|
|
@@ -522,10 +881,24 @@ class Uploader:
|
|
|
522
881
|
|
|
523
882
|
while True:
|
|
524
883
|
try:
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
884
|
+
if self.user_session is not None:
|
|
885
|
+
file_handle = self._upload_stream_retryable(
|
|
886
|
+
self.user_session,
|
|
887
|
+
fp,
|
|
888
|
+
session_key,
|
|
889
|
+
T.cast(UploaderProgress, progress),
|
|
890
|
+
)
|
|
891
|
+
else:
|
|
892
|
+
with api_v4.create_user_session(
|
|
893
|
+
self.upload_options.user_items["user_upload_token"]
|
|
894
|
+
) as user_session:
|
|
895
|
+
file_handle = self._upload_stream_retryable(
|
|
896
|
+
user_session,
|
|
897
|
+
fp,
|
|
898
|
+
session_key,
|
|
899
|
+
T.cast(UploaderProgress, progress),
|
|
900
|
+
)
|
|
901
|
+
except BaseException as ex: # Include KeyboardInterrupt
|
|
529
902
|
self._handle_upload_exception(ex, T.cast(UploaderProgress, progress))
|
|
530
903
|
else:
|
|
531
904
|
break
|
|
@@ -546,97 +919,102 @@ class Uploader:
|
|
|
546
919
|
if progress is None:
|
|
547
920
|
progress = {}
|
|
548
921
|
|
|
549
|
-
if self.dry_run:
|
|
922
|
+
if self.upload_options.dry_run or self.upload_options.nofinish:
|
|
550
923
|
cluster_id = "0"
|
|
551
924
|
else:
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
925
|
+
organization_id = self.upload_options.user_items.get("MAPOrganizationKey")
|
|
926
|
+
|
|
927
|
+
with api_v4.create_user_session(
|
|
928
|
+
self.upload_options.user_items["user_upload_token"]
|
|
929
|
+
) as user_session:
|
|
930
|
+
resp = api_v4.finish_upload(
|
|
931
|
+
user_session,
|
|
932
|
+
file_handle,
|
|
933
|
+
cluster_filetype,
|
|
934
|
+
organization_id=organization_id,
|
|
935
|
+
)
|
|
561
936
|
|
|
562
|
-
|
|
937
|
+
body = api_v4.jsonify_response(resp)
|
|
938
|
+
# TODO: Validate cluster_id
|
|
939
|
+
cluster_id = body.get("cluster_id")
|
|
563
940
|
|
|
564
941
|
progress["cluster_id"] = cluster_id
|
|
565
942
|
self.emitter.emit("upload_finished", progress)
|
|
566
943
|
|
|
567
944
|
return cluster_id
|
|
568
945
|
|
|
569
|
-
def
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
emitter=None,
|
|
573
|
-
chunk_size=self.chunk_size,
|
|
574
|
-
dry_run=self.dry_run,
|
|
575
|
-
)
|
|
576
|
-
|
|
577
|
-
def _create_upload_service(self, session_key: str) -> upload_api_v4.UploadService:
|
|
946
|
+
def _create_upload_service(
|
|
947
|
+
self, user_session: requests.Session, session_key: str
|
|
948
|
+
) -> upload_api_v4.UploadService:
|
|
578
949
|
upload_service: upload_api_v4.UploadService
|
|
579
950
|
|
|
580
|
-
if self.dry_run:
|
|
951
|
+
if self.upload_options.dry_run:
|
|
581
952
|
upload_path = os.getenv("MAPILLARY_UPLOAD_ENDPOINT")
|
|
582
953
|
upload_service = upload_api_v4.FakeUploadService(
|
|
583
|
-
|
|
584
|
-
session_key
|
|
954
|
+
user_session,
|
|
955
|
+
session_key,
|
|
585
956
|
upload_path=Path(upload_path) if upload_path is not None else None,
|
|
586
957
|
)
|
|
587
958
|
LOG.info(
|
|
588
|
-
"Dry
|
|
959
|
+
"Dry-run mode enabled, uploading to %s",
|
|
589
960
|
upload_service.upload_path.joinpath(session_key),
|
|
590
961
|
)
|
|
591
962
|
else:
|
|
592
|
-
upload_service = upload_api_v4.UploadService(
|
|
593
|
-
user_access_token=self.user_items["user_upload_token"],
|
|
594
|
-
session_key=session_key,
|
|
595
|
-
)
|
|
963
|
+
upload_service = upload_api_v4.UploadService(user_session, session_key)
|
|
596
964
|
|
|
597
965
|
return upload_service
|
|
598
966
|
|
|
599
967
|
def _handle_upload_exception(
|
|
600
|
-
self, ex:
|
|
968
|
+
self, ex: BaseException, progress: UploaderProgress
|
|
601
969
|
) -> None:
|
|
602
|
-
retries = progress
|
|
970
|
+
retries = progress.get("retries", 0)
|
|
603
971
|
begin_offset = progress.get("begin_offset")
|
|
604
|
-
|
|
972
|
+
offset = progress.get("offset")
|
|
605
973
|
|
|
606
974
|
if retries <= constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
|
|
607
|
-
self.emitter.emit("
|
|
975
|
+
self.emitter.emit("upload_retrying", progress)
|
|
976
|
+
|
|
608
977
|
LOG.warning(
|
|
609
|
-
|
|
610
|
-
"Error uploading chunk_size %d at begin_offset %s: %s: %s",
|
|
611
|
-
chunk_size,
|
|
612
|
-
begin_offset,
|
|
613
|
-
ex.__class__.__name__,
|
|
614
|
-
str(ex),
|
|
978
|
+
f"Error uploading {self._upload_name(progress)} at {offset=} since {begin_offset=}: {ex.__class__.__name__}: {ex}"
|
|
615
979
|
)
|
|
980
|
+
|
|
616
981
|
# Keep things immutable here. Will increment retries in the caller
|
|
617
982
|
retries += 1
|
|
618
|
-
if
|
|
983
|
+
if _is_immediate_retriable_exception(ex):
|
|
619
984
|
sleep_for = 0
|
|
620
985
|
else:
|
|
621
986
|
sleep_for = min(2**retries, 16)
|
|
622
987
|
LOG.info(
|
|
623
|
-
"Retrying in
|
|
624
|
-
sleep_for,
|
|
625
|
-
retries,
|
|
626
|
-
constants.MAX_UPLOAD_RETRIES,
|
|
988
|
+
f"Retrying in {sleep_for} seconds ({retries}/{constants.MAX_UPLOAD_RETRIES})"
|
|
627
989
|
)
|
|
628
990
|
if sleep_for:
|
|
629
991
|
time.sleep(sleep_for)
|
|
630
992
|
else:
|
|
993
|
+
self.emitter.emit("upload_failed", progress)
|
|
631
994
|
raise ex
|
|
632
995
|
|
|
996
|
+
@classmethod
|
|
997
|
+
def _upload_name(cls, progress: UploaderProgress):
|
|
998
|
+
# Strictly speaking these sequence properties should not be exposed in this context
|
|
999
|
+
# TODO: Maybe move these logging statements to event handlers
|
|
1000
|
+
sequence_uuid: str | None = T.cast(
|
|
1001
|
+
T.Union[str, None], progress.get("sequence_uuid")
|
|
1002
|
+
)
|
|
1003
|
+
import_path = T.cast(T.Union[str, None], progress.get("import_path"))
|
|
1004
|
+
if sequence_uuid is not None:
|
|
1005
|
+
if import_path is None:
|
|
1006
|
+
name: str = f"sequence_{sequence_uuid}"
|
|
1007
|
+
else:
|
|
1008
|
+
name = f"sequence_{sequence_uuid}/{Path(import_path).name}"
|
|
1009
|
+
else:
|
|
1010
|
+
name = Path(import_path or "unknown").name
|
|
1011
|
+
return name
|
|
1012
|
+
|
|
633
1013
|
def _chunk_with_progress_emitted(
|
|
634
|
-
self,
|
|
635
|
-
stream: T.IO[bytes],
|
|
636
|
-
progress: UploaderProgress,
|
|
1014
|
+
self, stream: T.IO[bytes], progress: UploaderProgress
|
|
637
1015
|
) -> T.Generator[bytes, None, None]:
|
|
638
1016
|
for chunk in upload_api_v4.UploadService.chunkize_byte_stream(
|
|
639
|
-
stream, self.chunk_size
|
|
1017
|
+
stream, self.upload_options.chunk_size
|
|
640
1018
|
):
|
|
641
1019
|
yield chunk
|
|
642
1020
|
|
|
@@ -649,11 +1027,21 @@ class Uploader:
|
|
|
649
1027
|
|
|
650
1028
|
def _upload_stream_retryable(
|
|
651
1029
|
self,
|
|
652
|
-
|
|
1030
|
+
user_session: requests.Session,
|
|
653
1031
|
fp: T.IO[bytes],
|
|
654
|
-
|
|
1032
|
+
session_key: str,
|
|
1033
|
+
progress: UploaderProgress | None = None,
|
|
655
1034
|
) -> str:
|
|
656
1035
|
"""Upload the stream with safe retries guraranteed"""
|
|
1036
|
+
if progress is None:
|
|
1037
|
+
progress = T.cast(UploaderProgress, {})
|
|
1038
|
+
|
|
1039
|
+
upload_service = self._create_upload_service(user_session, session_key)
|
|
1040
|
+
|
|
1041
|
+
if "entity_size" not in progress:
|
|
1042
|
+
fp.seek(0, io.SEEK_END)
|
|
1043
|
+
entity_size = fp.tell()
|
|
1044
|
+
progress["entity_size"] = entity_size
|
|
657
1045
|
|
|
658
1046
|
begin_offset = upload_service.fetch_offset()
|
|
659
1047
|
|
|
@@ -662,11 +1050,39 @@ class Uploader:
|
|
|
662
1050
|
|
|
663
1051
|
self.emitter.emit("upload_fetch_offset", progress)
|
|
664
1052
|
|
|
665
|
-
|
|
1053
|
+
# Estimate the read timeout
|
|
1054
|
+
if not constants.MIN_UPLOAD_SPEED:
|
|
1055
|
+
read_timeout = None
|
|
1056
|
+
else:
|
|
1057
|
+
remaining_bytes = abs(progress["entity_size"] - begin_offset)
|
|
1058
|
+
read_timeout = max(
|
|
1059
|
+
api_v4.REQUESTS_TIMEOUT,
|
|
1060
|
+
remaining_bytes / constants.MIN_UPLOAD_SPEED,
|
|
1061
|
+
)
|
|
666
1062
|
|
|
1063
|
+
# Upload from begin_offset
|
|
1064
|
+
fp.seek(begin_offset, io.SEEK_SET)
|
|
667
1065
|
shifted_chunks = self._chunk_with_progress_emitted(fp, progress)
|
|
668
1066
|
|
|
669
|
-
|
|
1067
|
+
# Start uploading
|
|
1068
|
+
return upload_service.upload_shifted_chunks(
|
|
1069
|
+
shifted_chunks, begin_offset, read_timeout=read_timeout
|
|
1070
|
+
)
|
|
1071
|
+
|
|
1072
|
+
def _gen_session_key(self, fp: T.IO[bytes], progress: dict[str, T.Any]) -> str:
|
|
1073
|
+
if self.upload_options.noresume:
|
|
1074
|
+
# Generate a unique UUID for session_key when noresume is True
|
|
1075
|
+
# to prevent resuming from previous uploads
|
|
1076
|
+
session_key = f"{_prefixed_uuid4()}"
|
|
1077
|
+
else:
|
|
1078
|
+
fp.seek(0, io.SEEK_SET)
|
|
1079
|
+
session_key = utils.md5sum_fp(fp).hexdigest()
|
|
1080
|
+
|
|
1081
|
+
filetype = progress.get("file_type")
|
|
1082
|
+
if filetype is not None:
|
|
1083
|
+
session_key = _suffix_session_key(session_key, types.FileType(filetype))
|
|
1084
|
+
|
|
1085
|
+
return session_key
|
|
670
1086
|
|
|
671
1087
|
|
|
672
1088
|
def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
|
|
@@ -676,7 +1092,7 @@ def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
|
|
|
676
1092
|
raise FileNotFoundError(f"No such file {metadata.filename}")
|
|
677
1093
|
|
|
678
1094
|
|
|
679
|
-
def
|
|
1095
|
+
def _is_immediate_retriable_exception(ex: BaseException) -> bool:
|
|
680
1096
|
if (
|
|
681
1097
|
isinstance(ex, requests.HTTPError)
|
|
682
1098
|
and isinstance(ex.response, requests.Response)
|
|
@@ -689,8 +1105,10 @@ def _is_immediate_retry(ex: Exception):
|
|
|
689
1105
|
# resp: {"debug_info":{"retriable":true,"type":"OffsetInvalidError","message":"Request starting offset is invalid"}}
|
|
690
1106
|
return resp.get("debug_info", {}).get("retriable", False)
|
|
691
1107
|
|
|
1108
|
+
return False
|
|
1109
|
+
|
|
692
1110
|
|
|
693
|
-
def _is_retriable_exception(ex:
|
|
1111
|
+
def _is_retriable_exception(ex: BaseException) -> bool:
|
|
694
1112
|
if isinstance(ex, (requests.ConnectionError, requests.Timeout)):
|
|
695
1113
|
return True
|
|
696
1114
|
|
|
@@ -709,19 +1127,36 @@ def _is_retriable_exception(ex: Exception):
|
|
|
709
1127
|
return False
|
|
710
1128
|
|
|
711
1129
|
|
|
712
|
-
|
|
713
|
-
|
|
1130
|
+
_SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
|
|
1131
|
+
api_v4.ClusterFileType.ZIP: ".zip",
|
|
1132
|
+
api_v4.ClusterFileType.CAMM: ".mp4",
|
|
1133
|
+
api_v4.ClusterFileType.BLACKVUE: ".mp4",
|
|
1134
|
+
types.FileType.IMAGE: ".jpg",
|
|
1135
|
+
types.FileType.ZIP: ".zip",
|
|
1136
|
+
types.FileType.BLACKVUE: ".mp4",
|
|
1137
|
+
types.FileType.CAMM: ".mp4",
|
|
1138
|
+
types.FileType.GOPRO: ".mp4",
|
|
1139
|
+
types.FileType.VIDEO: ".mp4",
|
|
1140
|
+
}
|
|
1141
|
+
|
|
1142
|
+
|
|
1143
|
+
def _suffix_session_key(
|
|
1144
|
+
key: str, filetype: api_v4.ClusterFileType | types.FileType
|
|
714
1145
|
) -> str:
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
}
|
|
726
|
-
|
|
727
|
-
return
|
|
1146
|
+
is_uuid_before = _is_uuid(key)
|
|
1147
|
+
|
|
1148
|
+
key = f"mly_tools_{key}{_SUFFIX_MAP[filetype]}"
|
|
1149
|
+
|
|
1150
|
+
assert _is_uuid(key) is is_uuid_before
|
|
1151
|
+
|
|
1152
|
+
return key
|
|
1153
|
+
|
|
1154
|
+
|
|
1155
|
+
def _prefixed_uuid4():
|
|
1156
|
+
prefixed = f"uuid_{uuid.uuid4().hex}"
|
|
1157
|
+
assert _is_uuid(prefixed)
|
|
1158
|
+
return prefixed
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
def _is_uuid(key: str) -> bool:
|
|
1162
|
+
return key.startswith("uuid_") or key.startswith("mly_tools_uuid_")
|