mapillary-tools 0.14.0b1__py3-none-any.whl → 0.14.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapillary_tools/__init__.py +1 -1
- mapillary_tools/api_v4.py +66 -263
- mapillary_tools/authenticate.py +47 -39
- mapillary_tools/commands/__main__.py +15 -16
- mapillary_tools/commands/upload.py +33 -4
- mapillary_tools/config.py +5 -0
- mapillary_tools/constants.py +127 -45
- mapillary_tools/exceptions.py +4 -0
- mapillary_tools/exif_read.py +2 -1
- mapillary_tools/exif_write.py +3 -1
- mapillary_tools/geo.py +16 -0
- mapillary_tools/geotag/base.py +6 -2
- mapillary_tools/geotag/factory.py +9 -1
- mapillary_tools/geotag/geotag_images_from_exiftool.py +1 -1
- mapillary_tools/geotag/geotag_images_from_gpx.py +0 -6
- mapillary_tools/geotag/geotag_videos_from_exiftool.py +30 -9
- mapillary_tools/geotag/options.py +4 -1
- mapillary_tools/geotag/utils.py +9 -12
- mapillary_tools/geotag/video_extractors/gpx.py +2 -1
- mapillary_tools/geotag/video_extractors/native.py +25 -0
- mapillary_tools/history.py +124 -7
- mapillary_tools/http.py +211 -0
- mapillary_tools/mp4/construct_mp4_parser.py +8 -2
- mapillary_tools/process_geotag_properties.py +35 -38
- mapillary_tools/process_sequence_properties.py +339 -322
- mapillary_tools/sample_video.py +1 -2
- mapillary_tools/serializer/description.py +68 -58
- mapillary_tools/serializer/gpx.py +1 -1
- mapillary_tools/upload.py +202 -207
- mapillary_tools/upload_api_v4.py +57 -47
- mapillary_tools/uploader.py +728 -285
- mapillary_tools/utils.py +57 -5
- {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/METADATA +7 -6
- {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/RECORD +38 -37
- {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/WHEEL +0 -0
- {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/entry_points.txt +0 -0
- {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/licenses/LICENSE +0 -0
- {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/top_level.txt +0 -0
mapillary_tools/uploader.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import concurrent.futures
|
|
4
|
-
|
|
5
4
|
import dataclasses
|
|
6
5
|
import io
|
|
7
6
|
import json
|
|
8
7
|
import logging
|
|
9
8
|
import os
|
|
9
|
+
import queue
|
|
10
10
|
import struct
|
|
11
11
|
import sys
|
|
12
12
|
import tempfile
|
|
@@ -25,7 +25,22 @@ else:
|
|
|
25
25
|
|
|
26
26
|
import requests
|
|
27
27
|
|
|
28
|
-
from . import
|
|
28
|
+
from . import (
|
|
29
|
+
api_v4,
|
|
30
|
+
config,
|
|
31
|
+
constants,
|
|
32
|
+
exif_write,
|
|
33
|
+
geo,
|
|
34
|
+
history,
|
|
35
|
+
telemetry,
|
|
36
|
+
types,
|
|
37
|
+
upload_api_v4,
|
|
38
|
+
utils,
|
|
39
|
+
VERSION,
|
|
40
|
+
)
|
|
41
|
+
from .camm import camm_builder, camm_parser
|
|
42
|
+
from .gpmf import gpmf_parser
|
|
43
|
+
from .mp4 import simple_mp4_builder
|
|
29
44
|
from .serializer.description import (
|
|
30
45
|
desc_file_to_exif,
|
|
31
46
|
DescriptionJSONSerializer,
|
|
@@ -36,6 +51,25 @@ from .serializer.description import (
|
|
|
36
51
|
LOG = logging.getLogger(__name__)
|
|
37
52
|
|
|
38
53
|
|
|
54
|
+
@dataclasses.dataclass(frozen=True)
|
|
55
|
+
class UploadOptions:
|
|
56
|
+
user_items: config.UserItem
|
|
57
|
+
chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024)
|
|
58
|
+
num_upload_workers: int = constants.MAX_IMAGE_UPLOAD_WORKERS
|
|
59
|
+
dry_run: bool = False
|
|
60
|
+
nofinish: bool = False
|
|
61
|
+
noresume: bool = False
|
|
62
|
+
|
|
63
|
+
def __post_init__(self):
|
|
64
|
+
if self.num_upload_workers <= 0:
|
|
65
|
+
raise ValueError(
|
|
66
|
+
f"Expect positive num_upload_workers but got {self.num_upload_workers}"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
if self.chunk_size <= 0:
|
|
70
|
+
raise ValueError(f"Expect positive chunk_size but got {self.chunk_size}")
|
|
71
|
+
|
|
72
|
+
|
|
39
73
|
class UploaderProgress(T.TypedDict, total=True):
|
|
40
74
|
"""
|
|
41
75
|
Progress data that Uploader cares about.
|
|
@@ -60,7 +94,7 @@ class UploaderProgress(T.TypedDict, total=True):
|
|
|
60
94
|
# - offset == entity_size when "upload_end" or "upload_finished"
|
|
61
95
|
entity_size: int
|
|
62
96
|
|
|
63
|
-
# An "
|
|
97
|
+
# An "upload_retrying" will increase it. Reset to 0 if a chunk is uploaded
|
|
64
98
|
retries: int
|
|
65
99
|
|
|
66
100
|
# Cluster ID after finishing the upload
|
|
@@ -92,7 +126,7 @@ class SequenceProgress(T.TypedDict, total=False):
|
|
|
92
126
|
# MAPSequenceUUID. It is only available for directory uploading
|
|
93
127
|
sequence_uuid: str
|
|
94
128
|
|
|
95
|
-
# Path to the
|
|
129
|
+
# Path to the image/video/zip
|
|
96
130
|
import_path: str
|
|
97
131
|
|
|
98
132
|
|
|
@@ -120,13 +154,43 @@ class InvalidMapillaryZipFileError(SequenceError):
|
|
|
120
154
|
pass
|
|
121
155
|
|
|
122
156
|
|
|
157
|
+
# BELOW demonstrates the pseudocode for a typical upload workflow
|
|
158
|
+
# and when upload events are emitted
|
|
159
|
+
#################################################################
|
|
160
|
+
# def pseudo_upload(metadata):
|
|
161
|
+
# emit("upload_start")
|
|
162
|
+
# while True:
|
|
163
|
+
# try:
|
|
164
|
+
# if is_sequence(metadata):
|
|
165
|
+
# for image in metadata:
|
|
166
|
+
# upload_stream(image.read())
|
|
167
|
+
# emit("upload_progress")
|
|
168
|
+
# elif is_video(metadata):
|
|
169
|
+
# offset = fetch_offset()
|
|
170
|
+
# emit("upload_fetch_offset")
|
|
171
|
+
# for chunk in metadata.read()[offset:]:
|
|
172
|
+
# upload_stream(chunk)
|
|
173
|
+
# emit("upload_progress")
|
|
174
|
+
# except BaseException as ex: # Include KeyboardInterrupt
|
|
175
|
+
# if retryable(ex):
|
|
176
|
+
# emit("upload_retrying")
|
|
177
|
+
# continue
|
|
178
|
+
# else:
|
|
179
|
+
# emit("upload_failed")
|
|
180
|
+
# raise ex
|
|
181
|
+
# else:
|
|
182
|
+
# break
|
|
183
|
+
# emit("upload_end")
|
|
184
|
+
# finish_upload(data)
|
|
185
|
+
# emit("upload_finished")
|
|
123
186
|
EventName = T.Literal[
|
|
124
187
|
"upload_start",
|
|
125
188
|
"upload_fetch_offset",
|
|
126
189
|
"upload_progress",
|
|
190
|
+
"upload_retrying",
|
|
127
191
|
"upload_end",
|
|
192
|
+
"upload_failed",
|
|
128
193
|
"upload_finished",
|
|
129
|
-
"upload_interrupted",
|
|
130
194
|
]
|
|
131
195
|
|
|
132
196
|
|
|
@@ -154,7 +218,131 @@ class UploadResult:
|
|
|
154
218
|
error: Exception | None = None
|
|
155
219
|
|
|
156
220
|
|
|
157
|
-
class
|
|
221
|
+
class VideoUploader:
|
|
222
|
+
@classmethod
|
|
223
|
+
def upload_videos(
|
|
224
|
+
cls, mly_uploader: Uploader, video_metadatas: T.Sequence[types.VideoMetadata]
|
|
225
|
+
) -> T.Generator[tuple[types.VideoMetadata, UploadResult], None, None]:
|
|
226
|
+
# If upload in a random order, then interrupted uploads has a higher chance to expire.
|
|
227
|
+
# Therefore sort videos to make sure interrupted uploads are resumed as early as possible
|
|
228
|
+
sorted_video_metadatas = sorted(video_metadatas, key=lambda m: m.filename)
|
|
229
|
+
|
|
230
|
+
for idx, video_metadata in enumerate(sorted_video_metadatas):
|
|
231
|
+
LOG.debug(f"Checksum for video {video_metadata.filename}...")
|
|
232
|
+
try:
|
|
233
|
+
video_metadata.update_md5sum()
|
|
234
|
+
except Exception as ex:
|
|
235
|
+
yield video_metadata, UploadResult(error=ex)
|
|
236
|
+
continue
|
|
237
|
+
|
|
238
|
+
assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
|
|
239
|
+
|
|
240
|
+
progress: SequenceProgress = {
|
|
241
|
+
"total_sequence_count": len(sorted_video_metadatas),
|
|
242
|
+
"sequence_idx": idx,
|
|
243
|
+
"file_type": video_metadata.filetype.value,
|
|
244
|
+
"import_path": str(video_metadata.filename),
|
|
245
|
+
"sequence_md5sum": video_metadata.md5sum,
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
try:
|
|
249
|
+
with cls.build_camm_stream(video_metadata) as camm_fp:
|
|
250
|
+
# Upload the mp4 stream
|
|
251
|
+
file_handle = mly_uploader.upload_stream(
|
|
252
|
+
T.cast(T.IO[bytes], camm_fp),
|
|
253
|
+
progress=T.cast(T.Dict[str, T.Any], progress),
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
cluster_id = mly_uploader.finish_upload(
|
|
257
|
+
file_handle,
|
|
258
|
+
api_v4.ClusterFileType.CAMM,
|
|
259
|
+
progress=T.cast(T.Dict[str, T.Any], progress),
|
|
260
|
+
)
|
|
261
|
+
except Exception as ex:
|
|
262
|
+
yield video_metadata, UploadResult(error=ex)
|
|
263
|
+
else:
|
|
264
|
+
yield video_metadata, UploadResult(result=cluster_id)
|
|
265
|
+
|
|
266
|
+
@classmethod
|
|
267
|
+
@contextmanager
|
|
268
|
+
def build_camm_stream(cls, video_metadata: types.VideoMetadata):
|
|
269
|
+
# Convert video metadata to CAMMInfo
|
|
270
|
+
camm_info = cls.prepare_camm_info(video_metadata)
|
|
271
|
+
|
|
272
|
+
# Create the CAMM sample generator
|
|
273
|
+
camm_sample_generator = camm_builder.camm_sample_generator2(camm_info)
|
|
274
|
+
|
|
275
|
+
with video_metadata.filename.open("rb") as src_fp:
|
|
276
|
+
# Build the mp4 stream with the CAMM samples
|
|
277
|
+
yield simple_mp4_builder.transform_mp4(src_fp, camm_sample_generator)
|
|
278
|
+
|
|
279
|
+
@classmethod
|
|
280
|
+
def prepare_camm_info(
|
|
281
|
+
cls, video_metadata: types.VideoMetadata
|
|
282
|
+
) -> camm_parser.CAMMInfo:
|
|
283
|
+
camm_info = camm_parser.CAMMInfo(
|
|
284
|
+
make=video_metadata.make or "", model=video_metadata.model or ""
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
for point in video_metadata.points:
|
|
288
|
+
if isinstance(point, telemetry.CAMMGPSPoint):
|
|
289
|
+
if camm_info.gps is None:
|
|
290
|
+
camm_info.gps = []
|
|
291
|
+
camm_info.gps.append(point)
|
|
292
|
+
|
|
293
|
+
elif isinstance(point, telemetry.GPSPoint):
|
|
294
|
+
# There is no proper CAMM entry for GoPro GPS
|
|
295
|
+
if camm_info.mini_gps is None:
|
|
296
|
+
camm_info.mini_gps = []
|
|
297
|
+
camm_info.mini_gps.append(point)
|
|
298
|
+
|
|
299
|
+
elif isinstance(point, geo.Point):
|
|
300
|
+
if camm_info.mini_gps is None:
|
|
301
|
+
camm_info.mini_gps = []
|
|
302
|
+
camm_info.mini_gps.append(point)
|
|
303
|
+
else:
|
|
304
|
+
raise ValueError(f"Unknown point type: {point}")
|
|
305
|
+
|
|
306
|
+
if constants.MAPILLARY__EXPERIMENTAL_ENABLE_IMU:
|
|
307
|
+
if video_metadata.filetype is types.FileType.GOPRO:
|
|
308
|
+
with video_metadata.filename.open("rb") as fp:
|
|
309
|
+
gopro_info = gpmf_parser.extract_gopro_info(fp, telemetry_only=True)
|
|
310
|
+
if gopro_info is not None:
|
|
311
|
+
camm_info.accl = gopro_info.accl or []
|
|
312
|
+
camm_info.gyro = gopro_info.gyro or []
|
|
313
|
+
camm_info.magn = gopro_info.magn or []
|
|
314
|
+
|
|
315
|
+
return camm_info
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
class ZipUploader:
|
|
319
|
+
@classmethod
|
|
320
|
+
def upload_zipfiles(
|
|
321
|
+
cls, mly_uploader: Uploader, zip_paths: T.Sequence[Path]
|
|
322
|
+
) -> T.Generator[tuple[Path, UploadResult], None, None]:
|
|
323
|
+
# If upload in a random order, then interrupted uploads has a higher chance to expire.
|
|
324
|
+
# Therefore sort zipfiles to make sure interrupted uploads are resumed as early as possible
|
|
325
|
+
sorted_zip_paths = sorted(zip_paths)
|
|
326
|
+
|
|
327
|
+
for idx, zip_path in enumerate(sorted_zip_paths):
|
|
328
|
+
progress: SequenceProgress = {
|
|
329
|
+
"total_sequence_count": len(sorted_zip_paths),
|
|
330
|
+
"sequence_idx": idx,
|
|
331
|
+
"import_path": str(zip_path),
|
|
332
|
+
"file_type": types.FileType.ZIP.value,
|
|
333
|
+
"sequence_md5sum": "", # Placeholder, will be set in upload_zipfile
|
|
334
|
+
}
|
|
335
|
+
try:
|
|
336
|
+
cluster_id = cls._upload_zipfile(
|
|
337
|
+
mly_uploader,
|
|
338
|
+
zip_path,
|
|
339
|
+
progress=T.cast(T.Dict[str, T.Any], progress),
|
|
340
|
+
)
|
|
341
|
+
except Exception as ex:
|
|
342
|
+
yield zip_path, UploadResult(error=ex)
|
|
343
|
+
else:
|
|
344
|
+
yield zip_path, UploadResult(result=cluster_id)
|
|
345
|
+
|
|
158
346
|
@classmethod
|
|
159
347
|
def zip_images(
|
|
160
348
|
cls, metadatas: T.Sequence[types.ImageMetadata], zip_dir: Path
|
|
@@ -173,38 +361,93 @@ class ZipImageSequence:
|
|
|
173
361
|
)
|
|
174
362
|
with cls._wip_file_context(wip_zip_filename) as wip_path:
|
|
175
363
|
with wip_path.open("wb") as wip_fp:
|
|
176
|
-
cls.
|
|
364
|
+
cls._zip_sequence_fp(sequence, wip_fp)
|
|
177
365
|
|
|
178
366
|
@classmethod
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
except FileNotFoundError:
|
|
184
|
-
pass
|
|
185
|
-
try:
|
|
186
|
-
yield wip_path
|
|
367
|
+
def zip_images_and_upload(
|
|
368
|
+
cls, uploader: Uploader, image_metadatas: T.Sequence[types.ImageMetadata]
|
|
369
|
+
) -> T.Generator[tuple[str, UploadResult], None, None]:
|
|
370
|
+
sequences = types.group_and_sort_images(image_metadatas)
|
|
187
371
|
|
|
188
|
-
|
|
189
|
-
|
|
372
|
+
for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
|
|
373
|
+
try:
|
|
374
|
+
_validate_metadatas(sequence)
|
|
375
|
+
except Exception as ex:
|
|
376
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
377
|
+
continue
|
|
190
378
|
|
|
191
|
-
|
|
192
|
-
|
|
379
|
+
with tempfile.NamedTemporaryFile() as fp:
|
|
380
|
+
try:
|
|
381
|
+
sequence_md5sum = cls._zip_sequence_fp(sequence, fp)
|
|
382
|
+
except Exception as ex:
|
|
383
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
384
|
+
continue
|
|
385
|
+
|
|
386
|
+
sequence_progress: SequenceProgress = {
|
|
387
|
+
"sequence_idx": sequence_idx,
|
|
388
|
+
"total_sequence_count": len(sequences),
|
|
389
|
+
"sequence_image_count": len(sequence),
|
|
390
|
+
"sequence_uuid": sequence_uuid,
|
|
391
|
+
"file_type": types.FileType.ZIP.value,
|
|
392
|
+
"sequence_md5sum": sequence_md5sum,
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
try:
|
|
396
|
+
file_handle = uploader.upload_stream(
|
|
397
|
+
fp, progress=T.cast(T.Dict[str, T.Any], sequence_progress)
|
|
398
|
+
)
|
|
399
|
+
cluster_id = uploader.finish_upload(
|
|
400
|
+
file_handle,
|
|
401
|
+
api_v4.ClusterFileType.ZIP,
|
|
402
|
+
progress=T.cast(T.Dict[str, T.Any], sequence_progress),
|
|
403
|
+
)
|
|
404
|
+
except Exception as ex:
|
|
405
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
406
|
+
continue
|
|
407
|
+
|
|
408
|
+
yield sequence_uuid, UploadResult(result=cluster_id)
|
|
409
|
+
|
|
410
|
+
@classmethod
|
|
411
|
+
def _upload_zipfile(
|
|
412
|
+
cls,
|
|
413
|
+
uploader: Uploader,
|
|
414
|
+
zip_path: Path,
|
|
415
|
+
progress: dict[str, T.Any] | None = None,
|
|
416
|
+
) -> str:
|
|
417
|
+
if progress is None:
|
|
418
|
+
progress = {}
|
|
419
|
+
|
|
420
|
+
with zipfile.ZipFile(zip_path) as ziph:
|
|
421
|
+
namelist = ziph.namelist()
|
|
422
|
+
if not namelist:
|
|
423
|
+
raise InvalidMapillaryZipFileError("Zipfile has no files")
|
|
424
|
+
|
|
425
|
+
with zip_path.open("rb") as zip_fp:
|
|
426
|
+
sequence_md5sum = cls._extract_sequence_md5sum(zip_fp)
|
|
427
|
+
|
|
428
|
+
# Send the copy of the input progress to each upload session, to avoid modifying the original one
|
|
429
|
+
mutable_progress: SequenceProgress = {
|
|
430
|
+
**T.cast(SequenceProgress, progress),
|
|
431
|
+
"sequence_image_count": len(namelist),
|
|
432
|
+
"sequence_md5sum": sequence_md5sum,
|
|
433
|
+
"file_type": types.FileType.ZIP.value,
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
with zip_path.open("rb") as zip_fp:
|
|
437
|
+
file_handle = uploader.upload_stream(
|
|
438
|
+
zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress)
|
|
193
439
|
)
|
|
194
440
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
os.remove(wip_path)
|
|
203
|
-
except FileNotFoundError:
|
|
204
|
-
pass
|
|
441
|
+
cluster_id = uploader.finish_upload(
|
|
442
|
+
file_handle,
|
|
443
|
+
api_v4.ClusterFileType.ZIP,
|
|
444
|
+
progress=T.cast(T.Dict[str, T.Any], mutable_progress),
|
|
445
|
+
)
|
|
446
|
+
|
|
447
|
+
return cluster_id
|
|
205
448
|
|
|
206
449
|
@classmethod
|
|
207
|
-
def
|
|
450
|
+
def _zip_sequence_fp(
|
|
208
451
|
cls,
|
|
209
452
|
sequence: T.Sequence[types.ImageMetadata],
|
|
210
453
|
zip_fp: T.IO[bytes],
|
|
@@ -219,6 +462,8 @@ class ZipImageSequence:
|
|
|
219
462
|
f"Only one sequence is allowed but got {len(sequence_groups)}: {list(sequence_groups.keys())}"
|
|
220
463
|
)
|
|
221
464
|
|
|
465
|
+
if sequence:
|
|
466
|
+
LOG.debug(f"Checksum for sequence {sequence[0].MAPSequenceUUID}...")
|
|
222
467
|
sequence_md5sum = types.update_sequence_md5sum(sequence)
|
|
223
468
|
|
|
224
469
|
with zipfile.ZipFile(zip_fp, "w", zipfile.ZIP_DEFLATED) as zipf:
|
|
@@ -226,16 +471,18 @@ class ZipImageSequence:
|
|
|
226
471
|
# Arcname should be unique, the name does not matter
|
|
227
472
|
arcname = f"{idx}.jpg"
|
|
228
473
|
zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
|
|
229
|
-
zipf.writestr(zipinfo,
|
|
474
|
+
zipf.writestr(zipinfo, SingleImageUploader.dump_image_bytes(metadata))
|
|
230
475
|
assert len(sequence) == len(set(zipf.namelist()))
|
|
231
|
-
zipf.comment = json.dumps(
|
|
232
|
-
"
|
|
233
|
-
|
|
476
|
+
zipf.comment = json.dumps(
|
|
477
|
+
{"sequence_md5sum": sequence_md5sum},
|
|
478
|
+
sort_keys=True,
|
|
479
|
+
separators=(",", ":"),
|
|
480
|
+
).encode("utf-8")
|
|
234
481
|
|
|
235
482
|
return sequence_md5sum
|
|
236
483
|
|
|
237
484
|
@classmethod
|
|
238
|
-
def
|
|
485
|
+
def _extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
|
|
239
486
|
with zipfile.ZipFile(zip_fp, "r", zipfile.ZIP_DEFLATED) as ziph:
|
|
240
487
|
comment = ziph.comment
|
|
241
488
|
|
|
@@ -258,237 +505,365 @@ class ZipImageSequence:
|
|
|
258
505
|
return sequence_md5sum
|
|
259
506
|
|
|
260
507
|
@classmethod
|
|
261
|
-
|
|
508
|
+
@contextmanager
|
|
509
|
+
def _wip_file_context(cls, wip_path: Path):
|
|
262
510
|
try:
|
|
263
|
-
|
|
264
|
-
except
|
|
265
|
-
|
|
511
|
+
os.remove(wip_path)
|
|
512
|
+
except FileNotFoundError:
|
|
513
|
+
pass
|
|
514
|
+
try:
|
|
515
|
+
yield wip_path
|
|
266
516
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
517
|
+
with wip_path.open("rb") as fp:
|
|
518
|
+
upload_md5sum = utils.md5sum_fp(fp).hexdigest()
|
|
519
|
+
|
|
520
|
+
done_path = wip_path.parent.joinpath(
|
|
521
|
+
_suffix_session_key(upload_md5sum, api_v4.ClusterFileType.ZIP)
|
|
272
522
|
)
|
|
273
|
-
)
|
|
274
523
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
524
|
+
try:
|
|
525
|
+
os.remove(done_path)
|
|
526
|
+
except FileNotFoundError:
|
|
527
|
+
pass
|
|
528
|
+
wip_path.rename(done_path)
|
|
529
|
+
finally:
|
|
530
|
+
try:
|
|
531
|
+
os.remove(wip_path)
|
|
532
|
+
except FileNotFoundError:
|
|
533
|
+
pass
|
|
281
534
|
|
|
282
|
-
@classmethod
|
|
283
|
-
def upload_zipfile(
|
|
284
|
-
cls,
|
|
285
|
-
uploader: Uploader,
|
|
286
|
-
zip_path: Path,
|
|
287
|
-
progress: dict[str, T.Any] | None = None,
|
|
288
|
-
) -> str:
|
|
289
|
-
if progress is None:
|
|
290
|
-
progress = {}
|
|
291
535
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
536
|
+
class ImageSequenceUploader:
|
|
537
|
+
def __init__(self, upload_options: UploadOptions, emitter: EventEmitter):
|
|
538
|
+
self.upload_options = upload_options
|
|
539
|
+
self.emitter = emitter
|
|
296
540
|
|
|
297
|
-
|
|
298
|
-
|
|
541
|
+
def upload_images(
|
|
542
|
+
self, image_metadatas: T.Sequence[types.ImageMetadata]
|
|
543
|
+
) -> T.Generator[tuple[str, UploadResult], None, None]:
|
|
544
|
+
sequences = types.group_and_sort_images(image_metadatas)
|
|
299
545
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
"sequence_image_count": len(namelist),
|
|
304
|
-
"sequence_md5sum": sequence_md5sum,
|
|
305
|
-
"file_type": types.FileType.ZIP.value,
|
|
306
|
-
}
|
|
546
|
+
for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
|
|
547
|
+
LOG.debug(f"Checksum for image sequence {sequence_uuid}...")
|
|
548
|
+
sequence_md5sum = types.update_sequence_md5sum(sequence)
|
|
307
549
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
550
|
+
sequence_progress: SequenceProgress = {
|
|
551
|
+
"sequence_idx": sequence_idx,
|
|
552
|
+
"total_sequence_count": len(sequences),
|
|
553
|
+
"sequence_image_count": len(sequence),
|
|
554
|
+
"sequence_uuid": sequence_uuid,
|
|
555
|
+
"file_type": types.FileType.IMAGE.value,
|
|
556
|
+
"sequence_md5sum": sequence_md5sum,
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
try:
|
|
560
|
+
cluster_id = self._upload_sequence_and_finish(
|
|
561
|
+
sequence,
|
|
562
|
+
sequence_progress=T.cast(dict[str, T.Any], sequence_progress),
|
|
563
|
+
)
|
|
564
|
+
except Exception as ex:
|
|
565
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
566
|
+
else:
|
|
567
|
+
yield sequence_uuid, UploadResult(result=cluster_id)
|
|
568
|
+
|
|
569
|
+
def _upload_sequence_and_finish(
|
|
570
|
+
self,
|
|
571
|
+
sequence: T.Sequence[types.ImageMetadata],
|
|
572
|
+
sequence_progress: dict[str, T.Any],
|
|
573
|
+
) -> str:
|
|
574
|
+
_validate_metadatas(sequence)
|
|
575
|
+
|
|
576
|
+
sequence_progress["entity_size"] = sum(m.filesize or 0 for m in sequence)
|
|
577
|
+
self.emitter.emit("upload_start", sequence_progress)
|
|
578
|
+
|
|
579
|
+
try:
|
|
580
|
+
# Retries will be handled in the call (but no upload event emissions)
|
|
581
|
+
image_file_handles = self._upload_images_parallel(
|
|
582
|
+
sequence, sequence_progress
|
|
311
583
|
)
|
|
584
|
+
except BaseException as ex: # Include KeyboardInterrupt
|
|
585
|
+
self.emitter.emit("upload_failed", sequence_progress)
|
|
586
|
+
raise ex
|
|
312
587
|
|
|
588
|
+
manifest_file_handle = self._upload_manifest(image_file_handles)
|
|
589
|
+
|
|
590
|
+
self.emitter.emit("upload_end", sequence_progress)
|
|
591
|
+
|
|
592
|
+
uploader = Uploader(self.upload_options, emitter=self.emitter)
|
|
313
593
|
cluster_id = uploader.finish_upload(
|
|
314
|
-
|
|
315
|
-
api_v4.ClusterFileType.
|
|
316
|
-
progress=
|
|
594
|
+
manifest_file_handle,
|
|
595
|
+
api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST,
|
|
596
|
+
progress=sequence_progress,
|
|
317
597
|
)
|
|
318
598
|
|
|
319
599
|
return cluster_id
|
|
320
600
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
cls,
|
|
324
|
-
uploader: Uploader,
|
|
325
|
-
image_metadatas: T.Sequence[types.ImageMetadata],
|
|
326
|
-
progress: dict[str, T.Any] | None = None,
|
|
327
|
-
) -> T.Generator[tuple[str, UploadResult], None, None]:
|
|
328
|
-
if progress is None:
|
|
329
|
-
progress = {}
|
|
601
|
+
def _upload_manifest(self, image_file_handles: T.Sequence[str]) -> str:
|
|
602
|
+
uploader = Uploader(self.upload_options)
|
|
330
603
|
|
|
331
|
-
|
|
604
|
+
manifest = {
|
|
605
|
+
"version": "1",
|
|
606
|
+
"upload_type": "images",
|
|
607
|
+
"image_handles": image_file_handles,
|
|
608
|
+
}
|
|
332
609
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
610
|
+
with io.BytesIO() as manifest_fp:
|
|
611
|
+
manifest_fp.write(
|
|
612
|
+
json.dumps(manifest, sort_keys=True, separators=(",", ":")).encode(
|
|
613
|
+
"utf-8"
|
|
614
|
+
)
|
|
615
|
+
)
|
|
616
|
+
manifest_fp.seek(0, io.SEEK_SET)
|
|
617
|
+
return uploader.upload_stream(
|
|
618
|
+
manifest_fp, session_key=f"{_prefixed_uuid4()}.json"
|
|
619
|
+
)
|
|
339
620
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
621
|
+
def _upload_images_parallel(
|
|
622
|
+
self,
|
|
623
|
+
sequence: T.Sequence[types.ImageMetadata],
|
|
624
|
+
sequence_progress: dict[str, T.Any],
|
|
625
|
+
) -> list[str]:
|
|
626
|
+
if not sequence:
|
|
627
|
+
return []
|
|
346
628
|
|
|
347
|
-
|
|
348
|
-
"sequence_idx": sequence_idx,
|
|
349
|
-
"total_sequence_count": len(sequences),
|
|
350
|
-
"sequence_image_count": len(sequence),
|
|
351
|
-
"sequence_uuid": sequence_uuid,
|
|
352
|
-
"file_type": types.FileType.ZIP.value,
|
|
353
|
-
"sequence_md5sum": sequence_md5sum,
|
|
354
|
-
}
|
|
629
|
+
max_workers = min(self.upload_options.num_upload_workers, len(sequence))
|
|
355
630
|
|
|
356
|
-
|
|
631
|
+
# Lock is used to synchronize event emission
|
|
632
|
+
lock = threading.Lock()
|
|
357
633
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
634
|
+
# Push all images into the queue
|
|
635
|
+
image_queue: queue.Queue[tuple[int, types.ImageMetadata]] = queue.Queue()
|
|
636
|
+
for idx, image_metadata in enumerate(sequence):
|
|
637
|
+
image_queue.put((idx, image_metadata))
|
|
638
|
+
|
|
639
|
+
upload_interrupted = threading.Event()
|
|
640
|
+
|
|
641
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
642
|
+
futures = [
|
|
643
|
+
executor.submit(
|
|
644
|
+
self._upload_images_from_queue,
|
|
645
|
+
image_queue,
|
|
646
|
+
lock,
|
|
647
|
+
upload_interrupted,
|
|
648
|
+
sequence_progress,
|
|
649
|
+
)
|
|
650
|
+
for _ in range(max_workers)
|
|
651
|
+
]
|
|
368
652
|
|
|
369
|
-
|
|
653
|
+
indexed_image_file_handles = []
|
|
370
654
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
) -> str:
|
|
378
|
-
if progress is None:
|
|
379
|
-
progress = {}
|
|
655
|
+
try:
|
|
656
|
+
for future in futures:
|
|
657
|
+
indexed_image_file_handles.extend(future.result())
|
|
658
|
+
except KeyboardInterrupt as ex:
|
|
659
|
+
upload_interrupted.set()
|
|
660
|
+
raise ex
|
|
380
661
|
|
|
381
|
-
#
|
|
382
|
-
|
|
662
|
+
# All tasks should be done here, so below is more like assertion
|
|
663
|
+
image_queue.join()
|
|
664
|
+
if sys.version_info >= (3, 13):
|
|
665
|
+
image_queue.shutdown()
|
|
383
666
|
|
|
384
|
-
|
|
667
|
+
file_handles: list[str] = []
|
|
385
668
|
|
|
386
|
-
|
|
387
|
-
mutable_progress = {
|
|
388
|
-
**(progress or {}),
|
|
389
|
-
"filename": str(image_metadata.filename),
|
|
390
|
-
}
|
|
669
|
+
indexed_image_file_handles.sort()
|
|
391
670
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
671
|
+
# Important to guarantee the order
|
|
672
|
+
assert len(indexed_image_file_handles) == len(sequence)
|
|
673
|
+
for expected_idx, (idx, file_handle) in enumerate(indexed_image_file_handles):
|
|
674
|
+
assert expected_idx == idx
|
|
675
|
+
file_handles.append(file_handle)
|
|
676
|
+
|
|
677
|
+
return file_handles
|
|
678
|
+
|
|
679
|
+
def _upload_images_from_queue(
|
|
680
|
+
self,
|
|
681
|
+
image_queue: queue.Queue[tuple[int, types.ImageMetadata]],
|
|
682
|
+
lock: threading.Lock,
|
|
683
|
+
upload_interrupted: threading.Event,
|
|
684
|
+
sequence_progress: dict[str, T.Any],
|
|
685
|
+
) -> list[tuple[int, str]]:
|
|
686
|
+
indexed_file_handles = []
|
|
687
|
+
|
|
688
|
+
with api_v4.create_user_session(
|
|
689
|
+
self.upload_options.user_items["user_upload_token"]
|
|
690
|
+
) as user_session:
|
|
691
|
+
single_image_uploader = SingleImageUploader(
|
|
692
|
+
self.upload_options, user_session=user_session
|
|
395
693
|
)
|
|
396
694
|
|
|
397
|
-
|
|
695
|
+
while True:
|
|
696
|
+
# Assert that all images are already pushed into the queue
|
|
697
|
+
try:
|
|
698
|
+
idx, image_metadata = image_queue.get_nowait()
|
|
699
|
+
except queue.Empty:
|
|
700
|
+
break
|
|
701
|
+
|
|
702
|
+
# Main thread will handle the interruption
|
|
703
|
+
if upload_interrupted.is_set():
|
|
704
|
+
break
|
|
705
|
+
|
|
706
|
+
# Create a new mutatble progress to keep the sequence_progress immutable
|
|
707
|
+
image_progress = {
|
|
708
|
+
**sequence_progress,
|
|
709
|
+
"import_path": str(image_metadata.filename),
|
|
710
|
+
}
|
|
398
711
|
|
|
399
|
-
|
|
400
|
-
|
|
712
|
+
# image_progress will be updated during uploading
|
|
713
|
+
file_handle = single_image_uploader.upload(
|
|
714
|
+
image_metadata, image_progress
|
|
715
|
+
)
|
|
401
716
|
|
|
402
|
-
|
|
717
|
+
# Update chunk_size (it was constant if set)
|
|
718
|
+
image_progress["chunk_size"] = image_metadata.filesize
|
|
403
719
|
|
|
404
|
-
|
|
720
|
+
# Main thread will handle the interruption
|
|
721
|
+
if upload_interrupted.is_set():
|
|
722
|
+
break
|
|
405
723
|
|
|
406
|
-
|
|
724
|
+
with lock:
|
|
725
|
+
self.emitter.emit("upload_progress", image_progress)
|
|
407
726
|
|
|
408
|
-
|
|
727
|
+
indexed_file_handles.append((idx, file_handle))
|
|
409
728
|
|
|
410
|
-
|
|
729
|
+
image_queue.task_done()
|
|
411
730
|
|
|
412
|
-
|
|
413
|
-
max_workers=constants.MAX_IMAGE_UPLOAD_WORKERS
|
|
414
|
-
) as executor:
|
|
415
|
-
image_file_handles = list(executor.map(_upload_image, sequence))
|
|
731
|
+
return indexed_file_handles
|
|
416
732
|
|
|
417
|
-
manifest = {
|
|
418
|
-
"version": "1",
|
|
419
|
-
"upload_type": "images",
|
|
420
|
-
"image_handles": image_file_handles,
|
|
421
|
-
}
|
|
422
733
|
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
734
|
+
class SingleImageUploader:
|
|
735
|
+
def __init__(
|
|
736
|
+
self,
|
|
737
|
+
upload_options: UploadOptions,
|
|
738
|
+
user_session: requests.Session | None = None,
|
|
739
|
+
):
|
|
740
|
+
self.upload_options = upload_options
|
|
741
|
+
self.user_session = user_session
|
|
742
|
+
self.cache = self._maybe_create_persistent_cache_instance(
|
|
743
|
+
self.upload_options.user_items, upload_options
|
|
744
|
+
)
|
|
745
|
+
|
|
746
|
+
def upload(
|
|
747
|
+
self, image_metadata: types.ImageMetadata, image_progress: dict[str, T.Any]
|
|
748
|
+
) -> str:
|
|
749
|
+
image_bytes = self.dump_image_bytes(image_metadata)
|
|
750
|
+
|
|
751
|
+
uploader = Uploader(self.upload_options, user_session=self.user_session)
|
|
752
|
+
|
|
753
|
+
session_key = uploader._gen_session_key(io.BytesIO(image_bytes), image_progress)
|
|
754
|
+
|
|
755
|
+
file_handle = self._get_cached_file_handle(session_key)
|
|
756
|
+
|
|
757
|
+
if file_handle is None:
|
|
758
|
+
# image_progress will be updated during uploading
|
|
759
|
+
file_handle = uploader.upload_stream(
|
|
760
|
+
io.BytesIO(image_bytes),
|
|
761
|
+
session_key=session_key,
|
|
762
|
+
progress=image_progress,
|
|
428
763
|
)
|
|
764
|
+
self._set_file_handle_cache(session_key, file_handle)
|
|
429
765
|
|
|
430
|
-
|
|
766
|
+
return file_handle
|
|
431
767
|
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
768
|
+
@classmethod
|
|
769
|
+
def dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes:
|
|
770
|
+
try:
|
|
771
|
+
edit = exif_write.ExifEdit(metadata.filename)
|
|
772
|
+
except struct.error as ex:
|
|
773
|
+
raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex
|
|
774
|
+
|
|
775
|
+
# The cast is to fix the type checker error
|
|
776
|
+
edit.add_image_description(
|
|
777
|
+
T.cast(
|
|
778
|
+
T.Dict, desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata))
|
|
779
|
+
)
|
|
436
780
|
)
|
|
437
781
|
|
|
438
|
-
|
|
782
|
+
try:
|
|
783
|
+
return edit.dump_image_bytes()
|
|
784
|
+
except struct.error as ex:
|
|
785
|
+
raise ExifError(
|
|
786
|
+
f"Failed to dump EXIF bytes: {ex}", metadata.filename
|
|
787
|
+
) from ex
|
|
439
788
|
|
|
440
789
|
@classmethod
|
|
441
|
-
def
|
|
442
|
-
cls,
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
790
|
+
def _maybe_create_persistent_cache_instance(
|
|
791
|
+
cls, user_items: config.UserItem, upload_options: UploadOptions
|
|
792
|
+
) -> history.PersistentCache | None:
|
|
793
|
+
if not constants.UPLOAD_CACHE_DIR:
|
|
794
|
+
LOG.debug(
|
|
795
|
+
"Upload cache directory is set empty, skipping caching upload file handles"
|
|
796
|
+
)
|
|
797
|
+
return None
|
|
798
|
+
|
|
799
|
+
if upload_options.dry_run:
|
|
800
|
+
LOG.debug("Dry-run mode enabled, skipping caching upload file handles")
|
|
801
|
+
return None
|
|
802
|
+
|
|
803
|
+
# Different python/CLI versions use different cache (dbm) formats.
|
|
804
|
+
# Separate them to avoid conflicts
|
|
805
|
+
py_version_parts = [str(part) for part in sys.version_info[:3]]
|
|
806
|
+
version = f"py_{'_'.join(py_version_parts)}_{VERSION}"
|
|
807
|
+
|
|
808
|
+
cache_path_dir = (
|
|
809
|
+
Path(constants.UPLOAD_CACHE_DIR)
|
|
810
|
+
.joinpath(version)
|
|
811
|
+
.joinpath(api_v4.MAPILLARY_CLIENT_TOKEN.replace("|", "_"))
|
|
812
|
+
.joinpath(
|
|
813
|
+
user_items.get("MAPSettingsUserKey", user_items["user_upload_token"])
|
|
814
|
+
)
|
|
815
|
+
)
|
|
816
|
+
cache_path_dir.mkdir(parents=True, exist_ok=True)
|
|
817
|
+
cache_path = cache_path_dir.joinpath("cached_file_handles")
|
|
818
|
+
|
|
819
|
+
# Sanitize sensitive segments for logging
|
|
820
|
+
sanitized_cache_path = (
|
|
821
|
+
Path(constants.UPLOAD_CACHE_DIR)
|
|
822
|
+
.joinpath(version)
|
|
823
|
+
.joinpath("***")
|
|
824
|
+
.joinpath("***")
|
|
825
|
+
.joinpath("cached_file_handles")
|
|
826
|
+
)
|
|
827
|
+
LOG.debug(f"File handle cache path: {sanitized_cache_path}")
|
|
449
828
|
|
|
450
|
-
|
|
829
|
+
cache = history.PersistentCache(str(cache_path.resolve()))
|
|
830
|
+
cache.clear_expired()
|
|
451
831
|
|
|
452
|
-
|
|
453
|
-
sequence_md5sum = types.update_sequence_md5sum(sequence)
|
|
832
|
+
return cache
|
|
454
833
|
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
"sequence_image_count": len(sequence),
|
|
459
|
-
"sequence_uuid": sequence_uuid,
|
|
460
|
-
"file_type": types.FileType.IMAGE.value,
|
|
461
|
-
"sequence_md5sum": sequence_md5sum,
|
|
462
|
-
}
|
|
834
|
+
def _get_cached_file_handle(self, key: str) -> str | None:
|
|
835
|
+
if self.cache is None:
|
|
836
|
+
return None
|
|
463
837
|
|
|
464
|
-
|
|
838
|
+
if _is_uuid(key):
|
|
839
|
+
return None
|
|
465
840
|
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
841
|
+
return self.cache.get(key)
|
|
842
|
+
|
|
843
|
+
def _set_file_handle_cache(self, key: str, value: str) -> None:
|
|
844
|
+
if self.cache is None:
|
|
845
|
+
return
|
|
846
|
+
|
|
847
|
+
if _is_uuid(key):
|
|
848
|
+
return
|
|
849
|
+
|
|
850
|
+
self.cache.set(key, value)
|
|
474
851
|
|
|
475
852
|
|
|
476
853
|
class Uploader:
|
|
477
854
|
def __init__(
|
|
478
855
|
self,
|
|
479
|
-
|
|
856
|
+
upload_options: UploadOptions,
|
|
857
|
+
user_session: requests.Session | None = None,
|
|
480
858
|
emitter: EventEmitter | None = None,
|
|
481
|
-
chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
|
|
482
|
-
dry_run=False,
|
|
483
859
|
):
|
|
484
|
-
self.
|
|
860
|
+
self.upload_options = upload_options
|
|
861
|
+
self.user_session = user_session
|
|
485
862
|
if emitter is None:
|
|
486
863
|
# An empty event emitter that does nothing
|
|
487
864
|
self.emitter = EventEmitter()
|
|
488
865
|
else:
|
|
489
866
|
self.emitter = emitter
|
|
490
|
-
self.chunk_size = chunk_size
|
|
491
|
-
self.dry_run = dry_run
|
|
492
867
|
|
|
493
868
|
def upload_stream(
|
|
494
869
|
self,
|
|
@@ -500,21 +875,13 @@ class Uploader:
|
|
|
500
875
|
progress = {}
|
|
501
876
|
|
|
502
877
|
if session_key is None:
|
|
503
|
-
|
|
504
|
-
md5sum = utils.md5sum_fp(fp).hexdigest()
|
|
505
|
-
filetype = progress.get("file_type")
|
|
506
|
-
if filetype is not None:
|
|
507
|
-
session_key = _session_key(md5sum, types.FileType(filetype))
|
|
508
|
-
else:
|
|
509
|
-
session_key = md5sum
|
|
878
|
+
session_key = self._gen_session_key(fp, progress)
|
|
510
879
|
|
|
511
880
|
fp.seek(0, io.SEEK_END)
|
|
512
881
|
entity_size = fp.tell()
|
|
513
882
|
|
|
514
|
-
upload_service = self._create_upload_service(session_key)
|
|
515
|
-
|
|
516
883
|
progress["entity_size"] = entity_size
|
|
517
|
-
progress["chunk_size"] = self.chunk_size
|
|
884
|
+
progress["chunk_size"] = self.upload_options.chunk_size
|
|
518
885
|
progress["retries"] = 0
|
|
519
886
|
progress["begin_offset"] = None
|
|
520
887
|
|
|
@@ -522,10 +889,24 @@ class Uploader:
|
|
|
522
889
|
|
|
523
890
|
while True:
|
|
524
891
|
try:
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
892
|
+
if self.user_session is not None:
|
|
893
|
+
file_handle = self._upload_stream_retryable(
|
|
894
|
+
self.user_session,
|
|
895
|
+
fp,
|
|
896
|
+
session_key,
|
|
897
|
+
T.cast(UploaderProgress, progress),
|
|
898
|
+
)
|
|
899
|
+
else:
|
|
900
|
+
with api_v4.create_user_session(
|
|
901
|
+
self.upload_options.user_items["user_upload_token"]
|
|
902
|
+
) as user_session:
|
|
903
|
+
file_handle = self._upload_stream_retryable(
|
|
904
|
+
user_session,
|
|
905
|
+
fp,
|
|
906
|
+
session_key,
|
|
907
|
+
T.cast(UploaderProgress, progress),
|
|
908
|
+
)
|
|
909
|
+
except BaseException as ex: # Include KeyboardInterrupt
|
|
529
910
|
self._handle_upload_exception(ex, T.cast(UploaderProgress, progress))
|
|
530
911
|
else:
|
|
531
912
|
break
|
|
@@ -546,97 +927,102 @@ class Uploader:
|
|
|
546
927
|
if progress is None:
|
|
547
928
|
progress = {}
|
|
548
929
|
|
|
549
|
-
if self.dry_run:
|
|
930
|
+
if self.upload_options.dry_run or self.upload_options.nofinish:
|
|
550
931
|
cluster_id = "0"
|
|
551
932
|
else:
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
933
|
+
organization_id = self.upload_options.user_items.get("MAPOrganizationKey")
|
|
934
|
+
|
|
935
|
+
with api_v4.create_user_session(
|
|
936
|
+
self.upload_options.user_items["user_upload_token"]
|
|
937
|
+
) as user_session:
|
|
938
|
+
resp = api_v4.finish_upload(
|
|
939
|
+
user_session,
|
|
940
|
+
file_handle,
|
|
941
|
+
cluster_filetype,
|
|
942
|
+
organization_id=organization_id,
|
|
943
|
+
)
|
|
561
944
|
|
|
562
|
-
|
|
945
|
+
body = api_v4.jsonify_response(resp)
|
|
946
|
+
# TODO: Validate cluster_id
|
|
947
|
+
cluster_id = body.get("cluster_id")
|
|
563
948
|
|
|
564
949
|
progress["cluster_id"] = cluster_id
|
|
565
950
|
self.emitter.emit("upload_finished", progress)
|
|
566
951
|
|
|
567
952
|
return cluster_id
|
|
568
953
|
|
|
569
|
-
def
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
emitter=None,
|
|
573
|
-
chunk_size=self.chunk_size,
|
|
574
|
-
dry_run=self.dry_run,
|
|
575
|
-
)
|
|
576
|
-
|
|
577
|
-
def _create_upload_service(self, session_key: str) -> upload_api_v4.UploadService:
|
|
954
|
+
def _create_upload_service(
|
|
955
|
+
self, user_session: requests.Session, session_key: str
|
|
956
|
+
) -> upload_api_v4.UploadService:
|
|
578
957
|
upload_service: upload_api_v4.UploadService
|
|
579
958
|
|
|
580
|
-
if self.dry_run:
|
|
959
|
+
if self.upload_options.dry_run:
|
|
581
960
|
upload_path = os.getenv("MAPILLARY_UPLOAD_ENDPOINT")
|
|
582
961
|
upload_service = upload_api_v4.FakeUploadService(
|
|
583
|
-
|
|
584
|
-
session_key
|
|
962
|
+
user_session,
|
|
963
|
+
session_key,
|
|
585
964
|
upload_path=Path(upload_path) if upload_path is not None else None,
|
|
586
965
|
)
|
|
587
966
|
LOG.info(
|
|
588
|
-
"Dry
|
|
967
|
+
"Dry-run mode enabled, uploading to %s",
|
|
589
968
|
upload_service.upload_path.joinpath(session_key),
|
|
590
969
|
)
|
|
591
970
|
else:
|
|
592
|
-
upload_service = upload_api_v4.UploadService(
|
|
593
|
-
user_access_token=self.user_items["user_upload_token"],
|
|
594
|
-
session_key=session_key,
|
|
595
|
-
)
|
|
971
|
+
upload_service = upload_api_v4.UploadService(user_session, session_key)
|
|
596
972
|
|
|
597
973
|
return upload_service
|
|
598
974
|
|
|
599
975
|
def _handle_upload_exception(
|
|
600
|
-
self, ex:
|
|
976
|
+
self, ex: BaseException, progress: UploaderProgress
|
|
601
977
|
) -> None:
|
|
602
|
-
retries = progress
|
|
978
|
+
retries = progress.get("retries", 0)
|
|
603
979
|
begin_offset = progress.get("begin_offset")
|
|
604
|
-
|
|
980
|
+
offset = progress.get("offset")
|
|
605
981
|
|
|
606
982
|
if retries <= constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
|
|
607
|
-
self.emitter.emit("
|
|
983
|
+
self.emitter.emit("upload_retrying", progress)
|
|
984
|
+
|
|
608
985
|
LOG.warning(
|
|
609
|
-
|
|
610
|
-
"Error uploading chunk_size %d at begin_offset %s: %s: %s",
|
|
611
|
-
chunk_size,
|
|
612
|
-
begin_offset,
|
|
613
|
-
ex.__class__.__name__,
|
|
614
|
-
str(ex),
|
|
986
|
+
f"Error uploading {self._upload_name(progress)} at {offset=} since {begin_offset=}: {ex.__class__.__name__}: {ex}"
|
|
615
987
|
)
|
|
988
|
+
|
|
616
989
|
# Keep things immutable here. Will increment retries in the caller
|
|
617
990
|
retries += 1
|
|
618
|
-
if
|
|
991
|
+
if _is_immediate_retriable_exception(ex):
|
|
619
992
|
sleep_for = 0
|
|
620
993
|
else:
|
|
621
994
|
sleep_for = min(2**retries, 16)
|
|
622
995
|
LOG.info(
|
|
623
|
-
"Retrying in
|
|
624
|
-
sleep_for,
|
|
625
|
-
retries,
|
|
626
|
-
constants.MAX_UPLOAD_RETRIES,
|
|
996
|
+
f"Retrying in {sleep_for} seconds ({retries}/{constants.MAX_UPLOAD_RETRIES})"
|
|
627
997
|
)
|
|
628
998
|
if sleep_for:
|
|
629
999
|
time.sleep(sleep_for)
|
|
630
1000
|
else:
|
|
1001
|
+
self.emitter.emit("upload_failed", progress)
|
|
631
1002
|
raise ex
|
|
632
1003
|
|
|
1004
|
+
@classmethod
|
|
1005
|
+
def _upload_name(cls, progress: UploaderProgress):
|
|
1006
|
+
# Strictly speaking these sequence properties should not be exposed in this context
|
|
1007
|
+
# TODO: Maybe move these logging statements to event handlers
|
|
1008
|
+
sequence_uuid: str | None = T.cast(
|
|
1009
|
+
T.Union[str, None], progress.get("sequence_uuid")
|
|
1010
|
+
)
|
|
1011
|
+
import_path = T.cast(T.Union[str, None], progress.get("import_path"))
|
|
1012
|
+
if sequence_uuid is not None:
|
|
1013
|
+
if import_path is None:
|
|
1014
|
+
name: str = f"sequence_{sequence_uuid}"
|
|
1015
|
+
else:
|
|
1016
|
+
name = f"sequence_{sequence_uuid}/{Path(import_path).name}"
|
|
1017
|
+
else:
|
|
1018
|
+
name = Path(import_path or "unknown").name
|
|
1019
|
+
return name
|
|
1020
|
+
|
|
633
1021
|
def _chunk_with_progress_emitted(
|
|
634
|
-
self,
|
|
635
|
-
stream: T.IO[bytes],
|
|
636
|
-
progress: UploaderProgress,
|
|
1022
|
+
self, stream: T.IO[bytes], progress: UploaderProgress
|
|
637
1023
|
) -> T.Generator[bytes, None, None]:
|
|
638
1024
|
for chunk in upload_api_v4.UploadService.chunkize_byte_stream(
|
|
639
|
-
stream, self.chunk_size
|
|
1025
|
+
stream, self.upload_options.chunk_size
|
|
640
1026
|
):
|
|
641
1027
|
yield chunk
|
|
642
1028
|
|
|
@@ -649,11 +1035,21 @@ class Uploader:
|
|
|
649
1035
|
|
|
650
1036
|
def _upload_stream_retryable(
|
|
651
1037
|
self,
|
|
652
|
-
|
|
1038
|
+
user_session: requests.Session,
|
|
653
1039
|
fp: T.IO[bytes],
|
|
654
|
-
|
|
1040
|
+
session_key: str,
|
|
1041
|
+
progress: UploaderProgress | None = None,
|
|
655
1042
|
) -> str:
|
|
656
1043
|
"""Upload the stream with safe retries guraranteed"""
|
|
1044
|
+
if progress is None:
|
|
1045
|
+
progress = T.cast(UploaderProgress, {})
|
|
1046
|
+
|
|
1047
|
+
upload_service = self._create_upload_service(user_session, session_key)
|
|
1048
|
+
|
|
1049
|
+
if "entity_size" not in progress:
|
|
1050
|
+
fp.seek(0, io.SEEK_END)
|
|
1051
|
+
entity_size = fp.tell()
|
|
1052
|
+
progress["entity_size"] = entity_size
|
|
657
1053
|
|
|
658
1054
|
begin_offset = upload_service.fetch_offset()
|
|
659
1055
|
|
|
@@ -662,11 +1058,39 @@ class Uploader:
|
|
|
662
1058
|
|
|
663
1059
|
self.emitter.emit("upload_fetch_offset", progress)
|
|
664
1060
|
|
|
665
|
-
|
|
1061
|
+
# Estimate the read timeout
|
|
1062
|
+
if not constants.MIN_UPLOAD_SPEED:
|
|
1063
|
+
read_timeout = None
|
|
1064
|
+
else:
|
|
1065
|
+
remaining_bytes = abs(progress["entity_size"] - begin_offset)
|
|
1066
|
+
read_timeout = max(
|
|
1067
|
+
api_v4.REQUESTS_TIMEOUT,
|
|
1068
|
+
remaining_bytes / constants.MIN_UPLOAD_SPEED,
|
|
1069
|
+
)
|
|
666
1070
|
|
|
1071
|
+
# Upload from begin_offset
|
|
1072
|
+
fp.seek(begin_offset, io.SEEK_SET)
|
|
667
1073
|
shifted_chunks = self._chunk_with_progress_emitted(fp, progress)
|
|
668
1074
|
|
|
669
|
-
|
|
1075
|
+
# Start uploading
|
|
1076
|
+
return upload_service.upload_shifted_chunks(
|
|
1077
|
+
shifted_chunks, begin_offset, read_timeout=read_timeout
|
|
1078
|
+
)
|
|
1079
|
+
|
|
1080
|
+
def _gen_session_key(self, fp: T.IO[bytes], progress: dict[str, T.Any]) -> str:
|
|
1081
|
+
if self.upload_options.noresume:
|
|
1082
|
+
# Generate a unique UUID for session_key when noresume is True
|
|
1083
|
+
# to prevent resuming from previous uploads
|
|
1084
|
+
session_key = f"{_prefixed_uuid4()}"
|
|
1085
|
+
else:
|
|
1086
|
+
fp.seek(0, io.SEEK_SET)
|
|
1087
|
+
session_key = utils.md5sum_fp(fp).hexdigest()
|
|
1088
|
+
|
|
1089
|
+
filetype = progress.get("file_type")
|
|
1090
|
+
if filetype is not None:
|
|
1091
|
+
session_key = _suffix_session_key(session_key, types.FileType(filetype))
|
|
1092
|
+
|
|
1093
|
+
return session_key
|
|
670
1094
|
|
|
671
1095
|
|
|
672
1096
|
def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
|
|
@@ -676,7 +1100,7 @@ def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
|
|
|
676
1100
|
raise FileNotFoundError(f"No such file {metadata.filename}")
|
|
677
1101
|
|
|
678
1102
|
|
|
679
|
-
def
|
|
1103
|
+
def _is_immediate_retriable_exception(ex: BaseException) -> bool:
|
|
680
1104
|
if (
|
|
681
1105
|
isinstance(ex, requests.HTTPError)
|
|
682
1106
|
and isinstance(ex.response, requests.Response)
|
|
@@ -689,8 +1113,10 @@ def _is_immediate_retry(ex: Exception):
|
|
|
689
1113
|
# resp: {"debug_info":{"retriable":true,"type":"OffsetInvalidError","message":"Request starting offset is invalid"}}
|
|
690
1114
|
return resp.get("debug_info", {}).get("retriable", False)
|
|
691
1115
|
|
|
1116
|
+
return False
|
|
1117
|
+
|
|
692
1118
|
|
|
693
|
-
def _is_retriable_exception(ex:
|
|
1119
|
+
def _is_retriable_exception(ex: BaseException) -> bool:
|
|
694
1120
|
if isinstance(ex, (requests.ConnectionError, requests.Timeout)):
|
|
695
1121
|
return True
|
|
696
1122
|
|
|
@@ -709,19 +1135,36 @@ def _is_retriable_exception(ex: Exception):
|
|
|
709
1135
|
return False
|
|
710
1136
|
|
|
711
1137
|
|
|
712
|
-
|
|
713
|
-
|
|
1138
|
+
_SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
|
|
1139
|
+
api_v4.ClusterFileType.ZIP: ".zip",
|
|
1140
|
+
api_v4.ClusterFileType.CAMM: ".mp4",
|
|
1141
|
+
api_v4.ClusterFileType.BLACKVUE: ".mp4",
|
|
1142
|
+
types.FileType.IMAGE: ".jpg",
|
|
1143
|
+
types.FileType.ZIP: ".zip",
|
|
1144
|
+
types.FileType.BLACKVUE: ".mp4",
|
|
1145
|
+
types.FileType.CAMM: ".mp4",
|
|
1146
|
+
types.FileType.GOPRO: ".mp4",
|
|
1147
|
+
types.FileType.VIDEO: ".mp4",
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
|
|
1151
|
+
def _suffix_session_key(
|
|
1152
|
+
key: str, filetype: api_v4.ClusterFileType | types.FileType
|
|
714
1153
|
) -> str:
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
}
|
|
726
|
-
|
|
727
|
-
return
|
|
1154
|
+
is_uuid_before = _is_uuid(key)
|
|
1155
|
+
|
|
1156
|
+
key = f"mly_tools_{key}{_SUFFIX_MAP[filetype]}"
|
|
1157
|
+
|
|
1158
|
+
assert _is_uuid(key) is is_uuid_before
|
|
1159
|
+
|
|
1160
|
+
return key
|
|
1161
|
+
|
|
1162
|
+
|
|
1163
|
+
def _prefixed_uuid4():
|
|
1164
|
+
prefixed = f"uuid_{uuid.uuid4().hex}"
|
|
1165
|
+
assert _is_uuid(prefixed)
|
|
1166
|
+
return prefixed
|
|
1167
|
+
|
|
1168
|
+
|
|
1169
|
+
def _is_uuid(key: str) -> bool:
|
|
1170
|
+
return key.startswith("uuid_") or key.startswith("mly_tools_uuid_")
|