mapillary-tools 0.14.0a2__py3-none-any.whl → 0.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapillary_tools/__init__.py +1 -1
- mapillary_tools/api_v4.py +66 -262
- mapillary_tools/authenticate.py +54 -46
- mapillary_tools/blackvue_parser.py +79 -22
- mapillary_tools/commands/__main__.py +15 -16
- mapillary_tools/commands/upload.py +33 -4
- mapillary_tools/config.py +38 -17
- mapillary_tools/constants.py +127 -43
- mapillary_tools/exceptions.py +4 -0
- mapillary_tools/exif_read.py +2 -1
- mapillary_tools/exif_write.py +3 -1
- mapillary_tools/exiftool_read_video.py +52 -15
- mapillary_tools/exiftool_runner.py +4 -24
- mapillary_tools/ffmpeg.py +406 -232
- mapillary_tools/geo.py +16 -0
- mapillary_tools/geotag/__init__.py +0 -0
- mapillary_tools/geotag/base.py +8 -4
- mapillary_tools/geotag/factory.py +106 -89
- mapillary_tools/geotag/geotag_images_from_exiftool.py +27 -20
- mapillary_tools/geotag/geotag_images_from_gpx.py +7 -6
- mapillary_tools/geotag/geotag_images_from_video.py +35 -0
- mapillary_tools/geotag/geotag_videos_from_exiftool.py +61 -14
- mapillary_tools/geotag/geotag_videos_from_gpx.py +22 -9
- mapillary_tools/geotag/options.py +25 -3
- mapillary_tools/geotag/utils.py +9 -12
- mapillary_tools/geotag/video_extractors/base.py +1 -1
- mapillary_tools/geotag/video_extractors/exiftool.py +1 -1
- mapillary_tools/geotag/video_extractors/gpx.py +61 -70
- mapillary_tools/geotag/video_extractors/native.py +34 -31
- mapillary_tools/history.py +128 -8
- mapillary_tools/http.py +211 -0
- mapillary_tools/mp4/construct_mp4_parser.py +8 -2
- mapillary_tools/process_geotag_properties.py +47 -35
- mapillary_tools/process_sequence_properties.py +340 -325
- mapillary_tools/sample_video.py +8 -8
- mapillary_tools/serializer/description.py +587 -0
- mapillary_tools/serializer/gpx.py +132 -0
- mapillary_tools/types.py +44 -610
- mapillary_tools/upload.py +327 -352
- mapillary_tools/upload_api_v4.py +125 -72
- mapillary_tools/uploader.py +797 -216
- mapillary_tools/utils.py +57 -5
- {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/METADATA +91 -34
- mapillary_tools-0.14.1.dist-info/RECORD +76 -0
- {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/WHEEL +1 -1
- mapillary_tools-0.14.0a2.dist-info/RECORD +0 -72
- {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/entry_points.txt +0 -0
- {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/licenses/LICENSE +0 -0
- {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/top_level.txt +0 -0
mapillary_tools/uploader.py
CHANGED
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import concurrent.futures
|
|
3
4
|
import dataclasses
|
|
4
5
|
import io
|
|
5
6
|
import json
|
|
6
7
|
import logging
|
|
7
8
|
import os
|
|
9
|
+
import queue
|
|
8
10
|
import struct
|
|
11
|
+
import sys
|
|
9
12
|
import tempfile
|
|
13
|
+
import threading
|
|
10
14
|
import time
|
|
11
15
|
import typing as T
|
|
12
16
|
import uuid
|
|
@@ -14,14 +18,57 @@ import zipfile
|
|
|
14
18
|
from contextlib import contextmanager
|
|
15
19
|
from pathlib import Path
|
|
16
20
|
|
|
21
|
+
if sys.version_info >= (3, 11):
|
|
22
|
+
from typing import Required
|
|
23
|
+
else:
|
|
24
|
+
from typing_extensions import Required
|
|
25
|
+
|
|
17
26
|
import requests
|
|
18
27
|
|
|
19
|
-
from . import
|
|
28
|
+
from . import (
|
|
29
|
+
api_v4,
|
|
30
|
+
config,
|
|
31
|
+
constants,
|
|
32
|
+
exif_write,
|
|
33
|
+
geo,
|
|
34
|
+
history,
|
|
35
|
+
telemetry,
|
|
36
|
+
types,
|
|
37
|
+
upload_api_v4,
|
|
38
|
+
utils,
|
|
39
|
+
)
|
|
40
|
+
from .camm import camm_builder, camm_parser
|
|
41
|
+
from .gpmf import gpmf_parser
|
|
42
|
+
from .mp4 import simple_mp4_builder
|
|
43
|
+
from .serializer.description import (
|
|
44
|
+
desc_file_to_exif,
|
|
45
|
+
DescriptionJSONSerializer,
|
|
46
|
+
validate_image_desc,
|
|
47
|
+
)
|
|
20
48
|
|
|
21
49
|
|
|
22
50
|
LOG = logging.getLogger(__name__)
|
|
23
51
|
|
|
24
52
|
|
|
53
|
+
@dataclasses.dataclass(frozen=True)
|
|
54
|
+
class UploadOptions:
|
|
55
|
+
user_items: config.UserItem
|
|
56
|
+
chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024)
|
|
57
|
+
num_upload_workers: int = constants.MAX_IMAGE_UPLOAD_WORKERS
|
|
58
|
+
dry_run: bool = False
|
|
59
|
+
nofinish: bool = False
|
|
60
|
+
noresume: bool = False
|
|
61
|
+
|
|
62
|
+
def __post_init__(self):
|
|
63
|
+
if self.num_upload_workers <= 0:
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"Expect positive num_upload_workers but got {self.num_upload_workers}"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
if self.chunk_size <= 0:
|
|
69
|
+
raise ValueError(f"Expect positive chunk_size but got {self.chunk_size}")
|
|
70
|
+
|
|
71
|
+
|
|
25
72
|
class UploaderProgress(T.TypedDict, total=True):
|
|
26
73
|
"""
|
|
27
74
|
Progress data that Uploader cares about.
|
|
@@ -46,7 +93,7 @@ class UploaderProgress(T.TypedDict, total=True):
|
|
|
46
93
|
# - offset == entity_size when "upload_end" or "upload_finished"
|
|
47
94
|
entity_size: int
|
|
48
95
|
|
|
49
|
-
# An "
|
|
96
|
+
# An "upload_retrying" will increase it. Reset to 0 if a chunk is uploaded
|
|
50
97
|
retries: int
|
|
51
98
|
|
|
52
99
|
# Cluster ID after finishing the upload
|
|
@@ -56,17 +103,21 @@ class UploaderProgress(T.TypedDict, total=True):
|
|
|
56
103
|
class SequenceProgress(T.TypedDict, total=False):
|
|
57
104
|
"""Progress data at sequence level"""
|
|
58
105
|
|
|
59
|
-
#
|
|
60
|
-
|
|
106
|
+
# Used to check if it is uploaded or not
|
|
107
|
+
sequence_md5sum: Required[str]
|
|
108
|
+
|
|
109
|
+
# Used to resume from the previous upload,
|
|
110
|
+
# so it has to an unique identifier (hash) of the upload content
|
|
111
|
+
upload_md5sum: str
|
|
61
112
|
|
|
62
113
|
# File type
|
|
63
|
-
file_type: str
|
|
114
|
+
file_type: Required[str]
|
|
64
115
|
|
|
65
116
|
# How many sequences in total. It's always 1 when uploading Zipfile/BlackVue/CAMM
|
|
66
|
-
total_sequence_count: int
|
|
117
|
+
total_sequence_count: Required[int]
|
|
67
118
|
|
|
68
119
|
# 0-based nth sequence. It is always 0 when uploading Zipfile/BlackVue/CAMM
|
|
69
|
-
sequence_idx: int
|
|
120
|
+
sequence_idx: Required[int]
|
|
70
121
|
|
|
71
122
|
# How many images in the sequence. It's available only when uploading directories/Zipfiles
|
|
72
123
|
sequence_image_count: int
|
|
@@ -74,7 +125,7 @@ class SequenceProgress(T.TypedDict, total=False):
|
|
|
74
125
|
# MAPSequenceUUID. It is only available for directory uploading
|
|
75
126
|
sequence_uuid: str
|
|
76
127
|
|
|
77
|
-
# Path to the
|
|
128
|
+
# Path to the image/video/zip
|
|
78
129
|
import_path: str
|
|
79
130
|
|
|
80
131
|
|
|
@@ -102,13 +153,43 @@ class InvalidMapillaryZipFileError(SequenceError):
|
|
|
102
153
|
pass
|
|
103
154
|
|
|
104
155
|
|
|
156
|
+
# BELOW demonstrates the pseudocode for a typical upload workflow
|
|
157
|
+
# and when upload events are emitted
|
|
158
|
+
#################################################################
|
|
159
|
+
# def pseudo_upload(metadata):
|
|
160
|
+
# emit("upload_start")
|
|
161
|
+
# while True:
|
|
162
|
+
# try:
|
|
163
|
+
# if is_sequence(metadata):
|
|
164
|
+
# for image in metadata:
|
|
165
|
+
# upload_stream(image.read())
|
|
166
|
+
# emit("upload_progress")
|
|
167
|
+
# elif is_video(metadata):
|
|
168
|
+
# offset = fetch_offset()
|
|
169
|
+
# emit("upload_fetch_offset")
|
|
170
|
+
# for chunk in metadata.read()[offset:]:
|
|
171
|
+
# upload_stream(chunk)
|
|
172
|
+
# emit("upload_progress")
|
|
173
|
+
# except BaseException as ex: # Include KeyboardInterrupt
|
|
174
|
+
# if retryable(ex):
|
|
175
|
+
# emit("upload_retrying")
|
|
176
|
+
# continue
|
|
177
|
+
# else:
|
|
178
|
+
# emit("upload_failed")
|
|
179
|
+
# raise ex
|
|
180
|
+
# else:
|
|
181
|
+
# break
|
|
182
|
+
# emit("upload_end")
|
|
183
|
+
# finish_upload(data)
|
|
184
|
+
# emit("upload_finished")
|
|
105
185
|
EventName = T.Literal[
|
|
106
186
|
"upload_start",
|
|
107
187
|
"upload_fetch_offset",
|
|
108
188
|
"upload_progress",
|
|
189
|
+
"upload_retrying",
|
|
109
190
|
"upload_end",
|
|
191
|
+
"upload_failed",
|
|
110
192
|
"upload_finished",
|
|
111
|
-
"upload_interrupted",
|
|
112
193
|
]
|
|
113
194
|
|
|
114
195
|
|
|
@@ -121,6 +202,7 @@ class EventEmitter:
|
|
|
121
202
|
def on(self, event: EventName):
|
|
122
203
|
def _wrap(callback):
|
|
123
204
|
self.events.setdefault(event, []).append(callback)
|
|
205
|
+
return callback
|
|
124
206
|
|
|
125
207
|
return _wrap
|
|
126
208
|
|
|
@@ -135,7 +217,131 @@ class UploadResult:
|
|
|
135
217
|
error: Exception | None = None
|
|
136
218
|
|
|
137
219
|
|
|
138
|
-
class
|
|
220
|
+
class VideoUploader:
|
|
221
|
+
@classmethod
|
|
222
|
+
def upload_videos(
|
|
223
|
+
cls, mly_uploader: Uploader, video_metadatas: T.Sequence[types.VideoMetadata]
|
|
224
|
+
) -> T.Generator[tuple[types.VideoMetadata, UploadResult], None, None]:
|
|
225
|
+
# If upload in a random order, then interrupted uploads has a higher chance to expire.
|
|
226
|
+
# Therefore sort videos to make sure interrupted uploads are resumed as early as possible
|
|
227
|
+
sorted_video_metadatas = sorted(video_metadatas, key=lambda m: m.filename)
|
|
228
|
+
|
|
229
|
+
for idx, video_metadata in enumerate(sorted_video_metadatas):
|
|
230
|
+
LOG.debug(f"Checksum for video {video_metadata.filename}...")
|
|
231
|
+
try:
|
|
232
|
+
video_metadata.update_md5sum()
|
|
233
|
+
except Exception as ex:
|
|
234
|
+
yield video_metadata, UploadResult(error=ex)
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
|
|
238
|
+
|
|
239
|
+
progress: SequenceProgress = {
|
|
240
|
+
"total_sequence_count": len(sorted_video_metadatas),
|
|
241
|
+
"sequence_idx": idx,
|
|
242
|
+
"file_type": video_metadata.filetype.value,
|
|
243
|
+
"import_path": str(video_metadata.filename),
|
|
244
|
+
"sequence_md5sum": video_metadata.md5sum,
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
try:
|
|
248
|
+
with cls.build_camm_stream(video_metadata) as camm_fp:
|
|
249
|
+
# Upload the mp4 stream
|
|
250
|
+
file_handle = mly_uploader.upload_stream(
|
|
251
|
+
T.cast(T.IO[bytes], camm_fp),
|
|
252
|
+
progress=T.cast(T.Dict[str, T.Any], progress),
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
cluster_id = mly_uploader.finish_upload(
|
|
256
|
+
file_handle,
|
|
257
|
+
api_v4.ClusterFileType.CAMM,
|
|
258
|
+
progress=T.cast(T.Dict[str, T.Any], progress),
|
|
259
|
+
)
|
|
260
|
+
except Exception as ex:
|
|
261
|
+
yield video_metadata, UploadResult(error=ex)
|
|
262
|
+
else:
|
|
263
|
+
yield video_metadata, UploadResult(result=cluster_id)
|
|
264
|
+
|
|
265
|
+
@classmethod
|
|
266
|
+
@contextmanager
|
|
267
|
+
def build_camm_stream(cls, video_metadata: types.VideoMetadata):
|
|
268
|
+
# Convert video metadata to CAMMInfo
|
|
269
|
+
camm_info = cls.prepare_camm_info(video_metadata)
|
|
270
|
+
|
|
271
|
+
# Create the CAMM sample generator
|
|
272
|
+
camm_sample_generator = camm_builder.camm_sample_generator2(camm_info)
|
|
273
|
+
|
|
274
|
+
with video_metadata.filename.open("rb") as src_fp:
|
|
275
|
+
# Build the mp4 stream with the CAMM samples
|
|
276
|
+
yield simple_mp4_builder.transform_mp4(src_fp, camm_sample_generator)
|
|
277
|
+
|
|
278
|
+
@classmethod
|
|
279
|
+
def prepare_camm_info(
|
|
280
|
+
cls, video_metadata: types.VideoMetadata
|
|
281
|
+
) -> camm_parser.CAMMInfo:
|
|
282
|
+
camm_info = camm_parser.CAMMInfo(
|
|
283
|
+
make=video_metadata.make or "", model=video_metadata.model or ""
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
for point in video_metadata.points:
|
|
287
|
+
if isinstance(point, telemetry.CAMMGPSPoint):
|
|
288
|
+
if camm_info.gps is None:
|
|
289
|
+
camm_info.gps = []
|
|
290
|
+
camm_info.gps.append(point)
|
|
291
|
+
|
|
292
|
+
elif isinstance(point, telemetry.GPSPoint):
|
|
293
|
+
# There is no proper CAMM entry for GoPro GPS
|
|
294
|
+
if camm_info.mini_gps is None:
|
|
295
|
+
camm_info.mini_gps = []
|
|
296
|
+
camm_info.mini_gps.append(point)
|
|
297
|
+
|
|
298
|
+
elif isinstance(point, geo.Point):
|
|
299
|
+
if camm_info.mini_gps is None:
|
|
300
|
+
camm_info.mini_gps = []
|
|
301
|
+
camm_info.mini_gps.append(point)
|
|
302
|
+
else:
|
|
303
|
+
raise ValueError(f"Unknown point type: {point}")
|
|
304
|
+
|
|
305
|
+
if constants.MAPILLARY__EXPERIMENTAL_ENABLE_IMU:
|
|
306
|
+
if video_metadata.filetype is types.FileType.GOPRO:
|
|
307
|
+
with video_metadata.filename.open("rb") as fp:
|
|
308
|
+
gopro_info = gpmf_parser.extract_gopro_info(fp, telemetry_only=True)
|
|
309
|
+
if gopro_info is not None:
|
|
310
|
+
camm_info.accl = gopro_info.accl or []
|
|
311
|
+
camm_info.gyro = gopro_info.gyro or []
|
|
312
|
+
camm_info.magn = gopro_info.magn or []
|
|
313
|
+
|
|
314
|
+
return camm_info
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
class ZipUploader:
|
|
318
|
+
@classmethod
|
|
319
|
+
def upload_zipfiles(
|
|
320
|
+
cls, mly_uploader: Uploader, zip_paths: T.Sequence[Path]
|
|
321
|
+
) -> T.Generator[tuple[Path, UploadResult], None, None]:
|
|
322
|
+
# If upload in a random order, then interrupted uploads has a higher chance to expire.
|
|
323
|
+
# Therefore sort zipfiles to make sure interrupted uploads are resumed as early as possible
|
|
324
|
+
sorted_zip_paths = sorted(zip_paths)
|
|
325
|
+
|
|
326
|
+
for idx, zip_path in enumerate(sorted_zip_paths):
|
|
327
|
+
progress: SequenceProgress = {
|
|
328
|
+
"total_sequence_count": len(sorted_zip_paths),
|
|
329
|
+
"sequence_idx": idx,
|
|
330
|
+
"import_path": str(zip_path),
|
|
331
|
+
"file_type": types.FileType.ZIP.value,
|
|
332
|
+
"sequence_md5sum": "", # Placeholder, will be set in upload_zipfile
|
|
333
|
+
}
|
|
334
|
+
try:
|
|
335
|
+
cluster_id = cls._upload_zipfile(
|
|
336
|
+
mly_uploader,
|
|
337
|
+
zip_path,
|
|
338
|
+
progress=T.cast(T.Dict[str, T.Any], progress),
|
|
339
|
+
)
|
|
340
|
+
except Exception as ex:
|
|
341
|
+
yield zip_path, UploadResult(error=ex)
|
|
342
|
+
else:
|
|
343
|
+
yield zip_path, UploadResult(result=cluster_id)
|
|
344
|
+
|
|
139
345
|
@classmethod
|
|
140
346
|
def zip_images(
|
|
141
347
|
cls, metadatas: T.Sequence[types.ImageMetadata], zip_dir: Path
|
|
@@ -148,30 +354,105 @@ class ZipImageSequence:
|
|
|
148
354
|
|
|
149
355
|
for sequence_uuid, sequence in sequences.items():
|
|
150
356
|
_validate_metadatas(sequence)
|
|
151
|
-
upload_md5sum = types.update_sequence_md5sum(sequence)
|
|
152
|
-
|
|
153
357
|
# For atomicity we write into a WIP file and then rename to the final file
|
|
154
358
|
wip_zip_filename = zip_dir.joinpath(
|
|
155
359
|
f".mly_zip_{uuid.uuid4()}_{sequence_uuid}_{os.getpid()}_{int(time.time())}"
|
|
156
360
|
)
|
|
157
|
-
|
|
158
|
-
zip_filename = zip_dir.joinpath(filename)
|
|
159
|
-
with wip_file_context(wip_zip_filename, zip_filename) as wip_path:
|
|
361
|
+
with cls._wip_file_context(wip_zip_filename) as wip_path:
|
|
160
362
|
with wip_path.open("wb") as wip_fp:
|
|
161
|
-
|
|
162
|
-
|
|
363
|
+
cls._zip_sequence_fp(sequence, wip_fp)
|
|
364
|
+
|
|
365
|
+
@classmethod
|
|
366
|
+
def zip_images_and_upload(
|
|
367
|
+
cls, uploader: Uploader, image_metadatas: T.Sequence[types.ImageMetadata]
|
|
368
|
+
) -> T.Generator[tuple[str, UploadResult], None, None]:
|
|
369
|
+
sequences = types.group_and_sort_images(image_metadatas)
|
|
370
|
+
|
|
371
|
+
for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
|
|
372
|
+
try:
|
|
373
|
+
_validate_metadatas(sequence)
|
|
374
|
+
except Exception as ex:
|
|
375
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
376
|
+
continue
|
|
377
|
+
|
|
378
|
+
with tempfile.NamedTemporaryFile() as fp:
|
|
379
|
+
try:
|
|
380
|
+
sequence_md5sum = cls._zip_sequence_fp(sequence, fp)
|
|
381
|
+
except Exception as ex:
|
|
382
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
sequence_progress: SequenceProgress = {
|
|
386
|
+
"sequence_idx": sequence_idx,
|
|
387
|
+
"total_sequence_count": len(sequences),
|
|
388
|
+
"sequence_image_count": len(sequence),
|
|
389
|
+
"sequence_uuid": sequence_uuid,
|
|
390
|
+
"file_type": types.FileType.ZIP.value,
|
|
391
|
+
"sequence_md5sum": sequence_md5sum,
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
try:
|
|
395
|
+
file_handle = uploader.upload_stream(
|
|
396
|
+
fp, progress=T.cast(T.Dict[str, T.Any], sequence_progress)
|
|
397
|
+
)
|
|
398
|
+
cluster_id = uploader.finish_upload(
|
|
399
|
+
file_handle,
|
|
400
|
+
api_v4.ClusterFileType.ZIP,
|
|
401
|
+
progress=T.cast(T.Dict[str, T.Any], sequence_progress),
|
|
402
|
+
)
|
|
403
|
+
except Exception as ex:
|
|
404
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
405
|
+
continue
|
|
406
|
+
|
|
407
|
+
yield sequence_uuid, UploadResult(result=cluster_id)
|
|
408
|
+
|
|
409
|
+
@classmethod
|
|
410
|
+
def _upload_zipfile(
|
|
411
|
+
cls,
|
|
412
|
+
uploader: Uploader,
|
|
413
|
+
zip_path: Path,
|
|
414
|
+
progress: dict[str, T.Any] | None = None,
|
|
415
|
+
) -> str:
|
|
416
|
+
if progress is None:
|
|
417
|
+
progress = {}
|
|
418
|
+
|
|
419
|
+
with zipfile.ZipFile(zip_path) as ziph:
|
|
420
|
+
namelist = ziph.namelist()
|
|
421
|
+
if not namelist:
|
|
422
|
+
raise InvalidMapillaryZipFileError("Zipfile has no files")
|
|
423
|
+
|
|
424
|
+
with zip_path.open("rb") as zip_fp:
|
|
425
|
+
sequence_md5sum = cls._extract_sequence_md5sum(zip_fp)
|
|
426
|
+
|
|
427
|
+
# Send the copy of the input progress to each upload session, to avoid modifying the original one
|
|
428
|
+
mutable_progress: SequenceProgress = {
|
|
429
|
+
**T.cast(SequenceProgress, progress),
|
|
430
|
+
"sequence_image_count": len(namelist),
|
|
431
|
+
"sequence_md5sum": sequence_md5sum,
|
|
432
|
+
"file_type": types.FileType.ZIP.value,
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
with zip_path.open("rb") as zip_fp:
|
|
436
|
+
file_handle = uploader.upload_stream(
|
|
437
|
+
zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress)
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
cluster_id = uploader.finish_upload(
|
|
441
|
+
file_handle,
|
|
442
|
+
api_v4.ClusterFileType.ZIP,
|
|
443
|
+
progress=T.cast(T.Dict[str, T.Any], mutable_progress),
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
return cluster_id
|
|
163
447
|
|
|
164
448
|
@classmethod
|
|
165
|
-
def
|
|
449
|
+
def _zip_sequence_fp(
|
|
166
450
|
cls,
|
|
167
451
|
sequence: T.Sequence[types.ImageMetadata],
|
|
168
452
|
zip_fp: T.IO[bytes],
|
|
169
453
|
) -> str:
|
|
170
454
|
"""
|
|
171
|
-
Write a sequence of ImageMetadata into the zipfile handle.
|
|
172
|
-
that the same sequence always produces the same zipfile, because the
|
|
173
|
-
sequence md5sum will be used to upload the zipfile or resume the upload.
|
|
174
|
-
|
|
455
|
+
Write a sequence of ImageMetadata into the zipfile handle.
|
|
175
456
|
The sequence has to be one sequence and sorted.
|
|
176
457
|
"""
|
|
177
458
|
|
|
@@ -180,21 +461,27 @@ class ZipImageSequence:
|
|
|
180
461
|
f"Only one sequence is allowed but got {len(sequence_groups)}: {list(sequence_groups.keys())}"
|
|
181
462
|
)
|
|
182
463
|
|
|
183
|
-
|
|
464
|
+
if sequence:
|
|
465
|
+
LOG.debug(f"Checksum for sequence {sequence[0].MAPSequenceUUID}...")
|
|
466
|
+
sequence_md5sum = types.update_sequence_md5sum(sequence)
|
|
184
467
|
|
|
185
468
|
with zipfile.ZipFile(zip_fp, "w", zipfile.ZIP_DEFLATED) as zipf:
|
|
186
469
|
for idx, metadata in enumerate(sequence):
|
|
187
|
-
#
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
470
|
+
# Arcname should be unique, the name does not matter
|
|
471
|
+
arcname = f"{idx}.jpg"
|
|
472
|
+
zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
|
|
473
|
+
zipf.writestr(zipinfo, SingleImageUploader.dump_image_bytes(metadata))
|
|
191
474
|
assert len(sequence) == len(set(zipf.namelist()))
|
|
192
|
-
zipf.comment = json.dumps(
|
|
475
|
+
zipf.comment = json.dumps(
|
|
476
|
+
{"sequence_md5sum": sequence_md5sum},
|
|
477
|
+
sort_keys=True,
|
|
478
|
+
separators=(",", ":"),
|
|
479
|
+
).encode("utf-8")
|
|
193
480
|
|
|
194
|
-
return
|
|
481
|
+
return sequence_md5sum
|
|
195
482
|
|
|
196
483
|
@classmethod
|
|
197
|
-
def
|
|
484
|
+
def _extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
|
|
198
485
|
with zipfile.ZipFile(zip_fp, "r", zipfile.ZIP_DEFLATED) as ziph:
|
|
199
486
|
comment = ziph.comment
|
|
200
487
|
|
|
@@ -209,162 +496,384 @@ class ZipImageSequence:
|
|
|
209
496
|
except json.JSONDecodeError as ex:
|
|
210
497
|
raise InvalidMapillaryZipFileError(str(ex)) from ex
|
|
211
498
|
|
|
212
|
-
|
|
499
|
+
sequence_md5sum = zip_metadata.get("sequence_md5sum")
|
|
213
500
|
|
|
214
|
-
if not
|
|
215
|
-
raise InvalidMapillaryZipFileError("No
|
|
501
|
+
if not sequence_md5sum and not isinstance(sequence_md5sum, str):
|
|
502
|
+
raise InvalidMapillaryZipFileError("No sequence_md5sum found")
|
|
216
503
|
|
|
217
|
-
return
|
|
504
|
+
return sequence_md5sum
|
|
218
505
|
|
|
219
506
|
@classmethod
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
):
|
|
507
|
+
@contextmanager
|
|
508
|
+
def _wip_file_context(cls, wip_path: Path):
|
|
223
509
|
try:
|
|
224
|
-
|
|
225
|
-
except
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
# The cast is to fix the type checker error
|
|
229
|
-
edit.add_image_description(
|
|
230
|
-
T.cast(T.Dict, types.desc_file_to_exif(types.as_desc(metadata)))
|
|
231
|
-
)
|
|
232
|
-
|
|
510
|
+
os.remove(wip_path)
|
|
511
|
+
except FileNotFoundError:
|
|
512
|
+
pass
|
|
233
513
|
try:
|
|
234
|
-
|
|
235
|
-
except struct.error as ex:
|
|
236
|
-
raise ExifError(
|
|
237
|
-
f"Failed to dump EXIF bytes: {ex}", metadata.filename
|
|
238
|
-
) from ex
|
|
239
|
-
|
|
240
|
-
zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
|
|
241
|
-
zipf.writestr(zipinfo, image_bytes)
|
|
242
|
-
|
|
243
|
-
@classmethod
|
|
244
|
-
def prepare_zipfile_and_upload(
|
|
245
|
-
cls,
|
|
246
|
-
zip_path: Path,
|
|
247
|
-
uploader: Uploader,
|
|
248
|
-
progress: dict[str, T.Any] | None = None,
|
|
249
|
-
) -> str:
|
|
250
|
-
if progress is None:
|
|
251
|
-
progress = {}
|
|
514
|
+
yield wip_path
|
|
252
515
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
if not namelist:
|
|
256
|
-
raise InvalidMapillaryZipFileError("Zipfile has no files")
|
|
516
|
+
with wip_path.open("rb") as fp:
|
|
517
|
+
upload_md5sum = utils.md5sum_fp(fp).hexdigest()
|
|
257
518
|
|
|
258
|
-
|
|
259
|
-
|
|
519
|
+
done_path = wip_path.parent.joinpath(
|
|
520
|
+
_suffix_session_key(upload_md5sum, api_v4.ClusterFileType.ZIP)
|
|
521
|
+
)
|
|
260
522
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
523
|
+
try:
|
|
524
|
+
os.remove(done_path)
|
|
525
|
+
except FileNotFoundError:
|
|
526
|
+
pass
|
|
527
|
+
wip_path.rename(done_path)
|
|
528
|
+
finally:
|
|
529
|
+
try:
|
|
530
|
+
os.remove(wip_path)
|
|
531
|
+
except FileNotFoundError:
|
|
532
|
+
pass
|
|
266
533
|
|
|
267
|
-
session_key = _session_key(upload_md5sum, upload_api_v4.ClusterFileType.ZIP)
|
|
268
534
|
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
session_key,
|
|
274
|
-
# Send the copy of the input progress to each upload session, to avoid modifying the original one
|
|
275
|
-
progress=T.cast(T.Dict[str, T.Any], {**progress, **sequence_progress}),
|
|
276
|
-
)
|
|
535
|
+
class ImageSequenceUploader:
|
|
536
|
+
def __init__(self, upload_options: UploadOptions, emitter: EventEmitter):
|
|
537
|
+
self.upload_options = upload_options
|
|
538
|
+
self.emitter = emitter
|
|
277
539
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
cls,
|
|
281
|
-
image_metadatas: T.Sequence[types.ImageMetadata],
|
|
282
|
-
uploader: Uploader,
|
|
283
|
-
progress: dict[str, T.Any] | None = None,
|
|
540
|
+
def upload_images(
|
|
541
|
+
self, image_metadatas: T.Sequence[types.ImageMetadata]
|
|
284
542
|
) -> T.Generator[tuple[str, UploadResult], None, None]:
|
|
285
|
-
if progress is None:
|
|
286
|
-
progress = {}
|
|
287
|
-
|
|
288
543
|
sequences = types.group_and_sort_images(image_metadatas)
|
|
289
544
|
|
|
290
545
|
for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
|
|
546
|
+
LOG.debug(f"Checksum for image sequence {sequence_uuid}...")
|
|
547
|
+
sequence_md5sum = types.update_sequence_md5sum(sequence)
|
|
548
|
+
|
|
291
549
|
sequence_progress: SequenceProgress = {
|
|
292
550
|
"sequence_idx": sequence_idx,
|
|
293
551
|
"total_sequence_count": len(sequences),
|
|
294
552
|
"sequence_image_count": len(sequence),
|
|
295
553
|
"sequence_uuid": sequence_uuid,
|
|
296
554
|
"file_type": types.FileType.IMAGE.value,
|
|
555
|
+
"sequence_md5sum": sequence_md5sum,
|
|
297
556
|
}
|
|
298
557
|
|
|
299
558
|
try:
|
|
300
|
-
|
|
559
|
+
cluster_id = self._upload_sequence_and_finish(
|
|
560
|
+
sequence,
|
|
561
|
+
sequence_progress=T.cast(dict[str, T.Any], sequence_progress),
|
|
562
|
+
)
|
|
301
563
|
except Exception as ex:
|
|
302
564
|
yield sequence_uuid, UploadResult(error=ex)
|
|
303
|
-
|
|
565
|
+
else:
|
|
566
|
+
yield sequence_uuid, UploadResult(result=cluster_id)
|
|
304
567
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
568
|
+
def _upload_sequence_and_finish(
|
|
569
|
+
self,
|
|
570
|
+
sequence: T.Sequence[types.ImageMetadata],
|
|
571
|
+
sequence_progress: dict[str, T.Any],
|
|
572
|
+
) -> str:
|
|
573
|
+
_validate_metadatas(sequence)
|
|
574
|
+
|
|
575
|
+
sequence_progress["entity_size"] = sum(m.filesize or 0 for m in sequence)
|
|
576
|
+
self.emitter.emit("upload_start", sequence_progress)
|
|
577
|
+
|
|
578
|
+
try:
|
|
579
|
+
# Retries will be handled in the call (but no upload event emissions)
|
|
580
|
+
image_file_handles = self._upload_images_parallel(
|
|
581
|
+
sequence, sequence_progress
|
|
582
|
+
)
|
|
583
|
+
except BaseException as ex: # Include KeyboardInterrupt
|
|
584
|
+
self.emitter.emit("upload_failed", sequence_progress)
|
|
585
|
+
raise ex
|
|
586
|
+
|
|
587
|
+
manifest_file_handle = self._upload_manifest(image_file_handles)
|
|
588
|
+
|
|
589
|
+
self.emitter.emit("upload_end", sequence_progress)
|
|
590
|
+
|
|
591
|
+
uploader = Uploader(self.upload_options, emitter=self.emitter)
|
|
592
|
+
cluster_id = uploader.finish_upload(
|
|
593
|
+
manifest_file_handle,
|
|
594
|
+
api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST,
|
|
595
|
+
progress=sequence_progress,
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
return cluster_id
|
|
599
|
+
|
|
600
|
+
def _upload_manifest(self, image_file_handles: T.Sequence[str]) -> str:
|
|
601
|
+
uploader = Uploader(self.upload_options)
|
|
311
602
|
|
|
312
|
-
|
|
603
|
+
manifest = {
|
|
604
|
+
"version": "1",
|
|
605
|
+
"upload_type": "images",
|
|
606
|
+
"image_handles": image_file_handles,
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
with io.BytesIO() as manifest_fp:
|
|
610
|
+
manifest_fp.write(
|
|
611
|
+
json.dumps(manifest, sort_keys=True, separators=(",", ":")).encode(
|
|
612
|
+
"utf-8"
|
|
613
|
+
)
|
|
614
|
+
)
|
|
615
|
+
manifest_fp.seek(0, io.SEEK_SET)
|
|
616
|
+
return uploader.upload_stream(
|
|
617
|
+
manifest_fp, session_key=f"{_prefixed_uuid4()}.json"
|
|
618
|
+
)
|
|
313
619
|
|
|
314
|
-
|
|
315
|
-
|
|
620
|
+
def _upload_images_parallel(
|
|
621
|
+
self,
|
|
622
|
+
sequence: T.Sequence[types.ImageMetadata],
|
|
623
|
+
sequence_progress: dict[str, T.Any],
|
|
624
|
+
) -> list[str]:
|
|
625
|
+
if not sequence:
|
|
626
|
+
return []
|
|
627
|
+
|
|
628
|
+
max_workers = min(self.upload_options.num_upload_workers, len(sequence))
|
|
629
|
+
|
|
630
|
+
# Lock is used to synchronize event emission
|
|
631
|
+
lock = threading.Lock()
|
|
632
|
+
|
|
633
|
+
# Push all images into the queue
|
|
634
|
+
image_queue: queue.Queue[tuple[int, types.ImageMetadata]] = queue.Queue()
|
|
635
|
+
for idx, image_metadata in enumerate(sequence):
|
|
636
|
+
image_queue.put((idx, image_metadata))
|
|
637
|
+
|
|
638
|
+
upload_interrupted = threading.Event()
|
|
639
|
+
|
|
640
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
641
|
+
futures = [
|
|
642
|
+
executor.submit(
|
|
643
|
+
self._upload_images_from_queue,
|
|
644
|
+
image_queue,
|
|
645
|
+
lock,
|
|
646
|
+
upload_interrupted,
|
|
647
|
+
sequence_progress,
|
|
316
648
|
)
|
|
649
|
+
for _ in range(max_workers)
|
|
650
|
+
]
|
|
651
|
+
|
|
652
|
+
indexed_image_file_handles = []
|
|
653
|
+
|
|
654
|
+
try:
|
|
655
|
+
for future in futures:
|
|
656
|
+
indexed_image_file_handles.extend(future.result())
|
|
657
|
+
except KeyboardInterrupt as ex:
|
|
658
|
+
upload_interrupted.set()
|
|
659
|
+
raise ex
|
|
660
|
+
|
|
661
|
+
# All tasks should be done here, so below is more like assertion
|
|
662
|
+
image_queue.join()
|
|
663
|
+
if sys.version_info >= (3, 13):
|
|
664
|
+
image_queue.shutdown()
|
|
665
|
+
|
|
666
|
+
file_handles: list[str] = []
|
|
667
|
+
|
|
668
|
+
indexed_image_file_handles.sort()
|
|
669
|
+
|
|
670
|
+
# Important to guarantee the order
|
|
671
|
+
assert len(indexed_image_file_handles) == len(sequence)
|
|
672
|
+
for expected_idx, (idx, file_handle) in enumerate(indexed_image_file_handles):
|
|
673
|
+
assert expected_idx == idx
|
|
674
|
+
file_handles.append(file_handle)
|
|
317
675
|
|
|
676
|
+
return file_handles
|
|
677
|
+
|
|
678
|
+
def _upload_images_from_queue(
|
|
679
|
+
self,
|
|
680
|
+
image_queue: queue.Queue[tuple[int, types.ImageMetadata]],
|
|
681
|
+
lock: threading.Lock,
|
|
682
|
+
upload_interrupted: threading.Event,
|
|
683
|
+
sequence_progress: dict[str, T.Any],
|
|
684
|
+
) -> list[tuple[int, str]]:
|
|
685
|
+
indexed_file_handles = []
|
|
686
|
+
|
|
687
|
+
with api_v4.create_user_session(
|
|
688
|
+
self.upload_options.user_items["user_upload_token"]
|
|
689
|
+
) as user_session:
|
|
690
|
+
single_image_uploader = SingleImageUploader(
|
|
691
|
+
self.upload_options, user_session=user_session
|
|
692
|
+
)
|
|
693
|
+
|
|
694
|
+
while True:
|
|
695
|
+
# Assert that all images are already pushed into the queue
|
|
318
696
|
try:
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
697
|
+
idx, image_metadata = image_queue.get_nowait()
|
|
698
|
+
except queue.Empty:
|
|
699
|
+
break
|
|
700
|
+
|
|
701
|
+
# Main thread will handle the interruption
|
|
702
|
+
if upload_interrupted.is_set():
|
|
703
|
+
break
|
|
704
|
+
|
|
705
|
+
# Create a new mutatble progress to keep the sequence_progress immutable
|
|
706
|
+
image_progress = {
|
|
707
|
+
**sequence_progress,
|
|
708
|
+
"import_path": str(image_metadata.filename),
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
# image_progress will be updated during uploading
|
|
712
|
+
file_handle = single_image_uploader.upload(
|
|
713
|
+
image_metadata, image_progress
|
|
714
|
+
)
|
|
330
715
|
|
|
331
|
-
|
|
716
|
+
# Update chunk_size (it was constant if set)
|
|
717
|
+
image_progress["chunk_size"] = image_metadata.filesize
|
|
718
|
+
|
|
719
|
+
# Main thread will handle the interruption
|
|
720
|
+
if upload_interrupted.is_set():
|
|
721
|
+
break
|
|
722
|
+
|
|
723
|
+
with lock:
|
|
724
|
+
self.emitter.emit("upload_progress", image_progress)
|
|
725
|
+
|
|
726
|
+
indexed_file_handles.append((idx, file_handle))
|
|
727
|
+
|
|
728
|
+
image_queue.task_done()
|
|
729
|
+
|
|
730
|
+
return indexed_file_handles
|
|
731
|
+
|
|
732
|
+
|
|
733
|
+
class SingleImageUploader:
|
|
734
|
+
def __init__(
|
|
735
|
+
self,
|
|
736
|
+
upload_options: UploadOptions,
|
|
737
|
+
user_session: requests.Session | None = None,
|
|
738
|
+
):
|
|
739
|
+
self.upload_options = upload_options
|
|
740
|
+
self.user_session = user_session
|
|
741
|
+
self.cache = self._maybe_create_persistent_cache_instance(
|
|
742
|
+
self.upload_options.user_items, upload_options
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
def upload(
|
|
746
|
+
self, image_metadata: types.ImageMetadata, image_progress: dict[str, T.Any]
|
|
747
|
+
) -> str:
|
|
748
|
+
image_bytes = self.dump_image_bytes(image_metadata)
|
|
749
|
+
|
|
750
|
+
uploader = Uploader(self.upload_options, user_session=self.user_session)
|
|
751
|
+
|
|
752
|
+
session_key = uploader._gen_session_key(io.BytesIO(image_bytes), image_progress)
|
|
753
|
+
|
|
754
|
+
file_handle = self._get_cached_file_handle(session_key)
|
|
755
|
+
|
|
756
|
+
if file_handle is None:
|
|
757
|
+
# image_progress will be updated during uploading
|
|
758
|
+
file_handle = uploader.upload_stream(
|
|
759
|
+
io.BytesIO(image_bytes),
|
|
760
|
+
session_key=session_key,
|
|
761
|
+
progress=image_progress,
|
|
762
|
+
)
|
|
763
|
+
self._set_file_handle_cache(session_key, file_handle)
|
|
764
|
+
|
|
765
|
+
return file_handle
|
|
766
|
+
|
|
767
|
+
@classmethod
|
|
768
|
+
def dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes:
|
|
769
|
+
try:
|
|
770
|
+
edit = exif_write.ExifEdit(metadata.filename)
|
|
771
|
+
except struct.error as ex:
|
|
772
|
+
raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex
|
|
773
|
+
|
|
774
|
+
# The cast is to fix the type checker error
|
|
775
|
+
edit.add_image_description(
|
|
776
|
+
T.cast(
|
|
777
|
+
T.Dict, desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata))
|
|
778
|
+
)
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
try:
|
|
782
|
+
return edit.dump_image_bytes()
|
|
783
|
+
except struct.error as ex:
|
|
784
|
+
raise ExifError(
|
|
785
|
+
f"Failed to dump EXIF bytes: {ex}", metadata.filename
|
|
786
|
+
) from ex
|
|
787
|
+
|
|
788
|
+
@classmethod
|
|
789
|
+
def _maybe_create_persistent_cache_instance(
|
|
790
|
+
cls, user_items: config.UserItem, upload_options: UploadOptions
|
|
791
|
+
) -> history.PersistentCache | None:
|
|
792
|
+
if not constants.UPLOAD_CACHE_DIR:
|
|
793
|
+
LOG.debug(
|
|
794
|
+
"Upload cache directory is set empty, skipping caching upload file handles"
|
|
795
|
+
)
|
|
796
|
+
return None
|
|
797
|
+
|
|
798
|
+
if upload_options.dry_run:
|
|
799
|
+
LOG.debug("Dry-run mode enabled, skipping caching upload file handles")
|
|
800
|
+
return None
|
|
801
|
+
|
|
802
|
+
cache_path_dir = (
|
|
803
|
+
Path(constants.UPLOAD_CACHE_DIR)
|
|
804
|
+
.joinpath(api_v4.MAPILLARY_CLIENT_TOKEN.replace("|", "_"))
|
|
805
|
+
.joinpath(
|
|
806
|
+
user_items.get("MAPSettingsUserKey", user_items["user_upload_token"])
|
|
807
|
+
)
|
|
808
|
+
)
|
|
809
|
+
cache_path_dir.mkdir(parents=True, exist_ok=True)
|
|
810
|
+
cache_path = cache_path_dir.joinpath("cached_file_handles")
|
|
811
|
+
|
|
812
|
+
# Sanitize sensitive segments for logging
|
|
813
|
+
sanitized_cache_path = (
|
|
814
|
+
Path(constants.UPLOAD_CACHE_DIR)
|
|
815
|
+
.joinpath("***")
|
|
816
|
+
.joinpath("***")
|
|
817
|
+
.joinpath("cached_file_handles")
|
|
818
|
+
)
|
|
819
|
+
LOG.debug(f"File handle cache path: {sanitized_cache_path}")
|
|
820
|
+
|
|
821
|
+
cache = history.PersistentCache(str(cache_path.resolve()))
|
|
822
|
+
cache.clear_expired()
|
|
823
|
+
|
|
824
|
+
return cache
|
|
825
|
+
|
|
826
|
+
def _get_cached_file_handle(self, key: str) -> str | None:
|
|
827
|
+
if self.cache is None:
|
|
828
|
+
return None
|
|
829
|
+
|
|
830
|
+
if _is_uuid(key):
|
|
831
|
+
return None
|
|
832
|
+
|
|
833
|
+
return self.cache.get(key)
|
|
834
|
+
|
|
835
|
+
def _set_file_handle_cache(self, key: str, value: str) -> None:
|
|
836
|
+
if self.cache is None:
|
|
837
|
+
return
|
|
838
|
+
|
|
839
|
+
if _is_uuid(key):
|
|
840
|
+
return
|
|
841
|
+
|
|
842
|
+
self.cache.set(key, value)
|
|
332
843
|
|
|
333
844
|
|
|
334
845
|
class Uploader:
|
|
335
846
|
def __init__(
|
|
336
847
|
self,
|
|
337
|
-
|
|
848
|
+
upload_options: UploadOptions,
|
|
849
|
+
user_session: requests.Session | None = None,
|
|
338
850
|
emitter: EventEmitter | None = None,
|
|
339
|
-
chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
|
|
340
|
-
dry_run=False,
|
|
341
851
|
):
|
|
342
|
-
self.
|
|
852
|
+
self.upload_options = upload_options
|
|
853
|
+
self.user_session = user_session
|
|
343
854
|
if emitter is None:
|
|
344
855
|
# An empty event emitter that does nothing
|
|
345
856
|
self.emitter = EventEmitter()
|
|
346
857
|
else:
|
|
347
858
|
self.emitter = emitter
|
|
348
|
-
self.chunk_size = chunk_size
|
|
349
|
-
self.dry_run = dry_run
|
|
350
859
|
|
|
351
860
|
def upload_stream(
|
|
352
861
|
self,
|
|
353
862
|
fp: T.IO[bytes],
|
|
354
|
-
|
|
355
|
-
session_key: str,
|
|
863
|
+
session_key: str | None = None,
|
|
356
864
|
progress: dict[str, T.Any] | None = None,
|
|
357
865
|
) -> str:
|
|
358
866
|
if progress is None:
|
|
359
867
|
progress = {}
|
|
360
868
|
|
|
869
|
+
if session_key is None:
|
|
870
|
+
session_key = self._gen_session_key(fp, progress)
|
|
871
|
+
|
|
361
872
|
fp.seek(0, io.SEEK_END)
|
|
362
873
|
entity_size = fp.tell()
|
|
363
874
|
|
|
364
|
-
upload_service = self._create_upload_service(session_key, cluster_filetype)
|
|
365
|
-
|
|
366
875
|
progress["entity_size"] = entity_size
|
|
367
|
-
progress["chunk_size"] = self.chunk_size
|
|
876
|
+
progress["chunk_size"] = self.upload_options.chunk_size
|
|
368
877
|
progress["retries"] = 0
|
|
369
878
|
progress["begin_offset"] = None
|
|
370
879
|
|
|
@@ -372,10 +881,24 @@ class Uploader:
|
|
|
372
881
|
|
|
373
882
|
while True:
|
|
374
883
|
try:
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
884
|
+
if self.user_session is not None:
|
|
885
|
+
file_handle = self._upload_stream_retryable(
|
|
886
|
+
self.user_session,
|
|
887
|
+
fp,
|
|
888
|
+
session_key,
|
|
889
|
+
T.cast(UploaderProgress, progress),
|
|
890
|
+
)
|
|
891
|
+
else:
|
|
892
|
+
with api_v4.create_user_session(
|
|
893
|
+
self.upload_options.user_items["user_upload_token"]
|
|
894
|
+
) as user_session:
|
|
895
|
+
file_handle = self._upload_stream_retryable(
|
|
896
|
+
user_session,
|
|
897
|
+
fp,
|
|
898
|
+
session_key,
|
|
899
|
+
T.cast(UploaderProgress, progress),
|
|
900
|
+
)
|
|
901
|
+
except BaseException as ex: # Include KeyboardInterrupt
|
|
379
902
|
self._handle_upload_exception(ex, T.cast(UploaderProgress, progress))
|
|
380
903
|
else:
|
|
381
904
|
break
|
|
@@ -384,75 +907,114 @@ class Uploader:
|
|
|
384
907
|
|
|
385
908
|
self.emitter.emit("upload_end", progress)
|
|
386
909
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
910
|
+
return file_handle
|
|
911
|
+
|
|
912
|
+
def finish_upload(
|
|
913
|
+
self,
|
|
914
|
+
file_handle: str,
|
|
915
|
+
cluster_filetype: api_v4.ClusterFileType,
|
|
916
|
+
progress: dict[str, T.Any] | None = None,
|
|
917
|
+
) -> str:
|
|
918
|
+
"""Finish upload with safe retries guraranteed"""
|
|
919
|
+
if progress is None:
|
|
920
|
+
progress = {}
|
|
921
|
+
|
|
922
|
+
if self.upload_options.dry_run or self.upload_options.nofinish:
|
|
923
|
+
cluster_id = "0"
|
|
924
|
+
else:
|
|
925
|
+
organization_id = self.upload_options.user_items.get("MAPOrganizationKey")
|
|
926
|
+
|
|
927
|
+
with api_v4.create_user_session(
|
|
928
|
+
self.upload_options.user_items["user_upload_token"]
|
|
929
|
+
) as user_session:
|
|
930
|
+
resp = api_v4.finish_upload(
|
|
931
|
+
user_session,
|
|
932
|
+
file_handle,
|
|
933
|
+
cluster_filetype,
|
|
934
|
+
organization_id=organization_id,
|
|
935
|
+
)
|
|
390
936
|
|
|
937
|
+
body = api_v4.jsonify_response(resp)
|
|
938
|
+
# TODO: Validate cluster_id
|
|
939
|
+
cluster_id = body.get("cluster_id")
|
|
940
|
+
|
|
941
|
+
progress["cluster_id"] = cluster_id
|
|
391
942
|
self.emitter.emit("upload_finished", progress)
|
|
392
943
|
|
|
393
944
|
return cluster_id
|
|
394
945
|
|
|
395
946
|
def _create_upload_service(
|
|
396
|
-
self,
|
|
947
|
+
self, user_session: requests.Session, session_key: str
|
|
397
948
|
) -> upload_api_v4.UploadService:
|
|
398
949
|
upload_service: upload_api_v4.UploadService
|
|
399
950
|
|
|
400
|
-
if self.dry_run:
|
|
951
|
+
if self.upload_options.dry_run:
|
|
952
|
+
upload_path = os.getenv("MAPILLARY_UPLOAD_ENDPOINT")
|
|
401
953
|
upload_service = upload_api_v4.FakeUploadService(
|
|
402
|
-
|
|
403
|
-
session_key
|
|
404
|
-
|
|
954
|
+
user_session,
|
|
955
|
+
session_key,
|
|
956
|
+
upload_path=Path(upload_path) if upload_path is not None else None,
|
|
405
957
|
)
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
session_key=session_key,
|
|
410
|
-
cluster_filetype=cluster_filetype,
|
|
958
|
+
LOG.info(
|
|
959
|
+
"Dry-run mode enabled, uploading to %s",
|
|
960
|
+
upload_service.upload_path.joinpath(session_key),
|
|
411
961
|
)
|
|
962
|
+
else:
|
|
963
|
+
upload_service = upload_api_v4.UploadService(user_session, session_key)
|
|
412
964
|
|
|
413
965
|
return upload_service
|
|
414
966
|
|
|
415
967
|
def _handle_upload_exception(
|
|
416
|
-
self, ex:
|
|
968
|
+
self, ex: BaseException, progress: UploaderProgress
|
|
417
969
|
) -> None:
|
|
418
|
-
retries = progress
|
|
970
|
+
retries = progress.get("retries", 0)
|
|
419
971
|
begin_offset = progress.get("begin_offset")
|
|
420
|
-
|
|
972
|
+
offset = progress.get("offset")
|
|
421
973
|
|
|
422
974
|
if retries <= constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
|
|
423
|
-
self.emitter.emit("
|
|
975
|
+
self.emitter.emit("upload_retrying", progress)
|
|
976
|
+
|
|
424
977
|
LOG.warning(
|
|
425
|
-
|
|
426
|
-
"Error uploading chunk_size %d at begin_offset %s: %s: %s",
|
|
427
|
-
chunk_size,
|
|
428
|
-
begin_offset,
|
|
429
|
-
ex.__class__.__name__,
|
|
430
|
-
str(ex),
|
|
978
|
+
f"Error uploading {self._upload_name(progress)} at {offset=} since {begin_offset=}: {ex.__class__.__name__}: {ex}"
|
|
431
979
|
)
|
|
980
|
+
|
|
432
981
|
# Keep things immutable here. Will increment retries in the caller
|
|
433
982
|
retries += 1
|
|
434
|
-
if
|
|
983
|
+
if _is_immediate_retriable_exception(ex):
|
|
435
984
|
sleep_for = 0
|
|
436
985
|
else:
|
|
437
986
|
sleep_for = min(2**retries, 16)
|
|
438
987
|
LOG.info(
|
|
439
|
-
"Retrying in
|
|
440
|
-
sleep_for,
|
|
441
|
-
retries,
|
|
442
|
-
constants.MAX_UPLOAD_RETRIES,
|
|
988
|
+
f"Retrying in {sleep_for} seconds ({retries}/{constants.MAX_UPLOAD_RETRIES})"
|
|
443
989
|
)
|
|
444
990
|
if sleep_for:
|
|
445
991
|
time.sleep(sleep_for)
|
|
446
992
|
else:
|
|
993
|
+
self.emitter.emit("upload_failed", progress)
|
|
447
994
|
raise ex
|
|
448
995
|
|
|
996
|
+
@classmethod
|
|
997
|
+
def _upload_name(cls, progress: UploaderProgress):
|
|
998
|
+
# Strictly speaking these sequence properties should not be exposed in this context
|
|
999
|
+
# TODO: Maybe move these logging statements to event handlers
|
|
1000
|
+
sequence_uuid: str | None = T.cast(
|
|
1001
|
+
T.Union[str, None], progress.get("sequence_uuid")
|
|
1002
|
+
)
|
|
1003
|
+
import_path = T.cast(T.Union[str, None], progress.get("import_path"))
|
|
1004
|
+
if sequence_uuid is not None:
|
|
1005
|
+
if import_path is None:
|
|
1006
|
+
name: str = f"sequence_{sequence_uuid}"
|
|
1007
|
+
else:
|
|
1008
|
+
name = f"sequence_{sequence_uuid}/{Path(import_path).name}"
|
|
1009
|
+
else:
|
|
1010
|
+
name = Path(import_path or "unknown").name
|
|
1011
|
+
return name
|
|
1012
|
+
|
|
449
1013
|
def _chunk_with_progress_emitted(
|
|
450
|
-
self,
|
|
451
|
-
stream: T.IO[bytes],
|
|
452
|
-
progress: UploaderProgress,
|
|
1014
|
+
self, stream: T.IO[bytes], progress: UploaderProgress
|
|
453
1015
|
) -> T.Generator[bytes, None, None]:
|
|
454
1016
|
for chunk in upload_api_v4.UploadService.chunkize_byte_stream(
|
|
455
|
-
stream, self.chunk_size
|
|
1017
|
+
stream, self.upload_options.chunk_size
|
|
456
1018
|
):
|
|
457
1019
|
yield chunk
|
|
458
1020
|
|
|
@@ -465,11 +1027,21 @@ class Uploader:
|
|
|
465
1027
|
|
|
466
1028
|
def _upload_stream_retryable(
|
|
467
1029
|
self,
|
|
468
|
-
|
|
1030
|
+
user_session: requests.Session,
|
|
469
1031
|
fp: T.IO[bytes],
|
|
470
|
-
|
|
1032
|
+
session_key: str,
|
|
1033
|
+
progress: UploaderProgress | None = None,
|
|
471
1034
|
) -> str:
|
|
472
1035
|
"""Upload the stream with safe retries guraranteed"""
|
|
1036
|
+
if progress is None:
|
|
1037
|
+
progress = T.cast(UploaderProgress, {})
|
|
1038
|
+
|
|
1039
|
+
upload_service = self._create_upload_service(user_session, session_key)
|
|
1040
|
+
|
|
1041
|
+
if "entity_size" not in progress:
|
|
1042
|
+
fp.seek(0, io.SEEK_END)
|
|
1043
|
+
entity_size = fp.tell()
|
|
1044
|
+
progress["entity_size"] = entity_size
|
|
473
1045
|
|
|
474
1046
|
begin_offset = upload_service.fetch_offset()
|
|
475
1047
|
|
|
@@ -478,64 +1050,49 @@ class Uploader:
|
|
|
478
1050
|
|
|
479
1051
|
self.emitter.emit("upload_fetch_offset", progress)
|
|
480
1052
|
|
|
481
|
-
|
|
1053
|
+
# Estimate the read timeout
|
|
1054
|
+
if not constants.MIN_UPLOAD_SPEED:
|
|
1055
|
+
read_timeout = None
|
|
1056
|
+
else:
|
|
1057
|
+
remaining_bytes = abs(progress["entity_size"] - begin_offset)
|
|
1058
|
+
read_timeout = max(
|
|
1059
|
+
api_v4.REQUESTS_TIMEOUT,
|
|
1060
|
+
remaining_bytes / constants.MIN_UPLOAD_SPEED,
|
|
1061
|
+
)
|
|
482
1062
|
|
|
1063
|
+
# Upload from begin_offset
|
|
1064
|
+
fp.seek(begin_offset, io.SEEK_SET)
|
|
483
1065
|
shifted_chunks = self._chunk_with_progress_emitted(fp, progress)
|
|
484
1066
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
) -> str:
|
|
490
|
-
"""Finish upload with safe retries guraranteed"""
|
|
1067
|
+
# Start uploading
|
|
1068
|
+
return upload_service.upload_shifted_chunks(
|
|
1069
|
+
shifted_chunks, begin_offset, read_timeout=read_timeout
|
|
1070
|
+
)
|
|
491
1071
|
|
|
492
|
-
|
|
493
|
-
|
|
1072
|
+
def _gen_session_key(self, fp: T.IO[bytes], progress: dict[str, T.Any]) -> str:
|
|
1073
|
+
if self.upload_options.noresume:
|
|
1074
|
+
# Generate a unique UUID for session_key when noresume is True
|
|
1075
|
+
# to prevent resuming from previous uploads
|
|
1076
|
+
session_key = f"{_prefixed_uuid4()}"
|
|
494
1077
|
else:
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
file_handle,
|
|
498
|
-
upload_service.cluster_filetype,
|
|
499
|
-
organization_id=self.user_items.get("MAPOrganizationKey"),
|
|
500
|
-
)
|
|
1078
|
+
fp.seek(0, io.SEEK_SET)
|
|
1079
|
+
session_key = utils.md5sum_fp(fp).hexdigest()
|
|
501
1080
|
|
|
502
|
-
|
|
503
|
-
|
|
1081
|
+
filetype = progress.get("file_type")
|
|
1082
|
+
if filetype is not None:
|
|
1083
|
+
session_key = _suffix_session_key(session_key, types.FileType(filetype))
|
|
504
1084
|
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
return cluster_id
|
|
1085
|
+
return session_key
|
|
508
1086
|
|
|
509
1087
|
|
|
510
1088
|
def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
|
|
511
1089
|
for metadata in metadatas:
|
|
512
|
-
|
|
1090
|
+
validate_image_desc(DescriptionJSONSerializer.as_desc(metadata))
|
|
513
1091
|
if not metadata.filename.is_file():
|
|
514
1092
|
raise FileNotFoundError(f"No such file {metadata.filename}")
|
|
515
1093
|
|
|
516
1094
|
|
|
517
|
-
|
|
518
|
-
def wip_file_context(wip_path: Path, done_path: Path):
|
|
519
|
-
assert wip_path != done_path, "should not be the same file"
|
|
520
|
-
try:
|
|
521
|
-
os.remove(wip_path)
|
|
522
|
-
except FileNotFoundError:
|
|
523
|
-
pass
|
|
524
|
-
try:
|
|
525
|
-
yield wip_path
|
|
526
|
-
try:
|
|
527
|
-
os.remove(done_path)
|
|
528
|
-
except FileNotFoundError:
|
|
529
|
-
pass
|
|
530
|
-
wip_path.rename(done_path)
|
|
531
|
-
finally:
|
|
532
|
-
try:
|
|
533
|
-
os.remove(wip_path)
|
|
534
|
-
except FileNotFoundError:
|
|
535
|
-
pass
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
def _is_immediate_retry(ex: Exception):
|
|
1095
|
+
def _is_immediate_retriable_exception(ex: BaseException) -> bool:
|
|
539
1096
|
if (
|
|
540
1097
|
isinstance(ex, requests.HTTPError)
|
|
541
1098
|
and isinstance(ex.response, requests.Response)
|
|
@@ -548,8 +1105,10 @@ def _is_immediate_retry(ex: Exception):
|
|
|
548
1105
|
# resp: {"debug_info":{"retriable":true,"type":"OffsetInvalidError","message":"Request starting offset is invalid"}}
|
|
549
1106
|
return resp.get("debug_info", {}).get("retriable", False)
|
|
550
1107
|
|
|
1108
|
+
return False
|
|
1109
|
+
|
|
551
1110
|
|
|
552
|
-
def _is_retriable_exception(ex:
|
|
1111
|
+
def _is_retriable_exception(ex: BaseException) -> bool:
|
|
553
1112
|
if isinstance(ex, (requests.ConnectionError, requests.Timeout)):
|
|
554
1113
|
return True
|
|
555
1114
|
|
|
@@ -568,14 +1127,36 @@ def _is_retriable_exception(ex: Exception):
|
|
|
568
1127
|
return False
|
|
569
1128
|
|
|
570
1129
|
|
|
571
|
-
_SUFFIX_MAP: dict[
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
1130
|
+
_SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
|
|
1131
|
+
api_v4.ClusterFileType.ZIP: ".zip",
|
|
1132
|
+
api_v4.ClusterFileType.CAMM: ".mp4",
|
|
1133
|
+
api_v4.ClusterFileType.BLACKVUE: ".mp4",
|
|
1134
|
+
types.FileType.IMAGE: ".jpg",
|
|
1135
|
+
types.FileType.ZIP: ".zip",
|
|
1136
|
+
types.FileType.BLACKVUE: ".mp4",
|
|
1137
|
+
types.FileType.CAMM: ".mp4",
|
|
1138
|
+
types.FileType.GOPRO: ".mp4",
|
|
1139
|
+
types.FileType.VIDEO: ".mp4",
|
|
575
1140
|
}
|
|
576
1141
|
|
|
577
1142
|
|
|
578
|
-
def
|
|
579
|
-
|
|
1143
|
+
def _suffix_session_key(
|
|
1144
|
+
key: str, filetype: api_v4.ClusterFileType | types.FileType
|
|
580
1145
|
) -> str:
|
|
581
|
-
|
|
1146
|
+
is_uuid_before = _is_uuid(key)
|
|
1147
|
+
|
|
1148
|
+
key = f"mly_tools_{key}{_SUFFIX_MAP[filetype]}"
|
|
1149
|
+
|
|
1150
|
+
assert _is_uuid(key) is is_uuid_before
|
|
1151
|
+
|
|
1152
|
+
return key
|
|
1153
|
+
|
|
1154
|
+
|
|
1155
|
+
def _prefixed_uuid4():
|
|
1156
|
+
prefixed = f"uuid_{uuid.uuid4().hex}"
|
|
1157
|
+
assert _is_uuid(prefixed)
|
|
1158
|
+
return prefixed
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
def _is_uuid(key: str) -> bool:
|
|
1162
|
+
return key.startswith("uuid_") or key.startswith("mly_tools_uuid_")
|