mapillary-tools 0.13.3__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapillary_tools/__init__.py +1 -1
- mapillary_tools/api_v4.py +198 -55
- mapillary_tools/authenticate.py +326 -64
- mapillary_tools/blackvue_parser.py +195 -0
- mapillary_tools/camm/camm_builder.py +55 -97
- mapillary_tools/camm/camm_parser.py +429 -181
- mapillary_tools/commands/__main__.py +10 -6
- mapillary_tools/commands/authenticate.py +8 -1
- mapillary_tools/commands/process.py +27 -51
- mapillary_tools/commands/process_and_upload.py +18 -5
- mapillary_tools/commands/sample_video.py +2 -3
- mapillary_tools/commands/upload.py +44 -13
- mapillary_tools/commands/video_process_and_upload.py +19 -5
- mapillary_tools/config.py +65 -26
- mapillary_tools/constants.py +141 -18
- mapillary_tools/exceptions.py +37 -34
- mapillary_tools/exif_read.py +221 -116
- mapillary_tools/exif_write.py +10 -8
- mapillary_tools/exiftool_read.py +33 -42
- mapillary_tools/exiftool_read_video.py +97 -47
- mapillary_tools/exiftool_runner.py +57 -0
- mapillary_tools/ffmpeg.py +417 -242
- mapillary_tools/geo.py +158 -118
- mapillary_tools/geotag/__init__.py +0 -1
- mapillary_tools/geotag/base.py +147 -0
- mapillary_tools/geotag/factory.py +307 -0
- mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
- mapillary_tools/geotag/geotag_images_from_exiftool.py +136 -85
- mapillary_tools/geotag/geotag_images_from_gpx.py +60 -124
- mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
- mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
- mapillary_tools/geotag/geotag_images_from_video.py +88 -51
- mapillary_tools/geotag/geotag_videos_from_exiftool.py +123 -0
- mapillary_tools/geotag/geotag_videos_from_gpx.py +52 -0
- mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
- mapillary_tools/geotag/image_extractors/base.py +18 -0
- mapillary_tools/geotag/image_extractors/exif.py +60 -0
- mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
- mapillary_tools/geotag/options.py +182 -0
- mapillary_tools/geotag/utils.py +52 -16
- mapillary_tools/geotag/video_extractors/base.py +18 -0
- mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
- mapillary_tools/geotag/video_extractors/gpx.py +116 -0
- mapillary_tools/geotag/video_extractors/native.py +160 -0
- mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
- mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
- mapillary_tools/history.py +134 -20
- mapillary_tools/mp4/construct_mp4_parser.py +17 -10
- mapillary_tools/mp4/io_utils.py +0 -1
- mapillary_tools/mp4/mp4_sample_parser.py +36 -28
- mapillary_tools/mp4/simple_mp4_builder.py +10 -9
- mapillary_tools/mp4/simple_mp4_parser.py +13 -22
- mapillary_tools/process_geotag_properties.py +184 -414
- mapillary_tools/process_sequence_properties.py +594 -225
- mapillary_tools/sample_video.py +20 -26
- mapillary_tools/serializer/description.py +587 -0
- mapillary_tools/serializer/gpx.py +132 -0
- mapillary_tools/telemetry.py +26 -13
- mapillary_tools/types.py +98 -611
- mapillary_tools/upload.py +411 -387
- mapillary_tools/upload_api_v4.py +167 -142
- mapillary_tools/uploader.py +804 -284
- mapillary_tools/utils.py +49 -18
- {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/METADATA +93 -35
- mapillary_tools-0.14.0.dist-info/RECORD +75 -0
- {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/WHEEL +1 -1
- mapillary_tools/geotag/blackvue_parser.py +0 -118
- mapillary_tools/geotag/geotag_from_generic.py +0 -22
- mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
- mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
- mapillary_tools/video_data_extraction/cli_options.py +0 -22
- mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
- mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
- mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
- mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
- mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
- mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
- mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
- mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
- mapillary_tools/video_data_extraction/extractors/gpx_parser.py +0 -108
- mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
- mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
- mapillary_tools-0.13.3.dist-info/RECORD +0 -75
- /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
- {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/entry_points.txt +0 -0
- {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info/licenses}/LICENSE +0 -0
- {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/top_level.txt +0 -0
mapillary_tools/uploader.py
CHANGED
|
@@ -1,8 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import concurrent.futures
|
|
4
|
+
|
|
5
|
+
import dataclasses
|
|
1
6
|
import io
|
|
2
7
|
import json
|
|
3
8
|
import logging
|
|
4
9
|
import os
|
|
10
|
+
import struct
|
|
11
|
+
import sys
|
|
5
12
|
import tempfile
|
|
13
|
+
import threading
|
|
6
14
|
import time
|
|
7
15
|
import typing as T
|
|
8
16
|
import uuid
|
|
@@ -10,33 +18,96 @@ import zipfile
|
|
|
10
18
|
from contextlib import contextmanager
|
|
11
19
|
from pathlib import Path
|
|
12
20
|
|
|
13
|
-
|
|
21
|
+
if sys.version_info >= (3, 11):
|
|
22
|
+
from typing import Required
|
|
23
|
+
else:
|
|
24
|
+
from typing_extensions import Required
|
|
25
|
+
|
|
14
26
|
import requests
|
|
15
27
|
|
|
16
|
-
from . import
|
|
28
|
+
from . import (
|
|
29
|
+
api_v4,
|
|
30
|
+
config,
|
|
31
|
+
constants,
|
|
32
|
+
exif_write,
|
|
33
|
+
geo,
|
|
34
|
+
history,
|
|
35
|
+
telemetry,
|
|
36
|
+
types,
|
|
37
|
+
upload_api_v4,
|
|
38
|
+
utils,
|
|
39
|
+
)
|
|
40
|
+
from .camm import camm_builder, camm_parser
|
|
41
|
+
from .gpmf import gpmf_parser
|
|
42
|
+
from .mp4 import simple_mp4_builder
|
|
43
|
+
from .serializer.description import (
|
|
44
|
+
desc_file_to_exif,
|
|
45
|
+
DescriptionJSONSerializer,
|
|
46
|
+
validate_image_desc,
|
|
47
|
+
)
|
|
17
48
|
|
|
18
49
|
|
|
19
50
|
LOG = logging.getLogger(__name__)
|
|
20
51
|
|
|
21
52
|
|
|
22
|
-
|
|
23
|
-
|
|
53
|
+
@dataclasses.dataclass(frozen=True)
|
|
54
|
+
class UploadOptions:
|
|
55
|
+
user_items: config.UserItem
|
|
56
|
+
chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024)
|
|
57
|
+
dry_run: bool = False
|
|
58
|
+
nofinish: bool = False
|
|
59
|
+
noresume: bool = False
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class UploaderProgress(T.TypedDict, total=True):
|
|
63
|
+
"""
|
|
64
|
+
Progress data that Uploader cares about.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
# The size, in bytes, of the last chunk that has been read and upload
|
|
24
68
|
chunk_size: int
|
|
25
69
|
|
|
26
|
-
#
|
|
27
|
-
|
|
70
|
+
# The initial offset returned by the upload service, which is also the offset
|
|
71
|
+
# uploader start uploading from.
|
|
72
|
+
# Assert:
|
|
73
|
+
# - 0 <= begin_offset <= offset <= entity_size
|
|
74
|
+
# - Be non-None after at least a successful "upload_fetch_offset"
|
|
75
|
+
begin_offset: int | None
|
|
28
76
|
|
|
29
|
-
# How many bytes has been uploaded so far
|
|
77
|
+
# How many bytes of the file has been uploaded so far
|
|
30
78
|
offset: int
|
|
31
79
|
|
|
32
|
-
# Size in bytes of the
|
|
80
|
+
# Size in bytes of the file (i.e. fp.tell() after seek to the end)
|
|
81
|
+
# NOTE: It's different from filesize in file system
|
|
82
|
+
# Assert:
|
|
83
|
+
# - offset == entity_size when "upload_end" or "upload_finished"
|
|
33
84
|
entity_size: int
|
|
34
85
|
|
|
86
|
+
# An "upload_interrupted" will increase it. Reset to 0 if a chunk is uploaded
|
|
87
|
+
retries: int
|
|
88
|
+
|
|
89
|
+
# Cluster ID after finishing the upload
|
|
90
|
+
cluster_id: str
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class SequenceProgress(T.TypedDict, total=False):
|
|
94
|
+
"""Progress data at sequence level"""
|
|
95
|
+
|
|
96
|
+
# Used to check if it is uploaded or not
|
|
97
|
+
sequence_md5sum: Required[str]
|
|
98
|
+
|
|
99
|
+
# Used to resume from the previous upload,
|
|
100
|
+
# so it has to an unique identifier (hash) of the upload content
|
|
101
|
+
upload_md5sum: str
|
|
102
|
+
|
|
103
|
+
# File type
|
|
104
|
+
file_type: Required[str]
|
|
105
|
+
|
|
35
106
|
# How many sequences in total. It's always 1 when uploading Zipfile/BlackVue/CAMM
|
|
36
|
-
total_sequence_count: int
|
|
107
|
+
total_sequence_count: Required[int]
|
|
37
108
|
|
|
38
109
|
# 0-based nth sequence. It is always 0 when uploading Zipfile/BlackVue/CAMM
|
|
39
|
-
sequence_idx: int
|
|
110
|
+
sequence_idx: Required[int]
|
|
40
111
|
|
|
41
112
|
# How many images in the sequence. It's available only when uploading directories/Zipfiles
|
|
42
113
|
sequence_image_count: int
|
|
@@ -44,20 +115,31 @@ class Progress(T.TypedDict, total=False):
|
|
|
44
115
|
# MAPSequenceUUID. It is only available for directory uploading
|
|
45
116
|
sequence_uuid: str
|
|
46
117
|
|
|
47
|
-
# An "upload_interrupted" will increase it. Reset to 0 if the chunk is uploaded
|
|
48
|
-
retries: int
|
|
49
|
-
|
|
50
|
-
# md5sum of the zipfile/BlackVue/CAMM in uploading
|
|
51
|
-
md5sum: str
|
|
52
|
-
|
|
53
118
|
# Path to the Zipfile/BlackVue/CAMM
|
|
54
119
|
import_path: str
|
|
55
120
|
|
|
56
|
-
# Cluster ID after finishing the upload
|
|
57
|
-
cluster_id: str
|
|
58
121
|
|
|
122
|
+
class Progress(SequenceProgress, UploaderProgress):
|
|
123
|
+
pass
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
class SequenceError(Exception):
|
|
127
|
+
"""
|
|
128
|
+
Base class for sequence specific errors. These errors will cause the
|
|
129
|
+
current sequence upload to fail but will not interrupt the overall upload
|
|
130
|
+
process for other sequences.
|
|
131
|
+
"""
|
|
59
132
|
|
|
60
|
-
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class ExifError(SequenceError):
|
|
137
|
+
def __init__(self, message: str, image_path: Path):
|
|
138
|
+
super().__init__(message)
|
|
139
|
+
self.image_path = image_path
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class InvalidMapillaryZipFileError(SequenceError):
|
|
61
143
|
pass
|
|
62
144
|
|
|
63
145
|
|
|
@@ -65,14 +147,15 @@ EventName = T.Literal[
|
|
|
65
147
|
"upload_start",
|
|
66
148
|
"upload_fetch_offset",
|
|
67
149
|
"upload_progress",
|
|
150
|
+
"upload_interrupted",
|
|
68
151
|
"upload_end",
|
|
152
|
+
"upload_failed",
|
|
69
153
|
"upload_finished",
|
|
70
|
-
"upload_interrupted",
|
|
71
154
|
]
|
|
72
155
|
|
|
73
156
|
|
|
74
157
|
class EventEmitter:
|
|
75
|
-
events:
|
|
158
|
+
events: dict[EventName, list]
|
|
76
159
|
|
|
77
160
|
def __init__(self):
|
|
78
161
|
self.events = {}
|
|
@@ -80,6 +163,7 @@ class EventEmitter:
|
|
|
80
163
|
def on(self, event: EventName):
|
|
81
164
|
def _wrap(callback):
|
|
82
165
|
self.events.setdefault(event, []).append(callback)
|
|
166
|
+
return callback
|
|
83
167
|
|
|
84
168
|
return _wrap
|
|
85
169
|
|
|
@@ -88,237 +172,731 @@ class EventEmitter:
|
|
|
88
172
|
callback(*args, **kwargs)
|
|
89
173
|
|
|
90
174
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
emitter: T.Optional[EventEmitter] = None,
|
|
96
|
-
chunk_size: int = upload_api_v4.DEFAULT_CHUNK_SIZE,
|
|
97
|
-
dry_run=False,
|
|
98
|
-
):
|
|
99
|
-
jsonschema.validate(instance=user_items, schema=types.UserItemSchema)
|
|
100
|
-
self.user_items = user_items
|
|
101
|
-
self.emitter = emitter
|
|
102
|
-
self.chunk_size = chunk_size
|
|
103
|
-
self.dry_run = dry_run
|
|
175
|
+
@dataclasses.dataclass
|
|
176
|
+
class UploadResult:
|
|
177
|
+
result: str | None = None
|
|
178
|
+
error: Exception | None = None
|
|
104
179
|
|
|
105
|
-
|
|
106
|
-
|
|
180
|
+
|
|
181
|
+
class VideoUploader:
|
|
182
|
+
@classmethod
|
|
183
|
+
def upload_videos(
|
|
184
|
+
cls, mly_uploader: Uploader, video_metadatas: T.Sequence[types.VideoMetadata]
|
|
185
|
+
) -> T.Generator[tuple[types.VideoMetadata, UploadResult], None, None]:
|
|
186
|
+
# If upload in a random order, then interrupted uploads has a higher chance to expire.
|
|
187
|
+
# Therefore sort videos to make sure interrupted uploads are resumed as early as possible
|
|
188
|
+
sorted_video_metadatas = sorted(video_metadatas, key=lambda m: m.filename)
|
|
189
|
+
|
|
190
|
+
for idx, video_metadata in enumerate(sorted_video_metadatas):
|
|
191
|
+
LOG.debug(f"Checksum for video {video_metadata.filename}...")
|
|
192
|
+
try:
|
|
193
|
+
video_metadata.update_md5sum()
|
|
194
|
+
except Exception as ex:
|
|
195
|
+
yield video_metadata, UploadResult(error=ex)
|
|
196
|
+
continue
|
|
197
|
+
|
|
198
|
+
assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
|
|
199
|
+
|
|
200
|
+
progress: SequenceProgress = {
|
|
201
|
+
"total_sequence_count": len(sorted_video_metadatas),
|
|
202
|
+
"sequence_idx": idx,
|
|
203
|
+
"file_type": video_metadata.filetype.value,
|
|
204
|
+
"import_path": str(video_metadata.filename),
|
|
205
|
+
"sequence_md5sum": video_metadata.md5sum,
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
try:
|
|
209
|
+
with cls.build_camm_stream(video_metadata) as camm_fp:
|
|
210
|
+
# Upload the mp4 stream
|
|
211
|
+
file_handle = mly_uploader.upload_stream(
|
|
212
|
+
T.cast(T.IO[bytes], camm_fp),
|
|
213
|
+
progress=T.cast(T.Dict[str, T.Any], progress),
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
cluster_id = mly_uploader.finish_upload(
|
|
217
|
+
file_handle,
|
|
218
|
+
api_v4.ClusterFileType.CAMM,
|
|
219
|
+
progress=T.cast(T.Dict[str, T.Any], progress),
|
|
220
|
+
)
|
|
221
|
+
except Exception as ex:
|
|
222
|
+
yield video_metadata, UploadResult(error=ex)
|
|
223
|
+
else:
|
|
224
|
+
yield video_metadata, UploadResult(result=cluster_id)
|
|
225
|
+
|
|
226
|
+
@classmethod
|
|
227
|
+
@contextmanager
|
|
228
|
+
def build_camm_stream(cls, video_metadata: types.VideoMetadata):
|
|
229
|
+
# Convert video metadata to CAMMInfo
|
|
230
|
+
camm_info = cls.prepare_camm_info(video_metadata)
|
|
231
|
+
|
|
232
|
+
# Create the CAMM sample generator
|
|
233
|
+
camm_sample_generator = camm_builder.camm_sample_generator2(camm_info)
|
|
234
|
+
|
|
235
|
+
with video_metadata.filename.open("rb") as src_fp:
|
|
236
|
+
# Build the mp4 stream with the CAMM samples
|
|
237
|
+
yield simple_mp4_builder.transform_mp4(src_fp, camm_sample_generator)
|
|
238
|
+
|
|
239
|
+
@classmethod
|
|
240
|
+
def prepare_camm_info(
|
|
241
|
+
cls, video_metadata: types.VideoMetadata
|
|
242
|
+
) -> camm_parser.CAMMInfo:
|
|
243
|
+
camm_info = camm_parser.CAMMInfo(
|
|
244
|
+
make=video_metadata.make or "", model=video_metadata.model or ""
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
for point in video_metadata.points:
|
|
248
|
+
if isinstance(point, telemetry.CAMMGPSPoint):
|
|
249
|
+
if camm_info.gps is None:
|
|
250
|
+
camm_info.gps = []
|
|
251
|
+
camm_info.gps.append(point)
|
|
252
|
+
|
|
253
|
+
elif isinstance(point, telemetry.GPSPoint):
|
|
254
|
+
# There is no proper CAMM entry for GoPro GPS
|
|
255
|
+
if camm_info.mini_gps is None:
|
|
256
|
+
camm_info.mini_gps = []
|
|
257
|
+
camm_info.mini_gps.append(point)
|
|
258
|
+
|
|
259
|
+
elif isinstance(point, geo.Point):
|
|
260
|
+
if camm_info.mini_gps is None:
|
|
261
|
+
camm_info.mini_gps = []
|
|
262
|
+
camm_info.mini_gps.append(point)
|
|
263
|
+
else:
|
|
264
|
+
raise ValueError(f"Unknown point type: {point}")
|
|
265
|
+
|
|
266
|
+
if constants.MAPILLARY__EXPERIMENTAL_ENABLE_IMU:
|
|
267
|
+
if video_metadata.filetype is types.FileType.GOPRO:
|
|
268
|
+
with video_metadata.filename.open("rb") as fp:
|
|
269
|
+
gopro_info = gpmf_parser.extract_gopro_info(fp, telemetry_only=True)
|
|
270
|
+
if gopro_info is not None:
|
|
271
|
+
camm_info.accl = gopro_info.accl or []
|
|
272
|
+
camm_info.gyro = gopro_info.gyro or []
|
|
273
|
+
camm_info.magn = gopro_info.magn or []
|
|
274
|
+
|
|
275
|
+
return camm_info
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class ZipUploader:
|
|
279
|
+
@classmethod
|
|
280
|
+
def upload_zipfiles(
|
|
281
|
+
cls, mly_uploader: Uploader, zip_paths: T.Sequence[Path]
|
|
282
|
+
) -> T.Generator[tuple[Path, UploadResult], None, None]:
|
|
283
|
+
# If upload in a random order, then interrupted uploads has a higher chance to expire.
|
|
284
|
+
# Therefore sort zipfiles to make sure interrupted uploads are resumed as early as possible
|
|
285
|
+
sorted_zip_paths = sorted(zip_paths)
|
|
286
|
+
|
|
287
|
+
for idx, zip_path in enumerate(sorted_zip_paths):
|
|
288
|
+
progress: SequenceProgress = {
|
|
289
|
+
"total_sequence_count": len(sorted_zip_paths),
|
|
290
|
+
"sequence_idx": idx,
|
|
291
|
+
"import_path": str(zip_path),
|
|
292
|
+
"file_type": types.FileType.ZIP.value,
|
|
293
|
+
"sequence_md5sum": "", # Placeholder, will be set in upload_zipfile
|
|
294
|
+
}
|
|
295
|
+
try:
|
|
296
|
+
cluster_id = cls._upload_zipfile(
|
|
297
|
+
mly_uploader,
|
|
298
|
+
zip_path,
|
|
299
|
+
progress=T.cast(T.Dict[str, T.Any], progress),
|
|
300
|
+
)
|
|
301
|
+
except Exception as ex:
|
|
302
|
+
yield zip_path, UploadResult(error=ex)
|
|
303
|
+
else:
|
|
304
|
+
yield zip_path, UploadResult(result=cluster_id)
|
|
305
|
+
|
|
306
|
+
@classmethod
|
|
307
|
+
def zip_images(
|
|
308
|
+
cls, metadatas: T.Sequence[types.ImageMetadata], zip_dir: Path
|
|
309
|
+
) -> None:
|
|
310
|
+
"""
|
|
311
|
+
Group images into sequences and zip each sequence into a zipfile.
|
|
312
|
+
"""
|
|
313
|
+
sequences = types.group_and_sort_images(metadatas)
|
|
314
|
+
os.makedirs(zip_dir, exist_ok=True)
|
|
315
|
+
|
|
316
|
+
for sequence_uuid, sequence in sequences.items():
|
|
317
|
+
_validate_metadatas(sequence)
|
|
318
|
+
# For atomicity we write into a WIP file and then rename to the final file
|
|
319
|
+
wip_zip_filename = zip_dir.joinpath(
|
|
320
|
+
f".mly_zip_{uuid.uuid4()}_{sequence_uuid}_{os.getpid()}_{int(time.time())}"
|
|
321
|
+
)
|
|
322
|
+
with cls._wip_file_context(wip_zip_filename) as wip_path:
|
|
323
|
+
with wip_path.open("wb") as wip_fp:
|
|
324
|
+
cls._zip_sequence_fp(sequence, wip_fp)
|
|
325
|
+
|
|
326
|
+
@classmethod
|
|
327
|
+
def zip_images_and_upload(
|
|
328
|
+
cls, uploader: Uploader, image_metadatas: T.Sequence[types.ImageMetadata]
|
|
329
|
+
) -> T.Generator[tuple[str, UploadResult], None, None]:
|
|
330
|
+
sequences = types.group_and_sort_images(image_metadatas)
|
|
331
|
+
|
|
332
|
+
for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
|
|
333
|
+
try:
|
|
334
|
+
_validate_metadatas(sequence)
|
|
335
|
+
except Exception as ex:
|
|
336
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
337
|
+
continue
|
|
338
|
+
|
|
339
|
+
with tempfile.NamedTemporaryFile() as fp:
|
|
340
|
+
try:
|
|
341
|
+
sequence_md5sum = cls._zip_sequence_fp(sequence, fp)
|
|
342
|
+
except Exception as ex:
|
|
343
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
344
|
+
continue
|
|
345
|
+
|
|
346
|
+
sequence_progress: SequenceProgress = {
|
|
347
|
+
"sequence_idx": sequence_idx,
|
|
348
|
+
"total_sequence_count": len(sequences),
|
|
349
|
+
"sequence_image_count": len(sequence),
|
|
350
|
+
"sequence_uuid": sequence_uuid,
|
|
351
|
+
"file_type": types.FileType.ZIP.value,
|
|
352
|
+
"sequence_md5sum": sequence_md5sum,
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
try:
|
|
356
|
+
file_handle = uploader.upload_stream(
|
|
357
|
+
fp, progress=T.cast(T.Dict[str, T.Any], sequence_progress)
|
|
358
|
+
)
|
|
359
|
+
cluster_id = uploader.finish_upload(
|
|
360
|
+
file_handle,
|
|
361
|
+
api_v4.ClusterFileType.ZIP,
|
|
362
|
+
progress=T.cast(T.Dict[str, T.Any], sequence_progress),
|
|
363
|
+
)
|
|
364
|
+
except Exception as ex:
|
|
365
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
366
|
+
continue
|
|
367
|
+
|
|
368
|
+
yield sequence_uuid, UploadResult(result=cluster_id)
|
|
369
|
+
|
|
370
|
+
@classmethod
|
|
371
|
+
def _upload_zipfile(
|
|
372
|
+
cls,
|
|
373
|
+
uploader: Uploader,
|
|
107
374
|
zip_path: Path,
|
|
108
|
-
|
|
109
|
-
) ->
|
|
110
|
-
if
|
|
111
|
-
|
|
375
|
+
progress: dict[str, T.Any] | None = None,
|
|
376
|
+
) -> str:
|
|
377
|
+
if progress is None:
|
|
378
|
+
progress = {}
|
|
112
379
|
|
|
113
380
|
with zipfile.ZipFile(zip_path) as ziph:
|
|
114
381
|
namelist = ziph.namelist()
|
|
115
382
|
if not namelist:
|
|
116
|
-
|
|
117
|
-
|
|
383
|
+
raise InvalidMapillaryZipFileError("Zipfile has no files")
|
|
384
|
+
|
|
385
|
+
with zip_path.open("rb") as zip_fp:
|
|
386
|
+
sequence_md5sum = cls._extract_sequence_md5sum(zip_fp)
|
|
118
387
|
|
|
119
|
-
|
|
120
|
-
|
|
388
|
+
# Send the copy of the input progress to each upload session, to avoid modifying the original one
|
|
389
|
+
mutable_progress: SequenceProgress = {
|
|
390
|
+
**T.cast(SequenceProgress, progress),
|
|
121
391
|
"sequence_image_count": len(namelist),
|
|
392
|
+
"sequence_md5sum": sequence_md5sum,
|
|
393
|
+
"file_type": types.FileType.ZIP.value,
|
|
122
394
|
}
|
|
123
395
|
|
|
124
|
-
with zip_path.open("rb") as
|
|
125
|
-
|
|
396
|
+
with zip_path.open("rb") as zip_fp:
|
|
397
|
+
file_handle = uploader.upload_stream(
|
|
398
|
+
zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress)
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
cluster_id = uploader.finish_upload(
|
|
402
|
+
file_handle,
|
|
403
|
+
api_v4.ClusterFileType.ZIP,
|
|
404
|
+
progress=T.cast(T.Dict[str, T.Any], mutable_progress),
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
return cluster_id
|
|
408
|
+
|
|
409
|
+
@classmethod
|
|
410
|
+
def _zip_sequence_fp(
|
|
411
|
+
cls,
|
|
412
|
+
sequence: T.Sequence[types.ImageMetadata],
|
|
413
|
+
zip_fp: T.IO[bytes],
|
|
414
|
+
) -> str:
|
|
415
|
+
"""
|
|
416
|
+
Write a sequence of ImageMetadata into the zipfile handle.
|
|
417
|
+
The sequence has to be one sequence and sorted.
|
|
418
|
+
"""
|
|
419
|
+
|
|
420
|
+
sequence_groups = types.group_and_sort_images(sequence)
|
|
421
|
+
assert len(sequence_groups) == 1, (
|
|
422
|
+
f"Only one sequence is allowed but got {len(sequence_groups)}: {list(sequence_groups.keys())}"
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
if sequence:
|
|
426
|
+
LOG.debug(f"Checksum for sequence {sequence[0].MAPSequenceUUID}...")
|
|
427
|
+
sequence_md5sum = types.update_sequence_md5sum(sequence)
|
|
428
|
+
|
|
429
|
+
with zipfile.ZipFile(zip_fp, "w", zipfile.ZIP_DEFLATED) as zipf:
|
|
430
|
+
for idx, metadata in enumerate(sequence):
|
|
431
|
+
# Arcname should be unique, the name does not matter
|
|
432
|
+
arcname = f"{idx}.jpg"
|
|
433
|
+
zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
|
|
434
|
+
zipf.writestr(zipinfo, SingleImageUploader.dump_image_bytes(metadata))
|
|
435
|
+
assert len(sequence) == len(set(zipf.namelist()))
|
|
436
|
+
zipf.comment = json.dumps(
|
|
437
|
+
{"sequence_md5sum": sequence_md5sum},
|
|
438
|
+
sort_keys=True,
|
|
439
|
+
separators=(",", ":"),
|
|
440
|
+
).encode("utf-8")
|
|
441
|
+
|
|
442
|
+
return sequence_md5sum
|
|
443
|
+
|
|
444
|
+
@classmethod
|
|
445
|
+
def _extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
|
|
446
|
+
with zipfile.ZipFile(zip_fp, "r", zipfile.ZIP_DEFLATED) as ziph:
|
|
447
|
+
comment = ziph.comment
|
|
448
|
+
|
|
449
|
+
if not comment:
|
|
450
|
+
raise InvalidMapillaryZipFileError("No comment found in the zipfile")
|
|
451
|
+
|
|
452
|
+
try:
|
|
453
|
+
decoded = comment.decode("utf-8")
|
|
454
|
+
zip_metadata = json.loads(decoded)
|
|
455
|
+
except UnicodeDecodeError as ex:
|
|
456
|
+
raise InvalidMapillaryZipFileError(str(ex)) from ex
|
|
457
|
+
except json.JSONDecodeError as ex:
|
|
458
|
+
raise InvalidMapillaryZipFileError(str(ex)) from ex
|
|
459
|
+
|
|
460
|
+
sequence_md5sum = zip_metadata.get("sequence_md5sum")
|
|
461
|
+
|
|
462
|
+
if not sequence_md5sum and not isinstance(sequence_md5sum, str):
|
|
463
|
+
raise InvalidMapillaryZipFileError("No sequence_md5sum found")
|
|
464
|
+
|
|
465
|
+
return sequence_md5sum
|
|
126
466
|
|
|
127
|
-
|
|
128
|
-
|
|
467
|
+
@classmethod
|
|
468
|
+
@contextmanager
|
|
469
|
+
def _wip_file_context(cls, wip_path: Path):
|
|
470
|
+
try:
|
|
471
|
+
os.remove(wip_path)
|
|
472
|
+
except FileNotFoundError:
|
|
473
|
+
pass
|
|
474
|
+
try:
|
|
475
|
+
yield wip_path
|
|
476
|
+
|
|
477
|
+
with wip_path.open("rb") as fp:
|
|
129
478
|
upload_md5sum = utils.md5sum_fp(fp).hexdigest()
|
|
130
479
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
fp,
|
|
134
|
-
upload_api_v4.ClusterFileType.ZIP,
|
|
135
|
-
upload_md5sum,
|
|
136
|
-
event_payload=final_event_payload,
|
|
480
|
+
done_path = wip_path.parent.joinpath(
|
|
481
|
+
_session_key(upload_md5sum, api_v4.ClusterFileType.ZIP)
|
|
137
482
|
)
|
|
138
483
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
484
|
+
try:
|
|
485
|
+
os.remove(done_path)
|
|
486
|
+
except FileNotFoundError:
|
|
487
|
+
pass
|
|
488
|
+
wip_path.rename(done_path)
|
|
489
|
+
finally:
|
|
490
|
+
try:
|
|
491
|
+
os.remove(wip_path)
|
|
492
|
+
except FileNotFoundError:
|
|
493
|
+
pass
|
|
494
|
+
|
|
146
495
|
|
|
147
|
-
|
|
496
|
+
class ImageSequenceUploader:
|
|
497
|
+
@classmethod
|
|
498
|
+
def upload_images(
|
|
499
|
+
cls, uploader: Uploader, image_metadatas: T.Sequence[types.ImageMetadata]
|
|
500
|
+
) -> T.Generator[tuple[str, UploadResult], None, None]:
|
|
148
501
|
sequences = types.group_and_sort_images(image_metadatas)
|
|
149
|
-
|
|
502
|
+
|
|
150
503
|
for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
|
|
151
|
-
|
|
152
|
-
|
|
504
|
+
LOG.debug(f"Checksum for image sequence {sequence_uuid}...")
|
|
505
|
+
sequence_md5sum = types.update_sequence_md5sum(sequence)
|
|
506
|
+
|
|
507
|
+
sequence_progress: SequenceProgress = {
|
|
153
508
|
"sequence_idx": sequence_idx,
|
|
154
509
|
"total_sequence_count": len(sequences),
|
|
155
510
|
"sequence_image_count": len(sequence),
|
|
156
511
|
"sequence_uuid": sequence_uuid,
|
|
512
|
+
"file_type": types.FileType.IMAGE.value,
|
|
513
|
+
"sequence_md5sum": sequence_md5sum,
|
|
157
514
|
}
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
515
|
+
|
|
516
|
+
try:
|
|
517
|
+
cluster_id = cls._upload_sequence(
|
|
518
|
+
uploader,
|
|
519
|
+
sequence,
|
|
520
|
+
progress=T.cast(dict[str, T.Any], sequence_progress),
|
|
521
|
+
)
|
|
522
|
+
except Exception as ex:
|
|
523
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
524
|
+
else:
|
|
525
|
+
yield sequence_uuid, UploadResult(result=cluster_id)
|
|
526
|
+
|
|
527
|
+
@classmethod
|
|
528
|
+
def _upload_sequence(
|
|
529
|
+
cls,
|
|
530
|
+
uploader: Uploader,
|
|
531
|
+
sequence: T.Sequence[types.ImageMetadata],
|
|
532
|
+
progress: dict[str, T.Any],
|
|
533
|
+
) -> str:
|
|
534
|
+
_validate_metadatas(sequence)
|
|
535
|
+
|
|
536
|
+
progress["entity_size"] = sum(m.filesize or 0 for m in sequence)
|
|
537
|
+
uploader.emitter.emit("upload_start", progress)
|
|
538
|
+
|
|
539
|
+
single_image_uploader = SingleImageUploader(uploader, progress=progress)
|
|
540
|
+
with concurrent.futures.ThreadPoolExecutor(
|
|
541
|
+
max_workers=constants.MAX_IMAGE_UPLOAD_WORKERS
|
|
542
|
+
) as executor:
|
|
543
|
+
image_file_handles = list(
|
|
544
|
+
executor.map(single_image_uploader.upload, sequence)
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
manifest_file_handle = cls._upload_manifest(uploader, image_file_handles)
|
|
548
|
+
|
|
549
|
+
uploader.emitter.emit("upload_end", progress)
|
|
550
|
+
|
|
551
|
+
cluster_id = uploader.finish_upload(
|
|
552
|
+
manifest_file_handle,
|
|
553
|
+
api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST,
|
|
554
|
+
progress=progress,
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
return cluster_id
|
|
558
|
+
|
|
559
|
+
@classmethod
|
|
560
|
+
def _upload_manifest(
|
|
561
|
+
cls, uploader: Uploader, image_file_handles: T.Sequence[str]
|
|
562
|
+
) -> str:
|
|
563
|
+
uploader_without_emitter = Uploader(uploader.upload_options)
|
|
564
|
+
|
|
565
|
+
manifest = {
|
|
566
|
+
"version": "1",
|
|
567
|
+
"upload_type": "images",
|
|
568
|
+
"image_handles": image_file_handles,
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
with io.BytesIO() as manifest_fp:
|
|
572
|
+
manifest_fp.write(
|
|
573
|
+
json.dumps(manifest, sort_keys=True, separators=(",", ":")).encode(
|
|
574
|
+
"utf-8"
|
|
168
575
|
)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
576
|
+
)
|
|
577
|
+
manifest_fp.seek(0, io.SEEK_SET)
|
|
578
|
+
return uploader_without_emitter.upload_stream(
|
|
579
|
+
manifest_fp, session_key=f"{_prefixed_uuid4()}.json"
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
|
|
583
|
+
class SingleImageUploader:
|
|
584
|
+
def __init__(
|
|
585
|
+
self,
|
|
586
|
+
uploader: Uploader,
|
|
587
|
+
progress: dict[str, T.Any] | None = None,
|
|
588
|
+
):
|
|
589
|
+
self.uploader = uploader
|
|
590
|
+
self.progress = progress or {}
|
|
591
|
+
self.lock = threading.Lock()
|
|
592
|
+
self.cache = self._maybe_create_persistent_cache_instance(
|
|
593
|
+
uploader.upload_options.user_items
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
def upload(self, image_metadata: types.ImageMetadata) -> str:
|
|
597
|
+
mutable_progress = {
|
|
598
|
+
**(self.progress or {}),
|
|
599
|
+
"filename": str(image_metadata.filename),
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
image_bytes = self.dump_image_bytes(image_metadata)
|
|
603
|
+
|
|
604
|
+
uploader_without_emitter = Uploader(self.uploader.upload_options)
|
|
605
|
+
|
|
606
|
+
session_key = uploader_without_emitter._gen_session_key(
|
|
607
|
+
io.BytesIO(image_bytes), mutable_progress
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
file_handle = self._file_handle_cache_get(session_key)
|
|
611
|
+
|
|
612
|
+
if file_handle is None:
|
|
613
|
+
file_handle = uploader_without_emitter.upload_stream(
|
|
614
|
+
io.BytesIO(image_bytes),
|
|
615
|
+
session_key=session_key,
|
|
616
|
+
progress=mutable_progress,
|
|
617
|
+
)
|
|
618
|
+
self._file_handle_cache_set(session_key, file_handle)
|
|
619
|
+
|
|
620
|
+
# Override chunk_size with the actual filesize
|
|
621
|
+
mutable_progress["chunk_size"] = image_metadata.filesize
|
|
622
|
+
|
|
623
|
+
with self.lock:
|
|
624
|
+
self.uploader.emitter.emit("upload_progress", mutable_progress)
|
|
625
|
+
|
|
626
|
+
return file_handle
|
|
627
|
+
|
|
628
|
+
@classmethod
|
|
629
|
+
def dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes:
|
|
630
|
+
try:
|
|
631
|
+
edit = exif_write.ExifEdit(metadata.filename)
|
|
632
|
+
except struct.error as ex:
|
|
633
|
+
raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex
|
|
634
|
+
|
|
635
|
+
# The cast is to fix the type checker error
|
|
636
|
+
edit.add_image_description(
|
|
637
|
+
T.cast(
|
|
638
|
+
T.Dict, desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata))
|
|
639
|
+
)
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
try:
|
|
643
|
+
return edit.dump_image_bytes()
|
|
644
|
+
except struct.error as ex:
|
|
645
|
+
raise ExifError(
|
|
646
|
+
f"Failed to dump EXIF bytes: {ex}", metadata.filename
|
|
647
|
+
) from ex
|
|
648
|
+
|
|
649
|
+
@classmethod
|
|
650
|
+
def _maybe_create_persistent_cache_instance(
|
|
651
|
+
cls, user_items: config.UserItem
|
|
652
|
+
) -> history.PersistentCache | None:
|
|
653
|
+
if not constants.UPLOAD_CACHE_DIR:
|
|
654
|
+
LOG.debug(
|
|
655
|
+
"Upload cache directory is set empty, skipping caching upload file handles"
|
|
656
|
+
)
|
|
657
|
+
return None
|
|
658
|
+
|
|
659
|
+
cache_path_dir = (
|
|
660
|
+
Path(constants.UPLOAD_CACHE_DIR)
|
|
661
|
+
.joinpath(api_v4.MAPILLARY_CLIENT_TOKEN.replace("|", "_"))
|
|
662
|
+
.joinpath(
|
|
663
|
+
user_items.get("MAPSettingsUserKey", user_items["user_upload_token"])
|
|
664
|
+
)
|
|
665
|
+
)
|
|
666
|
+
cache_path_dir.mkdir(parents=True, exist_ok=True)
|
|
667
|
+
cache_path = cache_path_dir.joinpath("cached_file_handles")
|
|
668
|
+
LOG.debug(f"File handle cache path: {cache_path}")
|
|
669
|
+
|
|
670
|
+
cache = history.PersistentCache(str(cache_path.resolve()))
|
|
671
|
+
cache.clear_expired()
|
|
672
|
+
|
|
673
|
+
return cache
|
|
674
|
+
|
|
675
|
+
def _file_handle_cache_get(self, key: str) -> str | None:
|
|
676
|
+
if self.cache is None:
|
|
677
|
+
return None
|
|
678
|
+
|
|
679
|
+
if _is_uuid(key):
|
|
680
|
+
return None
|
|
681
|
+
|
|
682
|
+
return self.cache.get(key)
|
|
683
|
+
|
|
684
|
+
def _file_handle_cache_set(self, key: str, value: str) -> None:
|
|
685
|
+
if self.cache is None:
|
|
686
|
+
return
|
|
687
|
+
|
|
688
|
+
if _is_uuid(key):
|
|
689
|
+
return
|
|
690
|
+
|
|
691
|
+
self.cache.set(key, value)
|
|
692
|
+
|
|
693
|
+
|
|
694
|
+
class Uploader:
|
|
695
|
+
def __init__(
|
|
696
|
+
self, upload_options: UploadOptions, emitter: EventEmitter | None = None
|
|
697
|
+
):
|
|
698
|
+
self.upload_options = upload_options
|
|
699
|
+
if emitter is None:
|
|
700
|
+
# An empty event emitter that does nothing
|
|
701
|
+
self.emitter = EventEmitter()
|
|
702
|
+
else:
|
|
703
|
+
self.emitter = emitter
|
|
172
704
|
|
|
173
705
|
def upload_stream(
|
|
174
706
|
self,
|
|
175
707
|
fp: T.IO[bytes],
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
708
|
+
session_key: str | None = None,
|
|
709
|
+
progress: dict[str, T.Any] | None = None,
|
|
710
|
+
) -> str:
|
|
711
|
+
if progress is None:
|
|
712
|
+
progress = {}
|
|
713
|
+
|
|
714
|
+
if session_key is None:
|
|
715
|
+
session_key = self._gen_session_key(fp, progress)
|
|
182
716
|
|
|
183
717
|
fp.seek(0, io.SEEK_END)
|
|
184
718
|
entity_size = fp.tell()
|
|
185
719
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
cluster_filetype=cluster_filetype,
|
|
200
|
-
chunk_size=self.chunk_size,
|
|
720
|
+
progress["entity_size"] = entity_size
|
|
721
|
+
progress["chunk_size"] = self.upload_options.chunk_size
|
|
722
|
+
progress["retries"] = 0
|
|
723
|
+
progress["begin_offset"] = None
|
|
724
|
+
|
|
725
|
+
self.emitter.emit("upload_start", progress)
|
|
726
|
+
|
|
727
|
+
upload_service = self._create_upload_service(session_key)
|
|
728
|
+
|
|
729
|
+
while True:
|
|
730
|
+
try:
|
|
731
|
+
file_handle = self._upload_stream_retryable(
|
|
732
|
+
upload_service, fp, T.cast(UploaderProgress, progress)
|
|
201
733
|
)
|
|
734
|
+
except Exception as ex:
|
|
735
|
+
self._handle_upload_exception(ex, T.cast(UploaderProgress, progress))
|
|
736
|
+
except BaseException as ex:
|
|
737
|
+
self.emitter.emit("upload_failed", progress)
|
|
738
|
+
raise ex
|
|
739
|
+
else:
|
|
740
|
+
break
|
|
741
|
+
|
|
742
|
+
progress["retries"] += 1
|
|
743
|
+
|
|
744
|
+
self.emitter.emit("upload_end", progress)
|
|
745
|
+
|
|
746
|
+
return file_handle
|
|
747
|
+
|
|
748
|
+
def finish_upload(
|
|
749
|
+
self,
|
|
750
|
+
file_handle: str,
|
|
751
|
+
cluster_filetype: api_v4.ClusterFileType,
|
|
752
|
+
progress: dict[str, T.Any] | None = None,
|
|
753
|
+
) -> str:
|
|
754
|
+
"""Finish upload with safe retries guraranteed"""
|
|
755
|
+
if progress is None:
|
|
756
|
+
progress = {}
|
|
757
|
+
|
|
758
|
+
if self.upload_options.dry_run or self.upload_options.nofinish:
|
|
759
|
+
cluster_id = "0"
|
|
760
|
+
else:
|
|
761
|
+
resp = api_v4.finish_upload(
|
|
762
|
+
self.upload_options.user_items["user_upload_token"],
|
|
763
|
+
file_handle,
|
|
764
|
+
cluster_filetype,
|
|
765
|
+
organization_id=self.upload_options.user_items.get(
|
|
766
|
+
"MAPOrganizationKey"
|
|
767
|
+
),
|
|
768
|
+
)
|
|
769
|
+
|
|
770
|
+
body = api_v4.jsonify_response(resp)
|
|
771
|
+
# TODO: Validate cluster_id
|
|
772
|
+
cluster_id = body.get("cluster_id")
|
|
773
|
+
|
|
774
|
+
progress["cluster_id"] = cluster_id
|
|
775
|
+
self.emitter.emit("upload_finished", progress)
|
|
776
|
+
|
|
777
|
+
return cluster_id
|
|
778
|
+
|
|
779
|
+
def _create_upload_service(self, session_key: str) -> upload_api_v4.UploadService:
|
|
780
|
+
upload_service: upload_api_v4.UploadService
|
|
781
|
+
|
|
782
|
+
if self.upload_options.dry_run:
|
|
783
|
+
upload_path = os.getenv("MAPILLARY_UPLOAD_ENDPOINT")
|
|
784
|
+
upload_service = upload_api_v4.FakeUploadService(
|
|
785
|
+
user_access_token=self.upload_options.user_items["user_upload_token"],
|
|
786
|
+
session_key=session_key,
|
|
787
|
+
upload_path=Path(upload_path) if upload_path is not None else None,
|
|
788
|
+
)
|
|
789
|
+
LOG.info(
|
|
790
|
+
"Dry run mode enabled. Data will be uploaded to %s",
|
|
791
|
+
upload_service.upload_path.joinpath(session_key),
|
|
202
792
|
)
|
|
203
793
|
else:
|
|
204
794
|
upload_service = upload_api_v4.UploadService(
|
|
205
|
-
user_access_token=self.user_items["user_upload_token"],
|
|
795
|
+
user_access_token=self.upload_options.user_items["user_upload_token"],
|
|
206
796
|
session_key=session_key,
|
|
207
|
-
organization_id=self.user_items.get("MAPOrganizationKey"),
|
|
208
|
-
cluster_filetype=cluster_filetype,
|
|
209
|
-
chunk_size=self.chunk_size,
|
|
210
797
|
)
|
|
211
798
|
|
|
212
|
-
|
|
213
|
-
**event_payload, # type: ignore
|
|
214
|
-
"entity_size": entity_size,
|
|
215
|
-
"md5sum": upload_md5sum,
|
|
216
|
-
}
|
|
799
|
+
return upload_service
|
|
217
800
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
801
|
+
def _handle_upload_exception(
|
|
802
|
+
self, ex: Exception, progress: UploaderProgress
|
|
803
|
+
) -> None:
|
|
804
|
+
retries = progress.get("retries", 0)
|
|
805
|
+
begin_offset = progress.get("begin_offset")
|
|
806
|
+
offset = progress.get("offset")
|
|
807
|
+
|
|
808
|
+
if retries <= constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
|
|
809
|
+
self.emitter.emit("upload_interrupted", progress)
|
|
810
|
+
LOG.warning(
|
|
811
|
+
f"Error uploading at {offset=} since {begin_offset=}: {ex.__class__.__name__}: {ex}"
|
|
224
812
|
)
|
|
225
|
-
|
|
226
|
-
|
|
813
|
+
# Keep things immutable here. Will increment retries in the caller
|
|
814
|
+
retries += 1
|
|
815
|
+
if _is_immediate_retriable_exception(ex):
|
|
816
|
+
sleep_for = 0
|
|
817
|
+
else:
|
|
818
|
+
sleep_for = min(2**retries, 16)
|
|
819
|
+
LOG.info(
|
|
820
|
+
f"Retrying in {sleep_for} seconds ({retries}/{constants.MAX_UPLOAD_RETRIES})"
|
|
821
|
+
)
|
|
822
|
+
if sleep_for:
|
|
823
|
+
time.sleep(sleep_for)
|
|
824
|
+
else:
|
|
825
|
+
self.emitter.emit("upload_failed", progress)
|
|
826
|
+
raise ex
|
|
827
|
+
|
|
828
|
+
def _chunk_with_progress_emitted(
|
|
829
|
+
self,
|
|
830
|
+
stream: T.IO[bytes],
|
|
831
|
+
progress: UploaderProgress,
|
|
832
|
+
) -> T.Generator[bytes, None, None]:
|
|
833
|
+
for chunk in upload_api_v4.UploadService.chunkize_byte_stream(
|
|
834
|
+
stream, self.upload_options.chunk_size
|
|
835
|
+
):
|
|
836
|
+
yield chunk
|
|
837
|
+
|
|
838
|
+
progress["offset"] += len(chunk)
|
|
839
|
+
progress["chunk_size"] = len(chunk)
|
|
840
|
+
# Whenever a chunk is uploaded, reset retries
|
|
841
|
+
progress["retries"] = 0
|
|
842
|
+
|
|
843
|
+
self.emitter.emit("upload_progress", progress)
|
|
844
|
+
|
|
845
|
+
def _upload_stream_retryable(
|
|
846
|
+
self,
|
|
847
|
+
upload_service: upload_api_v4.UploadService,
|
|
848
|
+
fp: T.IO[bytes],
|
|
849
|
+
progress: UploaderProgress,
|
|
850
|
+
) -> str:
|
|
851
|
+
"""Upload the stream with safe retries guraranteed"""
|
|
852
|
+
|
|
853
|
+
begin_offset = upload_service.fetch_offset()
|
|
854
|
+
|
|
855
|
+
progress["begin_offset"] = begin_offset
|
|
856
|
+
progress["offset"] = begin_offset
|
|
857
|
+
|
|
858
|
+
if not constants.MIN_UPLOAD_SPEED:
|
|
859
|
+
read_timeout = None
|
|
860
|
+
else:
|
|
861
|
+
remaining_bytes = abs(progress["entity_size"] - begin_offset)
|
|
862
|
+
read_timeout = max(
|
|
863
|
+
api_v4.REQUESTS_TIMEOUT, remaining_bytes / constants.MIN_UPLOAD_SPEED
|
|
864
|
+
)
|
|
865
|
+
|
|
866
|
+
self.emitter.emit("upload_fetch_offset", progress)
|
|
867
|
+
|
|
868
|
+
fp.seek(begin_offset, io.SEEK_SET)
|
|
869
|
+
|
|
870
|
+
shifted_chunks = self._chunk_with_progress_emitted(fp, progress)
|
|
871
|
+
|
|
872
|
+
return upload_service.upload_shifted_chunks(
|
|
873
|
+
shifted_chunks, begin_offset, read_timeout=read_timeout
|
|
874
|
+
)
|
|
875
|
+
|
|
876
|
+
def _gen_session_key(self, fp: T.IO[bytes], progress: dict[str, T.Any]) -> str:
|
|
877
|
+
if self.upload_options.noresume:
|
|
878
|
+
# Generate a unique UUID for session_key when noresume is True
|
|
879
|
+
# to prevent resuming from previous uploads
|
|
880
|
+
session_key = f"{_prefixed_uuid4()}"
|
|
881
|
+
else:
|
|
882
|
+
fp.seek(0, io.SEEK_SET)
|
|
883
|
+
session_key = utils.md5sum_fp(fp).hexdigest()
|
|
884
|
+
|
|
885
|
+
filetype = progress.get("file_type")
|
|
886
|
+
if filetype is not None:
|
|
887
|
+
session_key = _session_key(session_key, types.FileType(filetype))
|
|
888
|
+
|
|
889
|
+
return session_key
|
|
227
890
|
|
|
228
891
|
|
|
229
892
|
def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
|
|
230
893
|
for metadata in metadatas:
|
|
231
|
-
|
|
894
|
+
validate_image_desc(DescriptionJSONSerializer.as_desc(metadata))
|
|
232
895
|
if not metadata.filename.is_file():
|
|
233
896
|
raise FileNotFoundError(f"No such file {metadata.filename}")
|
|
234
897
|
|
|
235
898
|
|
|
236
|
-
|
|
237
|
-
def wip_file_context(wip_path: Path, done_path: Path):
|
|
238
|
-
assert wip_path != done_path, "should not be the same file"
|
|
239
|
-
try:
|
|
240
|
-
os.remove(wip_path)
|
|
241
|
-
except FileNotFoundError:
|
|
242
|
-
pass
|
|
243
|
-
try:
|
|
244
|
-
yield wip_path
|
|
245
|
-
try:
|
|
246
|
-
os.remove(done_path)
|
|
247
|
-
except FileNotFoundError:
|
|
248
|
-
pass
|
|
249
|
-
wip_path.rename(done_path)
|
|
250
|
-
finally:
|
|
251
|
-
try:
|
|
252
|
-
os.remove(wip_path)
|
|
253
|
-
except FileNotFoundError:
|
|
254
|
-
pass
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
def zip_images(
|
|
258
|
-
metadatas: T.List[types.ImageMetadata],
|
|
259
|
-
zip_dir: Path,
|
|
260
|
-
) -> None:
|
|
261
|
-
_validate_metadatas(metadatas)
|
|
262
|
-
sequences = types.group_and_sort_images(metadatas)
|
|
263
|
-
os.makedirs(zip_dir, exist_ok=True)
|
|
264
|
-
for sequence_uuid, sequence in sequences.items():
|
|
265
|
-
for metadata in sequence:
|
|
266
|
-
metadata.update_md5sum()
|
|
267
|
-
upload_md5sum = types.sequence_md5sum(sequence)
|
|
268
|
-
timestamp = int(time.time())
|
|
269
|
-
wip_zip_filename = zip_dir.joinpath(
|
|
270
|
-
f".mly_zip_{uuid.uuid4()}_{sequence_uuid}_{os.getpid()}_{timestamp}"
|
|
271
|
-
)
|
|
272
|
-
zip_filename = zip_dir.joinpath(f"mly_tools_{upload_md5sum}.zip")
|
|
273
|
-
with wip_file_context(wip_zip_filename, zip_filename) as wip_dir:
|
|
274
|
-
with wip_dir.open("wb") as fp:
|
|
275
|
-
_zip_sequence_fp(sequence, fp, upload_md5sum)
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
def _zip_sequence_fp(
|
|
279
|
-
sequence: T.Sequence[types.ImageMetadata],
|
|
280
|
-
fp: T.IO[bytes],
|
|
281
|
-
upload_md5sum: str,
|
|
282
|
-
) -> None:
|
|
283
|
-
arcname_idx = 0
|
|
284
|
-
arcnames = set()
|
|
285
|
-
with zipfile.ZipFile(fp, "w", zipfile.ZIP_DEFLATED) as ziph:
|
|
286
|
-
for metadata in sequence:
|
|
287
|
-
edit = exif_write.ExifEdit(metadata.filename)
|
|
288
|
-
# The cast is to fix the type checker error
|
|
289
|
-
edit.add_image_description(
|
|
290
|
-
T.cast(T.Dict, types.desc_file_to_exif(types.as_desc(metadata)))
|
|
291
|
-
)
|
|
292
|
-
image_bytes = edit.dump_image_bytes()
|
|
293
|
-
arcname: str = metadata.filename.name
|
|
294
|
-
# make sure the arcname is unique, otherwise zipfile.extractAll will eliminate duplicated ones
|
|
295
|
-
while arcname in arcnames:
|
|
296
|
-
arcname_idx += 1
|
|
297
|
-
arcname = (
|
|
298
|
-
f"{metadata.filename.stem}_{arcname_idx}{metadata.filename.suffix}"
|
|
299
|
-
)
|
|
300
|
-
arcnames.add(arcname)
|
|
301
|
-
zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
|
|
302
|
-
ziph.writestr(zipinfo, image_bytes)
|
|
303
|
-
ziph.comment = json.dumps({"upload_md5sum": upload_md5sum}).encode("utf-8")
|
|
304
|
-
assert len(sequence) == len(set(ziph.namelist()))
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
def _extract_upload_md5sum(fp: T.IO[bytes]) -> T.Optional[str]:
|
|
308
|
-
with zipfile.ZipFile(fp, "r", zipfile.ZIP_DEFLATED) as ziph:
|
|
309
|
-
comment = ziph.comment
|
|
310
|
-
if not comment:
|
|
311
|
-
return None
|
|
312
|
-
try:
|
|
313
|
-
upload_md5sum = json.loads(comment.decode("utf-8")).get("upload_md5sum")
|
|
314
|
-
except Exception:
|
|
315
|
-
return None
|
|
316
|
-
if not upload_md5sum:
|
|
317
|
-
return None
|
|
318
|
-
return str(upload_md5sum)
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
def _is_immediate_retry(ex: Exception):
|
|
899
|
+
def _is_immediate_retriable_exception(ex: Exception) -> bool:
|
|
322
900
|
if (
|
|
323
901
|
isinstance(ex, requests.HTTPError)
|
|
324
902
|
and isinstance(ex.response, requests.Response)
|
|
@@ -331,8 +909,10 @@ def _is_immediate_retry(ex: Exception):
|
|
|
331
909
|
# resp: {"debug_info":{"retriable":true,"type":"OffsetInvalidError","message":"Request starting offset is invalid"}}
|
|
332
910
|
return resp.get("debug_info", {}).get("retriable", False)
|
|
333
911
|
|
|
912
|
+
return False
|
|
913
|
+
|
|
334
914
|
|
|
335
|
-
def _is_retriable_exception(ex: Exception):
|
|
915
|
+
def _is_retriable_exception(ex: Exception) -> bool:
|
|
336
916
|
if isinstance(ex, (requests.ConnectionError, requests.Timeout)):
|
|
337
917
|
return True
|
|
338
918
|
|
|
@@ -351,89 +931,29 @@ def _is_retriable_exception(ex: Exception):
|
|
|
351
931
|
return False
|
|
352
932
|
|
|
353
933
|
|
|
354
|
-
def
|
|
355
|
-
|
|
356
|
-
assert isinstance(emitter, EventEmitter)
|
|
357
|
-
mutable_payload["offset"] += len(chunk)
|
|
358
|
-
mutable_payload["chunk_size"] = len(chunk)
|
|
359
|
-
emitter.emit("upload_progress", mutable_payload)
|
|
360
|
-
|
|
361
|
-
return _callback
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
def _upload_stream(
|
|
365
|
-
upload_service: upload_api_v4.UploadService,
|
|
366
|
-
fp: T.IO[bytes],
|
|
367
|
-
event_payload: T.Optional[Progress] = None,
|
|
368
|
-
emitter: T.Optional[EventEmitter] = None,
|
|
934
|
+
def _session_key(
|
|
935
|
+
upload_md5sum: str, filetype: api_v4.ClusterFileType | types.FileType
|
|
369
936
|
) -> str:
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
if emitter:
|
|
383
|
-
emitter.emit("upload_start", mutable_payload)
|
|
937
|
+
_SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
|
|
938
|
+
api_v4.ClusterFileType.ZIP: ".zip",
|
|
939
|
+
api_v4.ClusterFileType.CAMM: ".mp4",
|
|
940
|
+
api_v4.ClusterFileType.BLACKVUE: ".mp4",
|
|
941
|
+
types.FileType.IMAGE: ".jpg",
|
|
942
|
+
types.FileType.ZIP: ".zip",
|
|
943
|
+
types.FileType.BLACKVUE: ".mp4",
|
|
944
|
+
types.FileType.CAMM: ".mp4",
|
|
945
|
+
types.FileType.GOPRO: ".mp4",
|
|
946
|
+
types.FileType.VIDEO: ".mp4",
|
|
947
|
+
}
|
|
384
948
|
|
|
385
|
-
|
|
386
|
-
fp.seek(0, io.SEEK_SET)
|
|
387
|
-
begin_offset: T.Optional[int] = None
|
|
388
|
-
try:
|
|
389
|
-
begin_offset = upload_service.fetch_offset()
|
|
390
|
-
upload_service.callbacks = [_reset_retries]
|
|
391
|
-
if emitter:
|
|
392
|
-
mutable_payload["offset"] = begin_offset
|
|
393
|
-
mutable_payload["retries"] = retries
|
|
394
|
-
emitter.emit("upload_fetch_offset", mutable_payload)
|
|
395
|
-
upload_service.callbacks.append(
|
|
396
|
-
_setup_callback(emitter, mutable_payload)
|
|
397
|
-
)
|
|
398
|
-
file_handle = upload_service.upload(fp, offset=begin_offset)
|
|
399
|
-
except Exception as ex:
|
|
400
|
-
if retries < constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
|
|
401
|
-
if emitter:
|
|
402
|
-
emitter.emit("upload_interrupted", mutable_payload)
|
|
403
|
-
LOG.warning(
|
|
404
|
-
# use %s instead of %d because offset could be None
|
|
405
|
-
"Error uploading chunk_size %d at begin_offset %s: %s: %s",
|
|
406
|
-
upload_service.chunk_size,
|
|
407
|
-
begin_offset,
|
|
408
|
-
ex.__class__.__name__,
|
|
409
|
-
str(ex),
|
|
410
|
-
)
|
|
411
|
-
retries += 1
|
|
412
|
-
if _is_immediate_retry(ex):
|
|
413
|
-
sleep_for = 0
|
|
414
|
-
else:
|
|
415
|
-
sleep_for = min(2**retries, 16)
|
|
416
|
-
LOG.info(
|
|
417
|
-
"Retrying in %d seconds (%d/%d)",
|
|
418
|
-
sleep_for,
|
|
419
|
-
retries,
|
|
420
|
-
constants.MAX_UPLOAD_RETRIES,
|
|
421
|
-
)
|
|
422
|
-
if sleep_for:
|
|
423
|
-
time.sleep(sleep_for)
|
|
424
|
-
else:
|
|
425
|
-
raise ex
|
|
426
|
-
else:
|
|
427
|
-
break
|
|
949
|
+
return f"mly_tools_{upload_md5sum}{_SUFFIX_MAP[filetype]}"
|
|
428
950
|
|
|
429
|
-
if emitter:
|
|
430
|
-
emitter.emit("upload_end", mutable_payload)
|
|
431
951
|
|
|
432
|
-
|
|
433
|
-
|
|
952
|
+
def _prefixed_uuid4():
|
|
953
|
+
prefixed = f"uuid_{uuid.uuid4().hex}"
|
|
954
|
+
assert _is_uuid(prefixed)
|
|
955
|
+
return prefixed
|
|
434
956
|
|
|
435
|
-
if emitter:
|
|
436
|
-
mutable_payload["cluster_id"] = cluster_id
|
|
437
|
-
emitter.emit("upload_finished", mutable_payload)
|
|
438
957
|
|
|
439
|
-
|
|
958
|
+
def _is_uuid(session_key: str) -> bool:
|
|
959
|
+
return session_key.startswith("uuid_")
|