mapillary-tools 0.13.3a1__py3-none-any.whl → 0.14.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapillary_tools/__init__.py +1 -1
- mapillary_tools/api_v4.py +235 -14
- mapillary_tools/authenticate.py +325 -64
- mapillary_tools/{geotag/blackvue_parser.py → blackvue_parser.py} +74 -54
- mapillary_tools/camm/camm_builder.py +55 -97
- mapillary_tools/camm/camm_parser.py +425 -177
- mapillary_tools/commands/__main__.py +11 -4
- mapillary_tools/commands/authenticate.py +8 -1
- mapillary_tools/commands/process.py +27 -51
- mapillary_tools/commands/process_and_upload.py +19 -5
- mapillary_tools/commands/sample_video.py +2 -3
- mapillary_tools/commands/upload.py +18 -9
- mapillary_tools/commands/video_process_and_upload.py +19 -5
- mapillary_tools/config.py +28 -12
- mapillary_tools/constants.py +46 -4
- mapillary_tools/exceptions.py +34 -35
- mapillary_tools/exif_read.py +158 -53
- mapillary_tools/exiftool_read.py +19 -5
- mapillary_tools/exiftool_read_video.py +12 -1
- mapillary_tools/exiftool_runner.py +77 -0
- mapillary_tools/geo.py +148 -107
- mapillary_tools/geotag/factory.py +298 -0
- mapillary_tools/geotag/geotag_from_generic.py +152 -11
- mapillary_tools/geotag/geotag_images_from_exif.py +43 -124
- mapillary_tools/geotag/geotag_images_from_exiftool.py +66 -70
- mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +32 -48
- mapillary_tools/geotag/geotag_images_from_gpx.py +41 -116
- mapillary_tools/geotag/geotag_images_from_gpx_file.py +15 -96
- mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -2
- mapillary_tools/geotag/geotag_images_from_video.py +46 -46
- mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +98 -92
- mapillary_tools/geotag/geotag_videos_from_gpx.py +140 -0
- mapillary_tools/geotag/geotag_videos_from_video.py +149 -181
- mapillary_tools/geotag/options.py +159 -0
- mapillary_tools/{geotag → gpmf}/gpmf_parser.py +194 -171
- mapillary_tools/history.py +3 -11
- mapillary_tools/mp4/io_utils.py +0 -1
- mapillary_tools/mp4/mp4_sample_parser.py +11 -3
- mapillary_tools/mp4/simple_mp4_parser.py +0 -10
- mapillary_tools/process_geotag_properties.py +151 -386
- mapillary_tools/process_sequence_properties.py +554 -202
- mapillary_tools/sample_video.py +8 -15
- mapillary_tools/telemetry.py +24 -12
- mapillary_tools/types.py +80 -22
- mapillary_tools/upload.py +316 -298
- mapillary_tools/upload_api_v4.py +55 -122
- mapillary_tools/uploader.py +396 -254
- mapillary_tools/utils.py +26 -0
- mapillary_tools/video_data_extraction/extract_video_data.py +17 -36
- mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +34 -19
- mapillary_tools/video_data_extraction/extractors/camm_parser.py +41 -17
- mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +4 -1
- mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +1 -2
- mapillary_tools/video_data_extraction/extractors/gopro_parser.py +37 -22
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a1.dist-info}/METADATA +3 -2
- mapillary_tools-0.14.0a1.dist-info/RECORD +78 -0
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a1.dist-info}/WHEEL +1 -1
- mapillary_tools/geotag/utils.py +0 -26
- mapillary_tools-0.13.3a1.dist-info/RECORD +0 -75
- /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
- /mapillary_tools/{geotag → gpmf}/gps_filter.py +0 -0
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a1.dist-info}/entry_points.txt +0 -0
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a1.dist-info/licenses}/LICENSE +0 -0
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a1.dist-info}/top_level.txt +0 -0
mapillary_tools/uploader.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
1
4
|
import io
|
|
2
5
|
import json
|
|
3
6
|
import logging
|
|
4
7
|
import os
|
|
8
|
+
import struct
|
|
5
9
|
import tempfile
|
|
6
10
|
import time
|
|
7
11
|
import typing as T
|
|
@@ -10,28 +14,54 @@ import zipfile
|
|
|
10
14
|
from contextlib import contextmanager
|
|
11
15
|
from pathlib import Path
|
|
12
16
|
|
|
13
|
-
import jsonschema
|
|
14
17
|
import requests
|
|
15
18
|
|
|
16
|
-
from . import constants, exif_write, types, upload_api_v4
|
|
19
|
+
from . import api_v4, constants, exif_write, types, upload_api_v4
|
|
17
20
|
|
|
18
21
|
|
|
19
22
|
LOG = logging.getLogger(__name__)
|
|
20
23
|
|
|
21
24
|
|
|
22
|
-
class
|
|
23
|
-
|
|
25
|
+
class UploaderProgress(T.TypedDict, total=True):
|
|
26
|
+
"""
|
|
27
|
+
Progress data that Uploader cares about.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
# The size, in bytes, of the last chunk that has been read and upload
|
|
24
31
|
chunk_size: int
|
|
25
32
|
|
|
26
|
-
#
|
|
27
|
-
|
|
33
|
+
# The initial offset returned by the upload service, which is also the offset
|
|
34
|
+
# uploader start uploading from.
|
|
35
|
+
# Assert:
|
|
36
|
+
# - 0 <= begin_offset <= offset <= entity_size
|
|
37
|
+
# - Be non-None after at least a successful "upload_fetch_offset"
|
|
38
|
+
begin_offset: int | None
|
|
28
39
|
|
|
29
|
-
# How many bytes has been uploaded so far
|
|
40
|
+
# How many bytes of the file has been uploaded so far
|
|
30
41
|
offset: int
|
|
31
42
|
|
|
32
|
-
# Size in bytes of the
|
|
43
|
+
# Size in bytes of the file (i.e. fp.tell() after seek to the end)
|
|
44
|
+
# NOTE: It's different from filesize in file system
|
|
45
|
+
# Assert:
|
|
46
|
+
# - offset == entity_size when "upload_end" or "upload_finished"
|
|
33
47
|
entity_size: int
|
|
34
48
|
|
|
49
|
+
# An "upload_interrupted" will increase it. Reset to 0 if a chunk is uploaded
|
|
50
|
+
retries: int
|
|
51
|
+
|
|
52
|
+
# Cluster ID after finishing the upload
|
|
53
|
+
cluster_id: str
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class SequenceProgress(T.TypedDict, total=False):
|
|
57
|
+
"""Progress data at sequence level"""
|
|
58
|
+
|
|
59
|
+
# md5sum of the zipfile/BlackVue/CAMM in uploading
|
|
60
|
+
md5sum: str
|
|
61
|
+
|
|
62
|
+
# File type
|
|
63
|
+
file_type: str
|
|
64
|
+
|
|
35
65
|
# How many sequences in total. It's always 1 when uploading Zipfile/BlackVue/CAMM
|
|
36
66
|
total_sequence_count: int
|
|
37
67
|
|
|
@@ -44,20 +74,31 @@ class Progress(T.TypedDict, total=False):
|
|
|
44
74
|
# MAPSequenceUUID. It is only available for directory uploading
|
|
45
75
|
sequence_uuid: str
|
|
46
76
|
|
|
47
|
-
# An "upload_interrupted" will increase it. Reset to 0 if the chunk is uploaded
|
|
48
|
-
retries: int
|
|
49
|
-
|
|
50
|
-
# md5sum of the zipfile/BlackVue/CAMM in uploading
|
|
51
|
-
md5sum: str
|
|
52
|
-
|
|
53
77
|
# Path to the Zipfile/BlackVue/CAMM
|
|
54
78
|
import_path: str
|
|
55
79
|
|
|
56
|
-
# Cluster ID after finishing the upload
|
|
57
|
-
cluster_id: str
|
|
58
80
|
|
|
81
|
+
class Progress(SequenceProgress, UploaderProgress):
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class SequenceError(Exception):
|
|
86
|
+
"""
|
|
87
|
+
Base class for sequence specific errors. These errors will cause the
|
|
88
|
+
current sequence upload to fail but will not interrupt the overall upload
|
|
89
|
+
process for other sequences.
|
|
90
|
+
"""
|
|
59
91
|
|
|
60
|
-
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class ExifError(SequenceError):
|
|
96
|
+
def __init__(self, message: str, image_path: Path):
|
|
97
|
+
super().__init__(message)
|
|
98
|
+
self.image_path = image_path
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class InvalidMapillaryZipFileError(SequenceError):
|
|
61
102
|
pass
|
|
62
103
|
|
|
63
104
|
|
|
@@ -72,7 +113,7 @@ EventName = T.Literal[
|
|
|
72
113
|
|
|
73
114
|
|
|
74
115
|
class EventEmitter:
|
|
75
|
-
events:
|
|
116
|
+
events: dict[EventName, list]
|
|
76
117
|
|
|
77
118
|
def __init__(self):
|
|
78
119
|
self.events = {}
|
|
@@ -88,142 +129,382 @@ class EventEmitter:
|
|
|
88
129
|
callback(*args, **kwargs)
|
|
89
130
|
|
|
90
131
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
132
|
+
@dataclasses.dataclass
|
|
133
|
+
class UploadResult:
|
|
134
|
+
result: str | None = None
|
|
135
|
+
error: Exception | None = None
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class ZipImageSequence:
|
|
139
|
+
@classmethod
|
|
140
|
+
def zip_images(
|
|
141
|
+
cls, metadatas: T.Sequence[types.ImageMetadata], zip_dir: Path
|
|
142
|
+
) -> None:
|
|
143
|
+
"""
|
|
144
|
+
Group images into sequences and zip each sequence into a zipfile.
|
|
145
|
+
"""
|
|
146
|
+
sequences = types.group_and_sort_images(metadatas)
|
|
147
|
+
os.makedirs(zip_dir, exist_ok=True)
|
|
148
|
+
|
|
149
|
+
for sequence_uuid, sequence in sequences.items():
|
|
150
|
+
_validate_metadatas(sequence)
|
|
151
|
+
upload_md5sum = types.update_sequence_md5sum(sequence)
|
|
152
|
+
|
|
153
|
+
# For atomicity we write into a WIP file and then rename to the final file
|
|
154
|
+
wip_zip_filename = zip_dir.joinpath(
|
|
155
|
+
f".mly_zip_{uuid.uuid4()}_{sequence_uuid}_{os.getpid()}_{int(time.time())}"
|
|
156
|
+
)
|
|
157
|
+
filename = _session_key(upload_md5sum, upload_api_v4.ClusterFileType.ZIP)
|
|
158
|
+
zip_filename = zip_dir.joinpath(filename)
|
|
159
|
+
with wip_file_context(wip_zip_filename, zip_filename) as wip_path:
|
|
160
|
+
with wip_path.open("wb") as wip_fp:
|
|
161
|
+
actual_md5sum = cls.zip_sequence_deterministically(sequence, wip_fp)
|
|
162
|
+
assert actual_md5sum == upload_md5sum, "md5sum mismatch"
|
|
163
|
+
|
|
164
|
+
@classmethod
|
|
165
|
+
def zip_sequence_deterministically(
|
|
166
|
+
cls,
|
|
167
|
+
sequence: T.Sequence[types.ImageMetadata],
|
|
168
|
+
zip_fp: T.IO[bytes],
|
|
169
|
+
) -> str:
|
|
170
|
+
"""
|
|
171
|
+
Write a sequence of ImageMetadata into the zipfile handle. It should guarantee
|
|
172
|
+
that the same sequence always produces the same zipfile, because the
|
|
173
|
+
sequence md5sum will be used to upload the zipfile or resume the upload.
|
|
174
|
+
|
|
175
|
+
The sequence has to be one sequence and sorted.
|
|
176
|
+
"""
|
|
177
|
+
|
|
178
|
+
sequence_groups = types.group_and_sort_images(sequence)
|
|
179
|
+
assert len(sequence_groups) == 1, (
|
|
180
|
+
f"Only one sequence is allowed but got {len(sequence_groups)}: {list(sequence_groups.keys())}"
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
upload_md5sum = types.update_sequence_md5sum(sequence)
|
|
184
|
+
|
|
185
|
+
with zipfile.ZipFile(zip_fp, "w", zipfile.ZIP_DEFLATED) as zipf:
|
|
186
|
+
for idx, metadata in enumerate(sequence):
|
|
187
|
+
# Use {idx}.jpg (suffix does not matter) as the archive name to ensure the
|
|
188
|
+
# resulting zipfile is deterministic. This determinism is based on the upload_md5sum,
|
|
189
|
+
# which is derived from a list of image md5sums
|
|
190
|
+
cls._write_imagebytes_in_zip(zipf, metadata, arcname=f"{idx}.jpg")
|
|
191
|
+
assert len(sequence) == len(set(zipf.namelist()))
|
|
192
|
+
zipf.comment = json.dumps({"upload_md5sum": upload_md5sum}).encode("utf-8")
|
|
193
|
+
|
|
194
|
+
return upload_md5sum
|
|
195
|
+
|
|
196
|
+
@classmethod
|
|
197
|
+
def extract_upload_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
|
|
198
|
+
with zipfile.ZipFile(zip_fp, "r", zipfile.ZIP_DEFLATED) as ziph:
|
|
199
|
+
comment = ziph.comment
|
|
200
|
+
|
|
201
|
+
if not comment:
|
|
202
|
+
raise InvalidMapillaryZipFileError("No comment found in the zipfile")
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
decoded = comment.decode("utf-8")
|
|
206
|
+
zip_metadata = json.loads(decoded)
|
|
207
|
+
except UnicodeDecodeError as ex:
|
|
208
|
+
raise InvalidMapillaryZipFileError(str(ex)) from ex
|
|
209
|
+
except json.JSONDecodeError as ex:
|
|
210
|
+
raise InvalidMapillaryZipFileError(str(ex)) from ex
|
|
211
|
+
|
|
212
|
+
upload_md5sum = zip_metadata.get("upload_md5sum")
|
|
213
|
+
|
|
214
|
+
if not upload_md5sum and not isinstance(upload_md5sum, str):
|
|
215
|
+
raise InvalidMapillaryZipFileError("No upload_md5sum found")
|
|
216
|
+
|
|
217
|
+
return upload_md5sum
|
|
218
|
+
|
|
219
|
+
@classmethod
|
|
220
|
+
def _write_imagebytes_in_zip(
|
|
221
|
+
cls, zipf: zipfile.ZipFile, metadata: types.ImageMetadata, arcname: str
|
|
98
222
|
):
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
self.dry_run = dry_run
|
|
223
|
+
try:
|
|
224
|
+
edit = exif_write.ExifEdit(metadata.filename)
|
|
225
|
+
except struct.error as ex:
|
|
226
|
+
raise ExifError(f"Failed to load EXIF: {ex}", metadata.filename) from ex
|
|
104
227
|
|
|
105
|
-
|
|
106
|
-
|
|
228
|
+
# The cast is to fix the type checker error
|
|
229
|
+
edit.add_image_description(
|
|
230
|
+
T.cast(T.Dict, types.desc_file_to_exif(types.as_desc(metadata)))
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
image_bytes = edit.dump_image_bytes()
|
|
235
|
+
except struct.error as ex:
|
|
236
|
+
raise ExifError(
|
|
237
|
+
f"Failed to dump EXIF bytes: {ex}", metadata.filename
|
|
238
|
+
) from ex
|
|
239
|
+
|
|
240
|
+
zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
|
|
241
|
+
zipf.writestr(zipinfo, image_bytes)
|
|
242
|
+
|
|
243
|
+
@classmethod
|
|
244
|
+
def prepare_zipfile_and_upload(
|
|
245
|
+
cls,
|
|
107
246
|
zip_path: Path,
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
247
|
+
uploader: Uploader,
|
|
248
|
+
progress: dict[str, T.Any] | None = None,
|
|
249
|
+
) -> str:
|
|
250
|
+
if progress is None:
|
|
251
|
+
progress = {}
|
|
112
252
|
|
|
113
253
|
with zipfile.ZipFile(zip_path) as ziph:
|
|
114
254
|
namelist = ziph.namelist()
|
|
115
255
|
if not namelist:
|
|
116
|
-
|
|
117
|
-
|
|
256
|
+
raise InvalidMapillaryZipFileError("Zipfile has no files")
|
|
257
|
+
|
|
258
|
+
with zip_path.open("rb") as zip_fp:
|
|
259
|
+
upload_md5sum = cls.extract_upload_md5sum(zip_fp)
|
|
118
260
|
|
|
119
|
-
|
|
120
|
-
**event_payload, # type: ignore
|
|
261
|
+
sequence_progress: SequenceProgress = {
|
|
121
262
|
"sequence_image_count": len(namelist),
|
|
263
|
+
"file_type": types.FileType.ZIP.value,
|
|
264
|
+
"md5sum": upload_md5sum,
|
|
122
265
|
}
|
|
123
266
|
|
|
124
|
-
|
|
125
|
-
upload_md5sum = _extract_upload_md5sum(fp)
|
|
126
|
-
|
|
127
|
-
if upload_md5sum is None:
|
|
128
|
-
with zip_path.open("rb") as fp:
|
|
129
|
-
upload_md5sum = utils.md5sum_fp(fp).hexdigest()
|
|
267
|
+
session_key = _session_key(upload_md5sum, upload_api_v4.ClusterFileType.ZIP)
|
|
130
268
|
|
|
131
|
-
with zip_path.open("rb") as
|
|
132
|
-
return
|
|
133
|
-
|
|
269
|
+
with zip_path.open("rb") as zip_fp:
|
|
270
|
+
return uploader.upload_stream(
|
|
271
|
+
zip_fp,
|
|
134
272
|
upload_api_v4.ClusterFileType.ZIP,
|
|
135
|
-
|
|
136
|
-
|
|
273
|
+
session_key,
|
|
274
|
+
# Send the copy of the input progress to each upload session, to avoid modifying the original one
|
|
275
|
+
progress=T.cast(T.Dict[str, T.Any], {**progress, **sequence_progress}),
|
|
137
276
|
)
|
|
138
277
|
|
|
139
|
-
|
|
140
|
-
|
|
278
|
+
@classmethod
|
|
279
|
+
def prepare_images_and_upload(
|
|
280
|
+
cls,
|
|
141
281
|
image_metadatas: T.Sequence[types.ImageMetadata],
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
282
|
+
uploader: Uploader,
|
|
283
|
+
progress: dict[str, T.Any] | None = None,
|
|
284
|
+
) -> T.Generator[tuple[str, UploadResult], None, None]:
|
|
285
|
+
if progress is None:
|
|
286
|
+
progress = {}
|
|
146
287
|
|
|
147
|
-
_validate_metadatas(image_metadatas)
|
|
148
288
|
sequences = types.group_and_sort_images(image_metadatas)
|
|
149
|
-
|
|
289
|
+
|
|
150
290
|
for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
|
|
151
|
-
|
|
152
|
-
**event_payload, # type: ignore
|
|
291
|
+
sequence_progress: SequenceProgress = {
|
|
153
292
|
"sequence_idx": sequence_idx,
|
|
154
293
|
"total_sequence_count": len(sequences),
|
|
155
294
|
"sequence_image_count": len(sequence),
|
|
156
295
|
"sequence_uuid": sequence_uuid,
|
|
296
|
+
"file_type": types.FileType.IMAGE.value,
|
|
157
297
|
}
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
_validate_metadatas(sequence)
|
|
301
|
+
except Exception as ex:
|
|
302
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
303
|
+
continue
|
|
304
|
+
|
|
161
305
|
with tempfile.NamedTemporaryFile() as fp:
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
306
|
+
try:
|
|
307
|
+
upload_md5sum = cls.zip_sequence_deterministically(sequence, fp)
|
|
308
|
+
except Exception as ex:
|
|
309
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
310
|
+
continue
|
|
311
|
+
|
|
312
|
+
sequence_progress["md5sum"] = upload_md5sum
|
|
313
|
+
|
|
314
|
+
session_key = _session_key(
|
|
315
|
+
upload_md5sum, upload_api_v4.ClusterFileType.ZIP
|
|
168
316
|
)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
317
|
+
|
|
318
|
+
try:
|
|
319
|
+
cluster_id = uploader.upload_stream(
|
|
320
|
+
fp,
|
|
321
|
+
upload_api_v4.ClusterFileType.ZIP,
|
|
322
|
+
session_key,
|
|
323
|
+
progress=T.cast(
|
|
324
|
+
T.Dict[str, T.Any], {**progress, **sequence_progress}
|
|
325
|
+
),
|
|
326
|
+
)
|
|
327
|
+
except Exception as ex:
|
|
328
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
329
|
+
continue
|
|
330
|
+
|
|
331
|
+
yield sequence_uuid, UploadResult(result=cluster_id)
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
class Uploader:
|
|
335
|
+
def __init__(
|
|
336
|
+
self,
|
|
337
|
+
user_items: types.UserItem,
|
|
338
|
+
emitter: EventEmitter | None = None,
|
|
339
|
+
chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
|
|
340
|
+
dry_run=False,
|
|
341
|
+
):
|
|
342
|
+
self.user_items = user_items
|
|
343
|
+
if emitter is None:
|
|
344
|
+
# An empty event emitter that does nothing
|
|
345
|
+
self.emitter = EventEmitter()
|
|
346
|
+
else:
|
|
347
|
+
self.emitter = emitter
|
|
348
|
+
self.chunk_size = chunk_size
|
|
349
|
+
self.dry_run = dry_run
|
|
172
350
|
|
|
173
351
|
def upload_stream(
|
|
174
352
|
self,
|
|
175
353
|
fp: T.IO[bytes],
|
|
176
354
|
cluster_filetype: upload_api_v4.ClusterFileType,
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
) ->
|
|
180
|
-
if
|
|
181
|
-
|
|
355
|
+
session_key: str,
|
|
356
|
+
progress: dict[str, T.Any] | None = None,
|
|
357
|
+
) -> str:
|
|
358
|
+
if progress is None:
|
|
359
|
+
progress = {}
|
|
182
360
|
|
|
183
361
|
fp.seek(0, io.SEEK_END)
|
|
184
362
|
entity_size = fp.tell()
|
|
185
363
|
|
|
186
|
-
|
|
187
|
-
upload_api_v4.ClusterFileType.ZIP: ".zip",
|
|
188
|
-
upload_api_v4.ClusterFileType.CAMM: ".mp4",
|
|
189
|
-
upload_api_v4.ClusterFileType.BLACKVUE: ".mp4",
|
|
190
|
-
}
|
|
191
|
-
session_key = f"mly_tools_{upload_md5sum}{SUFFIX_MAP[cluster_filetype]}"
|
|
364
|
+
upload_service = self._create_upload_service(session_key, cluster_filetype)
|
|
192
365
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
366
|
+
progress["entity_size"] = entity_size
|
|
367
|
+
progress["chunk_size"] = self.chunk_size
|
|
368
|
+
progress["retries"] = 0
|
|
369
|
+
progress["begin_offset"] = None
|
|
370
|
+
|
|
371
|
+
self.emitter.emit("upload_start", progress)
|
|
372
|
+
|
|
373
|
+
while True:
|
|
374
|
+
try:
|
|
375
|
+
file_handle = self._upload_stream_retryable(
|
|
376
|
+
upload_service, fp, T.cast(UploaderProgress, progress)
|
|
201
377
|
)
|
|
378
|
+
except Exception as ex:
|
|
379
|
+
self._handle_upload_exception(ex, T.cast(UploaderProgress, progress))
|
|
380
|
+
else:
|
|
381
|
+
break
|
|
382
|
+
|
|
383
|
+
progress["retries"] += 1
|
|
384
|
+
|
|
385
|
+
self.emitter.emit("upload_end", progress)
|
|
386
|
+
|
|
387
|
+
# TODO: retry here
|
|
388
|
+
cluster_id = self._finish_upload_retryable(upload_service, file_handle)
|
|
389
|
+
progress["cluster_id"] = cluster_id
|
|
390
|
+
|
|
391
|
+
self.emitter.emit("upload_finished", progress)
|
|
392
|
+
|
|
393
|
+
return cluster_id
|
|
394
|
+
|
|
395
|
+
def _create_upload_service(
|
|
396
|
+
self, session_key: str, cluster_filetype: upload_api_v4.ClusterFileType
|
|
397
|
+
) -> upload_api_v4.UploadService:
|
|
398
|
+
upload_service: upload_api_v4.UploadService
|
|
399
|
+
|
|
400
|
+
if self.dry_run:
|
|
401
|
+
upload_service = upload_api_v4.FakeUploadService(
|
|
402
|
+
user_access_token=self.user_items["user_upload_token"],
|
|
403
|
+
session_key=session_key,
|
|
404
|
+
cluster_filetype=cluster_filetype,
|
|
202
405
|
)
|
|
203
406
|
else:
|
|
204
407
|
upload_service = upload_api_v4.UploadService(
|
|
205
408
|
user_access_token=self.user_items["user_upload_token"],
|
|
206
409
|
session_key=session_key,
|
|
207
|
-
organization_id=self.user_items.get("MAPOrganizationKey"),
|
|
208
410
|
cluster_filetype=cluster_filetype,
|
|
209
|
-
chunk_size=self.chunk_size,
|
|
210
411
|
)
|
|
211
412
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
413
|
+
return upload_service
|
|
414
|
+
|
|
415
|
+
def _handle_upload_exception(
|
|
416
|
+
self, ex: Exception, progress: UploaderProgress
|
|
417
|
+
) -> None:
|
|
418
|
+
retries = progress["retries"]
|
|
419
|
+
begin_offset = progress.get("begin_offset")
|
|
420
|
+
chunk_size = progress["chunk_size"]
|
|
421
|
+
|
|
422
|
+
if retries <= constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
|
|
423
|
+
self.emitter.emit("upload_interrupted", progress)
|
|
424
|
+
LOG.warning(
|
|
425
|
+
# use %s instead of %d because offset could be None
|
|
426
|
+
"Error uploading chunk_size %d at begin_offset %s: %s: %s",
|
|
427
|
+
chunk_size,
|
|
428
|
+
begin_offset,
|
|
429
|
+
ex.__class__.__name__,
|
|
430
|
+
str(ex),
|
|
431
|
+
)
|
|
432
|
+
# Keep things immutable here. Will increment retries in the caller
|
|
433
|
+
retries += 1
|
|
434
|
+
if _is_immediate_retry(ex):
|
|
435
|
+
sleep_for = 0
|
|
436
|
+
else:
|
|
437
|
+
sleep_for = min(2**retries, 16)
|
|
438
|
+
LOG.info(
|
|
439
|
+
"Retrying in %d seconds (%d/%d)",
|
|
440
|
+
sleep_for,
|
|
441
|
+
retries,
|
|
442
|
+
constants.MAX_UPLOAD_RETRIES,
|
|
443
|
+
)
|
|
444
|
+
if sleep_for:
|
|
445
|
+
time.sleep(sleep_for)
|
|
446
|
+
else:
|
|
447
|
+
raise ex
|
|
217
448
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
449
|
+
def _chunk_with_progress_emitted(
|
|
450
|
+
self,
|
|
451
|
+
stream: T.IO[bytes],
|
|
452
|
+
progress: UploaderProgress,
|
|
453
|
+
) -> T.Generator[bytes, None, None]:
|
|
454
|
+
for chunk in upload_api_v4.UploadService.chunkize_byte_stream(
|
|
455
|
+
stream, self.chunk_size
|
|
456
|
+
):
|
|
457
|
+
yield chunk
|
|
458
|
+
|
|
459
|
+
progress["offset"] += len(chunk)
|
|
460
|
+
progress["chunk_size"] = len(chunk)
|
|
461
|
+
# Whenever a chunk is uploaded, reset retries
|
|
462
|
+
progress["retries"] = 0
|
|
463
|
+
|
|
464
|
+
self.emitter.emit("upload_progress", progress)
|
|
465
|
+
|
|
466
|
+
def _upload_stream_retryable(
|
|
467
|
+
self,
|
|
468
|
+
upload_service: upload_api_v4.UploadService,
|
|
469
|
+
fp: T.IO[bytes],
|
|
470
|
+
progress: UploaderProgress,
|
|
471
|
+
) -> str:
|
|
472
|
+
"""Upload the stream with safe retries guraranteed"""
|
|
473
|
+
|
|
474
|
+
begin_offset = upload_service.fetch_offset()
|
|
475
|
+
|
|
476
|
+
progress["begin_offset"] = begin_offset
|
|
477
|
+
progress["offset"] = begin_offset
|
|
478
|
+
|
|
479
|
+
self.emitter.emit("upload_fetch_offset", progress)
|
|
480
|
+
|
|
481
|
+
fp.seek(begin_offset, io.SEEK_SET)
|
|
482
|
+
|
|
483
|
+
shifted_chunks = self._chunk_with_progress_emitted(fp, progress)
|
|
484
|
+
|
|
485
|
+
return upload_service.upload_shifted_chunks(shifted_chunks, begin_offset)
|
|
486
|
+
|
|
487
|
+
def _finish_upload_retryable(
|
|
488
|
+
self, upload_service: upload_api_v4.UploadService, file_handle: str
|
|
489
|
+
) -> str:
|
|
490
|
+
"""Finish upload with safe retries guraranteed"""
|
|
491
|
+
|
|
492
|
+
if self.dry_run:
|
|
493
|
+
cluster_id = "0"
|
|
494
|
+
else:
|
|
495
|
+
resp = api_v4.finish_upload(
|
|
496
|
+
self.user_items["user_upload_token"],
|
|
497
|
+
file_handle,
|
|
498
|
+
upload_service.cluster_filetype,
|
|
499
|
+
organization_id=self.user_items.get("MAPOrganizationKey"),
|
|
224
500
|
)
|
|
225
|
-
|
|
226
|
-
|
|
501
|
+
|
|
502
|
+
data = resp.json()
|
|
503
|
+
cluster_id = data.get("cluster_id")
|
|
504
|
+
|
|
505
|
+
# TODO: validate cluster_id
|
|
506
|
+
|
|
507
|
+
return cluster_id
|
|
227
508
|
|
|
228
509
|
|
|
229
510
|
def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
|
|
@@ -254,70 +535,6 @@ def wip_file_context(wip_path: Path, done_path: Path):
|
|
|
254
535
|
pass
|
|
255
536
|
|
|
256
537
|
|
|
257
|
-
def zip_images(
|
|
258
|
-
metadatas: T.List[types.ImageMetadata],
|
|
259
|
-
zip_dir: Path,
|
|
260
|
-
) -> None:
|
|
261
|
-
_validate_metadatas(metadatas)
|
|
262
|
-
sequences = types.group_and_sort_images(metadatas)
|
|
263
|
-
os.makedirs(zip_dir, exist_ok=True)
|
|
264
|
-
for sequence_uuid, sequence in sequences.items():
|
|
265
|
-
for metadata in sequence:
|
|
266
|
-
metadata.update_md5sum()
|
|
267
|
-
upload_md5sum = types.sequence_md5sum(sequence)
|
|
268
|
-
timestamp = int(time.time())
|
|
269
|
-
wip_zip_filename = zip_dir.joinpath(
|
|
270
|
-
f".mly_zip_{uuid.uuid4()}_{sequence_uuid}_{os.getpid()}_{timestamp}"
|
|
271
|
-
)
|
|
272
|
-
zip_filename = zip_dir.joinpath(f"mly_tools_{upload_md5sum}.zip")
|
|
273
|
-
with wip_file_context(wip_zip_filename, zip_filename) as wip_dir:
|
|
274
|
-
with wip_dir.open("wb") as fp:
|
|
275
|
-
_zip_sequence_fp(sequence, fp, upload_md5sum)
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
def _zip_sequence_fp(
|
|
279
|
-
sequence: T.Sequence[types.ImageMetadata],
|
|
280
|
-
fp: T.IO[bytes],
|
|
281
|
-
upload_md5sum: str,
|
|
282
|
-
) -> None:
|
|
283
|
-
arcname_idx = 0
|
|
284
|
-
arcnames = set()
|
|
285
|
-
with zipfile.ZipFile(fp, "w", zipfile.ZIP_DEFLATED) as ziph:
|
|
286
|
-
for metadata in sequence:
|
|
287
|
-
edit = exif_write.ExifEdit(metadata.filename)
|
|
288
|
-
# The cast is to fix the type checker error
|
|
289
|
-
edit.add_image_description(
|
|
290
|
-
T.cast(T.Dict, types.desc_file_to_exif(types.as_desc(metadata)))
|
|
291
|
-
)
|
|
292
|
-
image_bytes = edit.dump_image_bytes()
|
|
293
|
-
arcname: str = metadata.filename.name
|
|
294
|
-
# make sure the arcname is unique, otherwise zipfile.extractAll will eliminate duplicated ones
|
|
295
|
-
while arcname in arcnames:
|
|
296
|
-
arcname_idx += 1
|
|
297
|
-
arcname = (
|
|
298
|
-
f"{metadata.filename.stem}_{arcname_idx}{metadata.filename.suffix}"
|
|
299
|
-
)
|
|
300
|
-
arcnames.add(arcname)
|
|
301
|
-
zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
|
|
302
|
-
ziph.writestr(zipinfo, image_bytes)
|
|
303
|
-
ziph.comment = json.dumps({"upload_md5sum": upload_md5sum}).encode("utf-8")
|
|
304
|
-
assert len(sequence) == len(set(ziph.namelist()))
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
def _extract_upload_md5sum(fp: T.IO[bytes]) -> T.Optional[str]:
|
|
308
|
-
with zipfile.ZipFile(fp, "r", zipfile.ZIP_DEFLATED) as ziph:
|
|
309
|
-
comment = ziph.comment
|
|
310
|
-
if not comment:
|
|
311
|
-
return None
|
|
312
|
-
try:
|
|
313
|
-
upload_md5sum = json.loads(comment.decode("utf-8")).get("upload_md5sum")
|
|
314
|
-
except Exception:
|
|
315
|
-
return None
|
|
316
|
-
if not upload_md5sum:
|
|
317
|
-
return None
|
|
318
|
-
return str(upload_md5sum)
|
|
319
|
-
|
|
320
|
-
|
|
321
538
|
def _is_immediate_retry(ex: Exception):
|
|
322
539
|
if (
|
|
323
540
|
isinstance(ex, requests.HTTPError)
|
|
@@ -351,89 +568,14 @@ def _is_retriable_exception(ex: Exception):
|
|
|
351
568
|
return False
|
|
352
569
|
|
|
353
570
|
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
emitter.emit("upload_progress", mutable_payload)
|
|
571
|
+
_SUFFIX_MAP: dict[upload_api_v4.ClusterFileType, str] = {
|
|
572
|
+
upload_api_v4.ClusterFileType.ZIP: ".zip",
|
|
573
|
+
upload_api_v4.ClusterFileType.CAMM: ".mp4",
|
|
574
|
+
upload_api_v4.ClusterFileType.BLACKVUE: ".mp4",
|
|
575
|
+
}
|
|
360
576
|
|
|
361
|
-
return _callback
|
|
362
577
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
upload_service: upload_api_v4.UploadService,
|
|
366
|
-
fp: T.IO[bytes],
|
|
367
|
-
event_payload: T.Optional[Progress] = None,
|
|
368
|
-
emitter: T.Optional[EventEmitter] = None,
|
|
578
|
+
def _session_key(
|
|
579
|
+
upload_md5sum: str, cluster_filetype: upload_api_v4.ClusterFileType
|
|
369
580
|
) -> str:
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
if event_payload is None:
|
|
373
|
-
event_payload = {}
|
|
374
|
-
|
|
375
|
-
mutable_payload = T.cast(Progress, {**event_payload})
|
|
376
|
-
|
|
377
|
-
# when it progresses, we reset retries
|
|
378
|
-
def _reset_retries(_, __):
|
|
379
|
-
nonlocal retries
|
|
380
|
-
retries = 0
|
|
381
|
-
|
|
382
|
-
if emitter:
|
|
383
|
-
emitter.emit("upload_start", mutable_payload)
|
|
384
|
-
|
|
385
|
-
while True:
|
|
386
|
-
fp.seek(0, io.SEEK_SET)
|
|
387
|
-
begin_offset: T.Optional[int] = None
|
|
388
|
-
try:
|
|
389
|
-
begin_offset = upload_service.fetch_offset()
|
|
390
|
-
upload_service.callbacks = [_reset_retries]
|
|
391
|
-
if emitter:
|
|
392
|
-
mutable_payload["offset"] = begin_offset
|
|
393
|
-
mutable_payload["retries"] = retries
|
|
394
|
-
emitter.emit("upload_fetch_offset", mutable_payload)
|
|
395
|
-
upload_service.callbacks.append(
|
|
396
|
-
_setup_callback(emitter, mutable_payload)
|
|
397
|
-
)
|
|
398
|
-
file_handle = upload_service.upload(fp, offset=begin_offset)
|
|
399
|
-
except Exception as ex:
|
|
400
|
-
if retries < constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
|
|
401
|
-
if emitter:
|
|
402
|
-
emitter.emit("upload_interrupted", mutable_payload)
|
|
403
|
-
LOG.warning(
|
|
404
|
-
# use %s instead of %d because offset could be None
|
|
405
|
-
"Error uploading chunk_size %d at begin_offset %s: %s: %s",
|
|
406
|
-
upload_service.chunk_size,
|
|
407
|
-
begin_offset,
|
|
408
|
-
ex.__class__.__name__,
|
|
409
|
-
str(ex),
|
|
410
|
-
)
|
|
411
|
-
retries += 1
|
|
412
|
-
if _is_immediate_retry(ex):
|
|
413
|
-
sleep_for = 0
|
|
414
|
-
else:
|
|
415
|
-
sleep_for = min(2**retries, 16)
|
|
416
|
-
LOG.info(
|
|
417
|
-
"Retrying in %d seconds (%d/%d)",
|
|
418
|
-
sleep_for,
|
|
419
|
-
retries,
|
|
420
|
-
constants.MAX_UPLOAD_RETRIES,
|
|
421
|
-
)
|
|
422
|
-
if sleep_for:
|
|
423
|
-
time.sleep(sleep_for)
|
|
424
|
-
else:
|
|
425
|
-
raise ex
|
|
426
|
-
else:
|
|
427
|
-
break
|
|
428
|
-
|
|
429
|
-
if emitter:
|
|
430
|
-
emitter.emit("upload_end", mutable_payload)
|
|
431
|
-
|
|
432
|
-
# TODO: retry here
|
|
433
|
-
cluster_id = upload_service.finish(file_handle)
|
|
434
|
-
|
|
435
|
-
if emitter:
|
|
436
|
-
mutable_payload["cluster_id"] = cluster_id
|
|
437
|
-
emitter.emit("upload_finished", mutable_payload)
|
|
438
|
-
|
|
439
|
-
return cluster_id
|
|
581
|
+
return f"mly_tools_{upload_md5sum}{_SUFFIX_MAP[cluster_filetype]}"
|