mapillary-tools 0.14.0a2__py3-none-any.whl → 0.14.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapillary_tools/__init__.py +1 -1
- mapillary_tools/api_v4.py +1 -0
- mapillary_tools/authenticate.py +9 -9
- mapillary_tools/blackvue_parser.py +79 -22
- mapillary_tools/config.py +38 -17
- mapillary_tools/constants.py +2 -0
- mapillary_tools/exiftool_read_video.py +52 -15
- mapillary_tools/exiftool_runner.py +4 -24
- mapillary_tools/ffmpeg.py +406 -232
- mapillary_tools/geotag/__init__.py +0 -0
- mapillary_tools/geotag/base.py +2 -2
- mapillary_tools/geotag/factory.py +97 -88
- mapillary_tools/geotag/geotag_images_from_exiftool.py +26 -19
- mapillary_tools/geotag/geotag_images_from_gpx.py +13 -6
- mapillary_tools/geotag/geotag_images_from_video.py +35 -0
- mapillary_tools/geotag/geotag_videos_from_exiftool.py +39 -13
- mapillary_tools/geotag/geotag_videos_from_gpx.py +22 -9
- mapillary_tools/geotag/options.py +25 -3
- mapillary_tools/geotag/video_extractors/base.py +1 -1
- mapillary_tools/geotag/video_extractors/exiftool.py +1 -1
- mapillary_tools/geotag/video_extractors/gpx.py +60 -70
- mapillary_tools/geotag/video_extractors/native.py +9 -31
- mapillary_tools/history.py +4 -1
- mapillary_tools/process_geotag_properties.py +16 -8
- mapillary_tools/process_sequence_properties.py +9 -11
- mapillary_tools/sample_video.py +7 -6
- mapillary_tools/serializer/description.py +587 -0
- mapillary_tools/serializer/gpx.py +132 -0
- mapillary_tools/types.py +44 -610
- mapillary_tools/upload.py +176 -197
- mapillary_tools/upload_api_v4.py +94 -51
- mapillary_tools/uploader.py +284 -138
- {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.0b1.dist-info}/METADATA +87 -31
- {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.0b1.dist-info}/RECORD +38 -35
- {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.0b1.dist-info}/WHEEL +1 -1
- {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.0b1.dist-info}/entry_points.txt +0 -0
- {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.0b1.dist-info}/licenses/LICENSE +0 -0
- {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.0b1.dist-info}/top_level.txt +0 -0
mapillary_tools/uploader.py
CHANGED
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import concurrent.futures
|
|
4
|
+
|
|
3
5
|
import dataclasses
|
|
4
6
|
import io
|
|
5
7
|
import json
|
|
6
8
|
import logging
|
|
7
9
|
import os
|
|
8
10
|
import struct
|
|
11
|
+
import sys
|
|
9
12
|
import tempfile
|
|
13
|
+
import threading
|
|
10
14
|
import time
|
|
11
15
|
import typing as T
|
|
12
16
|
import uuid
|
|
@@ -14,9 +18,19 @@ import zipfile
|
|
|
14
18
|
from contextlib import contextmanager
|
|
15
19
|
from pathlib import Path
|
|
16
20
|
|
|
21
|
+
if sys.version_info >= (3, 11):
|
|
22
|
+
from typing import Required
|
|
23
|
+
else:
|
|
24
|
+
from typing_extensions import Required
|
|
25
|
+
|
|
17
26
|
import requests
|
|
18
27
|
|
|
19
|
-
from . import api_v4, constants, exif_write, types, upload_api_v4
|
|
28
|
+
from . import api_v4, config, constants, exif_write, types, upload_api_v4, utils
|
|
29
|
+
from .serializer.description import (
|
|
30
|
+
desc_file_to_exif,
|
|
31
|
+
DescriptionJSONSerializer,
|
|
32
|
+
validate_image_desc,
|
|
33
|
+
)
|
|
20
34
|
|
|
21
35
|
|
|
22
36
|
LOG = logging.getLogger(__name__)
|
|
@@ -56,17 +70,21 @@ class UploaderProgress(T.TypedDict, total=True):
|
|
|
56
70
|
class SequenceProgress(T.TypedDict, total=False):
|
|
57
71
|
"""Progress data at sequence level"""
|
|
58
72
|
|
|
59
|
-
#
|
|
60
|
-
|
|
73
|
+
# Used to check if it is uploaded or not
|
|
74
|
+
sequence_md5sum: Required[str]
|
|
75
|
+
|
|
76
|
+
# Used to resume from the previous upload,
|
|
77
|
+
# so it has to an unique identifier (hash) of the upload content
|
|
78
|
+
upload_md5sum: str
|
|
61
79
|
|
|
62
80
|
# File type
|
|
63
|
-
file_type: str
|
|
81
|
+
file_type: Required[str]
|
|
64
82
|
|
|
65
83
|
# How many sequences in total. It's always 1 when uploading Zipfile/BlackVue/CAMM
|
|
66
|
-
total_sequence_count: int
|
|
84
|
+
total_sequence_count: Required[int]
|
|
67
85
|
|
|
68
86
|
# 0-based nth sequence. It is always 0 when uploading Zipfile/BlackVue/CAMM
|
|
69
|
-
sequence_idx: int
|
|
87
|
+
sequence_idx: Required[int]
|
|
70
88
|
|
|
71
89
|
# How many images in the sequence. It's available only when uploading directories/Zipfiles
|
|
72
90
|
sequence_image_count: int
|
|
@@ -121,6 +139,7 @@ class EventEmitter:
|
|
|
121
139
|
def on(self, event: EventName):
|
|
122
140
|
def _wrap(callback):
|
|
123
141
|
self.events.setdefault(event, []).append(callback)
|
|
142
|
+
return callback
|
|
124
143
|
|
|
125
144
|
return _wrap
|
|
126
145
|
|
|
@@ -148,30 +167,50 @@ class ZipImageSequence:
|
|
|
148
167
|
|
|
149
168
|
for sequence_uuid, sequence in sequences.items():
|
|
150
169
|
_validate_metadatas(sequence)
|
|
151
|
-
upload_md5sum = types.update_sequence_md5sum(sequence)
|
|
152
|
-
|
|
153
170
|
# For atomicity we write into a WIP file and then rename to the final file
|
|
154
171
|
wip_zip_filename = zip_dir.joinpath(
|
|
155
172
|
f".mly_zip_{uuid.uuid4()}_{sequence_uuid}_{os.getpid()}_{int(time.time())}"
|
|
156
173
|
)
|
|
157
|
-
|
|
158
|
-
zip_filename = zip_dir.joinpath(filename)
|
|
159
|
-
with wip_file_context(wip_zip_filename, zip_filename) as wip_path:
|
|
174
|
+
with cls._wip_file_context(wip_zip_filename) as wip_path:
|
|
160
175
|
with wip_path.open("wb") as wip_fp:
|
|
161
|
-
|
|
162
|
-
|
|
176
|
+
cls.zip_sequence_fp(sequence, wip_fp)
|
|
177
|
+
|
|
178
|
+
@classmethod
|
|
179
|
+
@contextmanager
|
|
180
|
+
def _wip_file_context(cls, wip_path: Path):
|
|
181
|
+
try:
|
|
182
|
+
os.remove(wip_path)
|
|
183
|
+
except FileNotFoundError:
|
|
184
|
+
pass
|
|
185
|
+
try:
|
|
186
|
+
yield wip_path
|
|
187
|
+
|
|
188
|
+
with wip_path.open("rb") as fp:
|
|
189
|
+
upload_md5sum = utils.md5sum_fp(fp).hexdigest()
|
|
190
|
+
|
|
191
|
+
done_path = wip_path.parent.joinpath(
|
|
192
|
+
_session_key(upload_md5sum, api_v4.ClusterFileType.ZIP)
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
os.remove(done_path)
|
|
197
|
+
except FileNotFoundError:
|
|
198
|
+
pass
|
|
199
|
+
wip_path.rename(done_path)
|
|
200
|
+
finally:
|
|
201
|
+
try:
|
|
202
|
+
os.remove(wip_path)
|
|
203
|
+
except FileNotFoundError:
|
|
204
|
+
pass
|
|
163
205
|
|
|
164
206
|
@classmethod
|
|
165
|
-
def
|
|
207
|
+
def zip_sequence_fp(
|
|
166
208
|
cls,
|
|
167
209
|
sequence: T.Sequence[types.ImageMetadata],
|
|
168
210
|
zip_fp: T.IO[bytes],
|
|
169
211
|
) -> str:
|
|
170
212
|
"""
|
|
171
|
-
Write a sequence of ImageMetadata into the zipfile handle.
|
|
172
|
-
that the same sequence always produces the same zipfile, because the
|
|
173
|
-
sequence md5sum will be used to upload the zipfile or resume the upload.
|
|
174
|
-
|
|
213
|
+
Write a sequence of ImageMetadata into the zipfile handle.
|
|
175
214
|
The sequence has to be one sequence and sorted.
|
|
176
215
|
"""
|
|
177
216
|
|
|
@@ -180,21 +219,23 @@ class ZipImageSequence:
|
|
|
180
219
|
f"Only one sequence is allowed but got {len(sequence_groups)}: {list(sequence_groups.keys())}"
|
|
181
220
|
)
|
|
182
221
|
|
|
183
|
-
|
|
222
|
+
sequence_md5sum = types.update_sequence_md5sum(sequence)
|
|
184
223
|
|
|
185
224
|
with zipfile.ZipFile(zip_fp, "w", zipfile.ZIP_DEFLATED) as zipf:
|
|
186
225
|
for idx, metadata in enumerate(sequence):
|
|
187
|
-
#
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
226
|
+
# Arcname should be unique, the name does not matter
|
|
227
|
+
arcname = f"{idx}.jpg"
|
|
228
|
+
zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
|
|
229
|
+
zipf.writestr(zipinfo, cls._dump_image_bytes(metadata))
|
|
191
230
|
assert len(sequence) == len(set(zipf.namelist()))
|
|
192
|
-
zipf.comment = json.dumps({"
|
|
231
|
+
zipf.comment = json.dumps({"sequence_md5sum": sequence_md5sum}).encode(
|
|
232
|
+
"utf-8"
|
|
233
|
+
)
|
|
193
234
|
|
|
194
|
-
return
|
|
235
|
+
return sequence_md5sum
|
|
195
236
|
|
|
196
237
|
@classmethod
|
|
197
|
-
def
|
|
238
|
+
def extract_sequence_md5sum(cls, zip_fp: T.IO[bytes]) -> str:
|
|
198
239
|
with zipfile.ZipFile(zip_fp, "r", zipfile.ZIP_DEFLATED) as ziph:
|
|
199
240
|
comment = ziph.comment
|
|
200
241
|
|
|
@@ -209,17 +250,15 @@ class ZipImageSequence:
|
|
|
209
250
|
except json.JSONDecodeError as ex:
|
|
210
251
|
raise InvalidMapillaryZipFileError(str(ex)) from ex
|
|
211
252
|
|
|
212
|
-
|
|
253
|
+
sequence_md5sum = zip_metadata.get("sequence_md5sum")
|
|
213
254
|
|
|
214
|
-
if not
|
|
215
|
-
raise InvalidMapillaryZipFileError("No
|
|
255
|
+
if not sequence_md5sum and not isinstance(sequence_md5sum, str):
|
|
256
|
+
raise InvalidMapillaryZipFileError("No sequence_md5sum found")
|
|
216
257
|
|
|
217
|
-
return
|
|
258
|
+
return sequence_md5sum
|
|
218
259
|
|
|
219
260
|
@classmethod
|
|
220
|
-
def
|
|
221
|
-
cls, zipf: zipfile.ZipFile, metadata: types.ImageMetadata, arcname: str
|
|
222
|
-
):
|
|
261
|
+
def _dump_image_bytes(cls, metadata: types.ImageMetadata) -> bytes:
|
|
223
262
|
try:
|
|
224
263
|
edit = exif_write.ExifEdit(metadata.filename)
|
|
225
264
|
except struct.error as ex:
|
|
@@ -227,24 +266,24 @@ class ZipImageSequence:
|
|
|
227
266
|
|
|
228
267
|
# The cast is to fix the type checker error
|
|
229
268
|
edit.add_image_description(
|
|
230
|
-
T.cast(
|
|
269
|
+
T.cast(
|
|
270
|
+
T.Dict,
|
|
271
|
+
desc_file_to_exif(DescriptionJSONSerializer.as_desc(metadata)),
|
|
272
|
+
)
|
|
231
273
|
)
|
|
232
274
|
|
|
233
275
|
try:
|
|
234
|
-
|
|
276
|
+
return edit.dump_image_bytes()
|
|
235
277
|
except struct.error as ex:
|
|
236
278
|
raise ExifError(
|
|
237
279
|
f"Failed to dump EXIF bytes: {ex}", metadata.filename
|
|
238
280
|
) from ex
|
|
239
281
|
|
|
240
|
-
zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
|
|
241
|
-
zipf.writestr(zipinfo, image_bytes)
|
|
242
|
-
|
|
243
282
|
@classmethod
|
|
244
|
-
def
|
|
283
|
+
def upload_zipfile(
|
|
245
284
|
cls,
|
|
246
|
-
zip_path: Path,
|
|
247
285
|
uploader: Uploader,
|
|
286
|
+
zip_path: Path,
|
|
248
287
|
progress: dict[str, T.Any] | None = None,
|
|
249
288
|
) -> str:
|
|
250
289
|
if progress is None:
|
|
@@ -256,30 +295,34 @@ class ZipImageSequence:
|
|
|
256
295
|
raise InvalidMapillaryZipFileError("Zipfile has no files")
|
|
257
296
|
|
|
258
297
|
with zip_path.open("rb") as zip_fp:
|
|
259
|
-
|
|
298
|
+
sequence_md5sum = cls.extract_sequence_md5sum(zip_fp)
|
|
260
299
|
|
|
261
|
-
|
|
300
|
+
# Send the copy of the input progress to each upload session, to avoid modifying the original one
|
|
301
|
+
mutable_progress: SequenceProgress = {
|
|
302
|
+
**T.cast(SequenceProgress, progress),
|
|
262
303
|
"sequence_image_count": len(namelist),
|
|
304
|
+
"sequence_md5sum": sequence_md5sum,
|
|
263
305
|
"file_type": types.FileType.ZIP.value,
|
|
264
|
-
"md5sum": upload_md5sum,
|
|
265
306
|
}
|
|
266
307
|
|
|
267
|
-
session_key = _session_key(upload_md5sum, upload_api_v4.ClusterFileType.ZIP)
|
|
268
|
-
|
|
269
308
|
with zip_path.open("rb") as zip_fp:
|
|
270
|
-
|
|
271
|
-
zip_fp,
|
|
272
|
-
upload_api_v4.ClusterFileType.ZIP,
|
|
273
|
-
session_key,
|
|
274
|
-
# Send the copy of the input progress to each upload session, to avoid modifying the original one
|
|
275
|
-
progress=T.cast(T.Dict[str, T.Any], {**progress, **sequence_progress}),
|
|
309
|
+
file_handle = uploader.upload_stream(
|
|
310
|
+
zip_fp, progress=T.cast(T.Dict[str, T.Any], mutable_progress)
|
|
276
311
|
)
|
|
277
312
|
|
|
313
|
+
cluster_id = uploader.finish_upload(
|
|
314
|
+
file_handle,
|
|
315
|
+
api_v4.ClusterFileType.ZIP,
|
|
316
|
+
progress=T.cast(T.Dict[str, T.Any], mutable_progress),
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
return cluster_id
|
|
320
|
+
|
|
278
321
|
@classmethod
|
|
279
|
-
def
|
|
322
|
+
def zip_images_and_upload(
|
|
280
323
|
cls,
|
|
281
|
-
image_metadatas: T.Sequence[types.ImageMetadata],
|
|
282
324
|
uploader: Uploader,
|
|
325
|
+
image_metadatas: T.Sequence[types.ImageMetadata],
|
|
283
326
|
progress: dict[str, T.Any] | None = None,
|
|
284
327
|
) -> T.Generator[tuple[str, UploadResult], None, None]:
|
|
285
328
|
if progress is None:
|
|
@@ -288,14 +331,6 @@ class ZipImageSequence:
|
|
|
288
331
|
sequences = types.group_and_sort_images(image_metadatas)
|
|
289
332
|
|
|
290
333
|
for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
|
|
291
|
-
sequence_progress: SequenceProgress = {
|
|
292
|
-
"sequence_idx": sequence_idx,
|
|
293
|
-
"total_sequence_count": len(sequences),
|
|
294
|
-
"sequence_image_count": len(sequence),
|
|
295
|
-
"sequence_uuid": sequence_uuid,
|
|
296
|
-
"file_type": types.FileType.IMAGE.value,
|
|
297
|
-
}
|
|
298
|
-
|
|
299
334
|
try:
|
|
300
335
|
_validate_metadatas(sequence)
|
|
301
336
|
except Exception as ex:
|
|
@@ -304,25 +339,28 @@ class ZipImageSequence:
|
|
|
304
339
|
|
|
305
340
|
with tempfile.NamedTemporaryFile() as fp:
|
|
306
341
|
try:
|
|
307
|
-
|
|
342
|
+
sequence_md5sum = cls.zip_sequence_fp(sequence, fp)
|
|
308
343
|
except Exception as ex:
|
|
309
344
|
yield sequence_uuid, UploadResult(error=ex)
|
|
310
345
|
continue
|
|
311
346
|
|
|
312
|
-
sequence_progress
|
|
347
|
+
sequence_progress: SequenceProgress = {
|
|
348
|
+
"sequence_idx": sequence_idx,
|
|
349
|
+
"total_sequence_count": len(sequences),
|
|
350
|
+
"sequence_image_count": len(sequence),
|
|
351
|
+
"sequence_uuid": sequence_uuid,
|
|
352
|
+
"file_type": types.FileType.ZIP.value,
|
|
353
|
+
"sequence_md5sum": sequence_md5sum,
|
|
354
|
+
}
|
|
313
355
|
|
|
314
|
-
|
|
315
|
-
upload_md5sum, upload_api_v4.ClusterFileType.ZIP
|
|
316
|
-
)
|
|
356
|
+
mutable_progress: dict[str, T.Any] = {**progress, **sequence_progress}
|
|
317
357
|
|
|
318
358
|
try:
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
progress=
|
|
324
|
-
T.Dict[str, T.Any], {**progress, **sequence_progress}
|
|
325
|
-
),
|
|
359
|
+
file_handle = uploader.upload_stream(fp, progress=mutable_progress)
|
|
360
|
+
cluster_id = uploader.finish_upload(
|
|
361
|
+
file_handle,
|
|
362
|
+
api_v4.ClusterFileType.ZIP,
|
|
363
|
+
progress=mutable_progress,
|
|
326
364
|
)
|
|
327
365
|
except Exception as ex:
|
|
328
366
|
yield sequence_uuid, UploadResult(error=ex)
|
|
@@ -330,11 +368,115 @@ class ZipImageSequence:
|
|
|
330
368
|
|
|
331
369
|
yield sequence_uuid, UploadResult(result=cluster_id)
|
|
332
370
|
|
|
371
|
+
@classmethod
|
|
372
|
+
def _upload_sequence(
|
|
373
|
+
cls,
|
|
374
|
+
uploader: Uploader,
|
|
375
|
+
sequence: T.Sequence[types.ImageMetadata],
|
|
376
|
+
progress: dict[str, T.Any] | None = None,
|
|
377
|
+
) -> str:
|
|
378
|
+
if progress is None:
|
|
379
|
+
progress = {}
|
|
380
|
+
|
|
381
|
+
# FIXME: This is a hack to disable the event emitter inside the uploader
|
|
382
|
+
uploader_without_emitter = uploader.copy_uploader_without_emitter()
|
|
383
|
+
|
|
384
|
+
lock = threading.Lock()
|
|
385
|
+
|
|
386
|
+
def _upload_image(image_metadata: types.ImageMetadata) -> str:
|
|
387
|
+
mutable_progress = {
|
|
388
|
+
**(progress or {}),
|
|
389
|
+
"filename": str(image_metadata.filename),
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
bytes = cls._dump_image_bytes(image_metadata)
|
|
393
|
+
file_handle = uploader_without_emitter.upload_stream(
|
|
394
|
+
io.BytesIO(bytes), progress=mutable_progress
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
mutable_progress["chunk_size"] = image_metadata.filesize
|
|
398
|
+
|
|
399
|
+
with lock:
|
|
400
|
+
uploader.emitter.emit("upload_progress", mutable_progress)
|
|
401
|
+
|
|
402
|
+
return file_handle
|
|
403
|
+
|
|
404
|
+
_validate_metadatas(sequence)
|
|
405
|
+
|
|
406
|
+
progress["entity_size"] = sum(m.filesize or 0 for m in sequence)
|
|
407
|
+
|
|
408
|
+
# TODO: assert sequence is sorted
|
|
409
|
+
|
|
410
|
+
uploader.emitter.emit("upload_start", progress)
|
|
411
|
+
|
|
412
|
+
with concurrent.futures.ThreadPoolExecutor(
|
|
413
|
+
max_workers=constants.MAX_IMAGE_UPLOAD_WORKERS
|
|
414
|
+
) as executor:
|
|
415
|
+
image_file_handles = list(executor.map(_upload_image, sequence))
|
|
416
|
+
|
|
417
|
+
manifest = {
|
|
418
|
+
"version": "1",
|
|
419
|
+
"upload_type": "images",
|
|
420
|
+
"image_handles": image_file_handles,
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
with io.BytesIO() as manifest_fp:
|
|
424
|
+
manifest_fp.write(json.dumps(manifest).encode("utf-8"))
|
|
425
|
+
manifest_fp.seek(0, io.SEEK_SET)
|
|
426
|
+
manifest_file_handle = uploader_without_emitter.upload_stream(
|
|
427
|
+
manifest_fp, session_key=f"{uuid.uuid4().hex}.json"
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
uploader.emitter.emit("upload_end", progress)
|
|
431
|
+
|
|
432
|
+
cluster_id = uploader.finish_upload(
|
|
433
|
+
manifest_file_handle,
|
|
434
|
+
api_v4.ClusterFileType.MLY_BUNDLE_MANIFEST,
|
|
435
|
+
progress=progress,
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
return cluster_id
|
|
439
|
+
|
|
440
|
+
@classmethod
|
|
441
|
+
def upload_images(
|
|
442
|
+
cls,
|
|
443
|
+
uploader: Uploader,
|
|
444
|
+
image_metadatas: T.Sequence[types.ImageMetadata],
|
|
445
|
+
progress: dict[str, T.Any] | None = None,
|
|
446
|
+
) -> T.Generator[tuple[str, UploadResult], None, None]:
|
|
447
|
+
if progress is None:
|
|
448
|
+
progress = {}
|
|
449
|
+
|
|
450
|
+
sequences = types.group_and_sort_images(image_metadatas)
|
|
451
|
+
|
|
452
|
+
for sequence_idx, (sequence_uuid, sequence) in enumerate(sequences.items()):
|
|
453
|
+
sequence_md5sum = types.update_sequence_md5sum(sequence)
|
|
454
|
+
|
|
455
|
+
sequence_progress: SequenceProgress = {
|
|
456
|
+
"sequence_idx": sequence_idx,
|
|
457
|
+
"total_sequence_count": len(sequences),
|
|
458
|
+
"sequence_image_count": len(sequence),
|
|
459
|
+
"sequence_uuid": sequence_uuid,
|
|
460
|
+
"file_type": types.FileType.IMAGE.value,
|
|
461
|
+
"sequence_md5sum": sequence_md5sum,
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
mutable_progress: dict[str, T.Any] = {**progress, **sequence_progress}
|
|
465
|
+
|
|
466
|
+
try:
|
|
467
|
+
cluster_id = cls._upload_sequence(
|
|
468
|
+
uploader, sequence, progress=mutable_progress
|
|
469
|
+
)
|
|
470
|
+
except Exception as ex:
|
|
471
|
+
yield sequence_uuid, UploadResult(error=ex)
|
|
472
|
+
else:
|
|
473
|
+
yield sequence_uuid, UploadResult(result=cluster_id)
|
|
474
|
+
|
|
333
475
|
|
|
334
476
|
class Uploader:
|
|
335
477
|
def __init__(
|
|
336
478
|
self,
|
|
337
|
-
user_items:
|
|
479
|
+
user_items: config.UserItem,
|
|
338
480
|
emitter: EventEmitter | None = None,
|
|
339
481
|
chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
|
|
340
482
|
dry_run=False,
|
|
@@ -351,17 +493,25 @@ class Uploader:
|
|
|
351
493
|
def upload_stream(
|
|
352
494
|
self,
|
|
353
495
|
fp: T.IO[bytes],
|
|
354
|
-
|
|
355
|
-
session_key: str,
|
|
496
|
+
session_key: str | None = None,
|
|
356
497
|
progress: dict[str, T.Any] | None = None,
|
|
357
498
|
) -> str:
|
|
358
499
|
if progress is None:
|
|
359
500
|
progress = {}
|
|
360
501
|
|
|
502
|
+
if session_key is None:
|
|
503
|
+
fp.seek(0, io.SEEK_SET)
|
|
504
|
+
md5sum = utils.md5sum_fp(fp).hexdigest()
|
|
505
|
+
filetype = progress.get("file_type")
|
|
506
|
+
if filetype is not None:
|
|
507
|
+
session_key = _session_key(md5sum, types.FileType(filetype))
|
|
508
|
+
else:
|
|
509
|
+
session_key = md5sum
|
|
510
|
+
|
|
361
511
|
fp.seek(0, io.SEEK_END)
|
|
362
512
|
entity_size = fp.tell()
|
|
363
513
|
|
|
364
|
-
upload_service = self._create_upload_service(session_key
|
|
514
|
+
upload_service = self._create_upload_service(session_key)
|
|
365
515
|
|
|
366
516
|
progress["entity_size"] = entity_size
|
|
367
517
|
progress["chunk_size"] = self.chunk_size
|
|
@@ -384,30 +534,64 @@ class Uploader:
|
|
|
384
534
|
|
|
385
535
|
self.emitter.emit("upload_end", progress)
|
|
386
536
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
537
|
+
return file_handle
|
|
538
|
+
|
|
539
|
+
def finish_upload(
|
|
540
|
+
self,
|
|
541
|
+
file_handle: str,
|
|
542
|
+
cluster_filetype: api_v4.ClusterFileType,
|
|
543
|
+
progress: dict[str, T.Any] | None = None,
|
|
544
|
+
) -> str:
|
|
545
|
+
"""Finish upload with safe retries guraranteed"""
|
|
546
|
+
if progress is None:
|
|
547
|
+
progress = {}
|
|
390
548
|
|
|
549
|
+
if self.dry_run:
|
|
550
|
+
cluster_id = "0"
|
|
551
|
+
else:
|
|
552
|
+
resp = api_v4.finish_upload(
|
|
553
|
+
self.user_items["user_upload_token"],
|
|
554
|
+
file_handle,
|
|
555
|
+
cluster_filetype,
|
|
556
|
+
organization_id=self.user_items.get("MAPOrganizationKey"),
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
data = resp.json()
|
|
560
|
+
cluster_id = data.get("cluster_id")
|
|
561
|
+
|
|
562
|
+
# TODO: validate cluster_id
|
|
563
|
+
|
|
564
|
+
progress["cluster_id"] = cluster_id
|
|
391
565
|
self.emitter.emit("upload_finished", progress)
|
|
392
566
|
|
|
393
567
|
return cluster_id
|
|
394
568
|
|
|
395
|
-
def
|
|
396
|
-
|
|
397
|
-
|
|
569
|
+
def copy_uploader_without_emitter(self) -> Uploader:
|
|
570
|
+
return Uploader(
|
|
571
|
+
self.user_items,
|
|
572
|
+
emitter=None,
|
|
573
|
+
chunk_size=self.chunk_size,
|
|
574
|
+
dry_run=self.dry_run,
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
def _create_upload_service(self, session_key: str) -> upload_api_v4.UploadService:
|
|
398
578
|
upload_service: upload_api_v4.UploadService
|
|
399
579
|
|
|
400
580
|
if self.dry_run:
|
|
581
|
+
upload_path = os.getenv("MAPILLARY_UPLOAD_ENDPOINT")
|
|
401
582
|
upload_service = upload_api_v4.FakeUploadService(
|
|
402
583
|
user_access_token=self.user_items["user_upload_token"],
|
|
403
584
|
session_key=session_key,
|
|
404
|
-
|
|
585
|
+
upload_path=Path(upload_path) if upload_path is not None else None,
|
|
586
|
+
)
|
|
587
|
+
LOG.info(
|
|
588
|
+
"Dry run mode enabled. Data will be uploaded to %s",
|
|
589
|
+
upload_service.upload_path.joinpath(session_key),
|
|
405
590
|
)
|
|
406
591
|
else:
|
|
407
592
|
upload_service = upload_api_v4.UploadService(
|
|
408
593
|
user_access_token=self.user_items["user_upload_token"],
|
|
409
594
|
session_key=session_key,
|
|
410
|
-
cluster_filetype=cluster_filetype,
|
|
411
595
|
)
|
|
412
596
|
|
|
413
597
|
return upload_service
|
|
@@ -484,57 +668,14 @@ class Uploader:
|
|
|
484
668
|
|
|
485
669
|
return upload_service.upload_shifted_chunks(shifted_chunks, begin_offset)
|
|
486
670
|
|
|
487
|
-
def _finish_upload_retryable(
|
|
488
|
-
self, upload_service: upload_api_v4.UploadService, file_handle: str
|
|
489
|
-
) -> str:
|
|
490
|
-
"""Finish upload with safe retries guraranteed"""
|
|
491
|
-
|
|
492
|
-
if self.dry_run:
|
|
493
|
-
cluster_id = "0"
|
|
494
|
-
else:
|
|
495
|
-
resp = api_v4.finish_upload(
|
|
496
|
-
self.user_items["user_upload_token"],
|
|
497
|
-
file_handle,
|
|
498
|
-
upload_service.cluster_filetype,
|
|
499
|
-
organization_id=self.user_items.get("MAPOrganizationKey"),
|
|
500
|
-
)
|
|
501
|
-
|
|
502
|
-
data = resp.json()
|
|
503
|
-
cluster_id = data.get("cluster_id")
|
|
504
|
-
|
|
505
|
-
# TODO: validate cluster_id
|
|
506
|
-
|
|
507
|
-
return cluster_id
|
|
508
|
-
|
|
509
671
|
|
|
510
672
|
def _validate_metadatas(metadatas: T.Sequence[types.ImageMetadata]):
|
|
511
673
|
for metadata in metadatas:
|
|
512
|
-
|
|
674
|
+
validate_image_desc(DescriptionJSONSerializer.as_desc(metadata))
|
|
513
675
|
if not metadata.filename.is_file():
|
|
514
676
|
raise FileNotFoundError(f"No such file {metadata.filename}")
|
|
515
677
|
|
|
516
678
|
|
|
517
|
-
@contextmanager
|
|
518
|
-
def wip_file_context(wip_path: Path, done_path: Path):
|
|
519
|
-
assert wip_path != done_path, "should not be the same file"
|
|
520
|
-
try:
|
|
521
|
-
os.remove(wip_path)
|
|
522
|
-
except FileNotFoundError:
|
|
523
|
-
pass
|
|
524
|
-
try:
|
|
525
|
-
yield wip_path
|
|
526
|
-
try:
|
|
527
|
-
os.remove(done_path)
|
|
528
|
-
except FileNotFoundError:
|
|
529
|
-
pass
|
|
530
|
-
wip_path.rename(done_path)
|
|
531
|
-
finally:
|
|
532
|
-
try:
|
|
533
|
-
os.remove(wip_path)
|
|
534
|
-
except FileNotFoundError:
|
|
535
|
-
pass
|
|
536
|
-
|
|
537
|
-
|
|
538
679
|
def _is_immediate_retry(ex: Exception):
|
|
539
680
|
if (
|
|
540
681
|
isinstance(ex, requests.HTTPError)
|
|
@@ -568,14 +709,19 @@ def _is_retriable_exception(ex: Exception):
|
|
|
568
709
|
return False
|
|
569
710
|
|
|
570
711
|
|
|
571
|
-
_SUFFIX_MAP: dict[upload_api_v4.ClusterFileType, str] = {
|
|
572
|
-
upload_api_v4.ClusterFileType.ZIP: ".zip",
|
|
573
|
-
upload_api_v4.ClusterFileType.CAMM: ".mp4",
|
|
574
|
-
upload_api_v4.ClusterFileType.BLACKVUE: ".mp4",
|
|
575
|
-
}
|
|
576
|
-
|
|
577
|
-
|
|
578
712
|
def _session_key(
|
|
579
|
-
upload_md5sum: str,
|
|
713
|
+
upload_md5sum: str, filetype: api_v4.ClusterFileType | types.FileType
|
|
580
714
|
) -> str:
|
|
581
|
-
|
|
715
|
+
_SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
|
|
716
|
+
api_v4.ClusterFileType.ZIP: ".zip",
|
|
717
|
+
api_v4.ClusterFileType.CAMM: ".mp4",
|
|
718
|
+
api_v4.ClusterFileType.BLACKVUE: ".mp4",
|
|
719
|
+
types.FileType.IMAGE: ".jpg",
|
|
720
|
+
types.FileType.ZIP: ".zip",
|
|
721
|
+
types.FileType.BLACKVUE: ".mp4",
|
|
722
|
+
types.FileType.CAMM: ".mp4",
|
|
723
|
+
types.FileType.GOPRO: ".mp4",
|
|
724
|
+
types.FileType.VIDEO: ".mp4",
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
return f"mly_tools_{upload_md5sum}{_SUFFIX_MAP[filetype]}"
|