mapillary-tools 0.13.3__py3-none-any.whl → 0.14.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +106 -7
  3. mapillary_tools/authenticate.py +325 -64
  4. mapillary_tools/{geotag/blackvue_parser.py → blackvue_parser.py} +74 -54
  5. mapillary_tools/camm/camm_builder.py +55 -97
  6. mapillary_tools/camm/camm_parser.py +425 -177
  7. mapillary_tools/commands/__main__.py +2 -0
  8. mapillary_tools/commands/authenticate.py +8 -1
  9. mapillary_tools/commands/process.py +27 -51
  10. mapillary_tools/commands/process_and_upload.py +18 -5
  11. mapillary_tools/commands/sample_video.py +2 -3
  12. mapillary_tools/commands/upload.py +18 -9
  13. mapillary_tools/commands/video_process_and_upload.py +19 -5
  14. mapillary_tools/config.py +28 -12
  15. mapillary_tools/constants.py +46 -4
  16. mapillary_tools/exceptions.py +34 -35
  17. mapillary_tools/exif_read.py +158 -53
  18. mapillary_tools/exiftool_read.py +19 -5
  19. mapillary_tools/exiftool_read_video.py +12 -1
  20. mapillary_tools/exiftool_runner.py +77 -0
  21. mapillary_tools/geo.py +148 -107
  22. mapillary_tools/geotag/factory.py +298 -0
  23. mapillary_tools/geotag/geotag_from_generic.py +152 -11
  24. mapillary_tools/geotag/geotag_images_from_exif.py +43 -124
  25. mapillary_tools/geotag/geotag_images_from_exiftool.py +66 -70
  26. mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +32 -48
  27. mapillary_tools/geotag/geotag_images_from_gpx.py +41 -116
  28. mapillary_tools/geotag/geotag_images_from_gpx_file.py +15 -96
  29. mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -2
  30. mapillary_tools/geotag/geotag_images_from_video.py +46 -46
  31. mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +98 -92
  32. mapillary_tools/geotag/geotag_videos_from_gpx.py +140 -0
  33. mapillary_tools/geotag/geotag_videos_from_video.py +149 -181
  34. mapillary_tools/geotag/options.py +159 -0
  35. mapillary_tools/{geotag → gpmf}/gpmf_parser.py +194 -171
  36. mapillary_tools/history.py +3 -11
  37. mapillary_tools/mp4/io_utils.py +0 -1
  38. mapillary_tools/mp4/mp4_sample_parser.py +11 -3
  39. mapillary_tools/mp4/simple_mp4_parser.py +0 -10
  40. mapillary_tools/process_geotag_properties.py +151 -386
  41. mapillary_tools/process_sequence_properties.py +554 -202
  42. mapillary_tools/sample_video.py +8 -15
  43. mapillary_tools/telemetry.py +24 -12
  44. mapillary_tools/types.py +80 -22
  45. mapillary_tools/upload.py +311 -261
  46. mapillary_tools/upload_api_v4.py +55 -95
  47. mapillary_tools/uploader.py +396 -254
  48. mapillary_tools/utils.py +26 -0
  49. mapillary_tools/video_data_extraction/extract_video_data.py +17 -36
  50. mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +34 -19
  51. mapillary_tools/video_data_extraction/extractors/camm_parser.py +41 -17
  52. mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +4 -1
  53. mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +1 -2
  54. mapillary_tools/video_data_extraction/extractors/gopro_parser.py +37 -22
  55. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info}/METADATA +3 -2
  56. mapillary_tools-0.14.0a1.dist-info/RECORD +78 -0
  57. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info}/WHEEL +1 -1
  58. mapillary_tools/geotag/utils.py +0 -26
  59. mapillary_tools-0.13.3.dist-info/RECORD +0 -75
  60. /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
  61. /mapillary_tools/{geotag → gpmf}/gps_filter.py +0 -0
  62. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info}/entry_points.txt +0 -0
  63. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info/licenses}/LICENSE +0 -0
  64. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0a1.dist-info}/top_level.txt +0 -0
mapillary_tools/upload.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import os
@@ -7,15 +9,15 @@ import typing as T
7
9
  import uuid
8
10
  from pathlib import Path
9
11
 
12
+ import jsonschema
10
13
  import requests
11
14
  from tqdm import tqdm
12
15
 
13
16
  from . import (
14
17
  api_v4,
15
- authenticate,
16
- config,
17
18
  constants,
18
19
  exceptions,
20
+ geo,
19
21
  history,
20
22
  ipc,
21
23
  telemetry,
@@ -25,31 +27,23 @@ from . import (
25
27
  utils,
26
28
  VERSION,
27
29
  )
28
- from .camm import camm_builder
29
- from .geotag import gpmf_parser
30
+ from .camm import camm_builder, camm_parser
31
+ from .gpmf import gpmf_parser
30
32
  from .mp4 import simple_mp4_builder
31
33
  from .types import FileType
32
34
 
33
35
  JSONDict = T.Dict[str, T.Union[str, int, float, None]]
34
36
 
35
37
  LOG = logging.getLogger(__name__)
36
- MAPILLARY_DISABLE_API_LOGGING = os.getenv("MAPILLARY_DISABLE_API_LOGGING")
37
- MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN = os.getenv(
38
- "MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN"
39
- )
40
- MAPILLARY__EXPERIMENTAL_ENABLE_IMU = os.getenv("MAPILLARY__EXPERIMENTAL_ENABLE_IMU")
41
- CAMM_CONVERTABLES = {FileType.CAMM, FileType.BLACKVUE, FileType.GOPRO}
42
38
 
43
39
 
44
- class UploadError(Exception):
45
- def __init__(self, inner_ex) -> None:
46
- self.inner_ex = inner_ex
47
- super().__init__(str(inner_ex))
40
+ class UploadedAlreadyError(uploader.SequenceError):
41
+ pass
48
42
 
49
43
 
50
44
  def _load_validate_metadatas_from_desc_path(
51
- desc_path: T.Optional[str], import_paths: T.Sequence[Path]
52
- ) -> T.List[types.Metadata]:
45
+ desc_path: str | None, import_paths: T.Sequence[Path]
46
+ ) -> list[types.Metadata]:
53
47
  is_default_desc_path = False
54
48
  if desc_path is None:
55
49
  is_default_desc_path = True
@@ -67,7 +61,7 @@ def _load_validate_metadatas_from_desc_path(
67
61
  "The description path must be specified (with --desc_path) when uploading a single file",
68
62
  )
69
63
 
70
- descs: T.List[types.DescriptionOrError] = []
64
+ descs: list[types.DescriptionOrError] = []
71
65
 
72
66
  if desc_path == "-":
73
67
  try:
@@ -75,7 +69,7 @@ def _load_validate_metadatas_from_desc_path(
75
69
  except json.JSONDecodeError as ex:
76
70
  raise exceptions.MapillaryInvalidDescriptionFile(
77
71
  f"Invalid JSON stream from stdin: {ex}"
78
- )
72
+ ) from ex
79
73
  else:
80
74
  if not os.path.isfile(desc_path):
81
75
  if is_default_desc_path:
@@ -92,7 +86,7 @@ def _load_validate_metadatas_from_desc_path(
92
86
  except json.JSONDecodeError as ex:
93
87
  raise exceptions.MapillaryInvalidDescriptionFile(
94
88
  f"Invalid JSON file {desc_path}: {ex}"
95
- )
89
+ ) from ex
96
90
 
97
91
  # the descs load from stdin or json file may contain invalid entries
98
92
  validated_descs = [
@@ -120,7 +114,7 @@ def _load_validate_metadatas_from_desc_path(
120
114
  def zip_images(
121
115
  import_path: Path,
122
116
  zip_dir: Path,
123
- desc_path: T.Optional[str] = None,
117
+ desc_path: str | None = None,
124
118
  ):
125
119
  if not import_path.is_dir():
126
120
  raise exceptions.MapillaryFileNotFoundError(
@@ -137,43 +131,19 @@ def zip_images(
137
131
  metadata for metadata in metadatas if isinstance(metadata, types.ImageMetadata)
138
132
  ]
139
133
 
140
- uploader.zip_images(image_metadatas, zip_dir)
134
+ uploader.ZipImageSequence.zip_images(image_metadatas, zip_dir)
141
135
 
142
136
 
143
- def fetch_user_items(
144
- user_name: T.Optional[str] = None, organization_key: T.Optional[str] = None
145
- ) -> types.UserItem:
146
- if user_name is None:
147
- all_user_items = config.list_all_users()
148
- if not all_user_items:
149
- raise exceptions.MapillaryBadParameterError(
150
- "No Mapillary account found. Add one with --user_name"
151
- )
152
- if len(all_user_items) == 1:
153
- user_items = all_user_items[0]
154
- else:
155
- raise exceptions.MapillaryBadParameterError(
156
- "Found multiple Mapillary accounts. Please specify one with --user_name"
157
- )
158
- else:
159
- user_items = authenticate.authenticate_user(user_name)
160
-
161
- if organization_key is not None:
162
- resp = api_v4.fetch_organization(
163
- user_items["user_upload_token"], organization_key
164
- )
165
- org = resp.json()
166
- LOG.info("Uploading to organization: %s", json.dumps(org))
167
- user_items = T.cast(
168
- types.UserItem, {**user_items, "MAPOrganizationKey": organization_key}
169
- )
170
- return user_items
171
-
172
-
173
- def _setup_cancel_due_to_duplication(emitter: uploader.EventEmitter) -> None:
137
+ def _setup_history(
138
+ emitter: uploader.EventEmitter,
139
+ upload_run_params: JSONDict,
140
+ metadatas: list[types.Metadata],
141
+ ) -> None:
174
142
  @emitter.on("upload_start")
175
- def upload_start(payload: uploader.Progress):
176
- md5sum = payload["md5sum"]
143
+ def check_duplication(payload: uploader.Progress):
144
+ md5sum = payload.get("md5sum")
145
+ assert md5sum is not None, f"md5sum has to be set for {payload}"
146
+
177
147
  if history.is_uploaded(md5sum):
178
148
  sequence_uuid = payload.get("sequence_uuid")
179
149
  if sequence_uuid is None:
@@ -189,19 +159,15 @@ def _setup_cancel_due_to_duplication(emitter: uploader.EventEmitter) -> None:
189
159
  sequence_uuid,
190
160
  history.history_desc_path(md5sum),
191
161
  )
192
- raise uploader.UploadCancelled()
162
+ raise UploadedAlreadyError()
193
163
 
194
-
195
- def _setup_write_upload_history(
196
- emitter: uploader.EventEmitter,
197
- params: JSONDict,
198
- metadatas: T.Optional[T.List[types.Metadata]] = None,
199
- ) -> None:
200
164
  @emitter.on("upload_finished")
201
- def upload_finished(payload: uploader.Progress):
165
+ def write_history(payload: uploader.Progress):
202
166
  sequence_uuid = payload.get("sequence_uuid")
203
- md5sum = payload["md5sum"]
204
- if sequence_uuid is None or metadatas is None:
167
+ md5sum = payload.get("md5sum")
168
+ assert md5sum is not None, f"md5sum has to be set for {payload}"
169
+
170
+ if sequence_uuid is None:
205
171
  sequence = None
206
172
  else:
207
173
  sequence = [
@@ -211,10 +177,11 @@ def _setup_write_upload_history(
211
177
  and metadata.MAPSequenceUUID == sequence_uuid
212
178
  ]
213
179
  sequence.sort(key=lambda metadata: metadata.sort_key())
180
+
214
181
  try:
215
182
  history.write_history(
216
183
  md5sum,
217
- params,
184
+ upload_run_params,
218
185
  T.cast(JSONDict, payload),
219
186
  sequence,
220
187
  )
@@ -223,7 +190,7 @@ def _setup_write_upload_history(
223
190
 
224
191
 
225
192
  def _setup_tdqm(emitter: uploader.EventEmitter) -> None:
226
- upload_pbar: T.Optional[tqdm] = None
193
+ upload_pbar: tqdm | None = None
227
194
 
228
195
  @emitter.on("upload_fetch_offset")
229
196
  def upload_fetch_offset(payload: uploader.Progress) -> None:
@@ -234,7 +201,7 @@ def _setup_tdqm(emitter: uploader.EventEmitter) -> None:
234
201
 
235
202
  nth = payload["sequence_idx"] + 1
236
203
  total = payload["total_sequence_count"]
237
- import_path: T.Optional[str] = payload.get("import_path")
204
+ import_path: str | None = payload.get("import_path")
238
205
  filetype = payload.get("file_type", "unknown").upper()
239
206
  if import_path is None:
240
207
  _desc = f"Uploading {filetype} ({nth}/{total})"
@@ -269,25 +236,40 @@ def _setup_ipc(emitter: uploader.EventEmitter):
269
236
  @emitter.on("upload_start")
270
237
  def upload_start(payload: uploader.Progress):
271
238
  type: uploader.EventName = "upload_start"
272
- LOG.debug("Sending %s via IPC: %s", type, payload)
239
+ LOG.debug("IPC %s: %s", type.upper(), payload)
273
240
  ipc.send(type, payload)
274
241
 
275
242
  @emitter.on("upload_fetch_offset")
276
243
  def upload_fetch_offset(payload: uploader.Progress) -> None:
277
244
  type: uploader.EventName = "upload_fetch_offset"
278
- LOG.debug("Sending %s via IPC: %s", type, payload)
245
+ LOG.debug("IPC %s: %s", type.upper(), payload)
279
246
  ipc.send(type, payload)
280
247
 
281
248
  @emitter.on("upload_progress")
282
249
  def upload_progress(payload: uploader.Progress):
283
250
  type: uploader.EventName = "upload_progress"
284
- LOG.debug("Sending %s via IPC: %s", type, payload)
251
+
252
+ if LOG.getEffectiveLevel() <= logging.DEBUG:
253
+ # In debug mode, we want to see the progress every 30 seconds
254
+ # instead of every chunk (which is too verbose)
255
+ INTERVAL_SECONDS = 30
256
+ now = time.time()
257
+ last_upload_progress_debug_at: float | None = T.cast(T.Dict, payload).get(
258
+ "_last_upload_progress_debug_at"
259
+ )
260
+ if (
261
+ last_upload_progress_debug_at is None
262
+ or last_upload_progress_debug_at + INTERVAL_SECONDS < now
263
+ ):
264
+ LOG.debug("IPC %s: %s", type.upper(), payload)
265
+ T.cast(T.Dict, payload)["_last_upload_progress_debug_at"] = now
266
+
285
267
  ipc.send(type, payload)
286
268
 
287
269
  @emitter.on("upload_end")
288
270
  def upload_end(payload: uploader.Progress) -> None:
289
271
  type: uploader.EventName = "upload_end"
290
- LOG.debug("Sending %s via IPC: %s", type, payload)
272
+ LOG.debug("IPC %s: %s", type.upper(), payload)
291
273
  ipc.send(type, payload)
292
274
 
293
275
 
@@ -309,7 +291,7 @@ class _APIStats(uploader.Progress, total=False):
309
291
 
310
292
 
311
293
  def _setup_api_stats(emitter: uploader.EventEmitter):
312
- all_stats: T.List[_APIStats] = []
294
+ all_stats: list[_APIStats] = []
313
295
 
314
296
  @emitter.on("upload_start")
315
297
  def collect_start_time(payload: _APIStats) -> None:
@@ -337,15 +319,18 @@ def _setup_api_stats(emitter: uploader.EventEmitter):
337
319
  now = time.time()
338
320
  payload["upload_end_time"] = now
339
321
  payload["upload_total_time"] += now - payload["upload_last_restart_time"]
322
+
323
+ @emitter.on("upload_finished")
324
+ def append_stats(payload: _APIStats) -> None:
340
325
  all_stats.append(payload)
341
326
 
342
327
  return all_stats
343
328
 
344
329
 
345
- def _summarize(stats: T.Sequence[_APIStats]) -> T.Dict:
330
+ def _summarize(stats: T.Sequence[_APIStats]) -> dict:
346
331
  total_image_count = sum(s.get("sequence_image_count", 0) for s in stats)
347
332
  total_uploaded_sequence_count = len(stats)
348
- # note that stats[0]["total_sequence_count"] not always same as total_uploaded_sequence_count
333
+ # Note that stats[0]["total_sequence_count"] not always same as total_uploaded_sequence_count
349
334
 
350
335
  total_uploaded_size = sum(
351
336
  s["entity_size"] - s.get("upload_first_offset", 0) for s in stats
@@ -363,6 +348,7 @@ def _summarize(stats: T.Sequence[_APIStats]) -> T.Dict:
363
348
 
364
349
  upload_summary = {
365
350
  "images": total_image_count,
351
+ # TODO: rename sequences to total uploads
366
352
  "sequences": total_uploaded_sequence_count,
367
353
  "size": round(total_entity_size_mb, 4),
368
354
  "uploaded_size": round(total_uploaded_size_mb, 4),
@@ -373,37 +359,34 @@ def _summarize(stats: T.Sequence[_APIStats]) -> T.Dict:
373
359
  return upload_summary
374
360
 
375
361
 
376
- def _show_upload_summary(stats: T.Sequence[_APIStats]):
377
- grouped: T.Dict[str, T.List[_APIStats]] = {}
378
- for stat in stats:
379
- grouped.setdefault(stat.get("file_type", "unknown"), []).append(stat)
362
+ def _show_upload_summary(stats: T.Sequence[_APIStats], errors: T.Sequence[Exception]):
363
+ if not stats:
364
+ LOG.info("Nothing uploaded. Bye.")
365
+ else:
366
+ grouped: dict[str, list[_APIStats]] = {}
367
+ for stat in stats:
368
+ grouped.setdefault(stat.get("file_type", "unknown"), []).append(stat)
369
+
370
+ for file_type, typed_stats in grouped.items():
371
+ if file_type == FileType.IMAGE.value:
372
+ LOG.info("%8d image sequences uploaded", len(typed_stats))
373
+ else:
374
+ LOG.info("%8d %s videos uploaded", len(typed_stats), file_type.upper())
380
375
 
381
- for file_type, typed_stats in grouped.items():
382
- if file_type == FileType.IMAGE.value:
383
- LOG.info(
384
- "%8d %s sequences uploaded",
385
- len(typed_stats),
386
- file_type.upper(),
387
- )
388
- else:
389
- LOG.info(
390
- "%8d %s files uploaded",
391
- len(typed_stats),
392
- file_type.upper(),
393
- )
376
+ summary = _summarize(stats)
377
+ LOG.info("%8.1fM data in total", summary["size"])
378
+ LOG.info("%8.1fM data uploaded", summary["uploaded_size"])
379
+ LOG.info("%8.1fs upload time", summary["time"])
394
380
 
395
- summary = _summarize(stats)
396
- LOG.info("%8.1fM data in total", summary["size"])
397
- LOG.info("%8.1fM data uploaded", summary["uploaded_size"])
398
- LOG.info("%8.1fs upload time", summary["time"])
381
+ for error in errors:
382
+ LOG.error("Upload error: %s: %s", error.__class__.__name__, error)
399
383
 
400
384
 
401
- def _api_logging_finished(summary: T.Dict):
402
- if MAPILLARY_DISABLE_API_LOGGING:
385
+ def _api_logging_finished(summary: dict):
386
+ if constants.MAPILLARY_DISABLE_API_LOGGING:
403
387
  return
404
388
 
405
389
  action: api_v4.ActionType = "upload_finished_upload"
406
- LOG.debug("API Logging for action %s: %s", action, summary)
407
390
  try:
408
391
  api_v4.log_event(action, summary)
409
392
  except requests.HTTPError as exc:
@@ -416,13 +399,12 @@ def _api_logging_finished(summary: T.Dict):
416
399
  LOG.warning("Error from API Logging for action %s", action, exc_info=True)
417
400
 
418
401
 
419
- def _api_logging_failed(payload: T.Dict, exc: Exception):
420
- if MAPILLARY_DISABLE_API_LOGGING:
402
+ def _api_logging_failed(payload: dict, exc: Exception):
403
+ if constants.MAPILLARY_DISABLE_API_LOGGING:
421
404
  return
422
405
 
423
406
  payload_with_reason = {**payload, "reason": exc.__class__.__name__}
424
407
  action: api_v4.ActionType = "upload_failed_upload"
425
- LOG.debug("API Logging for action %s: %s", action, payload)
426
408
  try:
427
409
  api_v4.log_event(action, payload_with_reason)
428
410
  except requests.HTTPError as exc:
@@ -436,18 +418,14 @@ def _api_logging_failed(payload: T.Dict, exc: Exception):
436
418
 
437
419
 
438
420
  def _load_descs(
439
- _metadatas_from_process: T.Optional[T.Sequence[types.MetadataOrError]],
440
- desc_path: T.Optional[str],
421
+ _metadatas_from_process: T.Sequence[types.MetadataOrError] | None,
422
+ desc_path: str | None,
441
423
  import_paths: T.Sequence[Path],
442
- ) -> T.List[types.Metadata]:
443
- metadatas: T.List[types.Metadata]
424
+ ) -> list[types.Metadata]:
425
+ metadatas: list[types.Metadata]
444
426
 
445
427
  if _metadatas_from_process is not None:
446
- metadatas = [
447
- metadata
448
- for metadata in _metadatas_from_process
449
- if not isinstance(metadata, types.ErrorMetadata)
450
- ]
428
+ metadatas, _ = types.separate_errors(_metadatas_from_process)
451
429
  else:
452
430
  metadatas = _load_validate_metadatas_from_desc_path(desc_path, import_paths)
453
431
 
@@ -471,31 +449,139 @@ _M = T.TypeVar("_M", bound=types.Metadata)
471
449
 
472
450
 
473
451
  def _find_metadata_with_filename_existed_in(
474
- metadatas: T.Sequence[_M], paths: T.Sequence[Path]
475
- ) -> T.List[_M]:
452
+ metadatas: T.Iterable[_M], paths: T.Iterable[Path]
453
+ ) -> list[_M]:
476
454
  resolved_image_paths = set(p.resolve() for p in paths)
477
455
  return [d for d in metadatas if d.filename.resolve() in resolved_image_paths]
478
456
 
479
457
 
480
- def upload(
481
- import_path: T.Union[Path, T.Sequence[Path]],
482
- desc_path: T.Optional[str] = None,
483
- _metadatas_from_process: T.Optional[T.Sequence[types.MetadataOrError]] = None,
484
- user_name: T.Optional[str] = None,
485
- organization_key: T.Optional[str] = None,
486
- dry_run=False,
487
- skip_subfolders=False,
488
- ) -> None:
489
- import_paths: T.Sequence[Path]
458
+ def _gen_upload_everything(
459
+ mly_uploader: uploader.Uploader,
460
+ metadatas: T.Sequence[types.Metadata],
461
+ import_paths: T.Sequence[Path],
462
+ skip_subfolders: bool,
463
+ ):
464
+ # Upload images
465
+ image_metadatas = _find_metadata_with_filename_existed_in(
466
+ (m for m in metadatas if isinstance(m, types.ImageMetadata)),
467
+ utils.find_images(import_paths, skip_subfolders=skip_subfolders),
468
+ )
469
+ for image_result in uploader.ZipImageSequence.prepare_images_and_upload(
470
+ image_metadatas,
471
+ mly_uploader,
472
+ ):
473
+ yield image_result
474
+
475
+ # Upload videos
476
+ video_metadatas = _find_metadata_with_filename_existed_in(
477
+ (m for m in metadatas if isinstance(m, types.VideoMetadata)),
478
+ utils.find_videos(import_paths, skip_subfolders=skip_subfolders),
479
+ )
480
+ for video_result in _gen_upload_videos(mly_uploader, video_metadatas):
481
+ yield video_result
482
+
483
+ # Upload zip files
484
+ zip_paths = utils.find_zipfiles(import_paths, skip_subfolders=skip_subfolders)
485
+ for zip_result in _gen_upload_zipfiles(mly_uploader, zip_paths):
486
+ yield zip_result
487
+
488
+
489
+ def _gen_upload_videos(
490
+ mly_uploader: uploader.Uploader, video_metadatas: T.Sequence[types.VideoMetadata]
491
+ ) -> T.Generator[tuple[types.VideoMetadata, uploader.UploadResult], None, None]:
492
+ for idx, video_metadata in enumerate(video_metadatas):
493
+ try:
494
+ video_metadata.update_md5sum()
495
+ except Exception as ex:
496
+ yield video_metadata, uploader.UploadResult(error=ex)
497
+ continue
498
+
499
+ assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
500
+
501
+ # Convert video metadata to CAMMInfo
502
+ camm_info = _prepare_camm_info(video_metadata)
503
+
504
+ # Create the CAMM sample generator
505
+ camm_sample_generator = camm_builder.camm_sample_generator2(camm_info)
506
+
507
+ progress: uploader.SequenceProgress = {
508
+ "total_sequence_count": len(video_metadatas),
509
+ "sequence_idx": idx,
510
+ "file_type": video_metadata.filetype.value,
511
+ "import_path": str(video_metadata.filename),
512
+ "md5sum": video_metadata.md5sum,
513
+ }
514
+
515
+ session_key = uploader._session_key(
516
+ video_metadata.md5sum, upload_api_v4.ClusterFileType.CAMM
517
+ )
518
+
519
+ try:
520
+ with video_metadata.filename.open("rb") as src_fp:
521
+ # Build the mp4 stream with the CAMM samples
522
+ camm_fp = simple_mp4_builder.transform_mp4(
523
+ src_fp, camm_sample_generator
524
+ )
525
+
526
+ # Upload the mp4 stream
527
+ cluster_id = mly_uploader.upload_stream(
528
+ T.cast(T.IO[bytes], camm_fp),
529
+ upload_api_v4.ClusterFileType.CAMM,
530
+ session_key,
531
+ progress=T.cast(T.Dict[str, T.Any], progress),
532
+ )
533
+ except Exception as ex:
534
+ yield video_metadata, uploader.UploadResult(error=ex)
535
+ else:
536
+ yield video_metadata, uploader.UploadResult(result=cluster_id)
537
+
538
+
539
+ def _prepare_camm_info(video_metadata: types.VideoMetadata) -> camm_parser.CAMMInfo:
540
+ camm_info = camm_parser.CAMMInfo(
541
+ make=video_metadata.make or "", model=video_metadata.model or ""
542
+ )
543
+
544
+ for point in video_metadata.points:
545
+ if isinstance(point, telemetry.CAMMGPSPoint):
546
+ if camm_info.gps is None:
547
+ camm_info.gps = []
548
+ camm_info.gps.append(point)
549
+
550
+ elif isinstance(point, telemetry.GPSPoint):
551
+ # There is no proper CAMM entry for GoPro GPS
552
+ if camm_info.mini_gps is None:
553
+ camm_info.mini_gps = []
554
+ camm_info.mini_gps.append(point)
555
+
556
+ elif isinstance(point, geo.Point):
557
+ if camm_info.mini_gps is None:
558
+ camm_info.mini_gps = []
559
+ camm_info.mini_gps.append(point)
560
+ else:
561
+ raise ValueError(f"Unknown point type: {point}")
562
+
563
+ if constants.MAPILLARY__EXPERIMENTAL_ENABLE_IMU:
564
+ if video_metadata.filetype is FileType.GOPRO:
565
+ with video_metadata.filename.open("rb") as fp:
566
+ gopro_info = gpmf_parser.extract_gopro_info(fp, telemetry_only=True)
567
+ if gopro_info is not None:
568
+ camm_info.accl = gopro_info.accl or []
569
+ camm_info.gyro = gopro_info.gyro or []
570
+ camm_info.magn = gopro_info.magn or []
571
+
572
+ return camm_info
573
+
574
+
575
+ def _normalize_import_paths(import_path: Path | T.Sequence[Path]) -> list[Path]:
576
+ import_paths: list[Path]
577
+
490
578
  if isinstance(import_path, Path):
491
579
  import_paths = [import_path]
492
580
  else:
493
581
  assert isinstance(import_path, list)
494
582
  import_paths = import_path
495
- import_paths = list(utils.deduplicate_paths(import_paths))
496
583
 
497
- if not import_paths:
498
- return
584
+ import_paths = list(utils.deduplicate_paths(import_paths))
499
585
 
500
586
  # Check and fail early
501
587
  for path in import_paths:
@@ -504,176 +590,140 @@ def upload(
504
590
  f"Import file or directory not found: {path}"
505
591
  )
506
592
 
593
+ return import_paths
594
+
595
+
596
+ def _continue_or_fail(ex: Exception) -> Exception:
597
+ """
598
+ Wrap the exception, or re-raise if it is a fatal error (i.e. there is no point to continue)
599
+ """
600
+
601
+ if isinstance(ex, uploader.SequenceError):
602
+ return ex
603
+
604
+ # Certain files not found or no permission
605
+ if isinstance(ex, OSError):
606
+ return ex
607
+
608
+ # Certain metadatas are not valid
609
+ if isinstance(ex, exceptions.MapillaryMetadataValidationError):
610
+ return ex
611
+
612
+ # Fatal error: this is thrown after all retries
613
+ if isinstance(ex, requests.ConnectionError):
614
+ raise exceptions.MapillaryUploadConnectionError(str(ex)) from ex
615
+
616
+ # Fatal error: this is thrown after all retries
617
+ if isinstance(ex, requests.Timeout):
618
+ raise exceptions.MapillaryUploadTimeoutError(str(ex)) from ex
619
+
620
+ # Fatal error:
621
+ if isinstance(ex, requests.HTTPError) and isinstance(
622
+ ex.response, requests.Response
623
+ ):
624
+ if api_v4.is_auth_error(ex.response):
625
+ raise exceptions.MapillaryUploadUnauthorizedError(
626
+ api_v4.extract_auth_error_message(ex.response)
627
+ ) from ex
628
+ raise ex
629
+
630
+ raise ex
631
+
632
+
633
+ def upload(
634
+ import_path: Path | T.Sequence[Path],
635
+ user_items: types.UserItem,
636
+ desc_path: str | None = None,
637
+ _metadatas_from_process: T.Sequence[types.MetadataOrError] | None = None,
638
+ dry_run=False,
639
+ skip_subfolders=False,
640
+ ) -> None:
641
+ import_paths = _normalize_import_paths(import_path)
642
+
507
643
  metadatas = _load_descs(_metadatas_from_process, desc_path, import_paths)
508
644
 
509
- user_items = fetch_user_items(user_name, organization_key)
645
+ jsonschema.validate(instance=user_items, schema=types.UserItemSchema)
510
646
 
511
647
  # Setup the emitter -- the order matters here
512
648
 
513
649
  emitter = uploader.EventEmitter()
514
650
 
515
- enable_history = history.MAPILLARY_UPLOAD_HISTORY_PATH and (
516
- not dry_run or MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN == "YES"
651
+ # When dry_run mode is on, we disable history by default.
652
+ # But we need dry_run for tests, so we added MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN
653
+ # and when it is on, we enable history regardless of dry_run
654
+ enable_history = constants.MAPILLARY_UPLOAD_HISTORY_PATH and (
655
+ not dry_run or constants.MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN
517
656
  )
518
657
 
519
- # Put it first one to cancel early
658
+ # Put it first one to check duplications first
520
659
  if enable_history:
521
- _setup_cancel_due_to_duplication(emitter)
660
+ upload_run_params: JSONDict = {
661
+ # Null if multiple paths provided
662
+ "import_path": str(import_path) if isinstance(import_path, Path) else None,
663
+ "organization_key": user_items.get("MAPOrganizationKey"),
664
+ "user_key": user_items.get("MAPSettingsUserKey"),
665
+ "version": VERSION,
666
+ "run_at": time.time(),
667
+ }
668
+ _setup_history(emitter, upload_run_params, metadatas)
522
669
 
523
- # This one set up tdqm
670
+ # Set up tdqm
524
671
  _setup_tdqm(emitter)
525
672
 
526
- # Now stats is empty but it will collect during upload
673
+ # Now stats is empty but it will collect during ALL uploads
527
674
  stats = _setup_api_stats(emitter)
528
675
 
529
- # Send the progress as well as the log stats collected above
676
+ # Send the progress via IPC, and log the progress in debug mode
530
677
  _setup_ipc(emitter)
531
678
 
532
- params: JSONDict = {
533
- # null if multiple paths provided
534
- "import_path": str(import_path) if isinstance(import_path, Path) else None,
535
- "organization_key": user_items.get("MAPOrganizationKey"),
536
- "user_key": user_items.get("MAPSettingsUserKey"),
537
- "version": VERSION,
538
- }
539
-
540
- if enable_history:
541
- _setup_write_upload_history(emitter, params, metadatas)
679
+ mly_uploader = uploader.Uploader(user_items, emitter=emitter, dry_run=dry_run)
542
680
 
543
- mly_uploader = uploader.Uploader(
544
- user_items,
545
- emitter=emitter,
546
- dry_run=dry_run,
547
- chunk_size=int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
681
+ results = _gen_upload_everything(
682
+ mly_uploader, metadatas, import_paths, skip_subfolders
548
683
  )
549
684
 
550
- try:
551
- image_paths = utils.find_images(import_paths, skip_subfolders=skip_subfolders)
552
- # find descs that match the image paths from the import paths
553
- image_metadatas = [
554
- metadata
555
- for metadata in (metadatas or [])
556
- if isinstance(metadata, types.ImageMetadata)
557
- ]
558
- specified_image_metadatas = _find_metadata_with_filename_existed_in(
559
- image_metadatas, image_paths
560
- )
561
- if specified_image_metadatas:
562
- try:
563
- clusters = mly_uploader.upload_images(
564
- specified_image_metadatas,
565
- event_payload={"file_type": FileType.IMAGE.value},
566
- )
567
- except Exception as ex:
568
- raise UploadError(ex) from ex
569
-
570
- if clusters:
571
- LOG.debug("Uploaded to cluster: %s", clusters)
572
-
573
- video_paths = utils.find_videos(import_paths, skip_subfolders=skip_subfolders)
574
- video_metadatas = [
575
- metadata
576
- for metadata in (metadatas or [])
577
- if isinstance(metadata, types.VideoMetadata)
578
- ]
579
- specified_video_metadatas = _find_metadata_with_filename_existed_in(
580
- video_metadatas, video_paths
581
- )
582
- for idx, video_metadata in enumerate(specified_video_metadatas):
583
- video_metadata.update_md5sum()
584
- assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
585
-
586
- # extract telemetry measurements from GoPro videos
587
- telemetry_measurements: T.List[telemetry.TelemetryMeasurement] = []
588
- if MAPILLARY__EXPERIMENTAL_ENABLE_IMU == "YES":
589
- if video_metadata.filetype is FileType.GOPRO:
590
- with video_metadata.filename.open("rb") as fp:
591
- telemetry_data = gpmf_parser.extract_telemetry_data(fp)
592
- if telemetry_data:
593
- telemetry_measurements.extend(telemetry_data.accl)
594
- telemetry_measurements.extend(telemetry_data.gyro)
595
- telemetry_measurements.extend(telemetry_data.magn)
596
- telemetry_measurements.sort(key=lambda m: m.time)
597
-
598
- generator = camm_builder.camm_sample_generator2(
599
- video_metadata, telemetry_measurements=telemetry_measurements
600
- )
685
+ upload_successes = 0
686
+ upload_errors: list[Exception] = []
601
687
 
602
- with video_metadata.filename.open("rb") as src_fp:
603
- camm_fp = simple_mp4_builder.transform_mp4(src_fp, generator)
604
- event_payload: uploader.Progress = {
605
- "total_sequence_count": len(specified_video_metadatas),
606
- "sequence_idx": idx,
607
- "file_type": video_metadata.filetype.value,
608
- "import_path": str(video_metadata.filename),
609
- }
610
- try:
611
- cluster_id = mly_uploader.upload_stream(
612
- T.cast(T.BinaryIO, camm_fp),
613
- upload_api_v4.ClusterFileType.CAMM,
614
- video_metadata.md5sum,
615
- event_payload=event_payload,
616
- )
617
- except Exception as ex:
618
- raise UploadError(ex) from ex
619
- LOG.debug("Uploaded to cluster: %s", cluster_id)
620
-
621
- zip_paths = utils.find_zipfiles(import_paths, skip_subfolders=skip_subfolders)
622
- _upload_zipfiles(mly_uploader, zip_paths)
623
-
624
- except UploadError as ex:
625
- inner_ex = ex.inner_ex
688
+ # The real upload happens sequentially here
689
+ try:
690
+ for _, result in results:
691
+ if result.error is not None:
692
+ upload_errors.append(_continue_or_fail(result.error))
693
+ else:
694
+ upload_successes += 1
626
695
 
696
+ except Exception as ex:
697
+ # Fatal error: log and raise
627
698
  if not dry_run:
628
- _api_logging_failed(_summarize(stats), inner_ex)
629
-
630
- if isinstance(inner_ex, requests.ConnectionError):
631
- raise exceptions.MapillaryUploadConnectionError(str(inner_ex)) from inner_ex
632
-
633
- if isinstance(inner_ex, requests.Timeout):
634
- raise exceptions.MapillaryUploadTimeoutError(str(inner_ex)) from inner_ex
635
-
636
- if isinstance(inner_ex, requests.HTTPError) and isinstance(
637
- inner_ex.response, requests.Response
638
- ):
639
- if inner_ex.response.status_code in [400, 401]:
640
- try:
641
- error_body = inner_ex.response.json()
642
- except Exception:
643
- error_body = {}
644
- debug_info = error_body.get("debug_info", {})
645
- if debug_info.get("type") in ["NotAuthorizedError"]:
646
- raise exceptions.MapillaryUploadUnauthorizedError(
647
- debug_info.get("message")
648
- ) from inner_ex
649
- raise inner_ex
650
-
651
- raise inner_ex
652
-
653
- if stats:
699
+ _api_logging_failed(_summarize(stats), ex)
700
+ raise ex
701
+
702
+ else:
654
703
  if not dry_run:
655
704
  _api_logging_finished(_summarize(stats))
656
- _show_upload_summary(stats)
657
- else:
658
- LOG.info("Nothing uploaded. Bye.")
705
+
706
+ finally:
707
+ # We collected stats after every upload is finished
708
+ assert upload_successes == len(stats)
709
+ _show_upload_summary(stats, upload_errors)
659
710
 
660
711
 
661
- def _upload_zipfiles(
712
+ def _gen_upload_zipfiles(
662
713
  mly_uploader: uploader.Uploader,
663
714
  zip_paths: T.Sequence[Path],
664
- ) -> None:
715
+ ) -> T.Generator[tuple[Path, uploader.UploadResult], None, None]:
665
716
  for idx, zip_path in enumerate(zip_paths):
666
- event_payload: uploader.Progress = {
717
+ progress: uploader.SequenceProgress = {
667
718
  "total_sequence_count": len(zip_paths),
668
719
  "sequence_idx": idx,
669
- "file_type": FileType.ZIP.value,
670
720
  "import_path": str(zip_path),
671
721
  }
672
722
  try:
673
- cluster_id = mly_uploader.upload_zipfile(
674
- zip_path, event_payload=event_payload
723
+ cluster_id = uploader.ZipImageSequence.prepare_zipfile_and_upload(
724
+ zip_path, mly_uploader, progress=T.cast(T.Dict[str, T.Any], progress)
675
725
  )
676
726
  except Exception as ex:
677
- raise UploadError(ex) from ex
678
-
679
- LOG.debug("Uploaded to cluster: %s", cluster_id)
727
+ yield zip_path, uploader.UploadResult(error=ex)
728
+ else:
729
+ yield zip_path, uploader.UploadResult(result=cluster_id)