mapillary-tools 0.13.3a1__py3-none-any.whl → 0.14.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +237 -16
  3. mapillary_tools/authenticate.py +325 -64
  4. mapillary_tools/{geotag/blackvue_parser.py → blackvue_parser.py} +74 -54
  5. mapillary_tools/camm/camm_builder.py +55 -97
  6. mapillary_tools/camm/camm_parser.py +429 -181
  7. mapillary_tools/commands/__main__.py +12 -6
  8. mapillary_tools/commands/authenticate.py +8 -1
  9. mapillary_tools/commands/process.py +27 -51
  10. mapillary_tools/commands/process_and_upload.py +19 -5
  11. mapillary_tools/commands/sample_video.py +2 -3
  12. mapillary_tools/commands/upload.py +18 -9
  13. mapillary_tools/commands/video_process_and_upload.py +19 -5
  14. mapillary_tools/config.py +31 -13
  15. mapillary_tools/constants.py +47 -6
  16. mapillary_tools/exceptions.py +34 -35
  17. mapillary_tools/exif_read.py +221 -116
  18. mapillary_tools/exif_write.py +7 -7
  19. mapillary_tools/exiftool_read.py +33 -42
  20. mapillary_tools/exiftool_read_video.py +46 -33
  21. mapillary_tools/exiftool_runner.py +77 -0
  22. mapillary_tools/ffmpeg.py +24 -23
  23. mapillary_tools/geo.py +144 -120
  24. mapillary_tools/geotag/base.py +147 -0
  25. mapillary_tools/geotag/factory.py +291 -0
  26. mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
  27. mapillary_tools/geotag/geotag_images_from_exiftool.py +126 -82
  28. mapillary_tools/geotag/geotag_images_from_gpx.py +53 -118
  29. mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
  30. mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
  31. mapillary_tools/geotag/geotag_images_from_video.py +53 -51
  32. mapillary_tools/geotag/geotag_videos_from_exiftool.py +97 -0
  33. mapillary_tools/geotag/geotag_videos_from_gpx.py +39 -0
  34. mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
  35. mapillary_tools/geotag/image_extractors/base.py +18 -0
  36. mapillary_tools/geotag/image_extractors/exif.py +60 -0
  37. mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
  38. mapillary_tools/geotag/options.py +160 -0
  39. mapillary_tools/geotag/utils.py +52 -16
  40. mapillary_tools/geotag/video_extractors/base.py +18 -0
  41. mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
  42. mapillary_tools/{video_data_extraction/extractors/gpx_parser.py → geotag/video_extractors/gpx.py} +57 -39
  43. mapillary_tools/geotag/video_extractors/native.py +157 -0
  44. mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
  45. mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
  46. mapillary_tools/history.py +7 -13
  47. mapillary_tools/mp4/construct_mp4_parser.py +9 -8
  48. mapillary_tools/mp4/io_utils.py +0 -1
  49. mapillary_tools/mp4/mp4_sample_parser.py +36 -28
  50. mapillary_tools/mp4/simple_mp4_builder.py +10 -9
  51. mapillary_tools/mp4/simple_mp4_parser.py +13 -22
  52. mapillary_tools/process_geotag_properties.py +155 -392
  53. mapillary_tools/process_sequence_properties.py +562 -208
  54. mapillary_tools/sample_video.py +13 -20
  55. mapillary_tools/telemetry.py +26 -13
  56. mapillary_tools/types.py +111 -58
  57. mapillary_tools/upload.py +316 -298
  58. mapillary_tools/upload_api_v4.py +55 -122
  59. mapillary_tools/uploader.py +396 -254
  60. mapillary_tools/utils.py +42 -18
  61. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/METADATA +3 -2
  62. mapillary_tools-0.14.0a2.dist-info/RECORD +72 -0
  63. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/WHEEL +1 -1
  64. mapillary_tools/geotag/__init__.py +0 -1
  65. mapillary_tools/geotag/geotag_from_generic.py +0 -22
  66. mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
  67. mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
  68. mapillary_tools/video_data_extraction/cli_options.py +0 -22
  69. mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
  70. mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
  71. mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
  72. mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
  73. mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
  74. mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
  75. mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
  76. mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
  77. mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
  78. mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
  79. mapillary_tools-0.13.3a1.dist-info/RECORD +0 -75
  80. /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
  81. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/entry_points.txt +0 -0
  82. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info/licenses}/LICENSE +0 -0
  83. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/top_level.txt +0 -0
mapillary_tools/upload.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import os
@@ -7,15 +9,15 @@ import typing as T
7
9
  import uuid
8
10
  from pathlib import Path
9
11
 
12
+ import jsonschema
10
13
  import requests
11
14
  from tqdm import tqdm
12
15
 
13
16
  from . import (
14
17
  api_v4,
15
- authenticate,
16
- config,
17
18
  constants,
18
19
  exceptions,
20
+ geo,
19
21
  history,
20
22
  ipc,
21
23
  telemetry,
@@ -25,50 +27,23 @@ from . import (
25
27
  utils,
26
28
  VERSION,
27
29
  )
28
- from .camm import camm_builder
29
- from .geotag import gpmf_parser
30
+ from .camm import camm_builder, camm_parser
31
+ from .gpmf import gpmf_parser
30
32
  from .mp4 import simple_mp4_builder
31
33
  from .types import FileType
32
34
 
33
35
  JSONDict = T.Dict[str, T.Union[str, int, float, None]]
34
36
 
35
37
  LOG = logging.getLogger(__name__)
36
- MAPILLARY_DISABLE_API_LOGGING = os.getenv("MAPILLARY_DISABLE_API_LOGGING")
37
- MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN = os.getenv(
38
- "MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN"
39
- )
40
- MAPILLARY__EXPERIMENTAL_ENABLE_IMU = os.getenv("MAPILLARY__EXPERIMENTAL_ENABLE_IMU")
41
- CAMM_CONVERTABLES = {FileType.CAMM, FileType.BLACKVUE, FileType.GOPRO}
42
-
43
-
44
- class UploadError(Exception):
45
- def __init__(self, inner_ex) -> None:
46
- self.inner_ex = inner_ex
47
- super().__init__(str(inner_ex))
48
38
 
49
39
 
50
- class UploadHTTPError(Exception):
40
+ class UploadedAlreadyError(uploader.SequenceError):
51
41
  pass
52
42
 
53
43
 
54
- def wrap_http_exception(ex: requests.HTTPError):
55
- req = ex.request
56
- resp = ex.response
57
- if isinstance(resp, requests.Response) and isinstance(req, requests.Request):
58
- lines = [
59
- f"{req.method} {resp.url}",
60
- f"> HTTP Status: {resp.status_code}",
61
- str(resp.content),
62
- ]
63
- else:
64
- lines = []
65
-
66
- return UploadHTTPError("\n".join(lines))
67
-
68
-
69
44
  def _load_validate_metadatas_from_desc_path(
70
- desc_path: T.Optional[str], import_paths: T.Sequence[Path]
71
- ) -> T.List[types.Metadata]:
45
+ desc_path: str | None, import_paths: T.Sequence[Path]
46
+ ) -> list[types.Metadata]:
72
47
  is_default_desc_path = False
73
48
  if desc_path is None:
74
49
  is_default_desc_path = True
@@ -86,7 +61,7 @@ def _load_validate_metadatas_from_desc_path(
86
61
  "The description path must be specified (with --desc_path) when uploading a single file",
87
62
  )
88
63
 
89
- descs: T.List[types.DescriptionOrError] = []
64
+ descs: list[types.DescriptionOrError] = []
90
65
 
91
66
  if desc_path == "-":
92
67
  try:
@@ -94,7 +69,7 @@ def _load_validate_metadatas_from_desc_path(
94
69
  except json.JSONDecodeError as ex:
95
70
  raise exceptions.MapillaryInvalidDescriptionFile(
96
71
  f"Invalid JSON stream from stdin: {ex}"
97
- )
72
+ ) from ex
98
73
  else:
99
74
  if not os.path.isfile(desc_path):
100
75
  if is_default_desc_path:
@@ -111,7 +86,7 @@ def _load_validate_metadatas_from_desc_path(
111
86
  except json.JSONDecodeError as ex:
112
87
  raise exceptions.MapillaryInvalidDescriptionFile(
113
88
  f"Invalid JSON file {desc_path}: {ex}"
114
- )
89
+ ) from ex
115
90
 
116
91
  # the descs load from stdin or json file may contain invalid entries
117
92
  validated_descs = [
@@ -139,7 +114,7 @@ def _load_validate_metadatas_from_desc_path(
139
114
  def zip_images(
140
115
  import_path: Path,
141
116
  zip_dir: Path,
142
- desc_path: T.Optional[str] = None,
117
+ desc_path: str | None = None,
143
118
  ):
144
119
  if not import_path.is_dir():
145
120
  raise exceptions.MapillaryFileNotFoundError(
@@ -156,49 +131,19 @@ def zip_images(
156
131
  metadata for metadata in metadatas if isinstance(metadata, types.ImageMetadata)
157
132
  ]
158
133
 
159
- uploader.zip_images(image_metadatas, zip_dir)
160
-
161
-
162
- def fetch_user_items(
163
- user_name: T.Optional[str] = None, organization_key: T.Optional[str] = None
164
- ) -> types.UserItem:
165
- if user_name is None:
166
- all_user_items = config.list_all_users()
167
- if not all_user_items:
168
- raise exceptions.MapillaryBadParameterError(
169
- "No Mapillary account found. Add one with --user_name"
170
- )
171
- if len(all_user_items) == 1:
172
- user_items = all_user_items[0]
173
- else:
174
- raise exceptions.MapillaryBadParameterError(
175
- "Found multiple Mapillary accounts. Please specify one with --user_name"
176
- )
177
- else:
178
- try:
179
- user_items = authenticate.authenticate_user(user_name)
180
- except requests.HTTPError as exc:
181
- raise wrap_http_exception(exc) from exc
182
-
183
- if organization_key is not None:
184
- try:
185
- resp = api_v4.fetch_organization(
186
- user_items["user_upload_token"], organization_key
187
- )
188
- except requests.HTTPError as ex:
189
- raise wrap_http_exception(ex) from ex
190
- org = resp.json()
191
- LOG.info("Uploading to organization: %s", json.dumps(org))
192
- user_items = T.cast(
193
- types.UserItem, {**user_items, "MAPOrganizationKey": organization_key}
194
- )
195
- return user_items
134
+ uploader.ZipImageSequence.zip_images(image_metadatas, zip_dir)
196
135
 
197
136
 
198
- def _setup_cancel_due_to_duplication(emitter: uploader.EventEmitter) -> None:
137
+ def _setup_history(
138
+ emitter: uploader.EventEmitter,
139
+ upload_run_params: JSONDict,
140
+ metadatas: list[types.Metadata],
141
+ ) -> None:
199
142
  @emitter.on("upload_start")
200
- def upload_start(payload: uploader.Progress):
201
- md5sum = payload["md5sum"]
143
+ def check_duplication(payload: uploader.Progress):
144
+ md5sum = payload.get("md5sum")
145
+ assert md5sum is not None, f"md5sum has to be set for {payload}"
146
+
202
147
  if history.is_uploaded(md5sum):
203
148
  sequence_uuid = payload.get("sequence_uuid")
204
149
  if sequence_uuid is None:
@@ -214,19 +159,15 @@ def _setup_cancel_due_to_duplication(emitter: uploader.EventEmitter) -> None:
214
159
  sequence_uuid,
215
160
  history.history_desc_path(md5sum),
216
161
  )
217
- raise uploader.UploadCancelled()
218
-
162
+ raise UploadedAlreadyError()
219
163
 
220
- def _setup_write_upload_history(
221
- emitter: uploader.EventEmitter,
222
- params: JSONDict,
223
- metadatas: T.Optional[T.List[types.Metadata]] = None,
224
- ) -> None:
225
164
  @emitter.on("upload_finished")
226
- def upload_finished(payload: uploader.Progress):
165
+ def write_history(payload: uploader.Progress):
227
166
  sequence_uuid = payload.get("sequence_uuid")
228
- md5sum = payload["md5sum"]
229
- if sequence_uuid is None or metadatas is None:
167
+ md5sum = payload.get("md5sum")
168
+ assert md5sum is not None, f"md5sum has to be set for {payload}"
169
+
170
+ if sequence_uuid is None:
230
171
  sequence = None
231
172
  else:
232
173
  sequence = [
@@ -236,10 +177,11 @@ def _setup_write_upload_history(
236
177
  and metadata.MAPSequenceUUID == sequence_uuid
237
178
  ]
238
179
  sequence.sort(key=lambda metadata: metadata.sort_key())
180
+
239
181
  try:
240
182
  history.write_history(
241
183
  md5sum,
242
- params,
184
+ upload_run_params,
243
185
  T.cast(JSONDict, payload),
244
186
  sequence,
245
187
  )
@@ -248,7 +190,7 @@ def _setup_write_upload_history(
248
190
 
249
191
 
250
192
  def _setup_tdqm(emitter: uploader.EventEmitter) -> None:
251
- upload_pbar: T.Optional[tqdm] = None
193
+ upload_pbar: tqdm | None = None
252
194
 
253
195
  @emitter.on("upload_fetch_offset")
254
196
  def upload_fetch_offset(payload: uploader.Progress) -> None:
@@ -259,7 +201,7 @@ def _setup_tdqm(emitter: uploader.EventEmitter) -> None:
259
201
 
260
202
  nth = payload["sequence_idx"] + 1
261
203
  total = payload["total_sequence_count"]
262
- import_path: T.Optional[str] = payload.get("import_path")
204
+ import_path: str | None = payload.get("import_path")
263
205
  filetype = payload.get("file_type", "unknown").upper()
264
206
  if import_path is None:
265
207
  _desc = f"Uploading {filetype} ({nth}/{total})"
@@ -294,25 +236,40 @@ def _setup_ipc(emitter: uploader.EventEmitter):
294
236
  @emitter.on("upload_start")
295
237
  def upload_start(payload: uploader.Progress):
296
238
  type: uploader.EventName = "upload_start"
297
- LOG.debug("Sending %s via IPC: %s", type, payload)
239
+ LOG.debug("IPC %s: %s", type.upper(), payload)
298
240
  ipc.send(type, payload)
299
241
 
300
242
  @emitter.on("upload_fetch_offset")
301
243
  def upload_fetch_offset(payload: uploader.Progress) -> None:
302
244
  type: uploader.EventName = "upload_fetch_offset"
303
- LOG.debug("Sending %s via IPC: %s", type, payload)
245
+ LOG.debug("IPC %s: %s", type.upper(), payload)
304
246
  ipc.send(type, payload)
305
247
 
306
248
  @emitter.on("upload_progress")
307
249
  def upload_progress(payload: uploader.Progress):
308
250
  type: uploader.EventName = "upload_progress"
309
- LOG.debug("Sending %s via IPC: %s", type, payload)
251
+
252
+ if LOG.getEffectiveLevel() <= logging.DEBUG:
253
+ # In debug mode, we want to see the progress every 30 seconds
254
+ # instead of every chunk (which is too verbose)
255
+ INTERVAL_SECONDS = 30
256
+ now = time.time()
257
+ last_upload_progress_debug_at: float | None = T.cast(T.Dict, payload).get(
258
+ "_last_upload_progress_debug_at"
259
+ )
260
+ if (
261
+ last_upload_progress_debug_at is None
262
+ or last_upload_progress_debug_at + INTERVAL_SECONDS < now
263
+ ):
264
+ LOG.debug("IPC %s: %s", type.upper(), payload)
265
+ T.cast(T.Dict, payload)["_last_upload_progress_debug_at"] = now
266
+
310
267
  ipc.send(type, payload)
311
268
 
312
269
  @emitter.on("upload_end")
313
270
  def upload_end(payload: uploader.Progress) -> None:
314
271
  type: uploader.EventName = "upload_end"
315
- LOG.debug("Sending %s via IPC: %s", type, payload)
272
+ LOG.debug("IPC %s: %s", type.upper(), payload)
316
273
  ipc.send(type, payload)
317
274
 
318
275
 
@@ -334,7 +291,7 @@ class _APIStats(uploader.Progress, total=False):
334
291
 
335
292
 
336
293
  def _setup_api_stats(emitter: uploader.EventEmitter):
337
- all_stats: T.List[_APIStats] = []
294
+ all_stats: list[_APIStats] = []
338
295
 
339
296
  @emitter.on("upload_start")
340
297
  def collect_start_time(payload: _APIStats) -> None:
@@ -362,15 +319,18 @@ def _setup_api_stats(emitter: uploader.EventEmitter):
362
319
  now = time.time()
363
320
  payload["upload_end_time"] = now
364
321
  payload["upload_total_time"] += now - payload["upload_last_restart_time"]
322
+
323
+ @emitter.on("upload_finished")
324
+ def append_stats(payload: _APIStats) -> None:
365
325
  all_stats.append(payload)
366
326
 
367
327
  return all_stats
368
328
 
369
329
 
370
- def _summarize(stats: T.Sequence[_APIStats]) -> T.Dict:
330
+ def _summarize(stats: T.Sequence[_APIStats]) -> dict:
371
331
  total_image_count = sum(s.get("sequence_image_count", 0) for s in stats)
372
332
  total_uploaded_sequence_count = len(stats)
373
- # note that stats[0]["total_sequence_count"] not always same as total_uploaded_sequence_count
333
+ # Note that stats[0]["total_sequence_count"] not always same as total_uploaded_sequence_count
374
334
 
375
335
  total_uploaded_size = sum(
376
336
  s["entity_size"] - s.get("upload_first_offset", 0) for s in stats
@@ -388,6 +348,7 @@ def _summarize(stats: T.Sequence[_APIStats]) -> T.Dict:
388
348
 
389
349
  upload_summary = {
390
350
  "images": total_image_count,
351
+ # TODO: rename sequences to total uploads
391
352
  "sequences": total_uploaded_sequence_count,
392
353
  "size": round(total_entity_size_mb, 4),
393
354
  "uploaded_size": round(total_uploaded_size_mb, 4),
@@ -398,88 +359,73 @@ def _summarize(stats: T.Sequence[_APIStats]) -> T.Dict:
398
359
  return upload_summary
399
360
 
400
361
 
401
- def _show_upload_summary(stats: T.Sequence[_APIStats]):
402
- grouped: T.Dict[str, T.List[_APIStats]] = {}
403
- for stat in stats:
404
- grouped.setdefault(stat.get("file_type", "unknown"), []).append(stat)
362
+ def _show_upload_summary(stats: T.Sequence[_APIStats], errors: T.Sequence[Exception]):
363
+ if not stats:
364
+ LOG.info("Nothing uploaded. Bye.")
365
+ else:
366
+ grouped: dict[str, list[_APIStats]] = {}
367
+ for stat in stats:
368
+ grouped.setdefault(stat.get("file_type", "unknown"), []).append(stat)
405
369
 
406
- for file_type, typed_stats in grouped.items():
407
- if file_type == FileType.IMAGE.value:
408
- LOG.info(
409
- "%8d %s sequences uploaded",
410
- len(typed_stats),
411
- file_type.upper(),
412
- )
413
- else:
414
- LOG.info(
415
- "%8d %s files uploaded",
416
- len(typed_stats),
417
- file_type.upper(),
418
- )
370
+ for file_type, typed_stats in grouped.items():
371
+ if file_type == FileType.IMAGE.value:
372
+ LOG.info("%8d image sequences uploaded", len(typed_stats))
373
+ else:
374
+ LOG.info("%8d %s videos uploaded", len(typed_stats), file_type.upper())
375
+
376
+ summary = _summarize(stats)
377
+ LOG.info("%8.1fM data in total", summary["size"])
378
+ LOG.info("%8.1fM data uploaded", summary["uploaded_size"])
379
+ LOG.info("%8.1fs upload time", summary["time"])
419
380
 
420
- summary = _summarize(stats)
421
- LOG.info("%8.1fM data in total", summary["size"])
422
- LOG.info("%8.1fM data uploaded", summary["uploaded_size"])
423
- LOG.info("%8.1fs upload time", summary["time"])
381
+ for error in errors:
382
+ LOG.error("Upload error: %s: %s", error.__class__.__name__, error)
424
383
 
425
384
 
426
- def _api_logging_finished(summary: T.Dict):
427
- if MAPILLARY_DISABLE_API_LOGGING:
385
+ def _api_logging_finished(summary: dict):
386
+ if constants.MAPILLARY_DISABLE_API_LOGGING:
428
387
  return
429
388
 
430
389
  action: api_v4.ActionType = "upload_finished_upload"
431
- LOG.debug("API Logging for action %s: %s", action, summary)
432
390
  try:
433
- api_v4.log_event(
434
- action,
435
- summary,
436
- )
391
+ api_v4.log_event(action, summary)
437
392
  except requests.HTTPError as exc:
438
393
  LOG.warning(
439
- "Error from API Logging for action %s",
394
+ "HTTPError from API Logging for action %s: %s",
440
395
  action,
441
- exc_info=wrap_http_exception(exc),
396
+ api_v4.readable_http_error(exc),
442
397
  )
443
398
  except Exception:
444
399
  LOG.warning("Error from API Logging for action %s", action, exc_info=True)
445
400
 
446
401
 
447
- def _api_logging_failed(payload: T.Dict, exc: Exception):
448
- if MAPILLARY_DISABLE_API_LOGGING:
402
+ def _api_logging_failed(payload: dict, exc: Exception):
403
+ if constants.MAPILLARY_DISABLE_API_LOGGING:
449
404
  return
450
405
 
451
406
  payload_with_reason = {**payload, "reason": exc.__class__.__name__}
452
407
  action: api_v4.ActionType = "upload_failed_upload"
453
- LOG.debug("API Logging for action %s: %s", action, payload)
454
408
  try:
455
- api_v4.log_event(
456
- action,
457
- payload_with_reason,
458
- )
409
+ api_v4.log_event(action, payload_with_reason)
459
410
  except requests.HTTPError as exc:
460
- wrapped_exc = wrap_http_exception(exc)
461
411
  LOG.warning(
462
- "Error from API Logging for action %s",
412
+ "HTTPError from API Logging for action %s: %s",
463
413
  action,
464
- exc_info=wrapped_exc,
414
+ api_v4.readable_http_error(exc),
465
415
  )
466
416
  except Exception:
467
417
  LOG.warning("Error from API Logging for action %s", action, exc_info=True)
468
418
 
469
419
 
470
420
  def _load_descs(
471
- _metadatas_from_process: T.Optional[T.Sequence[types.MetadataOrError]],
472
- desc_path: T.Optional[str],
421
+ _metadatas_from_process: T.Sequence[types.MetadataOrError] | None,
422
+ desc_path: str | None,
473
423
  import_paths: T.Sequence[Path],
474
- ) -> T.List[types.Metadata]:
475
- metadatas: T.List[types.Metadata]
424
+ ) -> list[types.Metadata]:
425
+ metadatas: list[types.Metadata]
476
426
 
477
427
  if _metadatas_from_process is not None:
478
- metadatas = [
479
- metadata
480
- for metadata in _metadatas_from_process
481
- if not isinstance(metadata, types.ErrorMetadata)
482
- ]
428
+ metadatas, _ = types.separate_errors(_metadatas_from_process)
483
429
  else:
484
430
  metadatas = _load_validate_metadatas_from_desc_path(desc_path, import_paths)
485
431
 
@@ -503,31 +449,139 @@ _M = T.TypeVar("_M", bound=types.Metadata)
503
449
 
504
450
 
505
451
  def _find_metadata_with_filename_existed_in(
506
- metadatas: T.Sequence[_M], paths: T.Sequence[Path]
507
- ) -> T.List[_M]:
452
+ metadatas: T.Iterable[_M], paths: T.Iterable[Path]
453
+ ) -> list[_M]:
508
454
  resolved_image_paths = set(p.resolve() for p in paths)
509
455
  return [d for d in metadatas if d.filename.resolve() in resolved_image_paths]
510
456
 
511
457
 
512
- def upload(
513
- import_path: T.Union[Path, T.Sequence[Path]],
514
- desc_path: T.Optional[str] = None,
515
- _metadatas_from_process: T.Optional[T.Sequence[types.MetadataOrError]] = None,
516
- user_name: T.Optional[str] = None,
517
- organization_key: T.Optional[str] = None,
518
- dry_run=False,
519
- skip_subfolders=False,
520
- ) -> None:
521
- import_paths: T.Sequence[Path]
458
+ def _gen_upload_everything(
459
+ mly_uploader: uploader.Uploader,
460
+ metadatas: T.Sequence[types.Metadata],
461
+ import_paths: T.Sequence[Path],
462
+ skip_subfolders: bool,
463
+ ):
464
+ # Upload images
465
+ image_metadatas = _find_metadata_with_filename_existed_in(
466
+ (m for m in metadatas if isinstance(m, types.ImageMetadata)),
467
+ utils.find_images(import_paths, skip_subfolders=skip_subfolders),
468
+ )
469
+ for image_result in uploader.ZipImageSequence.prepare_images_and_upload(
470
+ image_metadatas,
471
+ mly_uploader,
472
+ ):
473
+ yield image_result
474
+
475
+ # Upload videos
476
+ video_metadatas = _find_metadata_with_filename_existed_in(
477
+ (m for m in metadatas if isinstance(m, types.VideoMetadata)),
478
+ utils.find_videos(import_paths, skip_subfolders=skip_subfolders),
479
+ )
480
+ for video_result in _gen_upload_videos(mly_uploader, video_metadatas):
481
+ yield video_result
482
+
483
+ # Upload zip files
484
+ zip_paths = utils.find_zipfiles(import_paths, skip_subfolders=skip_subfolders)
485
+ for zip_result in _gen_upload_zipfiles(mly_uploader, zip_paths):
486
+ yield zip_result
487
+
488
+
489
+ def _gen_upload_videos(
490
+ mly_uploader: uploader.Uploader, video_metadatas: T.Sequence[types.VideoMetadata]
491
+ ) -> T.Generator[tuple[types.VideoMetadata, uploader.UploadResult], None, None]:
492
+ for idx, video_metadata in enumerate(video_metadatas):
493
+ try:
494
+ video_metadata.update_md5sum()
495
+ except Exception as ex:
496
+ yield video_metadata, uploader.UploadResult(error=ex)
497
+ continue
498
+
499
+ assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
500
+
501
+ # Convert video metadata to CAMMInfo
502
+ camm_info = _prepare_camm_info(video_metadata)
503
+
504
+ # Create the CAMM sample generator
505
+ camm_sample_generator = camm_builder.camm_sample_generator2(camm_info)
506
+
507
+ progress: uploader.SequenceProgress = {
508
+ "total_sequence_count": len(video_metadatas),
509
+ "sequence_idx": idx,
510
+ "file_type": video_metadata.filetype.value,
511
+ "import_path": str(video_metadata.filename),
512
+ "md5sum": video_metadata.md5sum,
513
+ }
514
+
515
+ session_key = uploader._session_key(
516
+ video_metadata.md5sum, upload_api_v4.ClusterFileType.CAMM
517
+ )
518
+
519
+ try:
520
+ with video_metadata.filename.open("rb") as src_fp:
521
+ # Build the mp4 stream with the CAMM samples
522
+ camm_fp = simple_mp4_builder.transform_mp4(
523
+ src_fp, camm_sample_generator
524
+ )
525
+
526
+ # Upload the mp4 stream
527
+ cluster_id = mly_uploader.upload_stream(
528
+ T.cast(T.IO[bytes], camm_fp),
529
+ upload_api_v4.ClusterFileType.CAMM,
530
+ session_key,
531
+ progress=T.cast(T.Dict[str, T.Any], progress),
532
+ )
533
+ except Exception as ex:
534
+ yield video_metadata, uploader.UploadResult(error=ex)
535
+ else:
536
+ yield video_metadata, uploader.UploadResult(result=cluster_id)
537
+
538
+
539
+ def _prepare_camm_info(video_metadata: types.VideoMetadata) -> camm_parser.CAMMInfo:
540
+ camm_info = camm_parser.CAMMInfo(
541
+ make=video_metadata.make or "", model=video_metadata.model or ""
542
+ )
543
+
544
+ for point in video_metadata.points:
545
+ if isinstance(point, telemetry.CAMMGPSPoint):
546
+ if camm_info.gps is None:
547
+ camm_info.gps = []
548
+ camm_info.gps.append(point)
549
+
550
+ elif isinstance(point, telemetry.GPSPoint):
551
+ # There is no proper CAMM entry for GoPro GPS
552
+ if camm_info.mini_gps is None:
553
+ camm_info.mini_gps = []
554
+ camm_info.mini_gps.append(point)
555
+
556
+ elif isinstance(point, geo.Point):
557
+ if camm_info.mini_gps is None:
558
+ camm_info.mini_gps = []
559
+ camm_info.mini_gps.append(point)
560
+ else:
561
+ raise ValueError(f"Unknown point type: {point}")
562
+
563
+ if constants.MAPILLARY__EXPERIMENTAL_ENABLE_IMU:
564
+ if video_metadata.filetype is FileType.GOPRO:
565
+ with video_metadata.filename.open("rb") as fp:
566
+ gopro_info = gpmf_parser.extract_gopro_info(fp, telemetry_only=True)
567
+ if gopro_info is not None:
568
+ camm_info.accl = gopro_info.accl or []
569
+ camm_info.gyro = gopro_info.gyro or []
570
+ camm_info.magn = gopro_info.magn or []
571
+
572
+ return camm_info
573
+
574
+
575
+ def _normalize_import_paths(import_path: Path | T.Sequence[Path]) -> list[Path]:
576
+ import_paths: list[Path]
577
+
522
578
  if isinstance(import_path, Path):
523
579
  import_paths = [import_path]
524
580
  else:
525
581
  assert isinstance(import_path, list)
526
582
  import_paths = import_path
527
- import_paths = list(utils.deduplicate_paths(import_paths))
528
583
 
529
- if not import_paths:
530
- return
584
+ import_paths = list(utils.deduplicate_paths(import_paths))
531
585
 
532
586
  # Check and fail early
533
587
  for path in import_paths:
@@ -536,176 +590,140 @@ def upload(
536
590
  f"Import file or directory not found: {path}"
537
591
  )
538
592
 
593
+ return import_paths
594
+
595
+
596
+ def _continue_or_fail(ex: Exception) -> Exception:
597
+ """
598
+ Wrap the exception, or re-raise if it is a fatal error (i.e. there is no point to continue)
599
+ """
600
+
601
+ if isinstance(ex, uploader.SequenceError):
602
+ return ex
603
+
604
+ # Certain files not found or no permission
605
+ if isinstance(ex, OSError):
606
+ return ex
607
+
608
+ # Certain metadatas are not valid
609
+ if isinstance(ex, exceptions.MapillaryMetadataValidationError):
610
+ return ex
611
+
612
+ # Fatal error: this is thrown after all retries
613
+ if isinstance(ex, requests.ConnectionError):
614
+ raise exceptions.MapillaryUploadConnectionError(str(ex)) from ex
615
+
616
+ # Fatal error: this is thrown after all retries
617
+ if isinstance(ex, requests.Timeout):
618
+ raise exceptions.MapillaryUploadTimeoutError(str(ex)) from ex
619
+
620
+ # Fatal error:
621
+ if isinstance(ex, requests.HTTPError) and isinstance(
622
+ ex.response, requests.Response
623
+ ):
624
+ if api_v4.is_auth_error(ex.response):
625
+ raise exceptions.MapillaryUploadUnauthorizedError(
626
+ api_v4.extract_auth_error_message(ex.response)
627
+ ) from ex
628
+ raise ex
629
+
630
+ raise ex
631
+
632
+
633
+ def upload(
634
+ import_path: Path | T.Sequence[Path],
635
+ user_items: types.UserItem,
636
+ desc_path: str | None = None,
637
+ _metadatas_from_process: T.Sequence[types.MetadataOrError] | None = None,
638
+ dry_run=False,
639
+ skip_subfolders=False,
640
+ ) -> None:
641
+ import_paths = _normalize_import_paths(import_path)
642
+
539
643
  metadatas = _load_descs(_metadatas_from_process, desc_path, import_paths)
540
644
 
541
- user_items = fetch_user_items(user_name, organization_key)
645
+ jsonschema.validate(instance=user_items, schema=types.UserItemSchema)
542
646
 
543
647
  # Setup the emitter -- the order matters here
544
648
 
545
649
  emitter = uploader.EventEmitter()
546
650
 
547
- enable_history = history.MAPILLARY_UPLOAD_HISTORY_PATH and (
548
- not dry_run or MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN == "YES"
651
+ # When dry_run mode is on, we disable history by default.
652
+ # But we need dry_run for tests, so we added MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN
653
+ # and when it is on, we enable history regardless of dry_run
654
+ enable_history = constants.MAPILLARY_UPLOAD_HISTORY_PATH and (
655
+ not dry_run or constants.MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN
549
656
  )
550
657
 
551
- # Put it first one to cancel early
658
+ # Put it first one to check duplications first
552
659
  if enable_history:
553
- _setup_cancel_due_to_duplication(emitter)
660
+ upload_run_params: JSONDict = {
661
+ # Null if multiple paths provided
662
+ "import_path": str(import_path) if isinstance(import_path, Path) else None,
663
+ "organization_key": user_items.get("MAPOrganizationKey"),
664
+ "user_key": user_items.get("MAPSettingsUserKey"),
665
+ "version": VERSION,
666
+ "run_at": time.time(),
667
+ }
668
+ _setup_history(emitter, upload_run_params, metadatas)
554
669
 
555
- # This one set up tdqm
670
+ # Set up tdqm
556
671
  _setup_tdqm(emitter)
557
672
 
558
- # Now stats is empty but it will collect during upload
673
+ # Now stats is empty but it will collect during ALL uploads
559
674
  stats = _setup_api_stats(emitter)
560
675
 
561
- # Send the progress as well as the log stats collected above
676
+ # Send the progress via IPC, and log the progress in debug mode
562
677
  _setup_ipc(emitter)
563
678
 
564
- params: JSONDict = {
565
- # null if multiple paths provided
566
- "import_path": str(import_path) if isinstance(import_path, Path) else None,
567
- "organization_key": user_items.get("MAPOrganizationKey"),
568
- "user_key": user_items.get("MAPSettingsUserKey"),
569
- "version": VERSION,
570
- }
571
-
572
- if enable_history:
573
- _setup_write_upload_history(emitter, params, metadatas)
679
+ mly_uploader = uploader.Uploader(user_items, emitter=emitter, dry_run=dry_run)
574
680
 
575
- mly_uploader = uploader.Uploader(
576
- user_items,
577
- emitter=emitter,
578
- dry_run=dry_run,
579
- chunk_size=int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
681
+ results = _gen_upload_everything(
682
+ mly_uploader, metadatas, import_paths, skip_subfolders
580
683
  )
581
684
 
582
- try:
583
- image_paths = utils.find_images(import_paths, skip_subfolders=skip_subfolders)
584
- # find descs that match the image paths from the import paths
585
- image_metadatas = [
586
- metadata
587
- for metadata in (metadatas or [])
588
- if isinstance(metadata, types.ImageMetadata)
589
- ]
590
- specified_image_metadatas = _find_metadata_with_filename_existed_in(
591
- image_metadatas, image_paths
592
- )
593
- if specified_image_metadatas:
594
- try:
595
- clusters = mly_uploader.upload_images(
596
- specified_image_metadatas,
597
- event_payload={"file_type": FileType.IMAGE.value},
598
- )
599
- except Exception as ex:
600
- raise UploadError(ex) from ex
601
-
602
- if clusters:
603
- LOG.debug("Uploaded to cluster: %s", clusters)
604
-
605
- video_paths = utils.find_videos(import_paths, skip_subfolders=skip_subfolders)
606
- video_metadatas = [
607
- metadata
608
- for metadata in (metadatas or [])
609
- if isinstance(metadata, types.VideoMetadata)
610
- ]
611
- specified_video_metadatas = _find_metadata_with_filename_existed_in(
612
- video_metadatas, video_paths
613
- )
614
- for idx, video_metadata in enumerate(specified_video_metadatas):
615
- video_metadata.update_md5sum()
616
- assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
617
-
618
- # extract telemetry measurements from GoPro videos
619
- telemetry_measurements: T.List[telemetry.TelemetryMeasurement] = []
620
- if MAPILLARY__EXPERIMENTAL_ENABLE_IMU == "YES":
621
- if video_metadata.filetype is FileType.GOPRO:
622
- with video_metadata.filename.open("rb") as fp:
623
- telemetry_data = gpmf_parser.extract_telemetry_data(fp)
624
- if telemetry_data:
625
- telemetry_measurements.extend(telemetry_data.accl)
626
- telemetry_measurements.extend(telemetry_data.gyro)
627
- telemetry_measurements.extend(telemetry_data.magn)
628
- telemetry_measurements.sort(key=lambda m: m.time)
629
-
630
- generator = camm_builder.camm_sample_generator2(
631
- video_metadata, telemetry_measurements=telemetry_measurements
632
- )
685
+ upload_successes = 0
686
+ upload_errors: list[Exception] = []
633
687
 
634
- with video_metadata.filename.open("rb") as src_fp:
635
- camm_fp = simple_mp4_builder.transform_mp4(src_fp, generator)
636
- event_payload: uploader.Progress = {
637
- "total_sequence_count": len(specified_video_metadatas),
638
- "sequence_idx": idx,
639
- "file_type": video_metadata.filetype.value,
640
- "import_path": str(video_metadata.filename),
641
- }
642
- try:
643
- cluster_id = mly_uploader.upload_stream(
644
- T.cast(T.BinaryIO, camm_fp),
645
- upload_api_v4.ClusterFileType.CAMM,
646
- video_metadata.md5sum,
647
- event_payload=event_payload,
648
- )
649
- except Exception as ex:
650
- raise UploadError(ex) from ex
651
- LOG.debug("Uploaded to cluster: %s", cluster_id)
652
-
653
- zip_paths = utils.find_zipfiles(import_paths, skip_subfolders=skip_subfolders)
654
- _upload_zipfiles(mly_uploader, zip_paths)
655
-
656
- except UploadError as ex:
657
- inner_ex = ex.inner_ex
688
+ # The real upload happens sequentially here
689
+ try:
690
+ for _, result in results:
691
+ if result.error is not None:
692
+ upload_errors.append(_continue_or_fail(result.error))
693
+ else:
694
+ upload_successes += 1
658
695
 
696
+ except Exception as ex:
697
+ # Fatal error: log and raise
659
698
  if not dry_run:
660
- _api_logging_failed(_summarize(stats), inner_ex)
661
-
662
- if isinstance(inner_ex, requests.ConnectionError):
663
- raise exceptions.MapillaryUploadConnectionError(str(inner_ex)) from inner_ex
664
-
665
- if isinstance(inner_ex, requests.Timeout):
666
- raise exceptions.MapillaryUploadTimeoutError(str(inner_ex)) from inner_ex
667
-
668
- if isinstance(inner_ex, requests.HTTPError) and isinstance(
669
- inner_ex.response, requests.Response
670
- ):
671
- if inner_ex.response.status_code in [400, 401]:
672
- try:
673
- error_body = inner_ex.response.json()
674
- except Exception:
675
- error_body = {}
676
- debug_info = error_body.get("debug_info", {})
677
- if debug_info.get("type") in ["NotAuthorizedError"]:
678
- raise exceptions.MapillaryUploadUnauthorizedError(
679
- debug_info.get("message")
680
- ) from inner_ex
681
- raise wrap_http_exception(inner_ex) from inner_ex
682
-
683
- raise inner_ex
684
-
685
- if stats:
699
+ _api_logging_failed(_summarize(stats), ex)
700
+ raise ex
701
+
702
+ else:
686
703
  if not dry_run:
687
704
  _api_logging_finished(_summarize(stats))
688
- _show_upload_summary(stats)
689
- else:
690
- LOG.info("Nothing uploaded. Bye.")
691
705
 
706
+ finally:
707
+ # We collected stats after every upload is finished
708
+ assert upload_successes == len(stats)
709
+ _show_upload_summary(stats, upload_errors)
692
710
 
693
- def _upload_zipfiles(
711
+
712
+ def _gen_upload_zipfiles(
694
713
  mly_uploader: uploader.Uploader,
695
714
  zip_paths: T.Sequence[Path],
696
- ) -> None:
715
+ ) -> T.Generator[tuple[Path, uploader.UploadResult], None, None]:
697
716
  for idx, zip_path in enumerate(zip_paths):
698
- event_payload: uploader.Progress = {
717
+ progress: uploader.SequenceProgress = {
699
718
  "total_sequence_count": len(zip_paths),
700
719
  "sequence_idx": idx,
701
- "file_type": FileType.ZIP.value,
702
720
  "import_path": str(zip_path),
703
721
  }
704
722
  try:
705
- cluster_id = mly_uploader.upload_zipfile(
706
- zip_path, event_payload=event_payload
723
+ cluster_id = uploader.ZipImageSequence.prepare_zipfile_and_upload(
724
+ zip_path, mly_uploader, progress=T.cast(T.Dict[str, T.Any], progress)
707
725
  )
708
726
  except Exception as ex:
709
- raise UploadError(ex) from ex
710
-
711
- LOG.debug("Uploaded to cluster: %s", cluster_id)
727
+ yield zip_path, uploader.UploadResult(error=ex)
728
+ else:
729
+ yield zip_path, uploader.UploadResult(result=cluster_id)