mapillary-tools 0.14.0a2__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +66 -262
  3. mapillary_tools/authenticate.py +54 -46
  4. mapillary_tools/blackvue_parser.py +79 -22
  5. mapillary_tools/commands/__main__.py +15 -16
  6. mapillary_tools/commands/upload.py +33 -4
  7. mapillary_tools/config.py +38 -17
  8. mapillary_tools/constants.py +127 -43
  9. mapillary_tools/exceptions.py +4 -0
  10. mapillary_tools/exif_read.py +2 -1
  11. mapillary_tools/exif_write.py +3 -1
  12. mapillary_tools/exiftool_read_video.py +52 -15
  13. mapillary_tools/exiftool_runner.py +4 -24
  14. mapillary_tools/ffmpeg.py +406 -232
  15. mapillary_tools/geo.py +16 -0
  16. mapillary_tools/geotag/__init__.py +0 -0
  17. mapillary_tools/geotag/base.py +8 -4
  18. mapillary_tools/geotag/factory.py +106 -89
  19. mapillary_tools/geotag/geotag_images_from_exiftool.py +27 -20
  20. mapillary_tools/geotag/geotag_images_from_gpx.py +7 -6
  21. mapillary_tools/geotag/geotag_images_from_video.py +35 -0
  22. mapillary_tools/geotag/geotag_videos_from_exiftool.py +61 -14
  23. mapillary_tools/geotag/geotag_videos_from_gpx.py +22 -9
  24. mapillary_tools/geotag/options.py +25 -3
  25. mapillary_tools/geotag/utils.py +9 -12
  26. mapillary_tools/geotag/video_extractors/base.py +1 -1
  27. mapillary_tools/geotag/video_extractors/exiftool.py +1 -1
  28. mapillary_tools/geotag/video_extractors/gpx.py +61 -70
  29. mapillary_tools/geotag/video_extractors/native.py +34 -31
  30. mapillary_tools/history.py +128 -8
  31. mapillary_tools/http.py +211 -0
  32. mapillary_tools/mp4/construct_mp4_parser.py +8 -2
  33. mapillary_tools/process_geotag_properties.py +47 -35
  34. mapillary_tools/process_sequence_properties.py +340 -325
  35. mapillary_tools/sample_video.py +8 -8
  36. mapillary_tools/serializer/description.py +587 -0
  37. mapillary_tools/serializer/gpx.py +132 -0
  38. mapillary_tools/types.py +44 -610
  39. mapillary_tools/upload.py +327 -352
  40. mapillary_tools/upload_api_v4.py +125 -72
  41. mapillary_tools/uploader.py +797 -216
  42. mapillary_tools/utils.py +57 -5
  43. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/METADATA +91 -34
  44. mapillary_tools-0.14.1.dist-info/RECORD +76 -0
  45. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/WHEEL +1 -1
  46. mapillary_tools-0.14.0a2.dist-info/RECORD +0 -72
  47. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/entry_points.txt +0 -0
  48. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/licenses/LICENSE +0 -0
  49. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/top_level.txt +0 -0
mapillary_tools/upload.py CHANGED
@@ -9,27 +9,25 @@ import typing as T
9
9
  import uuid
10
10
  from pathlib import Path
11
11
 
12
+ import humanize
12
13
  import jsonschema
13
14
  import requests
14
15
  from tqdm import tqdm
15
16
 
16
17
  from . import (
17
18
  api_v4,
19
+ config,
18
20
  constants,
19
21
  exceptions,
20
- geo,
21
22
  history,
23
+ http,
22
24
  ipc,
23
- telemetry,
24
25
  types,
25
- upload_api_v4,
26
26
  uploader,
27
27
  utils,
28
28
  VERSION,
29
29
  )
30
- from .camm import camm_builder, camm_parser
31
- from .gpmf import gpmf_parser
32
- from .mp4 import simple_mp4_builder
30
+ from .serializer.description import DescriptionJSONSerializer
33
31
  from .types import FileType
34
32
 
35
33
  JSONDict = T.Dict[str, T.Union[str, int, float, None]]
@@ -37,91 +35,110 @@ JSONDict = T.Dict[str, T.Union[str, int, float, None]]
37
35
  LOG = logging.getLogger(__name__)
38
36
 
39
37
 
40
- class UploadedAlreadyError(uploader.SequenceError):
38
+ class UploadedAlready(uploader.SequenceError):
41
39
  pass
42
40
 
43
41
 
44
- def _load_validate_metadatas_from_desc_path(
45
- desc_path: str | None, import_paths: T.Sequence[Path]
46
- ) -> list[types.Metadata]:
47
- is_default_desc_path = False
48
- if desc_path is None:
49
- is_default_desc_path = True
50
- if len(import_paths) == 1 and import_paths[0].is_dir():
51
- desc_path = str(
52
- import_paths[0].joinpath(constants.IMAGE_DESCRIPTION_FILENAME)
53
- )
54
- else:
55
- if 1 < len(import_paths):
56
- raise exceptions.MapillaryBadParameterError(
57
- "The description path must be specified (with --desc_path) when uploading multiple paths",
58
- )
59
- else:
60
- raise exceptions.MapillaryBadParameterError(
61
- "The description path must be specified (with --desc_path) when uploading a single file",
62
- )
42
+ def upload(
43
+ import_path: Path | T.Sequence[Path],
44
+ user_items: config.UserItem,
45
+ num_upload_workers: int,
46
+ desc_path: str | None = None,
47
+ _metadatas_from_process: T.Sequence[types.MetadataOrError] | None = None,
48
+ reupload: bool = False,
49
+ dry_run: bool = False,
50
+ nofinish: bool = False,
51
+ noresume: bool = False,
52
+ skip_subfolders: bool = False,
53
+ ) -> None:
54
+ LOG.info("==> Uploading...")
63
55
 
64
- descs: list[types.DescriptionOrError] = []
56
+ import_paths = _normalize_import_paths(import_path)
65
57
 
66
- if desc_path == "-":
67
- try:
68
- descs = json.load(sys.stdin)
69
- except json.JSONDecodeError as ex:
70
- raise exceptions.MapillaryInvalidDescriptionFile(
71
- f"Invalid JSON stream from stdin: {ex}"
72
- ) from ex
73
- else:
74
- if not os.path.isfile(desc_path):
75
- if is_default_desc_path:
76
- raise exceptions.MapillaryFileNotFoundError(
77
- f"Description file {desc_path} not found. Has the directory been processed yet?"
78
- )
79
- else:
80
- raise exceptions.MapillaryFileNotFoundError(
81
- f"Description file {desc_path} not found"
82
- )
83
- with open(desc_path) as fp:
84
- try:
85
- descs = json.load(fp)
86
- except json.JSONDecodeError as ex:
87
- raise exceptions.MapillaryInvalidDescriptionFile(
88
- f"Invalid JSON file {desc_path}: {ex}"
89
- ) from ex
58
+ metadatas = _load_descs(_metadatas_from_process, import_paths, desc_path)
90
59
 
91
- # the descs load from stdin or json file may contain invalid entries
92
- validated_descs = [
93
- types.validate_and_fail_desc(desc)
94
- for desc in descs
95
- # skip error descriptions
96
- if "error" not in desc
97
- ]
60
+ jsonschema.validate(instance=user_items, schema=config.UserItemSchema)
61
+
62
+ # Setup the emitter -- the order matters here
63
+
64
+ emitter = uploader.EventEmitter()
98
65
 
99
- # throw if we found any invalid descs
100
- invalid_descs = [desc for desc in validated_descs if "error" in desc]
101
- if invalid_descs:
102
- for desc in invalid_descs:
103
- LOG.error("Invalid description entry: %s", json.dumps(desc))
104
- raise exceptions.MapillaryInvalidDescriptionFile(
105
- f"Found {len(invalid_descs)} invalid descriptions"
66
+ # Check duplications first
67
+ if not _is_history_disabled(dry_run):
68
+ upload_run_params: JSONDict = {
69
+ # Null if multiple paths provided
70
+ "import_path": str(import_path) if isinstance(import_path, Path) else None,
71
+ "organization_key": user_items.get("MAPOrganizationKey"),
72
+ "user_key": user_items.get("MAPSettingsUserKey"),
73
+ "version": VERSION,
74
+ "run_at": time.time(),
75
+ }
76
+ _setup_history(
77
+ emitter, upload_run_params, metadatas, reupload=reupload, nofinish=nofinish
106
78
  )
107
79
 
108
- # validated_descs should contain no errors
109
- return [
110
- types.from_desc(T.cast(types.Description, desc)) for desc in validated_descs
111
- ]
80
+ # Set up tdqm
81
+ _setup_tdqm(emitter)
112
82
 
83
+ # Now stats is empty but it will collect during ALL uploads
84
+ stats = _setup_api_stats(emitter)
113
85
 
114
- def zip_images(
115
- import_path: Path,
116
- zip_dir: Path,
117
- desc_path: str | None = None,
118
- ):
86
+ # Send the progress via IPC, and log the progress in debug mode
87
+ _setup_ipc(emitter)
88
+
89
+ try:
90
+ upload_options = uploader.UploadOptions(
91
+ user_items,
92
+ dry_run=dry_run,
93
+ nofinish=nofinish,
94
+ noresume=noresume,
95
+ num_upload_workers=num_upload_workers,
96
+ )
97
+ except ValueError as ex:
98
+ raise exceptions.MapillaryBadParameterError(str(ex)) from ex
99
+
100
+ mly_uploader = uploader.Uploader(upload_options, emitter=emitter)
101
+
102
+ results = _gen_upload_everything(
103
+ mly_uploader, metadatas, import_paths, skip_subfolders
104
+ )
105
+
106
+ upload_successes = 0
107
+ upload_errors: list[Exception] = []
108
+
109
+ # The real uploading happens sequentially here
110
+ try:
111
+ for _, result in results:
112
+ if result.error is not None:
113
+ upload_error = _continue_or_fail(result.error)
114
+ log_exception(upload_error)
115
+ upload_errors.append(upload_error)
116
+ else:
117
+ upload_successes += 1
118
+
119
+ except Exception as ex:
120
+ # Fatal error: log and raise
121
+ _api_logging_failed(_summarize(stats), ex, dry_run=dry_run)
122
+ raise ex
123
+
124
+ else:
125
+ _api_logging_finished(_summarize(stats), dry_run=dry_run)
126
+
127
+ finally:
128
+ # We collected stats after every upload is finished
129
+ assert upload_successes == len(stats), (
130
+ f"Expect {upload_successes} success but got {stats}"
131
+ )
132
+ _show_upload_summary(stats, upload_errors)
133
+
134
+
135
+ def zip_images(import_path: Path, zip_dir: Path, desc_path: str | None = None):
119
136
  if not import_path.is_dir():
120
137
  raise exceptions.MapillaryFileNotFoundError(
121
138
  f"Import directory not found: {import_path}"
122
139
  )
123
140
 
124
- metadatas = _load_validate_metadatas_from_desc_path(desc_path, [import_path])
141
+ metadatas = _load_valid_metadatas_from_desc_path([import_path], desc_path)
125
142
 
126
143
  if not metadatas:
127
144
  LOG.warning("No images or videos found in %s", desc_path)
@@ -131,40 +148,90 @@ def zip_images(
131
148
  metadata for metadata in metadatas if isinstance(metadata, types.ImageMetadata)
132
149
  ]
133
150
 
134
- uploader.ZipImageSequence.zip_images(image_metadatas, zip_dir)
151
+ uploader.ZipUploader.zip_images(image_metadatas, zip_dir)
152
+
153
+
154
+ def log_exception(ex: Exception) -> None:
155
+ if LOG.isEnabledFor(logging.DEBUG):
156
+ exc_info = ex
157
+ else:
158
+ exc_info = None
159
+
160
+ exc_name = ex.__class__.__name__
161
+
162
+ if isinstance(ex, UploadedAlready):
163
+ LOG.info(f"{exc_name}: {ex}")
164
+ elif isinstance(ex, requests.HTTPError):
165
+ LOG.error(f"{exc_name}: {http.readable_http_error(ex)}", exc_info=exc_info)
166
+ elif isinstance(ex, api_v4.HTTPContentError):
167
+ LOG.error(
168
+ f"{exc_name}: {ex}: {http.readable_http_response(ex.response)}",
169
+ exc_info=exc_info,
170
+ )
171
+ else:
172
+ LOG.error(f"{exc_name}: {ex}", exc_info=exc_info)
173
+
174
+
175
+ def _is_history_disabled(dry_run: bool) -> bool:
176
+ # There is no way to read/write history if the path is not set
177
+ if not constants.MAPILLARY_UPLOAD_HISTORY_PATH:
178
+ return True
179
+
180
+ if dry_run:
181
+ # When dry_run mode is on, we disable history by default
182
+ # However, we need dry_run for tests, so we added MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN
183
+ # and when it is on, we enable history regardless of dry_run
184
+ if constants.MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN:
185
+ return False
186
+ else:
187
+ return True
188
+
189
+ return False
135
190
 
136
191
 
137
192
  def _setup_history(
138
193
  emitter: uploader.EventEmitter,
139
194
  upload_run_params: JSONDict,
140
195
  metadatas: list[types.Metadata],
196
+ reupload: bool,
197
+ nofinish: bool,
141
198
  ) -> None:
142
199
  @emitter.on("upload_start")
143
200
  def check_duplication(payload: uploader.Progress):
144
- md5sum = payload.get("md5sum")
201
+ md5sum = payload.get("sequence_md5sum")
145
202
  assert md5sum is not None, f"md5sum has to be set for {payload}"
146
203
 
147
- if history.is_uploaded(md5sum):
148
- sequence_uuid = payload.get("sequence_uuid")
149
- if sequence_uuid is None:
150
- basename = os.path.basename(payload.get("import_path", ""))
151
- LOG.info(
152
- "File %s has been uploaded already. Check the upload history at %s",
153
- basename,
154
- history.history_desc_path(md5sum),
155
- )
204
+ record = history.read_history_record(md5sum)
205
+
206
+ if record is not None:
207
+ history_desc_path = history.history_desc_path(md5sum)
208
+ uploaded_at = record.get("summary", {}).get("upload_end_time", None)
209
+
210
+ upload_name = uploader.Uploader._upload_name(payload)
211
+
212
+ if reupload:
213
+ if uploaded_at is not None:
214
+ LOG.info(
215
+ f"Reuploading {upload_name}, despite being uploaded {humanize.naturaldelta(time.time() - uploaded_at)} ago ({time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploaded_at))})"
216
+ )
217
+ else:
218
+ LOG.info(
219
+ f"Reuploading {upload_name}, despite already being uploaded (see {history_desc_path})"
220
+ )
156
221
  else:
157
- LOG.info(
158
- "Sequence %s has been uploaded already. Check the upload history at %s",
159
- sequence_uuid,
160
- history.history_desc_path(md5sum),
161
- )
162
- raise UploadedAlreadyError()
222
+ if uploaded_at is not None:
223
+ msg = f"Skipping {upload_name}, already uploaded {humanize.naturaldelta(time.time() - uploaded_at)} ago ({time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploaded_at))})"
224
+ else:
225
+ msg = f"Skipping {upload_name}, already uploaded (see {history_desc_path})"
226
+ raise UploadedAlready(msg)
163
227
 
164
228
  @emitter.on("upload_finished")
165
229
  def write_history(payload: uploader.Progress):
230
+ if nofinish:
231
+ return
232
+
166
233
  sequence_uuid = payload.get("sequence_uuid")
167
- md5sum = payload.get("md5sum")
234
+ md5sum = payload.get("sequence_md5sum")
168
235
  assert md5sum is not None, f"md5sum has to be set for {payload}"
169
236
 
170
237
  if sequence_uuid is None:
@@ -180,10 +247,7 @@ def _setup_history(
180
247
 
181
248
  try:
182
249
  history.write_history(
183
- md5sum,
184
- upload_run_params,
185
- T.cast(JSONDict, payload),
186
- sequence,
250
+ md5sum, upload_run_params, T.cast(JSONDict, payload), sequence
187
251
  )
188
252
  except OSError:
189
253
  LOG.warning("Error writing upload history %s", md5sum, exc_info=True)
@@ -192,8 +256,8 @@ def _setup_history(
192
256
  def _setup_tdqm(emitter: uploader.EventEmitter) -> None:
193
257
  upload_pbar: tqdm | None = None
194
258
 
195
- @emitter.on("upload_fetch_offset")
196
- def upload_fetch_offset(payload: uploader.Progress) -> None:
259
+ @emitter.on("upload_start")
260
+ def upload_start(payload: uploader.Progress) -> None:
197
261
  nonlocal upload_pbar
198
262
 
199
263
  if upload_pbar is not None:
@@ -204,27 +268,52 @@ def _setup_tdqm(emitter: uploader.EventEmitter) -> None:
204
268
  import_path: str | None = payload.get("import_path")
205
269
  filetype = payload.get("file_type", "unknown").upper()
206
270
  if import_path is None:
207
- _desc = f"Uploading {filetype} ({nth}/{total})"
271
+ desc = f"Uploading {filetype} ({nth}/{total})"
208
272
  else:
209
- _desc = (
273
+ desc = (
210
274
  f"Uploading {filetype} {os.path.basename(import_path)} ({nth}/{total})"
211
275
  )
212
276
  upload_pbar = tqdm(
213
277
  total=payload["entity_size"],
214
- desc=_desc,
278
+ desc=desc,
215
279
  unit="B",
216
280
  unit_scale=True,
217
281
  unit_divisor=1024,
218
- initial=payload["offset"],
219
- disable=LOG.getEffectiveLevel() <= logging.DEBUG,
282
+ initial=payload.get("offset", 0),
283
+ disable=LOG.isEnabledFor(logging.DEBUG),
284
+ )
285
+
286
+ @emitter.on("upload_fetch_offset")
287
+ def upload_fetch_offset(payload: uploader.Progress) -> None:
288
+ assert upload_pbar is not None, (
289
+ "progress_bar must be initialized in upload_start"
220
290
  )
291
+ begin_offset = payload.get("begin_offset", 0)
292
+ if begin_offset is not None and begin_offset > 0:
293
+ if upload_pbar.total is not None:
294
+ progress_percent = (begin_offset / upload_pbar.total) * 100
295
+ upload_pbar.write(
296
+ f"Resuming upload at {begin_offset=} ({progress_percent:3.0f}%)",
297
+ file=sys.stderr,
298
+ )
299
+ else:
300
+ upload_pbar.write(
301
+ f"Resuming upload at {begin_offset=}", file=sys.stderr
302
+ )
303
+ upload_pbar.reset()
304
+ upload_pbar.update(begin_offset)
305
+ upload_pbar.refresh()
221
306
 
222
307
  @emitter.on("upload_progress")
223
308
  def upload_progress(payload: uploader.Progress) -> None:
224
- assert upload_pbar is not None, "progress_bar must be initialized"
309
+ assert upload_pbar is not None, (
310
+ "progress_bar must be initialized in upload_start"
311
+ )
225
312
  upload_pbar.update(payload["chunk_size"])
313
+ upload_pbar.refresh()
226
314
 
227
315
  @emitter.on("upload_end")
316
+ @emitter.on("upload_failed")
228
317
  def upload_end(_: uploader.Progress) -> None:
229
318
  nonlocal upload_pbar
230
319
  if upload_pbar:
@@ -236,20 +325,20 @@ def _setup_ipc(emitter: uploader.EventEmitter):
236
325
  @emitter.on("upload_start")
237
326
  def upload_start(payload: uploader.Progress):
238
327
  type: uploader.EventName = "upload_start"
239
- LOG.debug("IPC %s: %s", type.upper(), payload)
328
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
240
329
  ipc.send(type, payload)
241
330
 
242
331
  @emitter.on("upload_fetch_offset")
243
332
  def upload_fetch_offset(payload: uploader.Progress) -> None:
244
333
  type: uploader.EventName = "upload_fetch_offset"
245
- LOG.debug("IPC %s: %s", type.upper(), payload)
334
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
246
335
  ipc.send(type, payload)
247
336
 
248
337
  @emitter.on("upload_progress")
249
338
  def upload_progress(payload: uploader.Progress):
250
339
  type: uploader.EventName = "upload_progress"
251
340
 
252
- if LOG.getEffectiveLevel() <= logging.DEBUG:
341
+ if LOG.isEnabledFor(logging.DEBUG):
253
342
  # In debug mode, we want to see the progress every 30 seconds
254
343
  # instead of every chunk (which is too verbose)
255
344
  INTERVAL_SECONDS = 30
@@ -261,7 +350,7 @@ def _setup_ipc(emitter: uploader.EventEmitter):
261
350
  last_upload_progress_debug_at is None
262
351
  or last_upload_progress_debug_at + INTERVAL_SECONDS < now
263
352
  ):
264
- LOG.debug("IPC %s: %s", type.upper(), payload)
353
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
265
354
  T.cast(T.Dict, payload)["_last_upload_progress_debug_at"] = now
266
355
 
267
356
  ipc.send(type, payload)
@@ -269,7 +358,13 @@ def _setup_ipc(emitter: uploader.EventEmitter):
269
358
  @emitter.on("upload_end")
270
359
  def upload_end(payload: uploader.Progress) -> None:
271
360
  type: uploader.EventName = "upload_end"
272
- LOG.debug("IPC %s: %s", type.upper(), payload)
361
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
362
+ ipc.send(type, payload)
363
+
364
+ @emitter.on("upload_failed")
365
+ def upload_failed(payload: uploader.Progress) -> None:
366
+ type: uploader.EventName = "upload_failed"
367
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
273
368
  ipc.send(type, payload)
274
369
 
275
370
 
@@ -295,8 +390,13 @@ def _setup_api_stats(emitter: uploader.EventEmitter):
295
390
 
296
391
  @emitter.on("upload_start")
297
392
  def collect_start_time(payload: _APIStats) -> None:
298
- payload["upload_start_time"] = time.time()
393
+ now = time.time()
394
+ payload["upload_start_time"] = now
299
395
  payload["upload_total_time"] = 0
396
+ # These filed should be initialized in upload events like "upload_fetch_offset"
397
+ # but since we disabled them for uploading images, so we initialize them here
398
+ payload["upload_last_restart_time"] = now
399
+ payload["upload_first_offset"] = 0
300
400
 
301
401
  @emitter.on("upload_fetch_offset")
302
402
  def collect_restart_time(payload: _APIStats) -> None:
@@ -305,8 +405,8 @@ def _setup_api_stats(emitter: uploader.EventEmitter):
305
405
  payload["offset"], payload.get("upload_first_offset", payload["offset"])
306
406
  )
307
407
 
308
- @emitter.on("upload_interrupted")
309
- def collect_interrupted(payload: _APIStats):
408
+ @emitter.on("upload_retrying")
409
+ def collect_retrying(payload: _APIStats):
310
410
  # could be None if it failed to fetch offset
311
411
  restart_time = payload.get("upload_last_restart_time")
312
412
  if restart_time is not None:
@@ -360,89 +460,80 @@ def _summarize(stats: T.Sequence[_APIStats]) -> dict:
360
460
 
361
461
 
362
462
  def _show_upload_summary(stats: T.Sequence[_APIStats], errors: T.Sequence[Exception]):
363
- if not stats:
364
- LOG.info("Nothing uploaded. Bye.")
365
- else:
463
+ LOG.info("==> Upload summary")
464
+
465
+ errors_by_type: dict[type[Exception], list[Exception]] = {}
466
+ for error in errors:
467
+ errors_by_type.setdefault(type(error), []).append(error)
468
+
469
+ for error_type, error_list in errors_by_type.items():
470
+ if error_type is UploadedAlready:
471
+ LOG.info(
472
+ f"Skipped {len(error_list)} already uploaded sequences (use --reupload to force re-upload)",
473
+ )
474
+ else:
475
+ LOG.info(f"{len(error_list)} uploads failed due to {error_type.__name__}")
476
+
477
+ if stats:
366
478
  grouped: dict[str, list[_APIStats]] = {}
367
479
  for stat in stats:
368
480
  grouped.setdefault(stat.get("file_type", "unknown"), []).append(stat)
369
481
 
370
482
  for file_type, typed_stats in grouped.items():
371
483
  if file_type == FileType.IMAGE.value:
372
- LOG.info("%8d image sequences uploaded", len(typed_stats))
484
+ LOG.info(f"{len(typed_stats)} sequences uploaded")
373
485
  else:
374
- LOG.info("%8d %s videos uploaded", len(typed_stats), file_type.upper())
486
+ LOG.info(f"{len(typed_stats)} {file_type} uploaded")
375
487
 
376
488
  summary = _summarize(stats)
377
- LOG.info("%8.1fM data in total", summary["size"])
378
- LOG.info("%8.1fM data uploaded", summary["uploaded_size"])
379
- LOG.info("%8.1fs upload time", summary["time"])
489
+ LOG.info(f"{humanize.naturalsize(summary['size'] * 1024 * 1024)} read in total")
490
+ LOG.info(
491
+ f"{humanize.naturalsize(summary['uploaded_size'] * 1024 * 1024)} uploaded"
492
+ )
493
+ LOG.info(f"{summary['time']:.3f} seconds upload time")
494
+ else:
495
+ LOG.info("Nothing uploaded. Bye.")
380
496
 
381
- for error in errors:
382
- LOG.error("Upload error: %s: %s", error.__class__.__name__, error)
383
497
 
498
+ def _api_logging_finished(summary: dict, dry_run: bool = False):
499
+ if dry_run:
500
+ return
384
501
 
385
- def _api_logging_finished(summary: dict):
386
502
  if constants.MAPILLARY_DISABLE_API_LOGGING:
387
503
  return
388
504
 
389
505
  action: api_v4.ActionType = "upload_finished_upload"
390
- try:
391
- api_v4.log_event(action, summary)
392
- except requests.HTTPError as exc:
393
- LOG.warning(
394
- "HTTPError from API Logging for action %s: %s",
395
- action,
396
- api_v4.readable_http_error(exc),
397
- )
398
- except Exception:
399
- LOG.warning("Error from API Logging for action %s", action, exc_info=True)
400
506
 
507
+ with api_v4.create_client_session(disable_logging=True) as client_session:
508
+ try:
509
+ api_v4.log_event(client_session, action, summary)
510
+ except requests.HTTPError as exc:
511
+ LOG.warning(
512
+ f"HTTPError from logging action {action}: {http.readable_http_error(exc)}"
513
+ )
514
+ except Exception:
515
+ LOG.warning(f"Error from logging action {action}", exc_info=True)
516
+
517
+
518
+ def _api_logging_failed(payload: dict, exc: Exception, dry_run: bool = False):
519
+ if dry_run:
520
+ return
401
521
 
402
- def _api_logging_failed(payload: dict, exc: Exception):
403
522
  if constants.MAPILLARY_DISABLE_API_LOGGING:
404
523
  return
405
524
 
406
525
  payload_with_reason = {**payload, "reason": exc.__class__.__name__}
407
526
  action: api_v4.ActionType = "upload_failed_upload"
408
- try:
409
- api_v4.log_event(action, payload_with_reason)
410
- except requests.HTTPError as exc:
411
- LOG.warning(
412
- "HTTPError from API Logging for action %s: %s",
413
- action,
414
- api_v4.readable_http_error(exc),
415
- )
416
- except Exception:
417
- LOG.warning("Error from API Logging for action %s", action, exc_info=True)
418
527
 
419
-
420
- def _load_descs(
421
- _metadatas_from_process: T.Sequence[types.MetadataOrError] | None,
422
- desc_path: str | None,
423
- import_paths: T.Sequence[Path],
424
- ) -> list[types.Metadata]:
425
- metadatas: list[types.Metadata]
426
-
427
- if _metadatas_from_process is not None:
428
- metadatas, _ = types.separate_errors(_metadatas_from_process)
429
- else:
430
- metadatas = _load_validate_metadatas_from_desc_path(desc_path, import_paths)
431
-
432
- # Make sure all metadatas have sequence uuid assigned
433
- # It is used to find the right sequence when writing upload history
434
- missing_sequence_uuid = str(uuid.uuid4())
435
- for metadata in metadatas:
436
- if isinstance(metadata, types.ImageMetadata):
437
- if metadata.MAPSequenceUUID is None:
438
- metadata.MAPSequenceUUID = missing_sequence_uuid
439
-
440
- for metadata in metadatas:
441
- assert isinstance(metadata, (types.ImageMetadata, types.VideoMetadata))
442
- if isinstance(metadata, types.ImageMetadata):
443
- assert metadata.MAPSequenceUUID is not None
444
-
445
- return metadatas
528
+ with api_v4.create_client_session(disable_logging=True) as client_session:
529
+ try:
530
+ api_v4.log_event(client_session, action, payload_with_reason)
531
+ except requests.HTTPError as exc:
532
+ LOG.warning(
533
+ f"HTTPError from logging action {action}: {http.readable_http_error(exc)}"
534
+ )
535
+ except Exception:
536
+ LOG.warning(f"Error from logging action {action}", exc_info=True)
446
537
 
447
538
 
448
539
  _M = T.TypeVar("_M", bound=types.Metadata)
@@ -466,110 +557,21 @@ def _gen_upload_everything(
466
557
  (m for m in metadatas if isinstance(m, types.ImageMetadata)),
467
558
  utils.find_images(import_paths, skip_subfolders=skip_subfolders),
468
559
  )
469
- for image_result in uploader.ZipImageSequence.prepare_images_and_upload(
470
- image_metadatas,
471
- mly_uploader,
472
- ):
473
- yield image_result
560
+ image_uploader = uploader.ImageSequenceUploader(
561
+ mly_uploader.upload_options, emitter=mly_uploader.emitter
562
+ )
563
+ yield from image_uploader.upload_images(image_metadatas)
474
564
 
475
565
  # Upload videos
476
566
  video_metadatas = _find_metadata_with_filename_existed_in(
477
567
  (m for m in metadatas if isinstance(m, types.VideoMetadata)),
478
568
  utils.find_videos(import_paths, skip_subfolders=skip_subfolders),
479
569
  )
480
- for video_result in _gen_upload_videos(mly_uploader, video_metadatas):
481
- yield video_result
570
+ yield from uploader.VideoUploader.upload_videos(mly_uploader, video_metadatas)
482
571
 
483
572
  # Upload zip files
484
573
  zip_paths = utils.find_zipfiles(import_paths, skip_subfolders=skip_subfolders)
485
- for zip_result in _gen_upload_zipfiles(mly_uploader, zip_paths):
486
- yield zip_result
487
-
488
-
489
- def _gen_upload_videos(
490
- mly_uploader: uploader.Uploader, video_metadatas: T.Sequence[types.VideoMetadata]
491
- ) -> T.Generator[tuple[types.VideoMetadata, uploader.UploadResult], None, None]:
492
- for idx, video_metadata in enumerate(video_metadatas):
493
- try:
494
- video_metadata.update_md5sum()
495
- except Exception as ex:
496
- yield video_metadata, uploader.UploadResult(error=ex)
497
- continue
498
-
499
- assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
500
-
501
- # Convert video metadata to CAMMInfo
502
- camm_info = _prepare_camm_info(video_metadata)
503
-
504
- # Create the CAMM sample generator
505
- camm_sample_generator = camm_builder.camm_sample_generator2(camm_info)
506
-
507
- progress: uploader.SequenceProgress = {
508
- "total_sequence_count": len(video_metadatas),
509
- "sequence_idx": idx,
510
- "file_type": video_metadata.filetype.value,
511
- "import_path": str(video_metadata.filename),
512
- "md5sum": video_metadata.md5sum,
513
- }
514
-
515
- session_key = uploader._session_key(
516
- video_metadata.md5sum, upload_api_v4.ClusterFileType.CAMM
517
- )
518
-
519
- try:
520
- with video_metadata.filename.open("rb") as src_fp:
521
- # Build the mp4 stream with the CAMM samples
522
- camm_fp = simple_mp4_builder.transform_mp4(
523
- src_fp, camm_sample_generator
524
- )
525
-
526
- # Upload the mp4 stream
527
- cluster_id = mly_uploader.upload_stream(
528
- T.cast(T.IO[bytes], camm_fp),
529
- upload_api_v4.ClusterFileType.CAMM,
530
- session_key,
531
- progress=T.cast(T.Dict[str, T.Any], progress),
532
- )
533
- except Exception as ex:
534
- yield video_metadata, uploader.UploadResult(error=ex)
535
- else:
536
- yield video_metadata, uploader.UploadResult(result=cluster_id)
537
-
538
-
539
- def _prepare_camm_info(video_metadata: types.VideoMetadata) -> camm_parser.CAMMInfo:
540
- camm_info = camm_parser.CAMMInfo(
541
- make=video_metadata.make or "", model=video_metadata.model or ""
542
- )
543
-
544
- for point in video_metadata.points:
545
- if isinstance(point, telemetry.CAMMGPSPoint):
546
- if camm_info.gps is None:
547
- camm_info.gps = []
548
- camm_info.gps.append(point)
549
-
550
- elif isinstance(point, telemetry.GPSPoint):
551
- # There is no proper CAMM entry for GoPro GPS
552
- if camm_info.mini_gps is None:
553
- camm_info.mini_gps = []
554
- camm_info.mini_gps.append(point)
555
-
556
- elif isinstance(point, geo.Point):
557
- if camm_info.mini_gps is None:
558
- camm_info.mini_gps = []
559
- camm_info.mini_gps.append(point)
560
- else:
561
- raise ValueError(f"Unknown point type: {point}")
562
-
563
- if constants.MAPILLARY__EXPERIMENTAL_ENABLE_IMU:
564
- if video_metadata.filetype is FileType.GOPRO:
565
- with video_metadata.filename.open("rb") as fp:
566
- gopro_info = gpmf_parser.extract_gopro_info(fp, telemetry_only=True)
567
- if gopro_info is not None:
568
- camm_info.accl = gopro_info.accl or []
569
- camm_info.gyro = gopro_info.gyro or []
570
- camm_info.magn = gopro_info.magn or []
571
-
572
- return camm_info
574
+ yield from uploader.ZipUploader.upload_zipfiles(mly_uploader, zip_paths)
573
575
 
574
576
 
575
577
  def _normalize_import_paths(import_path: Path | T.Sequence[Path]) -> list[Path]:
@@ -602,7 +604,7 @@ def _continue_or_fail(ex: Exception) -> Exception:
602
604
  return ex
603
605
 
604
606
  # Certain files not found or no permission
605
- if isinstance(ex, OSError):
607
+ if isinstance(ex, (FileNotFoundError, PermissionError)):
606
608
  return ex
607
609
 
608
610
  # Certain metadatas are not valid
@@ -630,100 +632,73 @@ def _continue_or_fail(ex: Exception) -> Exception:
630
632
  raise ex
631
633
 
632
634
 
633
- def upload(
634
- import_path: Path | T.Sequence[Path],
635
- user_items: types.UserItem,
636
- desc_path: str | None = None,
637
- _metadatas_from_process: T.Sequence[types.MetadataOrError] | None = None,
638
- dry_run=False,
639
- skip_subfolders=False,
640
- ) -> None:
641
- import_paths = _normalize_import_paths(import_path)
642
-
643
- metadatas = _load_descs(_metadatas_from_process, desc_path, import_paths)
644
-
645
- jsonschema.validate(instance=user_items, schema=types.UserItemSchema)
646
-
647
- # Setup the emitter -- the order matters here
635
+ def _load_descs(
636
+ _metadatas_from_process: T.Sequence[types.MetadataOrError] | None,
637
+ import_paths: T.Sequence[Path],
638
+ desc_path: str | None,
639
+ ) -> list[types.Metadata]:
640
+ metadatas: list[types.Metadata]
648
641
 
649
- emitter = uploader.EventEmitter()
642
+ if _metadatas_from_process is not None:
643
+ metadatas, _ = types.separate_errors(_metadatas_from_process)
644
+ else:
645
+ metadatas = _load_valid_metadatas_from_desc_path(import_paths, desc_path)
650
646
 
651
- # When dry_run mode is on, we disable history by default.
652
- # But we need dry_run for tests, so we added MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN
653
- # and when it is on, we enable history regardless of dry_run
654
- enable_history = constants.MAPILLARY_UPLOAD_HISTORY_PATH and (
655
- not dry_run or constants.MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN
656
- )
647
+ # Make sure all metadatas have sequence uuid assigned
648
+ # It is used to find the right sequence when writing upload history
649
+ missing_sequence_uuid = str(uuid.uuid4())
650
+ for metadata in metadatas:
651
+ if isinstance(metadata, types.ImageMetadata):
652
+ if metadata.MAPSequenceUUID is None:
653
+ metadata.MAPSequenceUUID = missing_sequence_uuid
657
654
 
658
- # Put it first one to check duplications first
659
- if enable_history:
660
- upload_run_params: JSONDict = {
661
- # Null if multiple paths provided
662
- "import_path": str(import_path) if isinstance(import_path, Path) else None,
663
- "organization_key": user_items.get("MAPOrganizationKey"),
664
- "user_key": user_items.get("MAPSettingsUserKey"),
665
- "version": VERSION,
666
- "run_at": time.time(),
667
- }
668
- _setup_history(emitter, upload_run_params, metadatas)
655
+ for metadata in metadatas:
656
+ assert isinstance(metadata, (types.ImageMetadata, types.VideoMetadata))
657
+ if isinstance(metadata, types.ImageMetadata):
658
+ assert metadata.MAPSequenceUUID is not None
669
659
 
670
- # Set up tdqm
671
- _setup_tdqm(emitter)
660
+ return metadatas
672
661
 
673
- # Now stats is empty but it will collect during ALL uploads
674
- stats = _setup_api_stats(emitter)
675
662
 
676
- # Send the progress via IPC, and log the progress in debug mode
677
- _setup_ipc(emitter)
663
+ def _load_valid_metadatas_from_desc_path(
664
+ import_paths: T.Sequence[Path], desc_path: str | None
665
+ ) -> list[types.Metadata]:
666
+ if desc_path is None:
667
+ desc_path = _find_desc_path(import_paths)
678
668
 
679
- mly_uploader = uploader.Uploader(user_items, emitter=emitter, dry_run=dry_run)
669
+ if desc_path == "-":
670
+ try:
671
+ metadatas = DescriptionJSONSerializer.deserialize_stream(sys.stdin.buffer)
672
+ except json.JSONDecodeError as ex:
673
+ raise exceptions.MapillaryInvalidDescriptionFile(
674
+ f"Invalid JSON stream from {desc_path}: {ex}"
675
+ ) from ex
680
676
 
681
- results = _gen_upload_everything(
682
- mly_uploader, metadatas, import_paths, skip_subfolders
683
- )
677
+ else:
678
+ if not os.path.isfile(desc_path):
679
+ raise exceptions.MapillaryFileNotFoundError(
680
+ f"Description file not found: {desc_path}"
681
+ )
682
+ with open(desc_path, "rb") as fp:
683
+ try:
684
+ metadatas = DescriptionJSONSerializer.deserialize_stream(fp)
685
+ except json.JSONDecodeError as ex:
686
+ raise exceptions.MapillaryInvalidDescriptionFile(
687
+ f"Invalid JSON stream from {desc_path}: {ex}"
688
+ ) from ex
684
689
 
685
- upload_successes = 0
686
- upload_errors: list[Exception] = []
690
+ return metadatas
687
691
 
688
- # The real upload happens sequentially here
689
- try:
690
- for _, result in results:
691
- if result.error is not None:
692
- upload_errors.append(_continue_or_fail(result.error))
693
- else:
694
- upload_successes += 1
695
692
 
696
- except Exception as ex:
697
- # Fatal error: log and raise
698
- if not dry_run:
699
- _api_logging_failed(_summarize(stats), ex)
700
- raise ex
693
+ def _find_desc_path(import_paths: T.Sequence[Path]) -> str:
694
+ if len(import_paths) == 1 and import_paths[0].is_dir():
695
+ return str(import_paths[0].joinpath(constants.IMAGE_DESCRIPTION_FILENAME))
701
696
 
697
+ if 1 < len(import_paths):
698
+ raise exceptions.MapillaryBadParameterError(
699
+ "The description path must be specified (with --desc_path) when uploading multiple paths"
700
+ )
702
701
  else:
703
- if not dry_run:
704
- _api_logging_finished(_summarize(stats))
705
-
706
- finally:
707
- # We collected stats after every upload is finished
708
- assert upload_successes == len(stats)
709
- _show_upload_summary(stats, upload_errors)
710
-
711
-
712
- def _gen_upload_zipfiles(
713
- mly_uploader: uploader.Uploader,
714
- zip_paths: T.Sequence[Path],
715
- ) -> T.Generator[tuple[Path, uploader.UploadResult], None, None]:
716
- for idx, zip_path in enumerate(zip_paths):
717
- progress: uploader.SequenceProgress = {
718
- "total_sequence_count": len(zip_paths),
719
- "sequence_idx": idx,
720
- "import_path": str(zip_path),
721
- }
722
- try:
723
- cluster_id = uploader.ZipImageSequence.prepare_zipfile_and_upload(
724
- zip_path, mly_uploader, progress=T.cast(T.Dict[str, T.Any], progress)
725
- )
726
- except Exception as ex:
727
- yield zip_path, uploader.UploadResult(error=ex)
728
- else:
729
- yield zip_path, uploader.UploadResult(result=cluster_id)
702
+ raise exceptions.MapillaryBadParameterError(
703
+ "The description path must be specified (with --desc_path) when uploading a single file"
704
+ )