mapillary-tools 0.13.3__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +198 -55
  3. mapillary_tools/authenticate.py +326 -64
  4. mapillary_tools/blackvue_parser.py +195 -0
  5. mapillary_tools/camm/camm_builder.py +55 -97
  6. mapillary_tools/camm/camm_parser.py +429 -181
  7. mapillary_tools/commands/__main__.py +10 -6
  8. mapillary_tools/commands/authenticate.py +8 -1
  9. mapillary_tools/commands/process.py +27 -51
  10. mapillary_tools/commands/process_and_upload.py +18 -5
  11. mapillary_tools/commands/sample_video.py +2 -3
  12. mapillary_tools/commands/upload.py +44 -13
  13. mapillary_tools/commands/video_process_and_upload.py +19 -5
  14. mapillary_tools/config.py +65 -26
  15. mapillary_tools/constants.py +141 -18
  16. mapillary_tools/exceptions.py +37 -34
  17. mapillary_tools/exif_read.py +221 -116
  18. mapillary_tools/exif_write.py +10 -8
  19. mapillary_tools/exiftool_read.py +33 -42
  20. mapillary_tools/exiftool_read_video.py +97 -47
  21. mapillary_tools/exiftool_runner.py +57 -0
  22. mapillary_tools/ffmpeg.py +417 -242
  23. mapillary_tools/geo.py +158 -118
  24. mapillary_tools/geotag/__init__.py +0 -1
  25. mapillary_tools/geotag/base.py +147 -0
  26. mapillary_tools/geotag/factory.py +307 -0
  27. mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
  28. mapillary_tools/geotag/geotag_images_from_exiftool.py +136 -85
  29. mapillary_tools/geotag/geotag_images_from_gpx.py +60 -124
  30. mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
  31. mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
  32. mapillary_tools/geotag/geotag_images_from_video.py +88 -51
  33. mapillary_tools/geotag/geotag_videos_from_exiftool.py +123 -0
  34. mapillary_tools/geotag/geotag_videos_from_gpx.py +52 -0
  35. mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
  36. mapillary_tools/geotag/image_extractors/base.py +18 -0
  37. mapillary_tools/geotag/image_extractors/exif.py +60 -0
  38. mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
  39. mapillary_tools/geotag/options.py +182 -0
  40. mapillary_tools/geotag/utils.py +52 -16
  41. mapillary_tools/geotag/video_extractors/base.py +18 -0
  42. mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
  43. mapillary_tools/geotag/video_extractors/gpx.py +116 -0
  44. mapillary_tools/geotag/video_extractors/native.py +160 -0
  45. mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
  46. mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
  47. mapillary_tools/history.py +134 -20
  48. mapillary_tools/mp4/construct_mp4_parser.py +17 -10
  49. mapillary_tools/mp4/io_utils.py +0 -1
  50. mapillary_tools/mp4/mp4_sample_parser.py +36 -28
  51. mapillary_tools/mp4/simple_mp4_builder.py +10 -9
  52. mapillary_tools/mp4/simple_mp4_parser.py +13 -22
  53. mapillary_tools/process_geotag_properties.py +184 -414
  54. mapillary_tools/process_sequence_properties.py +594 -225
  55. mapillary_tools/sample_video.py +20 -26
  56. mapillary_tools/serializer/description.py +587 -0
  57. mapillary_tools/serializer/gpx.py +132 -0
  58. mapillary_tools/telemetry.py +26 -13
  59. mapillary_tools/types.py +98 -611
  60. mapillary_tools/upload.py +411 -387
  61. mapillary_tools/upload_api_v4.py +167 -142
  62. mapillary_tools/uploader.py +804 -284
  63. mapillary_tools/utils.py +49 -18
  64. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/METADATA +93 -35
  65. mapillary_tools-0.14.0.dist-info/RECORD +75 -0
  66. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/WHEEL +1 -1
  67. mapillary_tools/geotag/blackvue_parser.py +0 -118
  68. mapillary_tools/geotag/geotag_from_generic.py +0 -22
  69. mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
  70. mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
  71. mapillary_tools/video_data_extraction/cli_options.py +0 -22
  72. mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
  73. mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
  74. mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
  75. mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
  76. mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
  77. mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
  78. mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
  79. mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
  80. mapillary_tools/video_data_extraction/extractors/gpx_parser.py +0 -108
  81. mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
  82. mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
  83. mapillary_tools-0.13.3.dist-info/RECORD +0 -75
  84. /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
  85. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/entry_points.txt +0 -0
  86. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info/licenses}/LICENSE +0 -0
  87. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/top_level.txt +0 -0
mapillary_tools/upload.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import os
@@ -7,127 +9,134 @@ import typing as T
7
9
  import uuid
8
10
  from pathlib import Path
9
11
 
12
+ import humanize
13
+ import jsonschema
10
14
  import requests
11
15
  from tqdm import tqdm
12
16
 
13
17
  from . import (
14
18
  api_v4,
15
- authenticate,
16
19
  config,
17
20
  constants,
18
21
  exceptions,
19
22
  history,
20
23
  ipc,
21
- telemetry,
22
24
  types,
23
- upload_api_v4,
24
25
  uploader,
25
26
  utils,
26
27
  VERSION,
27
28
  )
28
- from .camm import camm_builder
29
- from .geotag import gpmf_parser
30
- from .mp4 import simple_mp4_builder
29
+ from .serializer.description import DescriptionJSONSerializer
31
30
  from .types import FileType
32
31
 
33
32
  JSONDict = T.Dict[str, T.Union[str, int, float, None]]
34
33
 
35
34
  LOG = logging.getLogger(__name__)
36
- MAPILLARY_DISABLE_API_LOGGING = os.getenv("MAPILLARY_DISABLE_API_LOGGING")
37
- MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN = os.getenv(
38
- "MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN"
39
- )
40
- MAPILLARY__EXPERIMENTAL_ENABLE_IMU = os.getenv("MAPILLARY__EXPERIMENTAL_ENABLE_IMU")
41
- CAMM_CONVERTABLES = {FileType.CAMM, FileType.BLACKVUE, FileType.GOPRO}
42
35
 
43
36
 
44
- class UploadError(Exception):
45
- def __init__(self, inner_ex) -> None:
46
- self.inner_ex = inner_ex
47
- super().__init__(str(inner_ex))
37
+ class UploadedAlready(uploader.SequenceError):
38
+ pass
48
39
 
49
40
 
50
- def _load_validate_metadatas_from_desc_path(
51
- desc_path: T.Optional[str], import_paths: T.Sequence[Path]
52
- ) -> T.List[types.Metadata]:
53
- is_default_desc_path = False
54
- if desc_path is None:
55
- is_default_desc_path = True
56
- if len(import_paths) == 1 and import_paths[0].is_dir():
57
- desc_path = str(
58
- import_paths[0].joinpath(constants.IMAGE_DESCRIPTION_FILENAME)
59
- )
60
- else:
61
- if 1 < len(import_paths):
62
- raise exceptions.MapillaryBadParameterError(
63
- "The description path must be specified (with --desc_path) when uploading multiple paths",
64
- )
65
- else:
66
- raise exceptions.MapillaryBadParameterError(
67
- "The description path must be specified (with --desc_path) when uploading a single file",
68
- )
41
+ def upload(
42
+ import_path: Path | T.Sequence[Path],
43
+ user_items: config.UserItem,
44
+ desc_path: str | None = None,
45
+ _metadatas_from_process: T.Sequence[types.MetadataOrError] | None = None,
46
+ reupload: bool = False,
47
+ dry_run: bool = False,
48
+ nofinish: bool = False,
49
+ noresume: bool = False,
50
+ skip_subfolders: bool = False,
51
+ ) -> None:
52
+ LOG.info("==> Uploading...")
69
53
 
70
- descs: T.List[types.DescriptionOrError] = []
54
+ import_paths = _normalize_import_paths(import_path)
71
55
 
72
- if desc_path == "-":
73
- try:
74
- descs = json.load(sys.stdin)
75
- except json.JSONDecodeError as ex:
76
- raise exceptions.MapillaryInvalidDescriptionFile(
77
- f"Invalid JSON stream from stdin: {ex}"
78
- )
79
- else:
80
- if not os.path.isfile(desc_path):
81
- if is_default_desc_path:
82
- raise exceptions.MapillaryFileNotFoundError(
83
- f"Description file {desc_path} not found. Has the directory been processed yet?"
84
- )
85
- else:
86
- raise exceptions.MapillaryFileNotFoundError(
87
- f"Description file {desc_path} not found"
88
- )
89
- with open(desc_path) as fp:
90
- try:
91
- descs = json.load(fp)
92
- except json.JSONDecodeError as ex:
93
- raise exceptions.MapillaryInvalidDescriptionFile(
94
- f"Invalid JSON file {desc_path}: {ex}"
95
- )
56
+ metadatas = _load_descs(_metadatas_from_process, import_paths, desc_path)
96
57
 
97
- # the descs load from stdin or json file may contain invalid entries
98
- validated_descs = [
99
- types.validate_and_fail_desc(desc)
100
- for desc in descs
101
- # skip error descriptions
102
- if "error" not in desc
103
- ]
58
+ jsonschema.validate(instance=user_items, schema=config.UserItemSchema)
59
+
60
+ # Setup the emitter -- the order matters here
104
61
 
105
- # throw if we found any invalid descs
106
- invalid_descs = [desc for desc in validated_descs if "error" in desc]
107
- if invalid_descs:
108
- for desc in invalid_descs:
109
- LOG.error("Invalid description entry: %s", json.dumps(desc))
110
- raise exceptions.MapillaryInvalidDescriptionFile(
111
- f"Found {len(invalid_descs)} invalid descriptions"
62
+ emitter = uploader.EventEmitter()
63
+
64
+ # Check duplications first
65
+ if not _is_history_disabled(dry_run):
66
+ upload_run_params: JSONDict = {
67
+ # Null if multiple paths provided
68
+ "import_path": str(import_path) if isinstance(import_path, Path) else None,
69
+ "organization_key": user_items.get("MAPOrganizationKey"),
70
+ "user_key": user_items.get("MAPSettingsUserKey"),
71
+ "version": VERSION,
72
+ "run_at": time.time(),
73
+ }
74
+ _setup_history(
75
+ emitter, upload_run_params, metadatas, reupload=reupload, nofinish=nofinish
112
76
  )
113
77
 
114
- # validated_descs should contain no errors
115
- return [
116
- types.from_desc(T.cast(types.Description, desc)) for desc in validated_descs
117
- ]
78
+ # Set up tdqm
79
+ _setup_tdqm(emitter)
118
80
 
81
+ # Now stats is empty but it will collect during ALL uploads
82
+ stats = _setup_api_stats(emitter)
119
83
 
120
- def zip_images(
121
- import_path: Path,
122
- zip_dir: Path,
123
- desc_path: T.Optional[str] = None,
124
- ):
84
+ # Send the progress via IPC, and log the progress in debug mode
85
+ _setup_ipc(emitter)
86
+
87
+ mly_uploader = uploader.Uploader(
88
+ uploader.UploadOptions(
89
+ user_items,
90
+ dry_run=dry_run,
91
+ nofinish=nofinish,
92
+ noresume=noresume,
93
+ ),
94
+ emitter=emitter,
95
+ )
96
+
97
+ results = _gen_upload_everything(
98
+ mly_uploader, metadatas, import_paths, skip_subfolders
99
+ )
100
+
101
+ upload_successes = 0
102
+ upload_errors: list[Exception] = []
103
+
104
+ # The real uploading happens sequentially here
105
+ try:
106
+ for _, result in results:
107
+ if result.error is not None:
108
+ upload_error = _continue_or_fail(result.error)
109
+ log_exception(upload_error)
110
+ upload_errors.append(upload_error)
111
+ else:
112
+ upload_successes += 1
113
+
114
+ except Exception as ex:
115
+ # Fatal error: log and raise
116
+ _api_logging_failed(_summarize(stats), ex, dry_run=dry_run)
117
+ raise ex
118
+
119
+ except KeyboardInterrupt:
120
+ LOG.info("Upload interrupted by user...")
121
+
122
+ else:
123
+ _api_logging_finished(_summarize(stats), dry_run=dry_run)
124
+
125
+ finally:
126
+ # We collected stats after every upload is finished
127
+ assert upload_successes == len(stats), (
128
+ f"Expect {upload_successes} success but got {stats}"
129
+ )
130
+ _show_upload_summary(stats, upload_errors)
131
+
132
+
133
+ def zip_images(import_path: Path, zip_dir: Path, desc_path: str | None = None):
125
134
  if not import_path.is_dir():
126
135
  raise exceptions.MapillaryFileNotFoundError(
127
136
  f"Import directory not found: {import_path}"
128
137
  )
129
138
 
130
- metadatas = _load_validate_metadatas_from_desc_path(desc_path, [import_path])
139
+ metadatas = _load_valid_metadatas_from_desc_path([import_path], desc_path)
131
140
 
132
141
  if not metadatas:
133
142
  LOG.warning("No images or videos found in %s", desc_path)
@@ -137,71 +146,99 @@ def zip_images(
137
146
  metadata for metadata in metadatas if isinstance(metadata, types.ImageMetadata)
138
147
  ]
139
148
 
140
- uploader.zip_images(image_metadatas, zip_dir)
149
+ uploader.ZipUploader.zip_images(image_metadatas, zip_dir)
141
150
 
142
151
 
143
- def fetch_user_items(
144
- user_name: T.Optional[str] = None, organization_key: T.Optional[str] = None
145
- ) -> types.UserItem:
146
- if user_name is None:
147
- all_user_items = config.list_all_users()
148
- if not all_user_items:
149
- raise exceptions.MapillaryBadParameterError(
150
- "No Mapillary account found. Add one with --user_name"
151
- )
152
- if len(all_user_items) == 1:
153
- user_items = all_user_items[0]
154
- else:
155
- raise exceptions.MapillaryBadParameterError(
156
- "Found multiple Mapillary accounts. Please specify one with --user_name"
157
- )
152
+ def log_exception(ex: Exception) -> None:
153
+ if LOG.getEffectiveLevel() <= logging.DEBUG:
154
+ exc_info = ex
158
155
  else:
159
- user_items = authenticate.authenticate_user(user_name)
160
-
161
- if organization_key is not None:
162
- resp = api_v4.fetch_organization(
163
- user_items["user_upload_token"], organization_key
156
+ exc_info = None
157
+
158
+ exc_name = ex.__class__.__name__
159
+
160
+ if isinstance(ex, UploadedAlready):
161
+ LOG.info(f"{exc_name}: {ex}")
162
+ elif isinstance(ex, requests.HTTPError):
163
+ LOG.error(f"{exc_name}: {api_v4.readable_http_error(ex)}", exc_info=exc_info)
164
+ elif isinstance(ex, api_v4.HTTPContentError):
165
+ LOG.error(
166
+ f"{exc_name}: {ex}: {api_v4.readable_http_response(ex.response)}",
167
+ exc_info=exc_info,
164
168
  )
165
- org = resp.json()
166
- LOG.info("Uploading to organization: %s", json.dumps(org))
167
- user_items = T.cast(
168
- types.UserItem, {**user_items, "MAPOrganizationKey": organization_key}
169
- )
170
- return user_items
169
+ else:
170
+ LOG.error(f"{exc_name}: {ex}", exc_info=exc_info)
171
+
171
172
 
173
+ def _is_history_disabled(dry_run: bool) -> bool:
174
+ # There is no way to read/write history if the path is not set
175
+ if not constants.MAPILLARY_UPLOAD_HISTORY_PATH:
176
+ return True
172
177
 
173
- def _setup_cancel_due_to_duplication(emitter: uploader.EventEmitter) -> None:
178
+ if dry_run:
179
+ # When dry_run mode is on, we disable history by default
180
+ # However, we need dry_run for tests, so we added MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN
181
+ # and when it is on, we enable history regardless of dry_run
182
+ if constants.MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN:
183
+ return False
184
+ else:
185
+ return True
186
+
187
+ return False
188
+
189
+
190
+ def _setup_history(
191
+ emitter: uploader.EventEmitter,
192
+ upload_run_params: JSONDict,
193
+ metadatas: list[types.Metadata],
194
+ reupload: bool,
195
+ nofinish: bool,
196
+ ) -> None:
174
197
  @emitter.on("upload_start")
175
- def upload_start(payload: uploader.Progress):
176
- md5sum = payload["md5sum"]
177
- if history.is_uploaded(md5sum):
198
+ def check_duplication(payload: uploader.Progress):
199
+ md5sum = payload.get("sequence_md5sum")
200
+ assert md5sum is not None, f"md5sum has to be set for {payload}"
201
+
202
+ record = history.read_history_record(md5sum)
203
+
204
+ if record is not None:
178
205
  sequence_uuid = payload.get("sequence_uuid")
206
+ history_desc_path = history.history_desc_path(md5sum)
207
+ uploaded_at = record.get("summary", {}).get("upload_end_time", None)
208
+
179
209
  if sequence_uuid is None:
180
210
  basename = os.path.basename(payload.get("import_path", ""))
181
- LOG.info(
182
- "File %s has been uploaded already. Check the upload history at %s",
183
- basename,
184
- history.history_desc_path(md5sum),
185
- )
211
+ name = f"file {basename}"
212
+
186
213
  else:
187
- LOG.info(
188
- "Sequence %s has been uploaded already. Check the upload history at %s",
189
- sequence_uuid,
190
- history.history_desc_path(md5sum),
191
- )
192
- raise uploader.UploadCancelled()
214
+ name = f"sequence {sequence_uuid}"
193
215
 
216
+ if reupload:
217
+ if uploaded_at is not None:
218
+ LOG.info(
219
+ f"Reuploading {name}: previously uploaded {humanize.naturaldelta(time.time() - uploaded_at)} ago ({time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploaded_at))})"
220
+ )
221
+ else:
222
+ LOG.info(
223
+ f"Reuploading {name}: already uploaded, see {history_desc_path}"
224
+ )
225
+ else:
226
+ if uploaded_at is not None:
227
+ msg = f"Skipping {name}: previously uploaded {humanize.naturaldelta(time.time() - uploaded_at)} ago ({time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploaded_at))})"
228
+ else:
229
+ msg = f"Skipping {name}: already uploaded, see {history_desc_path}"
230
+ raise UploadedAlready(msg)
194
231
 
195
- def _setup_write_upload_history(
196
- emitter: uploader.EventEmitter,
197
- params: JSONDict,
198
- metadatas: T.Optional[T.List[types.Metadata]] = None,
199
- ) -> None:
200
232
  @emitter.on("upload_finished")
201
- def upload_finished(payload: uploader.Progress):
233
+ def write_history(payload: uploader.Progress):
234
+ if nofinish:
235
+ return
236
+
202
237
  sequence_uuid = payload.get("sequence_uuid")
203
- md5sum = payload["md5sum"]
204
- if sequence_uuid is None or metadatas is None:
238
+ md5sum = payload.get("sequence_md5sum")
239
+ assert md5sum is not None, f"md5sum has to be set for {payload}"
240
+
241
+ if sequence_uuid is None:
205
242
  sequence = None
206
243
  else:
207
244
  sequence = [
@@ -211,22 +248,20 @@ def _setup_write_upload_history(
211
248
  and metadata.MAPSequenceUUID == sequence_uuid
212
249
  ]
213
250
  sequence.sort(key=lambda metadata: metadata.sort_key())
251
+
214
252
  try:
215
253
  history.write_history(
216
- md5sum,
217
- params,
218
- T.cast(JSONDict, payload),
219
- sequence,
254
+ md5sum, upload_run_params, T.cast(JSONDict, payload), sequence
220
255
  )
221
256
  except OSError:
222
257
  LOG.warning("Error writing upload history %s", md5sum, exc_info=True)
223
258
 
224
259
 
225
260
  def _setup_tdqm(emitter: uploader.EventEmitter) -> None:
226
- upload_pbar: T.Optional[tqdm] = None
261
+ upload_pbar: tqdm | None = None
227
262
 
228
- @emitter.on("upload_fetch_offset")
229
- def upload_fetch_offset(payload: uploader.Progress) -> None:
263
+ @emitter.on("upload_start")
264
+ def upload_start(payload: uploader.Progress) -> None:
230
265
  nonlocal upload_pbar
231
266
 
232
267
  if upload_pbar is not None:
@@ -234,30 +269,55 @@ def _setup_tdqm(emitter: uploader.EventEmitter) -> None:
234
269
 
235
270
  nth = payload["sequence_idx"] + 1
236
271
  total = payload["total_sequence_count"]
237
- import_path: T.Optional[str] = payload.get("import_path")
272
+ import_path: str | None = payload.get("import_path")
238
273
  filetype = payload.get("file_type", "unknown").upper()
239
274
  if import_path is None:
240
- _desc = f"Uploading {filetype} ({nth}/{total})"
275
+ desc = f"Uploading {filetype} ({nth}/{total})"
241
276
  else:
242
- _desc = (
277
+ desc = (
243
278
  f"Uploading {filetype} {os.path.basename(import_path)} ({nth}/{total})"
244
279
  )
245
280
  upload_pbar = tqdm(
246
281
  total=payload["entity_size"],
247
- desc=_desc,
282
+ desc=desc,
248
283
  unit="B",
249
284
  unit_scale=True,
250
285
  unit_divisor=1024,
251
- initial=payload["offset"],
286
+ initial=payload.get("offset", 0),
252
287
  disable=LOG.getEffectiveLevel() <= logging.DEBUG,
253
288
  )
254
289
 
290
+ @emitter.on("upload_fetch_offset")
291
+ def upload_fetch_offset(payload: uploader.Progress) -> None:
292
+ assert upload_pbar is not None, (
293
+ "progress_bar must be initialized in upload_start"
294
+ )
295
+ begin_offset = payload.get("begin_offset", 0)
296
+ if begin_offset is not None and begin_offset > 0:
297
+ if upload_pbar.total is not None:
298
+ progress_percent = (begin_offset / upload_pbar.total) * 100
299
+ upload_pbar.write(
300
+ f"Resuming upload at {begin_offset=} ({progress_percent:3.0f}%)",
301
+ file=sys.stderr,
302
+ )
303
+ else:
304
+ upload_pbar.write(
305
+ f"Resuming upload at {begin_offset=}", file=sys.stderr
306
+ )
307
+ upload_pbar.reset()
308
+ upload_pbar.update(begin_offset)
309
+ upload_pbar.refresh()
310
+
255
311
  @emitter.on("upload_progress")
256
312
  def upload_progress(payload: uploader.Progress) -> None:
257
- assert upload_pbar is not None, "progress_bar must be initialized"
313
+ assert upload_pbar is not None, (
314
+ "progress_bar must be initialized in upload_start"
315
+ )
258
316
  upload_pbar.update(payload["chunk_size"])
317
+ upload_pbar.refresh()
259
318
 
260
319
  @emitter.on("upload_end")
320
+ @emitter.on("upload_failed")
261
321
  def upload_end(_: uploader.Progress) -> None:
262
322
  nonlocal upload_pbar
263
323
  if upload_pbar:
@@ -269,25 +329,46 @@ def _setup_ipc(emitter: uploader.EventEmitter):
269
329
  @emitter.on("upload_start")
270
330
  def upload_start(payload: uploader.Progress):
271
331
  type: uploader.EventName = "upload_start"
272
- LOG.debug("Sending %s via IPC: %s", type, payload)
332
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
273
333
  ipc.send(type, payload)
274
334
 
275
335
  @emitter.on("upload_fetch_offset")
276
336
  def upload_fetch_offset(payload: uploader.Progress) -> None:
277
337
  type: uploader.EventName = "upload_fetch_offset"
278
- LOG.debug("Sending %s via IPC: %s", type, payload)
338
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
279
339
  ipc.send(type, payload)
280
340
 
281
341
  @emitter.on("upload_progress")
282
342
  def upload_progress(payload: uploader.Progress):
283
343
  type: uploader.EventName = "upload_progress"
284
- LOG.debug("Sending %s via IPC: %s", type, payload)
344
+
345
+ if LOG.getEffectiveLevel() <= logging.DEBUG:
346
+ # In debug mode, we want to see the progress every 30 seconds
347
+ # instead of every chunk (which is too verbose)
348
+ INTERVAL_SECONDS = 30
349
+ now = time.time()
350
+ last_upload_progress_debug_at: float | None = T.cast(T.Dict, payload).get(
351
+ "_last_upload_progress_debug_at"
352
+ )
353
+ if (
354
+ last_upload_progress_debug_at is None
355
+ or last_upload_progress_debug_at + INTERVAL_SECONDS < now
356
+ ):
357
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
358
+ T.cast(T.Dict, payload)["_last_upload_progress_debug_at"] = now
359
+
285
360
  ipc.send(type, payload)
286
361
 
287
362
  @emitter.on("upload_end")
288
363
  def upload_end(payload: uploader.Progress) -> None:
289
364
  type: uploader.EventName = "upload_end"
290
- LOG.debug("Sending %s via IPC: %s", type, payload)
365
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
366
+ ipc.send(type, payload)
367
+
368
+ @emitter.on("upload_failed")
369
+ def upload_failed(payload: uploader.Progress) -> None:
370
+ type: uploader.EventName = "upload_failed"
371
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
291
372
  ipc.send(type, payload)
292
373
 
293
374
 
@@ -309,12 +390,17 @@ class _APIStats(uploader.Progress, total=False):
309
390
 
310
391
 
311
392
  def _setup_api_stats(emitter: uploader.EventEmitter):
312
- all_stats: T.List[_APIStats] = []
393
+ all_stats: list[_APIStats] = []
313
394
 
314
395
  @emitter.on("upload_start")
315
396
  def collect_start_time(payload: _APIStats) -> None:
316
- payload["upload_start_time"] = time.time()
397
+ now = time.time()
398
+ payload["upload_start_time"] = now
317
399
  payload["upload_total_time"] = 0
400
+ # These filed should be initialized in upload events like "upload_fetch_offset"
401
+ # but since we disabled them for uploading images, so we initialize them here
402
+ payload["upload_last_restart_time"] = now
403
+ payload["upload_first_offset"] = 0
318
404
 
319
405
  @emitter.on("upload_fetch_offset")
320
406
  def collect_restart_time(payload: _APIStats) -> None:
@@ -337,15 +423,18 @@ def _setup_api_stats(emitter: uploader.EventEmitter):
337
423
  now = time.time()
338
424
  payload["upload_end_time"] = now
339
425
  payload["upload_total_time"] += now - payload["upload_last_restart_time"]
426
+
427
+ @emitter.on("upload_finished")
428
+ def append_stats(payload: _APIStats) -> None:
340
429
  all_stats.append(payload)
341
430
 
342
431
  return all_stats
343
432
 
344
433
 
345
- def _summarize(stats: T.Sequence[_APIStats]) -> T.Dict:
434
+ def _summarize(stats: T.Sequence[_APIStats]) -> dict:
346
435
  total_image_count = sum(s.get("sequence_image_count", 0) for s in stats)
347
436
  total_uploaded_sequence_count = len(stats)
348
- # note that stats[0]["total_sequence_count"] not always same as total_uploaded_sequence_count
437
+ # Note that stats[0]["total_sequence_count"] not always same as total_uploaded_sequence_count
349
438
 
350
439
  total_uploaded_size = sum(
351
440
  s["entity_size"] - s.get("upload_first_offset", 0) for s in stats
@@ -363,6 +452,7 @@ def _summarize(stats: T.Sequence[_APIStats]) -> T.Dict:
363
452
 
364
453
  upload_summary = {
365
454
  "images": total_image_count,
455
+ # TODO: rename sequences to total uploads
366
456
  "sequences": total_uploaded_sequence_count,
367
457
  "size": round(total_entity_size_mb, 4),
368
458
  "uploaded_size": round(total_uploaded_size_mb, 4),
@@ -373,129 +463,126 @@ def _summarize(stats: T.Sequence[_APIStats]) -> T.Dict:
373
463
  return upload_summary
374
464
 
375
465
 
376
- def _show_upload_summary(stats: T.Sequence[_APIStats]):
377
- grouped: T.Dict[str, T.List[_APIStats]] = {}
378
- for stat in stats:
379
- grouped.setdefault(stat.get("file_type", "unknown"), []).append(stat)
466
+ def _show_upload_summary(stats: T.Sequence[_APIStats], errors: T.Sequence[Exception]):
467
+ LOG.info("==> Upload summary")
468
+
469
+ errors_by_type: dict[type[Exception], list[Exception]] = {}
470
+ for error in errors:
471
+ errors_by_type.setdefault(type(error), []).append(error)
380
472
 
381
- for file_type, typed_stats in grouped.items():
382
- if file_type == FileType.IMAGE.value:
473
+ for error_type, error_list in errors_by_type.items():
474
+ if error_type is UploadedAlready:
383
475
  LOG.info(
384
- "%8d %s sequences uploaded",
385
- len(typed_stats),
386
- file_type.upper(),
476
+ f"Skipped {len(error_list)} already uploaded sequences (use --reupload to force re-upload)",
387
477
  )
388
478
  else:
389
- LOG.info(
390
- "%8d %s files uploaded",
391
- len(typed_stats),
392
- file_type.upper(),
393
- )
479
+ LOG.info(f"{len(error_list)} uploads failed due to {error_type.__name__}")
394
480
 
395
- summary = _summarize(stats)
396
- LOG.info("%8.1fM data in total", summary["size"])
397
- LOG.info("%8.1fM data uploaded", summary["uploaded_size"])
398
- LOG.info("%8.1fs upload time", summary["time"])
481
+ if stats:
482
+ grouped: dict[str, list[_APIStats]] = {}
483
+ for stat in stats:
484
+ grouped.setdefault(stat.get("file_type", "unknown"), []).append(stat)
399
485
 
486
+ for file_type, typed_stats in grouped.items():
487
+ if file_type == FileType.IMAGE.value:
488
+ LOG.info(f"{len(typed_stats)} sequences uploaded")
489
+ else:
490
+ LOG.info(f"{len(typed_stats)} {file_type} uploaded")
400
491
 
401
- def _api_logging_finished(summary: T.Dict):
402
- if MAPILLARY_DISABLE_API_LOGGING:
492
+ summary = _summarize(stats)
493
+ LOG.info(f"{humanize.naturalsize(summary['size'] * 1024 * 1024)} read in total")
494
+ LOG.info(
495
+ f"{humanize.naturalsize(summary['uploaded_size'] * 1024 * 1024)} uploaded"
496
+ )
497
+ LOG.info(f"{summary['time']} upload time")
498
+ else:
499
+ LOG.info("Nothing uploaded. Bye.")
500
+
501
+
502
+ def _api_logging_finished(summary: dict, dry_run: bool = False):
503
+ if dry_run:
504
+ return
505
+
506
+ if constants.MAPILLARY_DISABLE_API_LOGGING:
403
507
  return
404
508
 
405
509
  action: api_v4.ActionType = "upload_finished_upload"
406
- LOG.debug("API Logging for action %s: %s", action, summary)
407
510
  try:
408
511
  api_v4.log_event(action, summary)
409
512
  except requests.HTTPError as exc:
410
513
  LOG.warning(
411
- "HTTPError from API Logging for action %s: %s",
412
- action,
413
- api_v4.readable_http_error(exc),
514
+ f"HTTPError from logging action {action}: {api_v4.readable_http_error(exc)}"
414
515
  )
415
516
  except Exception:
416
- LOG.warning("Error from API Logging for action %s", action, exc_info=True)
517
+ LOG.warning(f"Error from logging action {action}", exc_info=True)
417
518
 
418
519
 
419
- def _api_logging_failed(payload: T.Dict, exc: Exception):
420
- if MAPILLARY_DISABLE_API_LOGGING:
520
+ def _api_logging_failed(payload: dict, exc: Exception, dry_run: bool = False):
521
+ if dry_run:
522
+ return
523
+
524
+ if constants.MAPILLARY_DISABLE_API_LOGGING:
421
525
  return
422
526
 
423
527
  payload_with_reason = {**payload, "reason": exc.__class__.__name__}
424
528
  action: api_v4.ActionType = "upload_failed_upload"
425
- LOG.debug("API Logging for action %s: %s", action, payload)
426
529
  try:
427
530
  api_v4.log_event(action, payload_with_reason)
428
531
  except requests.HTTPError as exc:
429
532
  LOG.warning(
430
- "HTTPError from API Logging for action %s: %s",
431
- action,
432
- api_v4.readable_http_error(exc),
533
+ f"HTTPError from logging action {action}: {api_v4.readable_http_error(exc)}"
433
534
  )
434
535
  except Exception:
435
- LOG.warning("Error from API Logging for action %s", action, exc_info=True)
436
-
536
+ LOG.warning(f"Error from logging action {action}", exc_info=True)
437
537
 
438
- def _load_descs(
439
- _metadatas_from_process: T.Optional[T.Sequence[types.MetadataOrError]],
440
- desc_path: T.Optional[str],
441
- import_paths: T.Sequence[Path],
442
- ) -> T.List[types.Metadata]:
443
- metadatas: T.List[types.Metadata]
444
538
 
445
- if _metadatas_from_process is not None:
446
- metadatas = [
447
- metadata
448
- for metadata in _metadatas_from_process
449
- if not isinstance(metadata, types.ErrorMetadata)
450
- ]
451
- else:
452
- metadatas = _load_validate_metadatas_from_desc_path(desc_path, import_paths)
539
+ _M = T.TypeVar("_M", bound=types.Metadata)
453
540
 
454
- # Make sure all metadatas have sequence uuid assigned
455
- # It is used to find the right sequence when writing upload history
456
- missing_sequence_uuid = str(uuid.uuid4())
457
- for metadata in metadatas:
458
- if isinstance(metadata, types.ImageMetadata):
459
- if metadata.MAPSequenceUUID is None:
460
- metadata.MAPSequenceUUID = missing_sequence_uuid
461
541
 
462
- for metadata in metadatas:
463
- assert isinstance(metadata, (types.ImageMetadata, types.VideoMetadata))
464
- if isinstance(metadata, types.ImageMetadata):
465
- assert metadata.MAPSequenceUUID is not None
542
+ def _find_metadata_with_filename_existed_in(
543
+ metadatas: T.Iterable[_M], paths: T.Iterable[Path]
544
+ ) -> list[_M]:
545
+ resolved_image_paths = set(p.resolve() for p in paths)
546
+ return [d for d in metadatas if d.filename.resolve() in resolved_image_paths]
466
547
 
467
- return metadatas
468
548
 
549
+ def _gen_upload_everything(
550
+ mly_uploader: uploader.Uploader,
551
+ metadatas: T.Sequence[types.Metadata],
552
+ import_paths: T.Sequence[Path],
553
+ skip_subfolders: bool,
554
+ ):
555
+ # Upload images
556
+ image_metadatas = _find_metadata_with_filename_existed_in(
557
+ (m for m in metadatas if isinstance(m, types.ImageMetadata)),
558
+ utils.find_images(import_paths, skip_subfolders=skip_subfolders),
559
+ )
560
+ yield from uploader.ImageSequenceUploader.upload_images(
561
+ mly_uploader, image_metadatas
562
+ )
469
563
 
470
- _M = T.TypeVar("_M", bound=types.Metadata)
564
+ # Upload videos
565
+ video_metadatas = _find_metadata_with_filename_existed_in(
566
+ (m for m in metadatas if isinstance(m, types.VideoMetadata)),
567
+ utils.find_videos(import_paths, skip_subfolders=skip_subfolders),
568
+ )
569
+ yield from uploader.VideoUploader.upload_videos(mly_uploader, video_metadatas)
471
570
 
571
+ # Upload zip files
572
+ zip_paths = utils.find_zipfiles(import_paths, skip_subfolders=skip_subfolders)
573
+ yield from uploader.ZipUploader.upload_zipfiles(mly_uploader, zip_paths)
472
574
 
473
- def _find_metadata_with_filename_existed_in(
474
- metadatas: T.Sequence[_M], paths: T.Sequence[Path]
475
- ) -> T.List[_M]:
476
- resolved_image_paths = set(p.resolve() for p in paths)
477
- return [d for d in metadatas if d.filename.resolve() in resolved_image_paths]
478
575
 
576
+ def _normalize_import_paths(import_path: Path | T.Sequence[Path]) -> list[Path]:
577
+ import_paths: list[Path]
479
578
 
480
- def upload(
481
- import_path: T.Union[Path, T.Sequence[Path]],
482
- desc_path: T.Optional[str] = None,
483
- _metadatas_from_process: T.Optional[T.Sequence[types.MetadataOrError]] = None,
484
- user_name: T.Optional[str] = None,
485
- organization_key: T.Optional[str] = None,
486
- dry_run=False,
487
- skip_subfolders=False,
488
- ) -> None:
489
- import_paths: T.Sequence[Path]
490
579
  if isinstance(import_path, Path):
491
580
  import_paths = [import_path]
492
581
  else:
493
582
  assert isinstance(import_path, list)
494
583
  import_paths = import_path
495
- import_paths = list(utils.deduplicate_paths(import_paths))
496
584
 
497
- if not import_paths:
498
- return
585
+ import_paths = list(utils.deduplicate_paths(import_paths))
499
586
 
500
587
  # Check and fail early
501
588
  for path in import_paths:
@@ -504,176 +591,113 @@ def upload(
504
591
  f"Import file or directory not found: {path}"
505
592
  )
506
593
 
507
- metadatas = _load_descs(_metadatas_from_process, desc_path, import_paths)
594
+ return import_paths
508
595
 
509
- user_items = fetch_user_items(user_name, organization_key)
510
596
 
511
- # Setup the emitter -- the order matters here
597
+ def _continue_or_fail(ex: Exception) -> Exception:
598
+ """
599
+ Wrap the exception, or re-raise if it is a fatal error (i.e. there is no point to continue)
600
+ """
512
601
 
513
- emitter = uploader.EventEmitter()
602
+ if isinstance(ex, uploader.SequenceError):
603
+ return ex
514
604
 
515
- enable_history = history.MAPILLARY_UPLOAD_HISTORY_PATH and (
516
- not dry_run or MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN == "YES"
517
- )
605
+ # Certain files not found or no permission
606
+ if isinstance(ex, (FileNotFoundError, PermissionError)):
607
+ return ex
518
608
 
519
- # Put it first one to cancel early
520
- if enable_history:
521
- _setup_cancel_due_to_duplication(emitter)
609
+ # Certain metadatas are not valid
610
+ if isinstance(ex, exceptions.MapillaryMetadataValidationError):
611
+ return ex
522
612
 
523
- # This one set up tdqm
524
- _setup_tdqm(emitter)
613
+ # Fatal error: this is thrown after all retries
614
+ if isinstance(ex, requests.ConnectionError):
615
+ raise exceptions.MapillaryUploadConnectionError(str(ex)) from ex
525
616
 
526
- # Now stats is empty but it will collect during upload
527
- stats = _setup_api_stats(emitter)
617
+ # Fatal error: this is thrown after all retries
618
+ if isinstance(ex, requests.Timeout):
619
+ raise exceptions.MapillaryUploadTimeoutError(str(ex)) from ex
528
620
 
529
- # Send the progress as well as the log stats collected above
530
- _setup_ipc(emitter)
621
+ # Fatal error:
622
+ if isinstance(ex, requests.HTTPError) and isinstance(
623
+ ex.response, requests.Response
624
+ ):
625
+ if api_v4.is_auth_error(ex.response):
626
+ raise exceptions.MapillaryUploadUnauthorizedError(
627
+ api_v4.extract_auth_error_message(ex.response)
628
+ ) from ex
629
+ raise ex
531
630
 
532
- params: JSONDict = {
533
- # null if multiple paths provided
534
- "import_path": str(import_path) if isinstance(import_path, Path) else None,
535
- "organization_key": user_items.get("MAPOrganizationKey"),
536
- "user_key": user_items.get("MAPSettingsUserKey"),
537
- "version": VERSION,
538
- }
631
+ raise ex
539
632
 
540
- if enable_history:
541
- _setup_write_upload_history(emitter, params, metadatas)
542
633
 
543
- mly_uploader = uploader.Uploader(
544
- user_items,
545
- emitter=emitter,
546
- dry_run=dry_run,
547
- chunk_size=int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
548
- )
634
+ def _load_descs(
635
+ _metadatas_from_process: T.Sequence[types.MetadataOrError] | None,
636
+ import_paths: T.Sequence[Path],
637
+ desc_path: str | None,
638
+ ) -> list[types.Metadata]:
639
+ metadatas: list[types.Metadata]
549
640
 
550
- try:
551
- image_paths = utils.find_images(import_paths, skip_subfolders=skip_subfolders)
552
- # find descs that match the image paths from the import paths
553
- image_metadatas = [
554
- metadata
555
- for metadata in (metadatas or [])
556
- if isinstance(metadata, types.ImageMetadata)
557
- ]
558
- specified_image_metadatas = _find_metadata_with_filename_existed_in(
559
- image_metadatas, image_paths
560
- )
561
- if specified_image_metadatas:
562
- try:
563
- clusters = mly_uploader.upload_images(
564
- specified_image_metadatas,
565
- event_payload={"file_type": FileType.IMAGE.value},
566
- )
567
- except Exception as ex:
568
- raise UploadError(ex) from ex
569
-
570
- if clusters:
571
- LOG.debug("Uploaded to cluster: %s", clusters)
572
-
573
- video_paths = utils.find_videos(import_paths, skip_subfolders=skip_subfolders)
574
- video_metadatas = [
575
- metadata
576
- for metadata in (metadatas or [])
577
- if isinstance(metadata, types.VideoMetadata)
578
- ]
579
- specified_video_metadatas = _find_metadata_with_filename_existed_in(
580
- video_metadatas, video_paths
581
- )
582
- for idx, video_metadata in enumerate(specified_video_metadatas):
583
- video_metadata.update_md5sum()
584
- assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
585
-
586
- # extract telemetry measurements from GoPro videos
587
- telemetry_measurements: T.List[telemetry.TelemetryMeasurement] = []
588
- if MAPILLARY__EXPERIMENTAL_ENABLE_IMU == "YES":
589
- if video_metadata.filetype is FileType.GOPRO:
590
- with video_metadata.filename.open("rb") as fp:
591
- telemetry_data = gpmf_parser.extract_telemetry_data(fp)
592
- if telemetry_data:
593
- telemetry_measurements.extend(telemetry_data.accl)
594
- telemetry_measurements.extend(telemetry_data.gyro)
595
- telemetry_measurements.extend(telemetry_data.magn)
596
- telemetry_measurements.sort(key=lambda m: m.time)
597
-
598
- generator = camm_builder.camm_sample_generator2(
599
- video_metadata, telemetry_measurements=telemetry_measurements
600
- )
641
+ if _metadatas_from_process is not None:
642
+ metadatas, _ = types.separate_errors(_metadatas_from_process)
643
+ else:
644
+ metadatas = _load_valid_metadatas_from_desc_path(import_paths, desc_path)
601
645
 
602
- with video_metadata.filename.open("rb") as src_fp:
603
- camm_fp = simple_mp4_builder.transform_mp4(src_fp, generator)
604
- event_payload: uploader.Progress = {
605
- "total_sequence_count": len(specified_video_metadatas),
606
- "sequence_idx": idx,
607
- "file_type": video_metadata.filetype.value,
608
- "import_path": str(video_metadata.filename),
609
- }
610
- try:
611
- cluster_id = mly_uploader.upload_stream(
612
- T.cast(T.BinaryIO, camm_fp),
613
- upload_api_v4.ClusterFileType.CAMM,
614
- video_metadata.md5sum,
615
- event_payload=event_payload,
616
- )
617
- except Exception as ex:
618
- raise UploadError(ex) from ex
619
- LOG.debug("Uploaded to cluster: %s", cluster_id)
620
-
621
- zip_paths = utils.find_zipfiles(import_paths, skip_subfolders=skip_subfolders)
622
- _upload_zipfiles(mly_uploader, zip_paths)
623
-
624
- except UploadError as ex:
625
- inner_ex = ex.inner_ex
626
-
627
- if not dry_run:
628
- _api_logging_failed(_summarize(stats), inner_ex)
629
-
630
- if isinstance(inner_ex, requests.ConnectionError):
631
- raise exceptions.MapillaryUploadConnectionError(str(inner_ex)) from inner_ex
632
-
633
- if isinstance(inner_ex, requests.Timeout):
634
- raise exceptions.MapillaryUploadTimeoutError(str(inner_ex)) from inner_ex
635
-
636
- if isinstance(inner_ex, requests.HTTPError) and isinstance(
637
- inner_ex.response, requests.Response
638
- ):
639
- if inner_ex.response.status_code in [400, 401]:
640
- try:
641
- error_body = inner_ex.response.json()
642
- except Exception:
643
- error_body = {}
644
- debug_info = error_body.get("debug_info", {})
645
- if debug_info.get("type") in ["NotAuthorizedError"]:
646
- raise exceptions.MapillaryUploadUnauthorizedError(
647
- debug_info.get("message")
648
- ) from inner_ex
649
- raise inner_ex
650
-
651
- raise inner_ex
646
+ # Make sure all metadatas have sequence uuid assigned
647
+ # It is used to find the right sequence when writing upload history
648
+ missing_sequence_uuid = str(uuid.uuid4())
649
+ for metadata in metadatas:
650
+ if isinstance(metadata, types.ImageMetadata):
651
+ if metadata.MAPSequenceUUID is None:
652
+ metadata.MAPSequenceUUID = missing_sequence_uuid
652
653
 
653
- if stats:
654
- if not dry_run:
655
- _api_logging_finished(_summarize(stats))
656
- _show_upload_summary(stats)
657
- else:
658
- LOG.info("Nothing uploaded. Bye.")
654
+ for metadata in metadatas:
655
+ assert isinstance(metadata, (types.ImageMetadata, types.VideoMetadata))
656
+ if isinstance(metadata, types.ImageMetadata):
657
+ assert metadata.MAPSequenceUUID is not None
659
658
 
659
+ return metadatas
660
660
 
661
- def _upload_zipfiles(
662
- mly_uploader: uploader.Uploader,
663
- zip_paths: T.Sequence[Path],
664
- ) -> None:
665
- for idx, zip_path in enumerate(zip_paths):
666
- event_payload: uploader.Progress = {
667
- "total_sequence_count": len(zip_paths),
668
- "sequence_idx": idx,
669
- "file_type": FileType.ZIP.value,
670
- "import_path": str(zip_path),
671
- }
661
+
662
+ def _load_valid_metadatas_from_desc_path(
663
+ import_paths: T.Sequence[Path], desc_path: str | None
664
+ ) -> list[types.Metadata]:
665
+ if desc_path is None:
666
+ desc_path = _find_desc_path(import_paths)
667
+
668
+ if desc_path == "-":
672
669
  try:
673
- cluster_id = mly_uploader.upload_zipfile(
674
- zip_path, event_payload=event_payload
670
+ metadatas = DescriptionJSONSerializer.deserialize_stream(sys.stdin.buffer)
671
+ except json.JSONDecodeError as ex:
672
+ raise exceptions.MapillaryInvalidDescriptionFile(
673
+ f"Invalid JSON stream from {desc_path}: {ex}"
674
+ ) from ex
675
+
676
+ else:
677
+ if not os.path.isfile(desc_path):
678
+ raise exceptions.MapillaryFileNotFoundError(
679
+ f"Description file not found: {desc_path}"
675
680
  )
676
- except Exception as ex:
677
- raise UploadError(ex) from ex
681
+ with open(desc_path, "rb") as fp:
682
+ try:
683
+ metadatas = DescriptionJSONSerializer.deserialize_stream(fp)
684
+ except json.JSONDecodeError as ex:
685
+ raise exceptions.MapillaryInvalidDescriptionFile(
686
+ f"Invalid JSON stream from {desc_path}: {ex}"
687
+ ) from ex
678
688
 
679
- LOG.debug("Uploaded to cluster: %s", cluster_id)
689
+ return metadatas
690
+
691
+
692
+ def _find_desc_path(import_paths: T.Sequence[Path]) -> str:
693
+ if len(import_paths) == 1 and import_paths[0].is_dir():
694
+ return str(import_paths[0].joinpath(constants.IMAGE_DESCRIPTION_FILENAME))
695
+
696
+ if 1 < len(import_paths):
697
+ raise exceptions.MapillaryBadParameterError(
698
+ "The description path must be specified (with --desc_path) when uploading multiple paths"
699
+ )
700
+ else:
701
+ raise exceptions.MapillaryBadParameterError(
702
+ "The description path must be specified (with --desc_path) when uploading a single file"
703
+ )