mapillary-tools 0.13.3a1__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +287 -22
  3. mapillary_tools/authenticate.py +326 -64
  4. mapillary_tools/blackvue_parser.py +195 -0
  5. mapillary_tools/camm/camm_builder.py +55 -97
  6. mapillary_tools/camm/camm_parser.py +429 -181
  7. mapillary_tools/commands/__main__.py +17 -8
  8. mapillary_tools/commands/authenticate.py +8 -1
  9. mapillary_tools/commands/process.py +27 -51
  10. mapillary_tools/commands/process_and_upload.py +19 -5
  11. mapillary_tools/commands/sample_video.py +2 -3
  12. mapillary_tools/commands/upload.py +44 -13
  13. mapillary_tools/commands/video_process_and_upload.py +19 -5
  14. mapillary_tools/config.py +65 -26
  15. mapillary_tools/constants.py +141 -18
  16. mapillary_tools/exceptions.py +37 -34
  17. mapillary_tools/exif_read.py +221 -116
  18. mapillary_tools/exif_write.py +10 -8
  19. mapillary_tools/exiftool_read.py +33 -42
  20. mapillary_tools/exiftool_read_video.py +97 -47
  21. mapillary_tools/exiftool_runner.py +57 -0
  22. mapillary_tools/ffmpeg.py +417 -242
  23. mapillary_tools/geo.py +158 -118
  24. mapillary_tools/geotag/__init__.py +0 -1
  25. mapillary_tools/geotag/base.py +147 -0
  26. mapillary_tools/geotag/factory.py +307 -0
  27. mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
  28. mapillary_tools/geotag/geotag_images_from_exiftool.py +136 -85
  29. mapillary_tools/geotag/geotag_images_from_gpx.py +60 -124
  30. mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
  31. mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
  32. mapillary_tools/geotag/geotag_images_from_video.py +88 -51
  33. mapillary_tools/geotag/geotag_videos_from_exiftool.py +123 -0
  34. mapillary_tools/geotag/geotag_videos_from_gpx.py +52 -0
  35. mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
  36. mapillary_tools/geotag/image_extractors/base.py +18 -0
  37. mapillary_tools/geotag/image_extractors/exif.py +60 -0
  38. mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
  39. mapillary_tools/geotag/options.py +182 -0
  40. mapillary_tools/geotag/utils.py +52 -16
  41. mapillary_tools/geotag/video_extractors/base.py +18 -0
  42. mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
  43. mapillary_tools/geotag/video_extractors/gpx.py +116 -0
  44. mapillary_tools/geotag/video_extractors/native.py +160 -0
  45. mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
  46. mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
  47. mapillary_tools/history.py +134 -20
  48. mapillary_tools/mp4/construct_mp4_parser.py +17 -10
  49. mapillary_tools/mp4/io_utils.py +0 -1
  50. mapillary_tools/mp4/mp4_sample_parser.py +36 -28
  51. mapillary_tools/mp4/simple_mp4_builder.py +10 -9
  52. mapillary_tools/mp4/simple_mp4_parser.py +13 -22
  53. mapillary_tools/process_geotag_properties.py +184 -414
  54. mapillary_tools/process_sequence_properties.py +594 -225
  55. mapillary_tools/sample_video.py +20 -26
  56. mapillary_tools/serializer/description.py +587 -0
  57. mapillary_tools/serializer/gpx.py +132 -0
  58. mapillary_tools/telemetry.py +26 -13
  59. mapillary_tools/types.py +98 -611
  60. mapillary_tools/upload.py +408 -416
  61. mapillary_tools/upload_api_v4.py +172 -174
  62. mapillary_tools/uploader.py +804 -284
  63. mapillary_tools/utils.py +49 -18
  64. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/METADATA +93 -35
  65. mapillary_tools-0.14.0.dist-info/RECORD +75 -0
  66. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/WHEEL +1 -1
  67. mapillary_tools/geotag/blackvue_parser.py +0 -118
  68. mapillary_tools/geotag/geotag_from_generic.py +0 -22
  69. mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
  70. mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
  71. mapillary_tools/video_data_extraction/cli_options.py +0 -22
  72. mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
  73. mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
  74. mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
  75. mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
  76. mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
  77. mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
  78. mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
  79. mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
  80. mapillary_tools/video_data_extraction/extractors/gpx_parser.py +0 -108
  81. mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
  82. mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
  83. mapillary_tools-0.13.3a1.dist-info/RECORD +0 -75
  84. /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
  85. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/entry_points.txt +0 -0
  86. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info/licenses}/LICENSE +0 -0
  87. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/top_level.txt +0 -0
mapillary_tools/upload.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import json
2
4
  import logging
3
5
  import os
@@ -7,146 +9,134 @@ import typing as T
7
9
  import uuid
8
10
  from pathlib import Path
9
11
 
12
+ import humanize
13
+ import jsonschema
10
14
  import requests
11
15
  from tqdm import tqdm
12
16
 
13
17
  from . import (
14
18
  api_v4,
15
- authenticate,
16
19
  config,
17
20
  constants,
18
21
  exceptions,
19
22
  history,
20
23
  ipc,
21
- telemetry,
22
24
  types,
23
- upload_api_v4,
24
25
  uploader,
25
26
  utils,
26
27
  VERSION,
27
28
  )
28
- from .camm import camm_builder
29
- from .geotag import gpmf_parser
30
- from .mp4 import simple_mp4_builder
29
+ from .serializer.description import DescriptionJSONSerializer
31
30
  from .types import FileType
32
31
 
33
32
  JSONDict = T.Dict[str, T.Union[str, int, float, None]]
34
33
 
35
34
  LOG = logging.getLogger(__name__)
36
- MAPILLARY_DISABLE_API_LOGGING = os.getenv("MAPILLARY_DISABLE_API_LOGGING")
37
- MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN = os.getenv(
38
- "MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN"
39
- )
40
- MAPILLARY__EXPERIMENTAL_ENABLE_IMU = os.getenv("MAPILLARY__EXPERIMENTAL_ENABLE_IMU")
41
- CAMM_CONVERTABLES = {FileType.CAMM, FileType.BLACKVUE, FileType.GOPRO}
42
35
 
43
36
 
44
- class UploadError(Exception):
45
- def __init__(self, inner_ex) -> None:
46
- self.inner_ex = inner_ex
47
- super().__init__(str(inner_ex))
37
+ class UploadedAlready(uploader.SequenceError):
38
+ pass
48
39
 
49
40
 
50
- class UploadHTTPError(Exception):
51
- pass
41
+ def upload(
42
+ import_path: Path | T.Sequence[Path],
43
+ user_items: config.UserItem,
44
+ desc_path: str | None = None,
45
+ _metadatas_from_process: T.Sequence[types.MetadataOrError] | None = None,
46
+ reupload: bool = False,
47
+ dry_run: bool = False,
48
+ nofinish: bool = False,
49
+ noresume: bool = False,
50
+ skip_subfolders: bool = False,
51
+ ) -> None:
52
+ LOG.info("==> Uploading...")
52
53
 
54
+ import_paths = _normalize_import_paths(import_path)
53
55
 
54
- def wrap_http_exception(ex: requests.HTTPError):
55
- req = ex.request
56
- resp = ex.response
57
- if isinstance(resp, requests.Response) and isinstance(req, requests.Request):
58
- lines = [
59
- f"{req.method} {resp.url}",
60
- f"> HTTP Status: {resp.status_code}",
61
- str(resp.content),
62
- ]
63
- else:
64
- lines = []
56
+ metadatas = _load_descs(_metadatas_from_process, import_paths, desc_path)
65
57
 
66
- return UploadHTTPError("\n".join(lines))
58
+ jsonschema.validate(instance=user_items, schema=config.UserItemSchema)
67
59
 
60
+ # Setup the emitter -- the order matters here
68
61
 
69
- def _load_validate_metadatas_from_desc_path(
70
- desc_path: T.Optional[str], import_paths: T.Sequence[Path]
71
- ) -> T.List[types.Metadata]:
72
- is_default_desc_path = False
73
- if desc_path is None:
74
- is_default_desc_path = True
75
- if len(import_paths) == 1 and import_paths[0].is_dir():
76
- desc_path = str(
77
- import_paths[0].joinpath(constants.IMAGE_DESCRIPTION_FILENAME)
78
- )
79
- else:
80
- if 1 < len(import_paths):
81
- raise exceptions.MapillaryBadParameterError(
82
- "The description path must be specified (with --desc_path) when uploading multiple paths",
83
- )
84
- else:
85
- raise exceptions.MapillaryBadParameterError(
86
- "The description path must be specified (with --desc_path) when uploading a single file",
87
- )
62
+ emitter = uploader.EventEmitter()
88
63
 
89
- descs: T.List[types.DescriptionOrError] = []
64
+ # Check duplications first
65
+ if not _is_history_disabled(dry_run):
66
+ upload_run_params: JSONDict = {
67
+ # Null if multiple paths provided
68
+ "import_path": str(import_path) if isinstance(import_path, Path) else None,
69
+ "organization_key": user_items.get("MAPOrganizationKey"),
70
+ "user_key": user_items.get("MAPSettingsUserKey"),
71
+ "version": VERSION,
72
+ "run_at": time.time(),
73
+ }
74
+ _setup_history(
75
+ emitter, upload_run_params, metadatas, reupload=reupload, nofinish=nofinish
76
+ )
90
77
 
91
- if desc_path == "-":
92
- try:
93
- descs = json.load(sys.stdin)
94
- except json.JSONDecodeError as ex:
95
- raise exceptions.MapillaryInvalidDescriptionFile(
96
- f"Invalid JSON stream from stdin: {ex}"
97
- )
98
- else:
99
- if not os.path.isfile(desc_path):
100
- if is_default_desc_path:
101
- raise exceptions.MapillaryFileNotFoundError(
102
- f"Description file {desc_path} not found. Has the directory been processed yet?"
103
- )
78
+ # Set up tdqm
79
+ _setup_tdqm(emitter)
80
+
81
+ # Now stats is empty but it will collect during ALL uploads
82
+ stats = _setup_api_stats(emitter)
83
+
84
+ # Send the progress via IPC, and log the progress in debug mode
85
+ _setup_ipc(emitter)
86
+
87
+ mly_uploader = uploader.Uploader(
88
+ uploader.UploadOptions(
89
+ user_items,
90
+ dry_run=dry_run,
91
+ nofinish=nofinish,
92
+ noresume=noresume,
93
+ ),
94
+ emitter=emitter,
95
+ )
96
+
97
+ results = _gen_upload_everything(
98
+ mly_uploader, metadatas, import_paths, skip_subfolders
99
+ )
100
+
101
+ upload_successes = 0
102
+ upload_errors: list[Exception] = []
103
+
104
+ # The real uploading happens sequentially here
105
+ try:
106
+ for _, result in results:
107
+ if result.error is not None:
108
+ upload_error = _continue_or_fail(result.error)
109
+ log_exception(upload_error)
110
+ upload_errors.append(upload_error)
104
111
  else:
105
- raise exceptions.MapillaryFileNotFoundError(
106
- f"Description file {desc_path} not found"
107
- )
108
- with open(desc_path) as fp:
109
- try:
110
- descs = json.load(fp)
111
- except json.JSONDecodeError as ex:
112
- raise exceptions.MapillaryInvalidDescriptionFile(
113
- f"Invalid JSON file {desc_path}: {ex}"
114
- )
112
+ upload_successes += 1
115
113
 
116
- # the descs load from stdin or json file may contain invalid entries
117
- validated_descs = [
118
- types.validate_and_fail_desc(desc)
119
- for desc in descs
120
- # skip error descriptions
121
- if "error" not in desc
122
- ]
114
+ except Exception as ex:
115
+ # Fatal error: log and raise
116
+ _api_logging_failed(_summarize(stats), ex, dry_run=dry_run)
117
+ raise ex
123
118
 
124
- # throw if we found any invalid descs
125
- invalid_descs = [desc for desc in validated_descs if "error" in desc]
126
- if invalid_descs:
127
- for desc in invalid_descs:
128
- LOG.error("Invalid description entry: %s", json.dumps(desc))
129
- raise exceptions.MapillaryInvalidDescriptionFile(
130
- f"Found {len(invalid_descs)} invalid descriptions"
131
- )
119
+ except KeyboardInterrupt:
120
+ LOG.info("Upload interrupted by user...")
132
121
 
133
- # validated_descs should contain no errors
134
- return [
135
- types.from_desc(T.cast(types.Description, desc)) for desc in validated_descs
136
- ]
122
+ else:
123
+ _api_logging_finished(_summarize(stats), dry_run=dry_run)
137
124
 
125
+ finally:
126
+ # We collected stats after every upload is finished
127
+ assert upload_successes == len(stats), (
128
+ f"Expect {upload_successes} success but got {stats}"
129
+ )
130
+ _show_upload_summary(stats, upload_errors)
138
131
 
139
- def zip_images(
140
- import_path: Path,
141
- zip_dir: Path,
142
- desc_path: T.Optional[str] = None,
143
- ):
132
+
133
+ def zip_images(import_path: Path, zip_dir: Path, desc_path: str | None = None):
144
134
  if not import_path.is_dir():
145
135
  raise exceptions.MapillaryFileNotFoundError(
146
136
  f"Import directory not found: {import_path}"
147
137
  )
148
138
 
149
- metadatas = _load_validate_metadatas_from_desc_path(desc_path, [import_path])
139
+ metadatas = _load_valid_metadatas_from_desc_path([import_path], desc_path)
150
140
 
151
141
  if not metadatas:
152
142
  LOG.warning("No images or videos found in %s", desc_path)
@@ -156,77 +146,99 @@ def zip_images(
156
146
  metadata for metadata in metadatas if isinstance(metadata, types.ImageMetadata)
157
147
  ]
158
148
 
159
- uploader.zip_images(image_metadatas, zip_dir)
149
+ uploader.ZipUploader.zip_images(image_metadatas, zip_dir)
160
150
 
161
151
 
162
- def fetch_user_items(
163
- user_name: T.Optional[str] = None, organization_key: T.Optional[str] = None
164
- ) -> types.UserItem:
165
- if user_name is None:
166
- all_user_items = config.list_all_users()
167
- if not all_user_items:
168
- raise exceptions.MapillaryBadParameterError(
169
- "No Mapillary account found. Add one with --user_name"
170
- )
171
- if len(all_user_items) == 1:
172
- user_items = all_user_items[0]
173
- else:
174
- raise exceptions.MapillaryBadParameterError(
175
- "Found multiple Mapillary accounts. Please specify one with --user_name"
176
- )
152
+ def log_exception(ex: Exception) -> None:
153
+ if LOG.getEffectiveLevel() <= logging.DEBUG:
154
+ exc_info = ex
177
155
  else:
178
- try:
179
- user_items = authenticate.authenticate_user(user_name)
180
- except requests.HTTPError as exc:
181
- raise wrap_http_exception(exc) from exc
182
-
183
- if organization_key is not None:
184
- try:
185
- resp = api_v4.fetch_organization(
186
- user_items["user_upload_token"], organization_key
187
- )
188
- except requests.HTTPError as ex:
189
- raise wrap_http_exception(ex) from ex
190
- org = resp.json()
191
- LOG.info("Uploading to organization: %s", json.dumps(org))
192
- user_items = T.cast(
193
- types.UserItem, {**user_items, "MAPOrganizationKey": organization_key}
156
+ exc_info = None
157
+
158
+ exc_name = ex.__class__.__name__
159
+
160
+ if isinstance(ex, UploadedAlready):
161
+ LOG.info(f"{exc_name}: {ex}")
162
+ elif isinstance(ex, requests.HTTPError):
163
+ LOG.error(f"{exc_name}: {api_v4.readable_http_error(ex)}", exc_info=exc_info)
164
+ elif isinstance(ex, api_v4.HTTPContentError):
165
+ LOG.error(
166
+ f"{exc_name}: {ex}: {api_v4.readable_http_response(ex.response)}",
167
+ exc_info=exc_info,
194
168
  )
195
- return user_items
169
+ else:
170
+ LOG.error(f"{exc_name}: {ex}", exc_info=exc_info)
171
+
196
172
 
173
+ def _is_history_disabled(dry_run: bool) -> bool:
174
+ # There is no way to read/write history if the path is not set
175
+ if not constants.MAPILLARY_UPLOAD_HISTORY_PATH:
176
+ return True
197
177
 
198
- def _setup_cancel_due_to_duplication(emitter: uploader.EventEmitter) -> None:
178
+ if dry_run:
179
+ # When dry_run mode is on, we disable history by default
180
+ # However, we need dry_run for tests, so we added MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN
181
+ # and when it is on, we enable history regardless of dry_run
182
+ if constants.MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN:
183
+ return False
184
+ else:
185
+ return True
186
+
187
+ return False
188
+
189
+
190
+ def _setup_history(
191
+ emitter: uploader.EventEmitter,
192
+ upload_run_params: JSONDict,
193
+ metadatas: list[types.Metadata],
194
+ reupload: bool,
195
+ nofinish: bool,
196
+ ) -> None:
199
197
  @emitter.on("upload_start")
200
- def upload_start(payload: uploader.Progress):
201
- md5sum = payload["md5sum"]
202
- if history.is_uploaded(md5sum):
198
+ def check_duplication(payload: uploader.Progress):
199
+ md5sum = payload.get("sequence_md5sum")
200
+ assert md5sum is not None, f"md5sum has to be set for {payload}"
201
+
202
+ record = history.read_history_record(md5sum)
203
+
204
+ if record is not None:
203
205
  sequence_uuid = payload.get("sequence_uuid")
206
+ history_desc_path = history.history_desc_path(md5sum)
207
+ uploaded_at = record.get("summary", {}).get("upload_end_time", None)
208
+
204
209
  if sequence_uuid is None:
205
210
  basename = os.path.basename(payload.get("import_path", ""))
206
- LOG.info(
207
- "File %s has been uploaded already. Check the upload history at %s",
208
- basename,
209
- history.history_desc_path(md5sum),
210
- )
211
+ name = f"file {basename}"
212
+
211
213
  else:
212
- LOG.info(
213
- "Sequence %s has been uploaded already. Check the upload history at %s",
214
- sequence_uuid,
215
- history.history_desc_path(md5sum),
216
- )
217
- raise uploader.UploadCancelled()
214
+ name = f"sequence {sequence_uuid}"
218
215
 
216
+ if reupload:
217
+ if uploaded_at is not None:
218
+ LOG.info(
219
+ f"Reuploading {name}: previously uploaded {humanize.naturaldelta(time.time() - uploaded_at)} ago ({time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploaded_at))})"
220
+ )
221
+ else:
222
+ LOG.info(
223
+ f"Reuploading {name}: already uploaded, see {history_desc_path}"
224
+ )
225
+ else:
226
+ if uploaded_at is not None:
227
+ msg = f"Skipping {name}: previously uploaded {humanize.naturaldelta(time.time() - uploaded_at)} ago ({time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(uploaded_at))})"
228
+ else:
229
+ msg = f"Skipping {name}: already uploaded, see {history_desc_path}"
230
+ raise UploadedAlready(msg)
219
231
 
220
- def _setup_write_upload_history(
221
- emitter: uploader.EventEmitter,
222
- params: JSONDict,
223
- metadatas: T.Optional[T.List[types.Metadata]] = None,
224
- ) -> None:
225
232
  @emitter.on("upload_finished")
226
- def upload_finished(payload: uploader.Progress):
233
+ def write_history(payload: uploader.Progress):
234
+ if nofinish:
235
+ return
236
+
227
237
  sequence_uuid = payload.get("sequence_uuid")
228
- md5sum = payload["md5sum"]
229
- if sequence_uuid is None or metadatas is None:
238
+ md5sum = payload.get("sequence_md5sum")
239
+ assert md5sum is not None, f"md5sum has to be set for {payload}"
240
+
241
+ if sequence_uuid is None:
230
242
  sequence = None
231
243
  else:
232
244
  sequence = [
@@ -236,22 +248,20 @@ def _setup_write_upload_history(
236
248
  and metadata.MAPSequenceUUID == sequence_uuid
237
249
  ]
238
250
  sequence.sort(key=lambda metadata: metadata.sort_key())
251
+
239
252
  try:
240
253
  history.write_history(
241
- md5sum,
242
- params,
243
- T.cast(JSONDict, payload),
244
- sequence,
254
+ md5sum, upload_run_params, T.cast(JSONDict, payload), sequence
245
255
  )
246
256
  except OSError:
247
257
  LOG.warning("Error writing upload history %s", md5sum, exc_info=True)
248
258
 
249
259
 
250
260
  def _setup_tdqm(emitter: uploader.EventEmitter) -> None:
251
- upload_pbar: T.Optional[tqdm] = None
261
+ upload_pbar: tqdm | None = None
252
262
 
253
- @emitter.on("upload_fetch_offset")
254
- def upload_fetch_offset(payload: uploader.Progress) -> None:
263
+ @emitter.on("upload_start")
264
+ def upload_start(payload: uploader.Progress) -> None:
255
265
  nonlocal upload_pbar
256
266
 
257
267
  if upload_pbar is not None:
@@ -259,30 +269,55 @@ def _setup_tdqm(emitter: uploader.EventEmitter) -> None:
259
269
 
260
270
  nth = payload["sequence_idx"] + 1
261
271
  total = payload["total_sequence_count"]
262
- import_path: T.Optional[str] = payload.get("import_path")
272
+ import_path: str | None = payload.get("import_path")
263
273
  filetype = payload.get("file_type", "unknown").upper()
264
274
  if import_path is None:
265
- _desc = f"Uploading {filetype} ({nth}/{total})"
275
+ desc = f"Uploading {filetype} ({nth}/{total})"
266
276
  else:
267
- _desc = (
277
+ desc = (
268
278
  f"Uploading {filetype} {os.path.basename(import_path)} ({nth}/{total})"
269
279
  )
270
280
  upload_pbar = tqdm(
271
281
  total=payload["entity_size"],
272
- desc=_desc,
282
+ desc=desc,
273
283
  unit="B",
274
284
  unit_scale=True,
275
285
  unit_divisor=1024,
276
- initial=payload["offset"],
286
+ initial=payload.get("offset", 0),
277
287
  disable=LOG.getEffectiveLevel() <= logging.DEBUG,
278
288
  )
279
289
 
290
+ @emitter.on("upload_fetch_offset")
291
+ def upload_fetch_offset(payload: uploader.Progress) -> None:
292
+ assert upload_pbar is not None, (
293
+ "progress_bar must be initialized in upload_start"
294
+ )
295
+ begin_offset = payload.get("begin_offset", 0)
296
+ if begin_offset is not None and begin_offset > 0:
297
+ if upload_pbar.total is not None:
298
+ progress_percent = (begin_offset / upload_pbar.total) * 100
299
+ upload_pbar.write(
300
+ f"Resuming upload at {begin_offset=} ({progress_percent:3.0f}%)",
301
+ file=sys.stderr,
302
+ )
303
+ else:
304
+ upload_pbar.write(
305
+ f"Resuming upload at {begin_offset=}", file=sys.stderr
306
+ )
307
+ upload_pbar.reset()
308
+ upload_pbar.update(begin_offset)
309
+ upload_pbar.refresh()
310
+
280
311
  @emitter.on("upload_progress")
281
312
  def upload_progress(payload: uploader.Progress) -> None:
282
- assert upload_pbar is not None, "progress_bar must be initialized"
313
+ assert upload_pbar is not None, (
314
+ "progress_bar must be initialized in upload_start"
315
+ )
283
316
  upload_pbar.update(payload["chunk_size"])
317
+ upload_pbar.refresh()
284
318
 
285
319
  @emitter.on("upload_end")
320
+ @emitter.on("upload_failed")
286
321
  def upload_end(_: uploader.Progress) -> None:
287
322
  nonlocal upload_pbar
288
323
  if upload_pbar:
@@ -294,25 +329,46 @@ def _setup_ipc(emitter: uploader.EventEmitter):
294
329
  @emitter.on("upload_start")
295
330
  def upload_start(payload: uploader.Progress):
296
331
  type: uploader.EventName = "upload_start"
297
- LOG.debug("Sending %s via IPC: %s", type, payload)
332
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
298
333
  ipc.send(type, payload)
299
334
 
300
335
  @emitter.on("upload_fetch_offset")
301
336
  def upload_fetch_offset(payload: uploader.Progress) -> None:
302
337
  type: uploader.EventName = "upload_fetch_offset"
303
- LOG.debug("Sending %s via IPC: %s", type, payload)
338
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
304
339
  ipc.send(type, payload)
305
340
 
306
341
  @emitter.on("upload_progress")
307
342
  def upload_progress(payload: uploader.Progress):
308
343
  type: uploader.EventName = "upload_progress"
309
- LOG.debug("Sending %s via IPC: %s", type, payload)
344
+
345
+ if LOG.getEffectiveLevel() <= logging.DEBUG:
346
+ # In debug mode, we want to see the progress every 30 seconds
347
+ # instead of every chunk (which is too verbose)
348
+ INTERVAL_SECONDS = 30
349
+ now = time.time()
350
+ last_upload_progress_debug_at: float | None = T.cast(T.Dict, payload).get(
351
+ "_last_upload_progress_debug_at"
352
+ )
353
+ if (
354
+ last_upload_progress_debug_at is None
355
+ or last_upload_progress_debug_at + INTERVAL_SECONDS < now
356
+ ):
357
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
358
+ T.cast(T.Dict, payload)["_last_upload_progress_debug_at"] = now
359
+
310
360
  ipc.send(type, payload)
311
361
 
312
362
  @emitter.on("upload_end")
313
363
  def upload_end(payload: uploader.Progress) -> None:
314
364
  type: uploader.EventName = "upload_end"
315
- LOG.debug("Sending %s via IPC: %s", type, payload)
365
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
366
+ ipc.send(type, payload)
367
+
368
+ @emitter.on("upload_failed")
369
+ def upload_failed(payload: uploader.Progress) -> None:
370
+ type: uploader.EventName = "upload_failed"
371
+ LOG.debug(f"{type.upper()}: {json.dumps(payload)}")
316
372
  ipc.send(type, payload)
317
373
 
318
374
 
@@ -334,12 +390,17 @@ class _APIStats(uploader.Progress, total=False):
334
390
 
335
391
 
336
392
  def _setup_api_stats(emitter: uploader.EventEmitter):
337
- all_stats: T.List[_APIStats] = []
393
+ all_stats: list[_APIStats] = []
338
394
 
339
395
  @emitter.on("upload_start")
340
396
  def collect_start_time(payload: _APIStats) -> None:
341
- payload["upload_start_time"] = time.time()
397
+ now = time.time()
398
+ payload["upload_start_time"] = now
342
399
  payload["upload_total_time"] = 0
400
+ # These filed should be initialized in upload events like "upload_fetch_offset"
401
+ # but since we disabled them for uploading images, so we initialize them here
402
+ payload["upload_last_restart_time"] = now
403
+ payload["upload_first_offset"] = 0
343
404
 
344
405
  @emitter.on("upload_fetch_offset")
345
406
  def collect_restart_time(payload: _APIStats) -> None:
@@ -362,15 +423,18 @@ def _setup_api_stats(emitter: uploader.EventEmitter):
362
423
  now = time.time()
363
424
  payload["upload_end_time"] = now
364
425
  payload["upload_total_time"] += now - payload["upload_last_restart_time"]
426
+
427
+ @emitter.on("upload_finished")
428
+ def append_stats(payload: _APIStats) -> None:
365
429
  all_stats.append(payload)
366
430
 
367
431
  return all_stats
368
432
 
369
433
 
370
- def _summarize(stats: T.Sequence[_APIStats]) -> T.Dict:
434
+ def _summarize(stats: T.Sequence[_APIStats]) -> dict:
371
435
  total_image_count = sum(s.get("sequence_image_count", 0) for s in stats)
372
436
  total_uploaded_sequence_count = len(stats)
373
- # note that stats[0]["total_sequence_count"] not always same as total_uploaded_sequence_count
437
+ # Note that stats[0]["total_sequence_count"] not always same as total_uploaded_sequence_count
374
438
 
375
439
  total_uploaded_size = sum(
376
440
  s["entity_size"] - s.get("upload_first_offset", 0) for s in stats
@@ -388,6 +452,7 @@ def _summarize(stats: T.Sequence[_APIStats]) -> T.Dict:
388
452
 
389
453
  upload_summary = {
390
454
  "images": total_image_count,
455
+ # TODO: rename sequences to total uploads
391
456
  "sequences": total_uploaded_sequence_count,
392
457
  "size": round(total_entity_size_mb, 4),
393
458
  "uploaded_size": round(total_uploaded_size_mb, 4),
@@ -398,136 +463,126 @@ def _summarize(stats: T.Sequence[_APIStats]) -> T.Dict:
398
463
  return upload_summary
399
464
 
400
465
 
401
- def _show_upload_summary(stats: T.Sequence[_APIStats]):
402
- grouped: T.Dict[str, T.List[_APIStats]] = {}
403
- for stat in stats:
404
- grouped.setdefault(stat.get("file_type", "unknown"), []).append(stat)
466
+ def _show_upload_summary(stats: T.Sequence[_APIStats], errors: T.Sequence[Exception]):
467
+ LOG.info("==> Upload summary")
405
468
 
406
- for file_type, typed_stats in grouped.items():
407
- if file_type == FileType.IMAGE.value:
469
+ errors_by_type: dict[type[Exception], list[Exception]] = {}
470
+ for error in errors:
471
+ errors_by_type.setdefault(type(error), []).append(error)
472
+
473
+ for error_type, error_list in errors_by_type.items():
474
+ if error_type is UploadedAlready:
408
475
  LOG.info(
409
- "%8d %s sequences uploaded",
410
- len(typed_stats),
411
- file_type.upper(),
476
+ f"Skipped {len(error_list)} already uploaded sequences (use --reupload to force re-upload)",
412
477
  )
413
478
  else:
414
- LOG.info(
415
- "%8d %s files uploaded",
416
- len(typed_stats),
417
- file_type.upper(),
418
- )
479
+ LOG.info(f"{len(error_list)} uploads failed due to {error_type.__name__}")
419
480
 
420
- summary = _summarize(stats)
421
- LOG.info("%8.1fM data in total", summary["size"])
422
- LOG.info("%8.1fM data uploaded", summary["uploaded_size"])
423
- LOG.info("%8.1fs upload time", summary["time"])
481
+ if stats:
482
+ grouped: dict[str, list[_APIStats]] = {}
483
+ for stat in stats:
484
+ grouped.setdefault(stat.get("file_type", "unknown"), []).append(stat)
424
485
 
486
+ for file_type, typed_stats in grouped.items():
487
+ if file_type == FileType.IMAGE.value:
488
+ LOG.info(f"{len(typed_stats)} sequences uploaded")
489
+ else:
490
+ LOG.info(f"{len(typed_stats)} {file_type} uploaded")
425
491
 
426
- def _api_logging_finished(summary: T.Dict):
427
- if MAPILLARY_DISABLE_API_LOGGING:
492
+ summary = _summarize(stats)
493
+ LOG.info(f"{humanize.naturalsize(summary['size'] * 1024 * 1024)} read in total")
494
+ LOG.info(
495
+ f"{humanize.naturalsize(summary['uploaded_size'] * 1024 * 1024)} uploaded"
496
+ )
497
+ LOG.info(f"{summary['time']} upload time")
498
+ else:
499
+ LOG.info("Nothing uploaded. Bye.")
500
+
501
+
502
+ def _api_logging_finished(summary: dict, dry_run: bool = False):
503
+ if dry_run:
504
+ return
505
+
506
+ if constants.MAPILLARY_DISABLE_API_LOGGING:
428
507
  return
429
508
 
430
509
  action: api_v4.ActionType = "upload_finished_upload"
431
- LOG.debug("API Logging for action %s: %s", action, summary)
432
510
  try:
433
- api_v4.log_event(
434
- action,
435
- summary,
436
- )
511
+ api_v4.log_event(action, summary)
437
512
  except requests.HTTPError as exc:
438
513
  LOG.warning(
439
- "Error from API Logging for action %s",
440
- action,
441
- exc_info=wrap_http_exception(exc),
514
+ f"HTTPError from logging action {action}: {api_v4.readable_http_error(exc)}"
442
515
  )
443
516
  except Exception:
444
- LOG.warning("Error from API Logging for action %s", action, exc_info=True)
517
+ LOG.warning(f"Error from logging action {action}", exc_info=True)
445
518
 
446
519
 
447
- def _api_logging_failed(payload: T.Dict, exc: Exception):
448
- if MAPILLARY_DISABLE_API_LOGGING:
520
+ def _api_logging_failed(payload: dict, exc: Exception, dry_run: bool = False):
521
+ if dry_run:
522
+ return
523
+
524
+ if constants.MAPILLARY_DISABLE_API_LOGGING:
449
525
  return
450
526
 
451
527
  payload_with_reason = {**payload, "reason": exc.__class__.__name__}
452
528
  action: api_v4.ActionType = "upload_failed_upload"
453
- LOG.debug("API Logging for action %s: %s", action, payload)
454
529
  try:
455
- api_v4.log_event(
456
- action,
457
- payload_with_reason,
458
- )
530
+ api_v4.log_event(action, payload_with_reason)
459
531
  except requests.HTTPError as exc:
460
- wrapped_exc = wrap_http_exception(exc)
461
532
  LOG.warning(
462
- "Error from API Logging for action %s",
463
- action,
464
- exc_info=wrapped_exc,
533
+ f"HTTPError from logging action {action}: {api_v4.readable_http_error(exc)}"
465
534
  )
466
535
  except Exception:
467
- LOG.warning("Error from API Logging for action %s", action, exc_info=True)
536
+ LOG.warning(f"Error from logging action {action}", exc_info=True)
468
537
 
469
538
 
470
- def _load_descs(
471
- _metadatas_from_process: T.Optional[T.Sequence[types.MetadataOrError]],
472
- desc_path: T.Optional[str],
473
- import_paths: T.Sequence[Path],
474
- ) -> T.List[types.Metadata]:
475
- metadatas: T.List[types.Metadata]
476
-
477
- if _metadatas_from_process is not None:
478
- metadatas = [
479
- metadata
480
- for metadata in _metadatas_from_process
481
- if not isinstance(metadata, types.ErrorMetadata)
482
- ]
483
- else:
484
- metadatas = _load_validate_metadatas_from_desc_path(desc_path, import_paths)
539
+ _M = T.TypeVar("_M", bound=types.Metadata)
485
540
 
486
- # Make sure all metadatas have sequence uuid assigned
487
- # It is used to find the right sequence when writing upload history
488
- missing_sequence_uuid = str(uuid.uuid4())
489
- for metadata in metadatas:
490
- if isinstance(metadata, types.ImageMetadata):
491
- if metadata.MAPSequenceUUID is None:
492
- metadata.MAPSequenceUUID = missing_sequence_uuid
493
541
 
494
- for metadata in metadatas:
495
- assert isinstance(metadata, (types.ImageMetadata, types.VideoMetadata))
496
- if isinstance(metadata, types.ImageMetadata):
497
- assert metadata.MAPSequenceUUID is not None
542
+ def _find_metadata_with_filename_existed_in(
543
+ metadatas: T.Iterable[_M], paths: T.Iterable[Path]
544
+ ) -> list[_M]:
545
+ resolved_image_paths = set(p.resolve() for p in paths)
546
+ return [d for d in metadatas if d.filename.resolve() in resolved_image_paths]
498
547
 
499
- return metadatas
500
548
 
549
+ def _gen_upload_everything(
550
+ mly_uploader: uploader.Uploader,
551
+ metadatas: T.Sequence[types.Metadata],
552
+ import_paths: T.Sequence[Path],
553
+ skip_subfolders: bool,
554
+ ):
555
+ # Upload images
556
+ image_metadatas = _find_metadata_with_filename_existed_in(
557
+ (m for m in metadatas if isinstance(m, types.ImageMetadata)),
558
+ utils.find_images(import_paths, skip_subfolders=skip_subfolders),
559
+ )
560
+ yield from uploader.ImageSequenceUploader.upload_images(
561
+ mly_uploader, image_metadatas
562
+ )
501
563
 
502
- _M = T.TypeVar("_M", bound=types.Metadata)
564
+ # Upload videos
565
+ video_metadatas = _find_metadata_with_filename_existed_in(
566
+ (m for m in metadatas if isinstance(m, types.VideoMetadata)),
567
+ utils.find_videos(import_paths, skip_subfolders=skip_subfolders),
568
+ )
569
+ yield from uploader.VideoUploader.upload_videos(mly_uploader, video_metadatas)
503
570
 
571
+ # Upload zip files
572
+ zip_paths = utils.find_zipfiles(import_paths, skip_subfolders=skip_subfolders)
573
+ yield from uploader.ZipUploader.upload_zipfiles(mly_uploader, zip_paths)
504
574
 
505
- def _find_metadata_with_filename_existed_in(
506
- metadatas: T.Sequence[_M], paths: T.Sequence[Path]
507
- ) -> T.List[_M]:
508
- resolved_image_paths = set(p.resolve() for p in paths)
509
- return [d for d in metadatas if d.filename.resolve() in resolved_image_paths]
510
575
 
576
+ def _normalize_import_paths(import_path: Path | T.Sequence[Path]) -> list[Path]:
577
+ import_paths: list[Path]
511
578
 
512
- def upload(
513
- import_path: T.Union[Path, T.Sequence[Path]],
514
- desc_path: T.Optional[str] = None,
515
- _metadatas_from_process: T.Optional[T.Sequence[types.MetadataOrError]] = None,
516
- user_name: T.Optional[str] = None,
517
- organization_key: T.Optional[str] = None,
518
- dry_run=False,
519
- skip_subfolders=False,
520
- ) -> None:
521
- import_paths: T.Sequence[Path]
522
579
  if isinstance(import_path, Path):
523
580
  import_paths = [import_path]
524
581
  else:
525
582
  assert isinstance(import_path, list)
526
583
  import_paths = import_path
527
- import_paths = list(utils.deduplicate_paths(import_paths))
528
584
 
529
- if not import_paths:
530
- return
585
+ import_paths = list(utils.deduplicate_paths(import_paths))
531
586
 
532
587
  # Check and fail early
533
588
  for path in import_paths:
@@ -536,176 +591,113 @@ def upload(
536
591
  f"Import file or directory not found: {path}"
537
592
  )
538
593
 
539
- metadatas = _load_descs(_metadatas_from_process, desc_path, import_paths)
594
+ return import_paths
540
595
 
541
- user_items = fetch_user_items(user_name, organization_key)
542
596
 
543
- # Setup the emitter -- the order matters here
597
+ def _continue_or_fail(ex: Exception) -> Exception:
598
+ """
599
+ Wrap the exception, or re-raise if it is a fatal error (i.e. there is no point to continue)
600
+ """
544
601
 
545
- emitter = uploader.EventEmitter()
602
+ if isinstance(ex, uploader.SequenceError):
603
+ return ex
546
604
 
547
- enable_history = history.MAPILLARY_UPLOAD_HISTORY_PATH and (
548
- not dry_run or MAPILLARY__ENABLE_UPLOAD_HISTORY_FOR_DRY_RUN == "YES"
549
- )
605
+ # Certain files not found or no permission
606
+ if isinstance(ex, (FileNotFoundError, PermissionError)):
607
+ return ex
550
608
 
551
- # Put it first one to cancel early
552
- if enable_history:
553
- _setup_cancel_due_to_duplication(emitter)
609
+ # Certain metadatas are not valid
610
+ if isinstance(ex, exceptions.MapillaryMetadataValidationError):
611
+ return ex
554
612
 
555
- # This one set up tdqm
556
- _setup_tdqm(emitter)
613
+ # Fatal error: this is thrown after all retries
614
+ if isinstance(ex, requests.ConnectionError):
615
+ raise exceptions.MapillaryUploadConnectionError(str(ex)) from ex
557
616
 
558
- # Now stats is empty but it will collect during upload
559
- stats = _setup_api_stats(emitter)
617
+ # Fatal error: this is thrown after all retries
618
+ if isinstance(ex, requests.Timeout):
619
+ raise exceptions.MapillaryUploadTimeoutError(str(ex)) from ex
560
620
 
561
- # Send the progress as well as the log stats collected above
562
- _setup_ipc(emitter)
621
+ # Fatal error:
622
+ if isinstance(ex, requests.HTTPError) and isinstance(
623
+ ex.response, requests.Response
624
+ ):
625
+ if api_v4.is_auth_error(ex.response):
626
+ raise exceptions.MapillaryUploadUnauthorizedError(
627
+ api_v4.extract_auth_error_message(ex.response)
628
+ ) from ex
629
+ raise ex
563
630
 
564
- params: JSONDict = {
565
- # null if multiple paths provided
566
- "import_path": str(import_path) if isinstance(import_path, Path) else None,
567
- "organization_key": user_items.get("MAPOrganizationKey"),
568
- "user_key": user_items.get("MAPSettingsUserKey"),
569
- "version": VERSION,
570
- }
631
+ raise ex
571
632
 
572
- if enable_history:
573
- _setup_write_upload_history(emitter, params, metadatas)
574
633
 
575
- mly_uploader = uploader.Uploader(
576
- user_items,
577
- emitter=emitter,
578
- dry_run=dry_run,
579
- chunk_size=int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024),
580
- )
634
+ def _load_descs(
635
+ _metadatas_from_process: T.Sequence[types.MetadataOrError] | None,
636
+ import_paths: T.Sequence[Path],
637
+ desc_path: str | None,
638
+ ) -> list[types.Metadata]:
639
+ metadatas: list[types.Metadata]
581
640
 
582
- try:
583
- image_paths = utils.find_images(import_paths, skip_subfolders=skip_subfolders)
584
- # find descs that match the image paths from the import paths
585
- image_metadatas = [
586
- metadata
587
- for metadata in (metadatas or [])
588
- if isinstance(metadata, types.ImageMetadata)
589
- ]
590
- specified_image_metadatas = _find_metadata_with_filename_existed_in(
591
- image_metadatas, image_paths
592
- )
593
- if specified_image_metadatas:
594
- try:
595
- clusters = mly_uploader.upload_images(
596
- specified_image_metadatas,
597
- event_payload={"file_type": FileType.IMAGE.value},
598
- )
599
- except Exception as ex:
600
- raise UploadError(ex) from ex
601
-
602
- if clusters:
603
- LOG.debug("Uploaded to cluster: %s", clusters)
604
-
605
- video_paths = utils.find_videos(import_paths, skip_subfolders=skip_subfolders)
606
- video_metadatas = [
607
- metadata
608
- for metadata in (metadatas or [])
609
- if isinstance(metadata, types.VideoMetadata)
610
- ]
611
- specified_video_metadatas = _find_metadata_with_filename_existed_in(
612
- video_metadatas, video_paths
613
- )
614
- for idx, video_metadata in enumerate(specified_video_metadatas):
615
- video_metadata.update_md5sum()
616
- assert isinstance(video_metadata.md5sum, str), "md5sum should be updated"
617
-
618
- # extract telemetry measurements from GoPro videos
619
- telemetry_measurements: T.List[telemetry.TelemetryMeasurement] = []
620
- if MAPILLARY__EXPERIMENTAL_ENABLE_IMU == "YES":
621
- if video_metadata.filetype is FileType.GOPRO:
622
- with video_metadata.filename.open("rb") as fp:
623
- telemetry_data = gpmf_parser.extract_telemetry_data(fp)
624
- if telemetry_data:
625
- telemetry_measurements.extend(telemetry_data.accl)
626
- telemetry_measurements.extend(telemetry_data.gyro)
627
- telemetry_measurements.extend(telemetry_data.magn)
628
- telemetry_measurements.sort(key=lambda m: m.time)
629
-
630
- generator = camm_builder.camm_sample_generator2(
631
- video_metadata, telemetry_measurements=telemetry_measurements
632
- )
641
+ if _metadatas_from_process is not None:
642
+ metadatas, _ = types.separate_errors(_metadatas_from_process)
643
+ else:
644
+ metadatas = _load_valid_metadatas_from_desc_path(import_paths, desc_path)
633
645
 
634
- with video_metadata.filename.open("rb") as src_fp:
635
- camm_fp = simple_mp4_builder.transform_mp4(src_fp, generator)
636
- event_payload: uploader.Progress = {
637
- "total_sequence_count": len(specified_video_metadatas),
638
- "sequence_idx": idx,
639
- "file_type": video_metadata.filetype.value,
640
- "import_path": str(video_metadata.filename),
641
- }
642
- try:
643
- cluster_id = mly_uploader.upload_stream(
644
- T.cast(T.BinaryIO, camm_fp),
645
- upload_api_v4.ClusterFileType.CAMM,
646
- video_metadata.md5sum,
647
- event_payload=event_payload,
648
- )
649
- except Exception as ex:
650
- raise UploadError(ex) from ex
651
- LOG.debug("Uploaded to cluster: %s", cluster_id)
652
-
653
- zip_paths = utils.find_zipfiles(import_paths, skip_subfolders=skip_subfolders)
654
- _upload_zipfiles(mly_uploader, zip_paths)
655
-
656
- except UploadError as ex:
657
- inner_ex = ex.inner_ex
658
-
659
- if not dry_run:
660
- _api_logging_failed(_summarize(stats), inner_ex)
661
-
662
- if isinstance(inner_ex, requests.ConnectionError):
663
- raise exceptions.MapillaryUploadConnectionError(str(inner_ex)) from inner_ex
664
-
665
- if isinstance(inner_ex, requests.Timeout):
666
- raise exceptions.MapillaryUploadTimeoutError(str(inner_ex)) from inner_ex
667
-
668
- if isinstance(inner_ex, requests.HTTPError) and isinstance(
669
- inner_ex.response, requests.Response
670
- ):
671
- if inner_ex.response.status_code in [400, 401]:
672
- try:
673
- error_body = inner_ex.response.json()
674
- except Exception:
675
- error_body = {}
676
- debug_info = error_body.get("debug_info", {})
677
- if debug_info.get("type") in ["NotAuthorizedError"]:
678
- raise exceptions.MapillaryUploadUnauthorizedError(
679
- debug_info.get("message")
680
- ) from inner_ex
681
- raise wrap_http_exception(inner_ex) from inner_ex
682
-
683
- raise inner_ex
646
+ # Make sure all metadatas have sequence uuid assigned
647
+ # It is used to find the right sequence when writing upload history
648
+ missing_sequence_uuid = str(uuid.uuid4())
649
+ for metadata in metadatas:
650
+ if isinstance(metadata, types.ImageMetadata):
651
+ if metadata.MAPSequenceUUID is None:
652
+ metadata.MAPSequenceUUID = missing_sequence_uuid
684
653
 
685
- if stats:
686
- if not dry_run:
687
- _api_logging_finished(_summarize(stats))
688
- _show_upload_summary(stats)
689
- else:
690
- LOG.info("Nothing uploaded. Bye.")
654
+ for metadata in metadatas:
655
+ assert isinstance(metadata, (types.ImageMetadata, types.VideoMetadata))
656
+ if isinstance(metadata, types.ImageMetadata):
657
+ assert metadata.MAPSequenceUUID is not None
691
658
 
659
+ return metadatas
692
660
 
693
- def _upload_zipfiles(
694
- mly_uploader: uploader.Uploader,
695
- zip_paths: T.Sequence[Path],
696
- ) -> None:
697
- for idx, zip_path in enumerate(zip_paths):
698
- event_payload: uploader.Progress = {
699
- "total_sequence_count": len(zip_paths),
700
- "sequence_idx": idx,
701
- "file_type": FileType.ZIP.value,
702
- "import_path": str(zip_path),
703
- }
661
+
662
+ def _load_valid_metadatas_from_desc_path(
663
+ import_paths: T.Sequence[Path], desc_path: str | None
664
+ ) -> list[types.Metadata]:
665
+ if desc_path is None:
666
+ desc_path = _find_desc_path(import_paths)
667
+
668
+ if desc_path == "-":
704
669
  try:
705
- cluster_id = mly_uploader.upload_zipfile(
706
- zip_path, event_payload=event_payload
670
+ metadatas = DescriptionJSONSerializer.deserialize_stream(sys.stdin.buffer)
671
+ except json.JSONDecodeError as ex:
672
+ raise exceptions.MapillaryInvalidDescriptionFile(
673
+ f"Invalid JSON stream from {desc_path}: {ex}"
674
+ ) from ex
675
+
676
+ else:
677
+ if not os.path.isfile(desc_path):
678
+ raise exceptions.MapillaryFileNotFoundError(
679
+ f"Description file not found: {desc_path}"
707
680
  )
708
- except Exception as ex:
709
- raise UploadError(ex) from ex
681
+ with open(desc_path, "rb") as fp:
682
+ try:
683
+ metadatas = DescriptionJSONSerializer.deserialize_stream(fp)
684
+ except json.JSONDecodeError as ex:
685
+ raise exceptions.MapillaryInvalidDescriptionFile(
686
+ f"Invalid JSON stream from {desc_path}: {ex}"
687
+ ) from ex
710
688
 
711
- LOG.debug("Uploaded to cluster: %s", cluster_id)
689
+ return metadatas
690
+
691
+
692
+ def _find_desc_path(import_paths: T.Sequence[Path]) -> str:
693
+ if len(import_paths) == 1 and import_paths[0].is_dir():
694
+ return str(import_paths[0].joinpath(constants.IMAGE_DESCRIPTION_FILENAME))
695
+
696
+ if 1 < len(import_paths):
697
+ raise exceptions.MapillaryBadParameterError(
698
+ "The description path must be specified (with --desc_path) when uploading multiple paths"
699
+ )
700
+ else:
701
+ raise exceptions.MapillaryBadParameterError(
702
+ "The description path must be specified (with --desc_path) when uploading a single file"
703
+ )