mapillary-tools 0.13.3a1__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +287 -22
  3. mapillary_tools/authenticate.py +326 -64
  4. mapillary_tools/blackvue_parser.py +195 -0
  5. mapillary_tools/camm/camm_builder.py +55 -97
  6. mapillary_tools/camm/camm_parser.py +429 -181
  7. mapillary_tools/commands/__main__.py +17 -8
  8. mapillary_tools/commands/authenticate.py +8 -1
  9. mapillary_tools/commands/process.py +27 -51
  10. mapillary_tools/commands/process_and_upload.py +19 -5
  11. mapillary_tools/commands/sample_video.py +2 -3
  12. mapillary_tools/commands/upload.py +44 -13
  13. mapillary_tools/commands/video_process_and_upload.py +19 -5
  14. mapillary_tools/config.py +65 -26
  15. mapillary_tools/constants.py +141 -18
  16. mapillary_tools/exceptions.py +37 -34
  17. mapillary_tools/exif_read.py +221 -116
  18. mapillary_tools/exif_write.py +10 -8
  19. mapillary_tools/exiftool_read.py +33 -42
  20. mapillary_tools/exiftool_read_video.py +97 -47
  21. mapillary_tools/exiftool_runner.py +57 -0
  22. mapillary_tools/ffmpeg.py +417 -242
  23. mapillary_tools/geo.py +158 -118
  24. mapillary_tools/geotag/__init__.py +0 -1
  25. mapillary_tools/geotag/base.py +147 -0
  26. mapillary_tools/geotag/factory.py +307 -0
  27. mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
  28. mapillary_tools/geotag/geotag_images_from_exiftool.py +136 -85
  29. mapillary_tools/geotag/geotag_images_from_gpx.py +60 -124
  30. mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
  31. mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
  32. mapillary_tools/geotag/geotag_images_from_video.py +88 -51
  33. mapillary_tools/geotag/geotag_videos_from_exiftool.py +123 -0
  34. mapillary_tools/geotag/geotag_videos_from_gpx.py +52 -0
  35. mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
  36. mapillary_tools/geotag/image_extractors/base.py +18 -0
  37. mapillary_tools/geotag/image_extractors/exif.py +60 -0
  38. mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
  39. mapillary_tools/geotag/options.py +182 -0
  40. mapillary_tools/geotag/utils.py +52 -16
  41. mapillary_tools/geotag/video_extractors/base.py +18 -0
  42. mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
  43. mapillary_tools/geotag/video_extractors/gpx.py +116 -0
  44. mapillary_tools/geotag/video_extractors/native.py +160 -0
  45. mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
  46. mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
  47. mapillary_tools/history.py +134 -20
  48. mapillary_tools/mp4/construct_mp4_parser.py +17 -10
  49. mapillary_tools/mp4/io_utils.py +0 -1
  50. mapillary_tools/mp4/mp4_sample_parser.py +36 -28
  51. mapillary_tools/mp4/simple_mp4_builder.py +10 -9
  52. mapillary_tools/mp4/simple_mp4_parser.py +13 -22
  53. mapillary_tools/process_geotag_properties.py +184 -414
  54. mapillary_tools/process_sequence_properties.py +594 -225
  55. mapillary_tools/sample_video.py +20 -26
  56. mapillary_tools/serializer/description.py +587 -0
  57. mapillary_tools/serializer/gpx.py +132 -0
  58. mapillary_tools/telemetry.py +26 -13
  59. mapillary_tools/types.py +98 -611
  60. mapillary_tools/upload.py +408 -416
  61. mapillary_tools/upload_api_v4.py +172 -174
  62. mapillary_tools/uploader.py +804 -284
  63. mapillary_tools/utils.py +49 -18
  64. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/METADATA +93 -35
  65. mapillary_tools-0.14.0.dist-info/RECORD +75 -0
  66. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/WHEEL +1 -1
  67. mapillary_tools/geotag/blackvue_parser.py +0 -118
  68. mapillary_tools/geotag/geotag_from_generic.py +0 -22
  69. mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
  70. mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
  71. mapillary_tools/video_data_extraction/cli_options.py +0 -22
  72. mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
  73. mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
  74. mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
  75. mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
  76. mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
  77. mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
  78. mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
  79. mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
  80. mapillary_tools/video_data_extraction/extractors/gpx_parser.py +0 -108
  81. mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
  82. mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
  83. mapillary_tools-0.13.3a1.dist-info/RECORD +0 -75
  84. /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
  85. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/entry_points.txt +0 -0
  86. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info/licenses}/LICENSE +0 -0
  87. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/top_level.txt +0 -0
@@ -1,200 +1,95 @@
1
- import collections
1
+ from __future__ import annotations
2
+
2
3
  import datetime
3
- import itertools
4
- import json
5
4
  import logging
6
5
  import typing as T
7
- from multiprocessing import Pool
8
6
  from pathlib import Path
9
7
 
8
+ import humanize
10
9
  from tqdm import tqdm
11
10
 
12
- from . import constants, exceptions, exif_write, history, types, utils
13
- from .geotag import (
14
- geotag_from_generic,
15
- geotag_images_from_exif,
16
- geotag_images_from_exiftool_both_image_and_video,
17
- geotag_images_from_gpx_file,
18
- geotag_images_from_nmea_file,
19
- geotag_images_from_video,
20
- geotag_videos_from_exiftool_video,
21
- geotag_videos_from_video,
11
+ from . import constants, exceptions, exif_write, types, utils
12
+ from .geotag.factory import parse_source_option, process
13
+ from .geotag.options import (
14
+ InterpolationOption,
15
+ SourceOption,
16
+ SourcePathOption,
17
+ SourceType,
22
18
  )
23
- from .types import FileType, VideoMetadataOrError
24
-
25
- from .video_data_extraction.cli_options import CliOptions, CliParserOptions
26
- from .video_data_extraction.extract_video_data import VideoDataExtractor
27
-
19
+ from .serializer.description import (
20
+ DescriptionJSONSerializer,
21
+ validate_and_fail_metadata,
22
+ )
23
+ from .serializer.gpx import GPXSerializer
28
24
 
29
25
  LOG = logging.getLogger(__name__)
30
-
31
-
32
- GeotagSource = T.Literal[
33
- "gopro_videos", "blackvue_videos", "camm", "exif", "gpx", "nmea", "exiftool"
34
- ]
35
-
36
- VideoGeotagSource = T.Literal[
37
- "video",
38
- "camm",
39
- "gopro",
40
- "blackvue",
41
- "gpx",
42
- "nmea",
43
- "exiftool_xml",
44
- "exiftool_runtime",
26
+ DEFAULT_GEOTAG_SOURCE_OPTIONS = [
27
+ SourceType.NATIVE.value,
28
+ SourceType.EXIFTOOL_RUNTIME.value,
45
29
  ]
46
30
 
47
31
 
48
- def _process_images(
49
- image_paths: T.Sequence[Path],
50
- geotag_source: GeotagSource,
51
- geotag_source_path: T.Optional[Path] = None,
52
- video_import_path: T.Optional[Path] = None,
53
- interpolation_use_gpx_start_time: bool = False,
54
- interpolation_offset_time: float = 0.0,
55
- num_processes: T.Optional[int] = None,
56
- skip_subfolders=False,
57
- ) -> T.Sequence[types.ImageMetadataOrError]:
58
- geotag: geotag_from_generic.GeotagImagesFromGeneric
59
-
60
- if video_import_path is not None:
61
- # commands that trigger this branch:
62
- # video_process video_import_path image_paths --geotag_source gpx --geotag_source_path <gpx_file> --skip_subfolders
63
- image_paths = list(
64
- utils.filter_video_samples(
65
- image_paths, video_import_path, skip_subfolders=skip_subfolders
66
- )
67
- )
68
-
69
- if geotag_source == "exif":
70
- geotag = geotag_images_from_exif.GeotagImagesFromEXIF(
71
- image_paths, num_processes=num_processes
72
- )
73
-
32
+ def _normalize_import_paths(import_path: Path | T.Sequence[Path]) -> T.Sequence[Path]:
33
+ import_paths: T.Sequence[Path]
34
+ if isinstance(import_path, Path):
35
+ import_paths = [import_path]
74
36
  else:
75
- if geotag_source_path is None:
76
- geotag_source_path = video_import_path
77
- if geotag_source_path is None:
78
- raise exceptions.MapillaryFileNotFoundError(
79
- "Geotag source path (--geotag_source_path) is required"
80
- )
81
- if geotag_source == "exiftool":
82
- if not geotag_source_path.exists():
83
- raise exceptions.MapillaryFileNotFoundError(
84
- f"Geotag source file not found: {geotag_source_path}"
85
- )
86
- else:
87
- if not geotag_source_path.is_file():
88
- raise exceptions.MapillaryFileNotFoundError(
89
- f"Geotag source file not found: {geotag_source_path}"
90
- )
37
+ import_paths = import_path
38
+ import_paths = list(utils.deduplicate_paths(import_paths))
39
+ return import_paths
91
40
 
92
- if geotag_source == "gpx":
93
- geotag = geotag_images_from_gpx_file.GeotagImagesFromGPXFile(
94
- image_paths,
95
- geotag_source_path,
96
- use_gpx_start_time=interpolation_use_gpx_start_time,
97
- offset_time=interpolation_offset_time,
98
- num_processes=num_processes,
99
- )
100
- elif geotag_source == "nmea":
101
- geotag = geotag_images_from_nmea_file.GeotagImagesFromNMEAFile(
102
- image_paths,
103
- geotag_source_path,
104
- use_gpx_start_time=interpolation_use_gpx_start_time,
105
- offset_time=interpolation_offset_time,
106
- num_processes=num_processes,
107
- )
108
- elif geotag_source in ["gopro_videos", "blackvue_videos", "camm"]:
109
- map_geotag_source_to_filetype: T.Dict[GeotagSource, FileType] = {
110
- "gopro_videos": FileType.GOPRO,
111
- "blackvue_videos": FileType.BLACKVUE,
112
- "camm": FileType.CAMM,
113
- }
114
- video_paths = utils.find_videos([geotag_source_path])
115
- image_samples_by_video_path = utils.find_all_image_samples(
116
- image_paths, video_paths
117
- )
118
- video_paths_with_image_samples = list(image_samples_by_video_path.keys())
119
- video_metadatas = geotag_videos_from_video.GeotagVideosFromVideo(
120
- video_paths_with_image_samples,
121
- filetypes={map_geotag_source_to_filetype[geotag_source]},
122
- num_processes=num_processes,
123
- ).to_description()
124
- geotag = geotag_images_from_video.GeotagImagesFromVideo(
125
- image_paths,
126
- video_metadatas,
127
- offset_time=interpolation_offset_time,
128
- num_processes=num_processes,
129
- )
130
- elif geotag_source == "exiftool":
131
- geotag = geotag_images_from_exiftool_both_image_and_video.GeotagImagesFromExifToolBothImageAndVideo(
132
- image_paths,
133
- geotag_source_path,
134
- )
135
- else:
136
- raise RuntimeError(f"Invalid geotag source {geotag_source}")
137
41
 
138
- return geotag.to_description()
42
+ def _parse_source_options(
43
+ geotag_source: list[str],
44
+ video_geotag_source: list[str],
45
+ geotag_source_path: Path | None,
46
+ ) -> list[SourceOption]:
47
+ parsed_options: list[SourceOption] = []
139
48
 
49
+ for s in geotag_source:
50
+ parsed_options.extend(parse_source_option(s))
140
51
 
141
- def _process_videos(
142
- geotag_source: str,
143
- geotag_source_path: T.Optional[Path],
144
- video_paths: T.Sequence[Path],
145
- num_processes: T.Optional[int],
146
- filetypes: T.Optional[T.Set[FileType]],
147
- ) -> T.Sequence[VideoMetadataOrError]:
148
- geotag: geotag_from_generic.GeotagVideosFromGeneric
149
- if geotag_source == "exiftool":
150
- if geotag_source_path is None:
151
- raise exceptions.MapillaryFileNotFoundError(
152
- "Geotag source path (--geotag_source_path) is required"
52
+ for s in video_geotag_source:
53
+ for video_option in parse_source_option(s):
54
+ video_option.filetypes = types.combine_filetype_filters(
55
+ video_option.filetypes, {types.FileType.VIDEO}
153
56
  )
154
- if not geotag_source_path.exists():
155
- raise exceptions.MapillaryFileNotFoundError(
156
- f"Geotag source file not found: {geotag_source_path}"
157
- )
158
- geotag = geotag_videos_from_exiftool_video.GeotagVideosFromExifToolVideo(
159
- video_paths,
160
- geotag_source_path,
161
- num_processes=num_processes,
162
- )
163
- else:
164
- geotag = geotag_videos_from_video.GeotagVideosFromVideo(
165
- video_paths,
166
- filetypes=filetypes,
167
- num_processes=num_processes,
168
- )
169
- return geotag.to_description()
57
+ parsed_options.append(video_option)
170
58
 
59
+ if geotag_source_path is not None:
60
+ for parsed_option in parsed_options:
61
+ if parsed_option.source_path is None:
62
+ parsed_option.source_path = SourcePathOption(
63
+ source_path=Path(geotag_source_path)
64
+ )
65
+ else:
66
+ source_path_option = parsed_option.source_path
67
+ if source_path_option.source_path is None:
68
+ source_path_option.source_path = Path(geotag_source_path)
69
+ else:
70
+ LOG.warning(
71
+ "The option --geotag_source_path is ignored for source %s",
72
+ parsed_option,
73
+ )
171
74
 
172
- def _normalize_import_paths(
173
- import_path: T.Union[Path, T.Sequence[Path]],
174
- ) -> T.Sequence[Path]:
175
- import_paths: T.Sequence[Path]
176
- if isinstance(import_path, Path):
177
- import_paths = [import_path]
178
- else:
179
- import_paths = import_path
180
- import_paths = list(utils.deduplicate_paths(import_paths))
181
- return import_paths
75
+ return parsed_options
182
76
 
183
77
 
184
78
  def process_geotag_properties(
185
- vars_args: T.Dict, # Hello, I'm a hack
186
- import_path: T.Union[Path, T.Sequence[Path]],
187
- filetypes: T.Set[FileType],
188
- geotag_source: GeotagSource,
189
- geotag_source_path: T.Optional[Path] = None,
79
+ import_path: Path | T.Sequence[Path],
80
+ filetypes: set[types.FileType] | None,
81
+ # Geotag options
82
+ geotag_source: list[str],
83
+ geotag_source_path: Path | None,
84
+ video_geotag_source: list[str],
85
+ # Global options
190
86
  # video_import_path comes from the command video_process
191
- video_import_path: T.Optional[Path] = None,
87
+ video_import_path: Path | None = None,
192
88
  interpolation_use_gpx_start_time: bool = False,
193
89
  interpolation_offset_time: float = 0.0,
90
+ num_processes: int | None = None,
194
91
  skip_subfolders=False,
195
- num_processes: T.Optional[int] = None,
196
- ) -> T.List[types.MetadataOrError]:
197
- filetypes = set(FileType(f) for f in filetypes)
92
+ ) -> list[types.MetadataOrError]:
198
93
  import_paths = _normalize_import_paths(import_path)
199
94
 
200
95
  # Check and fail early
@@ -204,84 +99,34 @@ def process_geotag_properties(
204
99
  f"Import file or directory not found: {path}"
205
100
  )
206
101
 
207
- metadatas: T.List[types.MetadataOrError] = []
208
-
209
- if FileType.IMAGE in filetypes:
210
- image_paths = utils.find_images(import_paths, skip_subfolders=skip_subfolders)
211
- if image_paths:
212
- image_metadatas = _process_images(
213
- image_paths,
214
- geotag_source=geotag_source,
215
- geotag_source_path=geotag_source_path,
216
- video_import_path=video_import_path,
217
- interpolation_use_gpx_start_time=interpolation_use_gpx_start_time,
218
- interpolation_offset_time=interpolation_offset_time,
219
- num_processes=num_processes,
220
- skip_subfolders=skip_subfolders,
221
- )
222
- metadatas.extend(image_metadatas)
102
+ if geotag_source_path is None:
103
+ geotag_source_path = video_import_path
223
104
 
224
- # --video_geotag_source is still experimental, for videos execute it XOR the legacy code
225
- if vars_args["video_geotag_source"]:
226
- metadatas.extend(_process_videos_beta(vars_args))
227
- else:
228
- if (
229
- FileType.CAMM in filetypes
230
- or FileType.GOPRO in filetypes
231
- or FileType.BLACKVUE in filetypes
232
- or FileType.VIDEO in filetypes
233
- ):
234
- video_paths = utils.find_videos(
235
- import_paths, skip_subfolders=skip_subfolders
236
- )
237
- if video_paths:
238
- video_metadata = _process_videos(
239
- geotag_source,
240
- geotag_source_path,
241
- video_paths,
242
- num_processes,
243
- filetypes,
244
- )
245
- metadatas.extend(video_metadata)
105
+ if not geotag_source and not video_geotag_source:
106
+ geotag_source = [*DEFAULT_GEOTAG_SOURCE_OPTIONS]
246
107
 
247
- # filenames should be deduplicated in utils.find_images/utils.find_videos
248
- assert len(metadatas) == len(set(metadata.filename for metadata in metadatas)), (
249
- "duplicate filenames found"
108
+ options = _parse_source_options(
109
+ geotag_source=geotag_source or [],
110
+ video_geotag_source=video_geotag_source or [],
111
+ geotag_source_path=geotag_source_path,
250
112
  )
251
113
 
252
- return metadatas
253
-
254
-
255
- def _process_videos_beta(vars_args: T.Dict):
256
- geotag_sources = vars_args["video_geotag_source"]
257
- geotag_sources_opts: T.List[CliParserOptions] = []
258
- for source in geotag_sources:
259
- parsed_opts: CliParserOptions = {}
260
- try:
261
- parsed_opts = json.loads(source)
262
- except ValueError:
263
- if source not in T.get_args(VideoGeotagSource):
264
- raise exceptions.MapillaryBadParameterError(
265
- "Unknown beta source %s or invalid JSON", source
266
- )
267
- parsed_opts = {"source": source}
114
+ for option in options:
115
+ option.filetypes = types.combine_filetype_filters(option.filetypes, filetypes)
116
+ option.num_processes = num_processes
117
+ if option.interpolation is None:
118
+ option.interpolation = InterpolationOption(
119
+ offset_time=interpolation_offset_time,
120
+ use_gpx_start_time=interpolation_use_gpx_start_time,
121
+ )
268
122
 
269
- if "source" not in parsed_opts:
270
- raise exceptions.MapillaryBadParameterError("Missing beta source name")
123
+ # TODO: can find both in one pass
124
+ image_paths = utils.find_images(import_paths, skip_subfolders=skip_subfolders)
125
+ video_paths = utils.find_videos(import_paths, skip_subfolders=skip_subfolders)
271
126
 
272
- geotag_sources_opts.append(parsed_opts)
127
+ metadata_or_errors = process(image_paths + video_paths, options)
273
128
 
274
- options: CliOptions = {
275
- "paths": vars_args["import_path"],
276
- "recursive": vars_args["skip_subfolders"] is False,
277
- "geotag_sources_options": geotag_sources_opts,
278
- "geotag_source_path": vars_args["geotag_source_path"],
279
- "num_processes": vars_args["num_processes"],
280
- "device_make": vars_args["device_make"],
281
- "device_model": vars_args["device_model"],
282
- }
283
- extractor = VideoDataExtractor(options)
284
- return extractor.process()
129
+ return metadata_or_errors
285
130
 
286
131
 
287
132
  def _apply_offsets(
@@ -324,7 +169,7 @@ def _overwrite_exif_tags(
324
169
  unit="images",
325
170
  disable=LOG.getEffectiveLevel() <= logging.DEBUG,
326
171
  ):
327
- dt = datetime.datetime.utcfromtimestamp(metadata.time)
172
+ dt = datetime.datetime.fromtimestamp(metadata.time, datetime.timezone.utc)
328
173
  dt = dt.replace(tzinfo=datetime.timezone.utc)
329
174
 
330
175
  try:
@@ -359,36 +204,40 @@ def _write_metadatas(
359
204
  desc_path: str,
360
205
  ) -> None:
361
206
  if desc_path == "-":
362
- descs = [types.as_desc(metadata) for metadata in metadatas]
363
- print(json.dumps(descs, indent=2))
207
+ descs = DescriptionJSONSerializer.serialize(metadatas)
208
+ print(descs.decode("utf-8"))
364
209
  else:
365
- descs = [types.as_desc(metadata) for metadata in metadatas]
366
- with open(desc_path, "w") as fp:
367
- json.dump(descs, fp)
210
+ normalized_suffix = Path(desc_path).suffix.strip().lower()
211
+ if normalized_suffix in [".gpx"]:
212
+ descs = GPXSerializer.serialize(metadatas)
213
+ else:
214
+ descs = DescriptionJSONSerializer.serialize(metadatas)
215
+ with open(desc_path, "wb") as fp:
216
+ fp.write(descs)
368
217
  LOG.info("Check the description file for details: %s", desc_path)
369
218
 
370
219
 
371
220
  def _is_error_skipped(
372
- error_type: str, skipped_process_errors: T.Set[T.Type[Exception]]
221
+ error_type: type[Exception], skipped_process_errors: set[type[Exception]]
373
222
  ):
374
- skipped_process_error_names = set(err.__name__ for err in skipped_process_errors)
375
- skip_all = Exception in skipped_process_errors
376
- return skip_all or error_type in skipped_process_error_names
223
+ return (Exception in skipped_process_errors) or (
224
+ error_type in skipped_process_errors
225
+ )
377
226
 
378
227
 
379
228
  def _show_stats(
380
229
  metadatas: T.Sequence[types.MetadataOrError],
381
- skipped_process_errors: T.Set[T.Type[Exception]],
230
+ skipped_process_errors: set[T.Type[Exception]],
382
231
  ) -> None:
383
- metadatas_by_filetype: T.Dict[FileType, T.List[types.MetadataOrError]] = {}
232
+ LOG.info("==> Process summary")
233
+
234
+ metadatas_by_filetype: dict[types.FileType, list[types.MetadataOrError]] = {}
384
235
  for metadata in metadatas:
385
- filetype: T.Optional[FileType]
386
236
  if isinstance(metadata, types.ImageMetadata):
387
- filetype = FileType.IMAGE
237
+ filetype = types.FileType.IMAGE
388
238
  else:
389
239
  filetype = metadata.filetype
390
- if filetype:
391
- metadatas_by_filetype.setdefault(FileType(filetype), []).append(metadata)
240
+ metadatas_by_filetype.setdefault(filetype, []).append(metadata)
392
241
 
393
242
  for filetype, group in metadatas_by_filetype.items():
394
243
  _show_stats_per_filetype(group, filetype, skipped_process_errors)
@@ -397,9 +246,7 @@ def _show_stats(
397
246
  metadata
398
247
  for metadata in metadatas
399
248
  if isinstance(metadata, types.ErrorMetadata)
400
- and not _is_error_skipped(
401
- metadata.error.__class__.__name__, skipped_process_errors
402
- )
249
+ and not _is_error_skipped(type(metadata.error), skipped_process_errors)
403
250
  ]
404
251
  if critical_error_metadatas:
405
252
  raise exceptions.MapillaryProcessError(
@@ -408,157 +255,75 @@ def _show_stats(
408
255
 
409
256
 
410
257
  def _show_stats_per_filetype(
411
- metadatas: T.Sequence[types.MetadataOrError],
412
- filetype: FileType,
413
- skipped_process_errors: T.Set[T.Type[Exception]],
258
+ metadatas: T.Collection[types.MetadataOrError],
259
+ filetype: types.FileType,
260
+ skipped_process_errors: set[T.Type[Exception]],
414
261
  ):
415
- good_metadatas: T.List[T.Union[types.VideoMetadata, types.ImageMetadata]] = []
416
- filesize_to_upload = 0
417
- error_metadatas: T.List[types.ErrorMetadata] = []
418
- for metadata in metadatas:
419
- if isinstance(metadata, types.ErrorMetadata):
420
- error_metadatas.append(metadata)
421
- else:
422
- good_metadatas.append(metadata)
423
- filesize_to_upload += metadata.filesize or 0
262
+ good_metadatas: list[types.Metadata]
263
+ good_metadatas, error_metadatas = types.separate_errors(metadatas)
424
264
 
425
- LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value)
265
+ LOG.info(f"{len(metadatas)} {filetype.value} read in total")
426
266
  if good_metadatas:
267
+ total_filesize = sum(
268
+ [0 if m.filesize is None else m.filesize for m in good_metadatas]
269
+ )
427
270
  LOG.info(
428
- "\t %8d %s(s) (%s MB) are ready to be uploaded",
429
- len(good_metadatas),
430
- filetype.value,
431
- round(filesize_to_upload / 1024 / 1024, 1),
271
+ f"\t{len(good_metadatas)} ({humanize.naturalsize(total_filesize)}) ready"
432
272
  )
433
273
 
434
- error_counter = collections.Counter(
435
- metadata.error.__class__.__name__ for metadata in error_metadatas
436
- )
274
+ errors_by_type: dict[type[Exception], list[types.ErrorMetadata]] = {}
275
+ for metadata in error_metadatas:
276
+ errors_by_type.setdefault(type(metadata.error), []).append(metadata)
437
277
 
438
- for error_type, count in error_counter.items():
278
+ for error_type, errors in errors_by_type.items():
279
+ total_filesize = sum([utils.get_file_size_quietly(m.filename) for m in errors])
439
280
  if _is_error_skipped(error_type, skipped_process_errors):
440
281
  LOG.warning(
441
- "\t %8d %s(s) skipped due to %s", count, filetype.value, error_type
282
+ f"\t{len(errors)} ({humanize.naturalsize(total_filesize)}) {error_type.__name__}"
442
283
  )
443
284
  else:
444
285
  LOG.error(
445
- "\t %8d %s(s) failed due to %s", count, filetype.value, error_type
286
+ f"\t{len(errors)} ({humanize.naturalsize(total_filesize)}) {error_type.__name__}"
446
287
  )
447
288
 
448
289
 
449
- _IT = T.TypeVar("_IT")
450
-
451
-
452
- def split_if(
453
- it: T.Iterable[_IT], sep: T.Callable[[_IT], bool]
454
- ) -> T.Tuple[T.List[_IT], T.List[_IT]]:
455
- yes, no = [], []
456
- for e in it:
457
- if sep(e):
458
- yes.append(e)
459
- else:
460
- no.append(e)
461
- return yes, no
290
+ def _validate_metadatas(
291
+ metadatas: T.Collection[types.MetadataOrError], num_processes: int | None
292
+ ) -> list[types.MetadataOrError]:
293
+ LOG.info(f"==> Validating {len(metadatas)} metadatas...")
462
294
 
295
+ # validating metadatas is slow, hence multiprocessing
463
296
 
464
- def _check_upload_status(
465
- metadatas: T.Sequence[types.MetadataOrError],
466
- ) -> T.List[types.MetadataOrError]:
467
- groups = types.group_and_sort_images(
468
- [
469
- metadata
470
- for metadata in metadatas
471
- if isinstance(metadata, types.ImageMetadata)
472
- ]
297
+ # Do not pass error metadatas where the error object can not be pickled for multiprocessing to work
298
+ # Otherwise we get:
299
+ # TypeError: __init__() missing 3 required positional arguments: 'image_time', 'gpx_start_time', and 'gpx_end_time'
300
+ # See https://stackoverflow.com/a/61432070
301
+ good_metadatas, error_metadatas = types.separate_errors(metadatas)
302
+ map_results = utils.mp_map_maybe(
303
+ validate_and_fail_metadata,
304
+ T.cast(T.Iterable[types.Metadata], good_metadatas),
305
+ num_processes=num_processes,
473
306
  )
474
- uploaded_sequence_uuids = set()
475
- for sequence_uuid, group in groups.items():
476
- for m in group:
477
- m.update_md5sum()
478
- sequence_md5sum = types.sequence_md5sum(group)
479
- if history.is_uploaded(sequence_md5sum):
480
- uploaded_sequence_uuids.add(sequence_uuid)
481
-
482
- output: T.List[types.MetadataOrError] = []
483
- for metadata in metadatas:
484
- if isinstance(metadata, types.ImageMetadata):
485
- if metadata.MAPSequenceUUID in uploaded_sequence_uuids:
486
- output.append(
487
- types.describe_error_metadata(
488
- exceptions.MapillaryUploadedAlreadyError(
489
- "The image was already uploaded",
490
- types.as_desc(metadata),
491
- ),
492
- filename=metadata.filename,
493
- filetype=types.FileType.IMAGE,
494
- )
495
- )
496
- else:
497
- output.append(metadata)
498
- elif isinstance(metadata, types.VideoMetadata):
499
- metadata.update_md5sum()
500
- assert isinstance(metadata.md5sum, str)
501
- if history.is_uploaded(metadata.md5sum):
502
- output.append(
503
- types.describe_error_metadata(
504
- exceptions.MapillaryUploadedAlreadyError(
505
- "The video was already uploaded",
506
- types.as_desc(metadata),
507
- ),
508
- filename=metadata.filename,
509
- filetype=metadata.filetype,
510
- )
511
- )
512
- else:
513
- output.append(metadata)
514
- else:
515
- output.append(metadata)
516
- assert len(output) == len(metadatas), "length mismatch"
517
- return output
518
-
519
307
 
520
- def _validate_metadatas(
521
- metadatas: T.Sequence[types.MetadataOrError], num_processes: T.Optional[int]
522
- ) -> T.List[types.MetadataOrError]:
523
- # validating metadatas is slow, hence multiprocessing
524
- if num_processes is None:
525
- pool_num_processes = None
526
- disable_multiprocessing = False
527
- else:
528
- pool_num_processes = max(num_processes, 1)
529
- disable_multiprocessing = num_processes <= 0
530
- with Pool(processes=pool_num_processes) as pool:
531
- validated_metadatas_iter: T.Iterator[types.MetadataOrError]
532
- if disable_multiprocessing:
533
- validated_metadatas_iter = map(types.validate_and_fail_metadata, metadatas)
534
- else:
535
- # Do not pass error metadatas where the error object can not be pickled for multiprocessing to work
536
- # Otherwise we get:
537
- # TypeError: __init__() missing 3 required positional arguments: 'image_time', 'gpx_start_time', and 'gpx_end_time'
538
- # See https://stackoverflow.com/a/61432070
539
- yes, no = split_if(metadatas, lambda m: isinstance(m, types.ErrorMetadata))
540
- no_iter = pool.imap(
541
- types.validate_and_fail_metadata,
542
- no,
543
- )
544
- validated_metadatas_iter = itertools.chain(yes, no_iter)
545
- return list(
546
- tqdm(
547
- validated_metadatas_iter,
548
- desc="Validating metadatas",
549
- unit="metadata",
550
- disable=LOG.getEffectiveLevel() <= logging.DEBUG,
551
- total=len(metadatas),
552
- )
308
+ validated_metadatas = list(
309
+ tqdm(
310
+ map_results,
311
+ desc="Validating metadatas",
312
+ unit="metadata",
313
+ disable=LOG.getEffectiveLevel() <= logging.DEBUG,
314
+ total=len(good_metadatas),
553
315
  )
316
+ )
317
+
318
+ return T.cast(list[types.MetadataOrError], validated_metadatas + error_metadatas)
554
319
 
555
320
 
556
321
  def process_finalize(
557
- import_path: T.Union[T.Sequence[Path], Path],
558
- metadatas: T.List[types.MetadataOrError],
322
+ import_path: T.Sequence[Path] | Path,
323
+ metadatas: list[types.MetadataOrError],
559
324
  skip_process_errors: bool = False,
560
- device_make: T.Optional[str] = None,
561
- device_model: T.Optional[str] = None,
325
+ device_make: str | None = None,
326
+ device_model: str | None = None,
562
327
  overwrite_all_EXIF_tags: bool = False,
563
328
  overwrite_EXIF_time_tag: bool = False,
564
329
  overwrite_EXIF_gps_tag: bool = False,
@@ -566,40 +331,48 @@ def process_finalize(
566
331
  overwrite_EXIF_orientation_tag: bool = False,
567
332
  offset_time: float = 0.0,
568
333
  offset_angle: float = 0.0,
569
- desc_path: T.Optional[str] = None,
570
- num_processes: T.Optional[int] = None,
571
- ) -> T.List[types.MetadataOrError]:
334
+ desc_path: str | None = None,
335
+ num_processes: int | None = None,
336
+ ) -> list[types.MetadataOrError]:
337
+ image_metadatas: list[types.ImageMetadata] = []
338
+ video_metadatas: list[types.VideoMetadata] = []
339
+
572
340
  for metadata in metadatas:
573
341
  if isinstance(metadata, types.VideoMetadata):
574
- if device_make is not None:
575
- metadata.make = device_make
576
- if device_model is not None:
577
- metadata.model = device_model
342
+ video_metadatas.append(metadata)
578
343
  elif isinstance(metadata, types.ImageMetadata):
579
- if device_make is not None:
580
- metadata.MAPDeviceMake = device_make
581
- if device_model is not None:
582
- metadata.MAPDeviceModel = device_model
344
+ image_metadatas.append(metadata)
345
+
346
+ for metadata in video_metadatas:
347
+ if device_make is not None:
348
+ metadata.make = device_make
349
+ if device_model is not None:
350
+ metadata.model = device_model
351
+
352
+ for metadata in image_metadatas:
353
+ if device_make is not None:
354
+ metadata.MAPDeviceMake = device_make
355
+ if device_model is not None:
356
+ metadata.MAPDeviceModel = device_model
357
+ # Add the basename
358
+ metadata.MAPFilename = metadata.filename.name
583
359
 
584
360
  # modified in place
585
361
  _apply_offsets(
586
- [
587
- metadata
588
- for metadata in metadatas
589
- if isinstance(metadata, types.ImageMetadata)
590
- ],
362
+ image_metadatas,
591
363
  offset_time=offset_time,
592
364
  offset_angle=offset_angle,
593
365
  )
594
366
 
595
- LOG.debug("Validating %d metadatas", len(metadatas))
596
- metadatas = _validate_metadatas(metadatas, num_processes)
367
+ metadatas = _validate_metadatas(metadatas, num_processes=num_processes)
597
368
 
598
- LOG.info("Checking upload status for %d metadatas", len(metadatas))
599
- metadatas = _check_upload_status(metadatas)
369
+ # image_metadatas and video_metadatas get stale after the validation,
370
+ # hence delete them to avoid confusion
371
+ del image_metadatas
372
+ del video_metadatas
600
373
 
601
374
  _overwrite_exif_tags(
602
- # search image metadatas again because some of them might have been failed
375
+ # Search image metadatas again because some of them might have been failed
603
376
  [
604
377
  metadata
605
378
  for metadata in metadatas
@@ -637,16 +410,13 @@ def process_finalize(
637
410
  # write descs first because _show_stats() may raise an exception
638
411
  _write_metadatas(metadatas, desc_path)
639
412
 
640
- # show stats
641
- skipped_process_errors: T.Set[T.Type[Exception]]
413
+ # Show stats
414
+ skipped_process_errors: set[T.Type[Exception]]
642
415
  if skip_process_errors:
643
- # skip all exceptions
416
+ # Skip all exceptions
644
417
  skipped_process_errors = {Exception}
645
418
  else:
646
- skipped_process_errors = {
647
- exceptions.MapillaryDuplicationError,
648
- exceptions.MapillaryUploadedAlreadyError,
649
- }
419
+ skipped_process_errors = {exceptions.MapillaryDuplicationError}
650
420
  _show_stats(metadatas, skipped_process_errors=skipped_process_errors)
651
421
 
652
422
  return metadatas