mapillary-tools 0.13.3a1__py3-none-any.whl → 0.14.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +237 -16
  3. mapillary_tools/authenticate.py +325 -64
  4. mapillary_tools/{geotag/blackvue_parser.py → blackvue_parser.py} +74 -54
  5. mapillary_tools/camm/camm_builder.py +55 -97
  6. mapillary_tools/camm/camm_parser.py +429 -181
  7. mapillary_tools/commands/__main__.py +12 -6
  8. mapillary_tools/commands/authenticate.py +8 -1
  9. mapillary_tools/commands/process.py +27 -51
  10. mapillary_tools/commands/process_and_upload.py +19 -5
  11. mapillary_tools/commands/sample_video.py +2 -3
  12. mapillary_tools/commands/upload.py +18 -9
  13. mapillary_tools/commands/video_process_and_upload.py +19 -5
  14. mapillary_tools/config.py +31 -13
  15. mapillary_tools/constants.py +47 -6
  16. mapillary_tools/exceptions.py +34 -35
  17. mapillary_tools/exif_read.py +221 -116
  18. mapillary_tools/exif_write.py +7 -7
  19. mapillary_tools/exiftool_read.py +33 -42
  20. mapillary_tools/exiftool_read_video.py +46 -33
  21. mapillary_tools/exiftool_runner.py +77 -0
  22. mapillary_tools/ffmpeg.py +24 -23
  23. mapillary_tools/geo.py +144 -120
  24. mapillary_tools/geotag/base.py +147 -0
  25. mapillary_tools/geotag/factory.py +291 -0
  26. mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
  27. mapillary_tools/geotag/geotag_images_from_exiftool.py +126 -82
  28. mapillary_tools/geotag/geotag_images_from_gpx.py +53 -118
  29. mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
  30. mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
  31. mapillary_tools/geotag/geotag_images_from_video.py +53 -51
  32. mapillary_tools/geotag/geotag_videos_from_exiftool.py +97 -0
  33. mapillary_tools/geotag/geotag_videos_from_gpx.py +39 -0
  34. mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
  35. mapillary_tools/geotag/image_extractors/base.py +18 -0
  36. mapillary_tools/geotag/image_extractors/exif.py +60 -0
  37. mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
  38. mapillary_tools/geotag/options.py +160 -0
  39. mapillary_tools/geotag/utils.py +52 -16
  40. mapillary_tools/geotag/video_extractors/base.py +18 -0
  41. mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
  42. mapillary_tools/{video_data_extraction/extractors/gpx_parser.py → geotag/video_extractors/gpx.py} +57 -39
  43. mapillary_tools/geotag/video_extractors/native.py +157 -0
  44. mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
  45. mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
  46. mapillary_tools/history.py +7 -13
  47. mapillary_tools/mp4/construct_mp4_parser.py +9 -8
  48. mapillary_tools/mp4/io_utils.py +0 -1
  49. mapillary_tools/mp4/mp4_sample_parser.py +36 -28
  50. mapillary_tools/mp4/simple_mp4_builder.py +10 -9
  51. mapillary_tools/mp4/simple_mp4_parser.py +13 -22
  52. mapillary_tools/process_geotag_properties.py +155 -392
  53. mapillary_tools/process_sequence_properties.py +562 -208
  54. mapillary_tools/sample_video.py +13 -20
  55. mapillary_tools/telemetry.py +26 -13
  56. mapillary_tools/types.py +111 -58
  57. mapillary_tools/upload.py +316 -298
  58. mapillary_tools/upload_api_v4.py +55 -122
  59. mapillary_tools/uploader.py +396 -254
  60. mapillary_tools/utils.py +42 -18
  61. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/METADATA +3 -2
  62. mapillary_tools-0.14.0a2.dist-info/RECORD +72 -0
  63. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/WHEEL +1 -1
  64. mapillary_tools/geotag/__init__.py +0 -1
  65. mapillary_tools/geotag/geotag_from_generic.py +0 -22
  66. mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
  67. mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
  68. mapillary_tools/video_data_extraction/cli_options.py +0 -22
  69. mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
  70. mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
  71. mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
  72. mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
  73. mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
  74. mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
  75. mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
  76. mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
  77. mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
  78. mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
  79. mapillary_tools-0.13.3a1.dist-info/RECORD +0 -75
  80. /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
  81. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/entry_points.txt +0 -0
  82. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info/licenses}/LICENSE +0 -0
  83. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/top_level.txt +0 -0
@@ -1,200 +1,91 @@
1
+ from __future__ import annotations
2
+
1
3
  import collections
2
4
  import datetime
3
- import itertools
4
5
  import json
5
6
  import logging
6
7
  import typing as T
7
- from multiprocessing import Pool
8
8
  from pathlib import Path
9
9
 
10
10
  from tqdm import tqdm
11
11
 
12
- from . import constants, exceptions, exif_write, history, types, utils
13
- from .geotag import (
14
- geotag_from_generic,
15
- geotag_images_from_exif,
16
- geotag_images_from_exiftool_both_image_and_video,
17
- geotag_images_from_gpx_file,
18
- geotag_images_from_nmea_file,
19
- geotag_images_from_video,
20
- geotag_videos_from_exiftool_video,
21
- geotag_videos_from_video,
12
+ from . import constants, exceptions, exif_write, types, utils
13
+ from .geotag.factory import parse_source_option, process
14
+ from .geotag.options import (
15
+ InterpolationOption,
16
+ SourceOption,
17
+ SourcePathOption,
18
+ SourceType,
22
19
  )
23
- from .types import FileType, VideoMetadataOrError
24
-
25
- from .video_data_extraction.cli_options import CliOptions, CliParserOptions
26
- from .video_data_extraction.extract_video_data import VideoDataExtractor
27
-
28
20
 
29
21
  LOG = logging.getLogger(__name__)
30
-
31
-
32
- GeotagSource = T.Literal[
33
- "gopro_videos", "blackvue_videos", "camm", "exif", "gpx", "nmea", "exiftool"
34
- ]
35
-
36
- VideoGeotagSource = T.Literal[
37
- "video",
38
- "camm",
39
- "gopro",
40
- "blackvue",
41
- "gpx",
42
- "nmea",
43
- "exiftool_xml",
44
- "exiftool_runtime",
22
+ DEFAULT_GEOTAG_SOURCE_OPTIONS = [
23
+ SourceType.NATIVE.value,
24
+ SourceType.EXIFTOOL_RUNTIME.value,
45
25
  ]
46
26
 
47
27
 
48
- def _process_images(
49
- image_paths: T.Sequence[Path],
50
- geotag_source: GeotagSource,
51
- geotag_source_path: T.Optional[Path] = None,
52
- video_import_path: T.Optional[Path] = None,
53
- interpolation_use_gpx_start_time: bool = False,
54
- interpolation_offset_time: float = 0.0,
55
- num_processes: T.Optional[int] = None,
56
- skip_subfolders=False,
57
- ) -> T.Sequence[types.ImageMetadataOrError]:
58
- geotag: geotag_from_generic.GeotagImagesFromGeneric
59
-
60
- if video_import_path is not None:
61
- # commands that trigger this branch:
62
- # video_process video_import_path image_paths --geotag_source gpx --geotag_source_path <gpx_file> --skip_subfolders
63
- image_paths = list(
64
- utils.filter_video_samples(
65
- image_paths, video_import_path, skip_subfolders=skip_subfolders
66
- )
67
- )
68
-
69
- if geotag_source == "exif":
70
- geotag = geotag_images_from_exif.GeotagImagesFromEXIF(
71
- image_paths, num_processes=num_processes
72
- )
73
-
28
+ def _normalize_import_paths(import_path: Path | T.Sequence[Path]) -> T.Sequence[Path]:
29
+ import_paths: T.Sequence[Path]
30
+ if isinstance(import_path, Path):
31
+ import_paths = [import_path]
74
32
  else:
75
- if geotag_source_path is None:
76
- geotag_source_path = video_import_path
77
- if geotag_source_path is None:
78
- raise exceptions.MapillaryFileNotFoundError(
79
- "Geotag source path (--geotag_source_path) is required"
80
- )
81
- if geotag_source == "exiftool":
82
- if not geotag_source_path.exists():
83
- raise exceptions.MapillaryFileNotFoundError(
84
- f"Geotag source file not found: {geotag_source_path}"
85
- )
86
- else:
87
- if not geotag_source_path.is_file():
88
- raise exceptions.MapillaryFileNotFoundError(
89
- f"Geotag source file not found: {geotag_source_path}"
90
- )
33
+ import_paths = import_path
34
+ import_paths = list(utils.deduplicate_paths(import_paths))
35
+ return import_paths
91
36
 
92
- if geotag_source == "gpx":
93
- geotag = geotag_images_from_gpx_file.GeotagImagesFromGPXFile(
94
- image_paths,
95
- geotag_source_path,
96
- use_gpx_start_time=interpolation_use_gpx_start_time,
97
- offset_time=interpolation_offset_time,
98
- num_processes=num_processes,
99
- )
100
- elif geotag_source == "nmea":
101
- geotag = geotag_images_from_nmea_file.GeotagImagesFromNMEAFile(
102
- image_paths,
103
- geotag_source_path,
104
- use_gpx_start_time=interpolation_use_gpx_start_time,
105
- offset_time=interpolation_offset_time,
106
- num_processes=num_processes,
107
- )
108
- elif geotag_source in ["gopro_videos", "blackvue_videos", "camm"]:
109
- map_geotag_source_to_filetype: T.Dict[GeotagSource, FileType] = {
110
- "gopro_videos": FileType.GOPRO,
111
- "blackvue_videos": FileType.BLACKVUE,
112
- "camm": FileType.CAMM,
113
- }
114
- video_paths = utils.find_videos([geotag_source_path])
115
- image_samples_by_video_path = utils.find_all_image_samples(
116
- image_paths, video_paths
117
- )
118
- video_paths_with_image_samples = list(image_samples_by_video_path.keys())
119
- video_metadatas = geotag_videos_from_video.GeotagVideosFromVideo(
120
- video_paths_with_image_samples,
121
- filetypes={map_geotag_source_to_filetype[geotag_source]},
122
- num_processes=num_processes,
123
- ).to_description()
124
- geotag = geotag_images_from_video.GeotagImagesFromVideo(
125
- image_paths,
126
- video_metadatas,
127
- offset_time=interpolation_offset_time,
128
- num_processes=num_processes,
129
- )
130
- elif geotag_source == "exiftool":
131
- geotag = geotag_images_from_exiftool_both_image_and_video.GeotagImagesFromExifToolBothImageAndVideo(
132
- image_paths,
133
- geotag_source_path,
134
- )
135
- else:
136
- raise RuntimeError(f"Invalid geotag source {geotag_source}")
137
37
 
138
- return geotag.to_description()
38
+ def _parse_source_options(
39
+ geotag_source: list[str],
40
+ video_geotag_source: list[str],
41
+ geotag_source_path: Path | None,
42
+ ) -> list[SourceOption]:
43
+ parsed_options: list[SourceOption] = []
139
44
 
45
+ for s in geotag_source:
46
+ parsed_options.extend(parse_source_option(s))
140
47
 
141
- def _process_videos(
142
- geotag_source: str,
143
- geotag_source_path: T.Optional[Path],
144
- video_paths: T.Sequence[Path],
145
- num_processes: T.Optional[int],
146
- filetypes: T.Optional[T.Set[FileType]],
147
- ) -> T.Sequence[VideoMetadataOrError]:
148
- geotag: geotag_from_generic.GeotagVideosFromGeneric
149
- if geotag_source == "exiftool":
150
- if geotag_source_path is None:
151
- raise exceptions.MapillaryFileNotFoundError(
152
- "Geotag source path (--geotag_source_path) is required"
48
+ for s in video_geotag_source:
49
+ for video_option in parse_source_option(s):
50
+ video_option.filetypes = types.combine_filetype_filters(
51
+ video_option.filetypes, {types.FileType.VIDEO}
153
52
  )
154
- if not geotag_source_path.exists():
155
- raise exceptions.MapillaryFileNotFoundError(
156
- f"Geotag source file not found: {geotag_source_path}"
157
- )
158
- geotag = geotag_videos_from_exiftool_video.GeotagVideosFromExifToolVideo(
159
- video_paths,
160
- geotag_source_path,
161
- num_processes=num_processes,
162
- )
163
- else:
164
- geotag = geotag_videos_from_video.GeotagVideosFromVideo(
165
- video_paths,
166
- filetypes=filetypes,
167
- num_processes=num_processes,
168
- )
169
- return geotag.to_description()
53
+ parsed_options.append(video_option)
170
54
 
55
+ if geotag_source_path is not None:
56
+ for parsed_option in parsed_options:
57
+ if parsed_option.source_path is None:
58
+ parsed_option.source_path = SourcePathOption(
59
+ source_path=Path(geotag_source_path)
60
+ )
61
+ else:
62
+ source_path_option = parsed_option.source_path
63
+ if source_path_option.source_path is None:
64
+ source_path_option.source_path = Path(geotag_source_path)
65
+ else:
66
+ LOG.warning(
67
+ "The option --geotag_source_path is ignored for source %s",
68
+ parsed_option,
69
+ )
171
70
 
172
- def _normalize_import_paths(
173
- import_path: T.Union[Path, T.Sequence[Path]],
174
- ) -> T.Sequence[Path]:
175
- import_paths: T.Sequence[Path]
176
- if isinstance(import_path, Path):
177
- import_paths = [import_path]
178
- else:
179
- import_paths = import_path
180
- import_paths = list(utils.deduplicate_paths(import_paths))
181
- return import_paths
71
+ return parsed_options
182
72
 
183
73
 
184
74
  def process_geotag_properties(
185
- vars_args: T.Dict, # Hello, I'm a hack
186
- import_path: T.Union[Path, T.Sequence[Path]],
187
- filetypes: T.Set[FileType],
188
- geotag_source: GeotagSource,
189
- geotag_source_path: T.Optional[Path] = None,
75
+ import_path: Path | T.Sequence[Path],
76
+ filetypes: set[types.FileType] | None,
77
+ # Geotag options
78
+ geotag_source: list[str],
79
+ geotag_source_path: Path | None,
80
+ video_geotag_source: list[str],
81
+ # Global options
190
82
  # video_import_path comes from the command video_process
191
- video_import_path: T.Optional[Path] = None,
83
+ video_import_path: Path | None = None,
192
84
  interpolation_use_gpx_start_time: bool = False,
193
85
  interpolation_offset_time: float = 0.0,
86
+ num_processes: int | None = None,
194
87
  skip_subfolders=False,
195
- num_processes: T.Optional[int] = None,
196
- ) -> T.List[types.MetadataOrError]:
197
- filetypes = set(FileType(f) for f in filetypes)
88
+ ) -> list[types.MetadataOrError]:
198
89
  import_paths = _normalize_import_paths(import_path)
199
90
 
200
91
  # Check and fail early
@@ -204,84 +95,34 @@ def process_geotag_properties(
204
95
  f"Import file or directory not found: {path}"
205
96
  )
206
97
 
207
- metadatas: T.List[types.MetadataOrError] = []
208
-
209
- if FileType.IMAGE in filetypes:
210
- image_paths = utils.find_images(import_paths, skip_subfolders=skip_subfolders)
211
- if image_paths:
212
- image_metadatas = _process_images(
213
- image_paths,
214
- geotag_source=geotag_source,
215
- geotag_source_path=geotag_source_path,
216
- video_import_path=video_import_path,
217
- interpolation_use_gpx_start_time=interpolation_use_gpx_start_time,
218
- interpolation_offset_time=interpolation_offset_time,
219
- num_processes=num_processes,
220
- skip_subfolders=skip_subfolders,
221
- )
222
- metadatas.extend(image_metadatas)
98
+ if geotag_source_path is None:
99
+ geotag_source_path = video_import_path
223
100
 
224
- # --video_geotag_source is still experimental, for videos execute it XOR the legacy code
225
- if vars_args["video_geotag_source"]:
226
- metadatas.extend(_process_videos_beta(vars_args))
227
- else:
228
- if (
229
- FileType.CAMM in filetypes
230
- or FileType.GOPRO in filetypes
231
- or FileType.BLACKVUE in filetypes
232
- or FileType.VIDEO in filetypes
233
- ):
234
- video_paths = utils.find_videos(
235
- import_paths, skip_subfolders=skip_subfolders
236
- )
237
- if video_paths:
238
- video_metadata = _process_videos(
239
- geotag_source,
240
- geotag_source_path,
241
- video_paths,
242
- num_processes,
243
- filetypes,
244
- )
245
- metadatas.extend(video_metadata)
101
+ if not geotag_source and not video_geotag_source:
102
+ geotag_source = [*DEFAULT_GEOTAG_SOURCE_OPTIONS]
246
103
 
247
- # filenames should be deduplicated in utils.find_images/utils.find_videos
248
- assert len(metadatas) == len(set(metadata.filename for metadata in metadatas)), (
249
- "duplicate filenames found"
104
+ options = _parse_source_options(
105
+ geotag_source=geotag_source or [],
106
+ video_geotag_source=video_geotag_source or [],
107
+ geotag_source_path=geotag_source_path,
250
108
  )
251
109
 
252
- return metadatas
253
-
254
-
255
- def _process_videos_beta(vars_args: T.Dict):
256
- geotag_sources = vars_args["video_geotag_source"]
257
- geotag_sources_opts: T.List[CliParserOptions] = []
258
- for source in geotag_sources:
259
- parsed_opts: CliParserOptions = {}
260
- try:
261
- parsed_opts = json.loads(source)
262
- except ValueError:
263
- if source not in T.get_args(VideoGeotagSource):
264
- raise exceptions.MapillaryBadParameterError(
265
- "Unknown beta source %s or invalid JSON", source
266
- )
267
- parsed_opts = {"source": source}
110
+ for option in options:
111
+ option.filetypes = types.combine_filetype_filters(option.filetypes, filetypes)
112
+ option.num_processes = num_processes
113
+ if option.interpolation is None:
114
+ option.interpolation = InterpolationOption(
115
+ offset_time=interpolation_offset_time,
116
+ use_gpx_start_time=interpolation_use_gpx_start_time,
117
+ )
268
118
 
269
- if "source" not in parsed_opts:
270
- raise exceptions.MapillaryBadParameterError("Missing beta source name")
119
+ # TODO: can find both in one pass
120
+ image_paths = utils.find_images(import_paths, skip_subfolders=skip_subfolders)
121
+ video_paths = utils.find_videos(import_paths, skip_subfolders=skip_subfolders)
271
122
 
272
- geotag_sources_opts.append(parsed_opts)
123
+ metadata_or_errors = process(image_paths + video_paths, options)
273
124
 
274
- options: CliOptions = {
275
- "paths": vars_args["import_path"],
276
- "recursive": vars_args["skip_subfolders"] is False,
277
- "geotag_sources_options": geotag_sources_opts,
278
- "geotag_source_path": vars_args["geotag_source_path"],
279
- "num_processes": vars_args["num_processes"],
280
- "device_make": vars_args["device_make"],
281
- "device_model": vars_args["device_model"],
282
- }
283
- extractor = VideoDataExtractor(options)
284
- return extractor.process()
125
+ return metadata_or_errors
285
126
 
286
127
 
287
128
  def _apply_offsets(
@@ -324,7 +165,7 @@ def _overwrite_exif_tags(
324
165
  unit="images",
325
166
  disable=LOG.getEffectiveLevel() <= logging.DEBUG,
326
167
  ):
327
- dt = datetime.datetime.utcfromtimestamp(metadata.time)
168
+ dt = datetime.datetime.fromtimestamp(metadata.time, datetime.timezone.utc)
328
169
  dt = dt.replace(tzinfo=datetime.timezone.utc)
329
170
 
330
171
  try:
@@ -368,9 +209,7 @@ def _write_metadatas(
368
209
  LOG.info("Check the description file for details: %s", desc_path)
369
210
 
370
211
 
371
- def _is_error_skipped(
372
- error_type: str, skipped_process_errors: T.Set[T.Type[Exception]]
373
- ):
212
+ def _is_error_skipped(error_type: str, skipped_process_errors: set[T.Type[Exception]]):
374
213
  skipped_process_error_names = set(err.__name__ for err in skipped_process_errors)
375
214
  skip_all = Exception in skipped_process_errors
376
215
  return skip_all or error_type in skipped_process_error_names
@@ -378,17 +217,15 @@ def _is_error_skipped(
378
217
 
379
218
  def _show_stats(
380
219
  metadatas: T.Sequence[types.MetadataOrError],
381
- skipped_process_errors: T.Set[T.Type[Exception]],
220
+ skipped_process_errors: set[T.Type[Exception]],
382
221
  ) -> None:
383
- metadatas_by_filetype: T.Dict[FileType, T.List[types.MetadataOrError]] = {}
222
+ metadatas_by_filetype: dict[types.FileType, list[types.MetadataOrError]] = {}
384
223
  for metadata in metadatas:
385
- filetype: T.Optional[FileType]
386
224
  if isinstance(metadata, types.ImageMetadata):
387
- filetype = FileType.IMAGE
225
+ filetype = types.FileType.IMAGE
388
226
  else:
389
227
  filetype = metadata.filetype
390
- if filetype:
391
- metadatas_by_filetype.setdefault(FileType(filetype), []).append(metadata)
228
+ metadatas_by_filetype.setdefault(filetype, []).append(metadata)
392
229
 
393
230
  for filetype, group in metadatas_by_filetype.items():
394
231
  _show_stats_per_filetype(group, filetype, skipped_process_errors)
@@ -408,19 +245,16 @@ def _show_stats(
408
245
 
409
246
 
410
247
  def _show_stats_per_filetype(
411
- metadatas: T.Sequence[types.MetadataOrError],
412
- filetype: FileType,
413
- skipped_process_errors: T.Set[T.Type[Exception]],
248
+ metadatas: T.Collection[types.MetadataOrError],
249
+ filetype: types.FileType,
250
+ skipped_process_errors: set[T.Type[Exception]],
414
251
  ):
415
- good_metadatas: T.List[T.Union[types.VideoMetadata, types.ImageMetadata]] = []
416
- filesize_to_upload = 0
417
- error_metadatas: T.List[types.ErrorMetadata] = []
418
- for metadata in metadatas:
419
- if isinstance(metadata, types.ErrorMetadata):
420
- error_metadatas.append(metadata)
421
- else:
422
- good_metadatas.append(metadata)
423
- filesize_to_upload += metadata.filesize or 0
252
+ good_metadatas: list[types.Metadata]
253
+ good_metadatas, error_metadatas = types.separate_errors(metadatas)
254
+
255
+ filesize_to_upload = sum(
256
+ [0 if m.filesize is None else m.filesize for m in good_metadatas]
257
+ )
424
258
 
425
259
  LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value)
426
260
  if good_metadatas:
@@ -446,119 +280,43 @@ def _show_stats_per_filetype(
446
280
  )
447
281
 
448
282
 
449
- _IT = T.TypeVar("_IT")
450
-
451
-
452
- def split_if(
453
- it: T.Iterable[_IT], sep: T.Callable[[_IT], bool]
454
- ) -> T.Tuple[T.List[_IT], T.List[_IT]]:
455
- yes, no = [], []
456
- for e in it:
457
- if sep(e):
458
- yes.append(e)
459
- else:
460
- no.append(e)
461
- return yes, no
283
+ def _validate_metadatas(
284
+ metadatas: T.Collection[types.MetadataOrError], num_processes: int | None
285
+ ) -> list[types.MetadataOrError]:
286
+ LOG.debug("Validating %d metadatas", len(metadatas))
462
287
 
288
+ # validating metadatas is slow, hence multiprocessing
463
289
 
464
- def _check_upload_status(
465
- metadatas: T.Sequence[types.MetadataOrError],
466
- ) -> T.List[types.MetadataOrError]:
467
- groups = types.group_and_sort_images(
468
- [
469
- metadata
470
- for metadata in metadatas
471
- if isinstance(metadata, types.ImageMetadata)
472
- ]
290
+ # Do not pass error metadatas where the error object can not be pickled for multiprocessing to work
291
+ # Otherwise we get:
292
+ # TypeError: __init__() missing 3 required positional arguments: 'image_time', 'gpx_start_time', and 'gpx_end_time'
293
+ # See https://stackoverflow.com/a/61432070
294
+ good_metadatas, error_metadatas = types.separate_errors(metadatas)
295
+ map_results = utils.mp_map_maybe(
296
+ types.validate_and_fail_metadata,
297
+ T.cast(T.Iterable[types.Metadata], good_metadatas),
298
+ num_processes=num_processes,
473
299
  )
474
- uploaded_sequence_uuids = set()
475
- for sequence_uuid, group in groups.items():
476
- for m in group:
477
- m.update_md5sum()
478
- sequence_md5sum = types.sequence_md5sum(group)
479
- if history.is_uploaded(sequence_md5sum):
480
- uploaded_sequence_uuids.add(sequence_uuid)
481
-
482
- output: T.List[types.MetadataOrError] = []
483
- for metadata in metadatas:
484
- if isinstance(metadata, types.ImageMetadata):
485
- if metadata.MAPSequenceUUID in uploaded_sequence_uuids:
486
- output.append(
487
- types.describe_error_metadata(
488
- exceptions.MapillaryUploadedAlreadyError(
489
- "The image was already uploaded",
490
- types.as_desc(metadata),
491
- ),
492
- filename=metadata.filename,
493
- filetype=types.FileType.IMAGE,
494
- )
495
- )
496
- else:
497
- output.append(metadata)
498
- elif isinstance(metadata, types.VideoMetadata):
499
- metadata.update_md5sum()
500
- assert isinstance(metadata.md5sum, str)
501
- if history.is_uploaded(metadata.md5sum):
502
- output.append(
503
- types.describe_error_metadata(
504
- exceptions.MapillaryUploadedAlreadyError(
505
- "The video was already uploaded",
506
- types.as_desc(metadata),
507
- ),
508
- filename=metadata.filename,
509
- filetype=metadata.filetype,
510
- )
511
- )
512
- else:
513
- output.append(metadata)
514
- else:
515
- output.append(metadata)
516
- assert len(output) == len(metadatas), "length mismatch"
517
- return output
518
300
 
519
-
520
- def _validate_metadatas(
521
- metadatas: T.Sequence[types.MetadataOrError], num_processes: T.Optional[int]
522
- ) -> T.List[types.MetadataOrError]:
523
- # validating metadatas is slow, hence multiprocessing
524
- if num_processes is None:
525
- pool_num_processes = None
526
- disable_multiprocessing = False
527
- else:
528
- pool_num_processes = max(num_processes, 1)
529
- disable_multiprocessing = num_processes <= 0
530
- with Pool(processes=pool_num_processes) as pool:
531
- validated_metadatas_iter: T.Iterator[types.MetadataOrError]
532
- if disable_multiprocessing:
533
- validated_metadatas_iter = map(types.validate_and_fail_metadata, metadatas)
534
- else:
535
- # Do not pass error metadatas where the error object can not be pickled for multiprocessing to work
536
- # Otherwise we get:
537
- # TypeError: __init__() missing 3 required positional arguments: 'image_time', 'gpx_start_time', and 'gpx_end_time'
538
- # See https://stackoverflow.com/a/61432070
539
- yes, no = split_if(metadatas, lambda m: isinstance(m, types.ErrorMetadata))
540
- no_iter = pool.imap(
541
- types.validate_and_fail_metadata,
542
- no,
543
- )
544
- validated_metadatas_iter = itertools.chain(yes, no_iter)
545
- return list(
546
- tqdm(
547
- validated_metadatas_iter,
548
- desc="Validating metadatas",
549
- unit="metadata",
550
- disable=LOG.getEffectiveLevel() <= logging.DEBUG,
551
- total=len(metadatas),
552
- )
301
+ validated_metadatas = list(
302
+ tqdm(
303
+ map_results,
304
+ desc="Validating metadatas",
305
+ unit="metadata",
306
+ disable=LOG.getEffectiveLevel() <= logging.DEBUG,
307
+ total=len(good_metadatas),
553
308
  )
309
+ )
310
+
311
+ return validated_metadatas + error_metadatas
554
312
 
555
313
 
556
314
  def process_finalize(
557
- import_path: T.Union[T.Sequence[Path], Path],
558
- metadatas: T.List[types.MetadataOrError],
315
+ import_path: T.Sequence[Path] | Path,
316
+ metadatas: list[types.MetadataOrError],
559
317
  skip_process_errors: bool = False,
560
- device_make: T.Optional[str] = None,
561
- device_model: T.Optional[str] = None,
318
+ device_make: str | None = None,
319
+ device_model: str | None = None,
562
320
  overwrite_all_EXIF_tags: bool = False,
563
321
  overwrite_EXIF_time_tag: bool = False,
564
322
  overwrite_EXIF_gps_tag: bool = False,
@@ -566,40 +324,48 @@ def process_finalize(
566
324
  overwrite_EXIF_orientation_tag: bool = False,
567
325
  offset_time: float = 0.0,
568
326
  offset_angle: float = 0.0,
569
- desc_path: T.Optional[str] = None,
570
- num_processes: T.Optional[int] = None,
571
- ) -> T.List[types.MetadataOrError]:
327
+ desc_path: str | None = None,
328
+ num_processes: int | None = None,
329
+ ) -> list[types.MetadataOrError]:
330
+ image_metadatas: list[types.ImageMetadata] = []
331
+ video_metadatas: list[types.VideoMetadata] = []
332
+
572
333
  for metadata in metadatas:
573
334
  if isinstance(metadata, types.VideoMetadata):
574
- if device_make is not None:
575
- metadata.make = device_make
576
- if device_model is not None:
577
- metadata.model = device_model
335
+ video_metadatas.append(metadata)
578
336
  elif isinstance(metadata, types.ImageMetadata):
579
- if device_make is not None:
580
- metadata.MAPDeviceMake = device_make
581
- if device_model is not None:
582
- metadata.MAPDeviceModel = device_model
337
+ image_metadatas.append(metadata)
338
+
339
+ for metadata in video_metadatas:
340
+ if device_make is not None:
341
+ metadata.make = device_make
342
+ if device_model is not None:
343
+ metadata.model = device_model
344
+
345
+ for metadata in image_metadatas:
346
+ if device_make is not None:
347
+ metadata.MAPDeviceMake = device_make
348
+ if device_model is not None:
349
+ metadata.MAPDeviceModel = device_model
350
+ # Add the basename
351
+ metadata.MAPFilename = metadata.filename.name
583
352
 
584
353
  # modified in place
585
354
  _apply_offsets(
586
- [
587
- metadata
588
- for metadata in metadatas
589
- if isinstance(metadata, types.ImageMetadata)
590
- ],
355
+ image_metadatas,
591
356
  offset_time=offset_time,
592
357
  offset_angle=offset_angle,
593
358
  )
594
359
 
595
- LOG.debug("Validating %d metadatas", len(metadatas))
596
- metadatas = _validate_metadatas(metadatas, num_processes)
360
+ metadatas = _validate_metadatas(metadatas, num_processes=num_processes)
597
361
 
598
- LOG.info("Checking upload status for %d metadatas", len(metadatas))
599
- metadatas = _check_upload_status(metadatas)
362
+ # image_metadatas and video_metadatas get stale after the validation,
363
+ # hence delete them to avoid confusion
364
+ del image_metadatas
365
+ del video_metadatas
600
366
 
601
367
  _overwrite_exif_tags(
602
- # search image metadatas again because some of them might have been failed
368
+ # Search image metadatas again because some of them might have been failed
603
369
  [
604
370
  metadata
605
371
  for metadata in metadatas
@@ -637,16 +403,13 @@ def process_finalize(
637
403
  # write descs first because _show_stats() may raise an exception
638
404
  _write_metadatas(metadatas, desc_path)
639
405
 
640
- # show stats
641
- skipped_process_errors: T.Set[T.Type[Exception]]
406
+ # Show stats
407
+ skipped_process_errors: set[T.Type[Exception]]
642
408
  if skip_process_errors:
643
- # skip all exceptions
409
+ # Skip all exceptions
644
410
  skipped_process_errors = {Exception}
645
411
  else:
646
- skipped_process_errors = {
647
- exceptions.MapillaryDuplicationError,
648
- exceptions.MapillaryUploadedAlreadyError,
649
- }
412
+ skipped_process_errors = {exceptions.MapillaryDuplicationError}
650
413
  _show_stats(metadatas, skipped_process_errors=skipped_process_errors)
651
414
 
652
415
  return metadatas