mapillary-tools 0.13.3a1__py3-none-any.whl → 0.14.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +237 -16
  3. mapillary_tools/authenticate.py +325 -64
  4. mapillary_tools/{geotag/blackvue_parser.py → blackvue_parser.py} +74 -54
  5. mapillary_tools/camm/camm_builder.py +55 -97
  6. mapillary_tools/camm/camm_parser.py +429 -181
  7. mapillary_tools/commands/__main__.py +12 -6
  8. mapillary_tools/commands/authenticate.py +8 -1
  9. mapillary_tools/commands/process.py +27 -51
  10. mapillary_tools/commands/process_and_upload.py +19 -5
  11. mapillary_tools/commands/sample_video.py +2 -3
  12. mapillary_tools/commands/upload.py +18 -9
  13. mapillary_tools/commands/video_process_and_upload.py +19 -5
  14. mapillary_tools/config.py +31 -13
  15. mapillary_tools/constants.py +47 -6
  16. mapillary_tools/exceptions.py +34 -35
  17. mapillary_tools/exif_read.py +221 -116
  18. mapillary_tools/exif_write.py +7 -7
  19. mapillary_tools/exiftool_read.py +33 -42
  20. mapillary_tools/exiftool_read_video.py +46 -33
  21. mapillary_tools/exiftool_runner.py +77 -0
  22. mapillary_tools/ffmpeg.py +24 -23
  23. mapillary_tools/geo.py +144 -120
  24. mapillary_tools/geotag/base.py +147 -0
  25. mapillary_tools/geotag/factory.py +291 -0
  26. mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
  27. mapillary_tools/geotag/geotag_images_from_exiftool.py +126 -82
  28. mapillary_tools/geotag/geotag_images_from_gpx.py +53 -118
  29. mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
  30. mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
  31. mapillary_tools/geotag/geotag_images_from_video.py +53 -51
  32. mapillary_tools/geotag/geotag_videos_from_exiftool.py +97 -0
  33. mapillary_tools/geotag/geotag_videos_from_gpx.py +39 -0
  34. mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
  35. mapillary_tools/geotag/image_extractors/base.py +18 -0
  36. mapillary_tools/geotag/image_extractors/exif.py +60 -0
  37. mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
  38. mapillary_tools/geotag/options.py +160 -0
  39. mapillary_tools/geotag/utils.py +52 -16
  40. mapillary_tools/geotag/video_extractors/base.py +18 -0
  41. mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
  42. mapillary_tools/{video_data_extraction/extractors/gpx_parser.py → geotag/video_extractors/gpx.py} +57 -39
  43. mapillary_tools/geotag/video_extractors/native.py +157 -0
  44. mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
  45. mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
  46. mapillary_tools/history.py +7 -13
  47. mapillary_tools/mp4/construct_mp4_parser.py +9 -8
  48. mapillary_tools/mp4/io_utils.py +0 -1
  49. mapillary_tools/mp4/mp4_sample_parser.py +36 -28
  50. mapillary_tools/mp4/simple_mp4_builder.py +10 -9
  51. mapillary_tools/mp4/simple_mp4_parser.py +13 -22
  52. mapillary_tools/process_geotag_properties.py +155 -392
  53. mapillary_tools/process_sequence_properties.py +562 -208
  54. mapillary_tools/sample_video.py +13 -20
  55. mapillary_tools/telemetry.py +26 -13
  56. mapillary_tools/types.py +111 -58
  57. mapillary_tools/upload.py +316 -298
  58. mapillary_tools/upload_api_v4.py +55 -122
  59. mapillary_tools/uploader.py +396 -254
  60. mapillary_tools/utils.py +42 -18
  61. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/METADATA +3 -2
  62. mapillary_tools-0.14.0a2.dist-info/RECORD +72 -0
  63. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/WHEEL +1 -1
  64. mapillary_tools/geotag/__init__.py +0 -1
  65. mapillary_tools/geotag/geotag_from_generic.py +0 -22
  66. mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
  67. mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
  68. mapillary_tools/video_data_extraction/cli_options.py +0 -22
  69. mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
  70. mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
  71. mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
  72. mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
  73. mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
  74. mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
  75. mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
  76. mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
  77. mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
  78. mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
  79. mapillary_tools-0.13.3a1.dist-info/RECORD +0 -75
  80. /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
  81. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/entry_points.txt +0 -0
  82. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info/licenses}/LICENSE +0 -0
  83. {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/top_level.txt +0 -0
@@ -1,64 +1,96 @@
1
+ from __future__ import annotations
2
+
1
3
  import itertools
2
4
  import logging
3
5
  import math
4
6
  import os
5
7
  import typing as T
6
8
 
7
- from . import constants, geo, types
8
- from .exceptions import MapillaryBadParameterError, MapillaryDuplicationError
9
+ from . import constants, exceptions, geo, types, utils
9
10
 
10
11
  LOG = logging.getLogger(__name__)
11
12
 
12
13
 
13
- Point = T.TypeVar("Point", bound=geo.Point)
14
- PointSequence = T.List[Point]
14
+ SeqItem = T.TypeVar("SeqItem")
15
+ PointSequence = T.List[geo.PointLike]
15
16
 
16
17
 
17
- def cut_sequence_by_time_distance(
18
- sequence: PointSequence,
19
- cutoff_distance: float,
20
- cutoff_time: float,
21
- ) -> T.List[PointSequence]:
22
- sequences: T.List[PointSequence] = []
18
+ def split_sequence_by(
19
+ sequence: T.Sequence[SeqItem],
20
+ should_split: T.Callable[[SeqItem, SeqItem], bool],
21
+ ) -> list[list[SeqItem]]:
22
+ """
23
+ Split a sequence into multiple sequences by should_split(prev, cur) => True
24
+ """
25
+ output_sequences: list[list[SeqItem]] = []
23
26
 
24
- if sequence:
25
- sequences.append([sequence[0]])
27
+ seq = iter(sequence)
28
+
29
+ prev = next(seq, None)
30
+ if prev is None:
31
+ return output_sequences
32
+
33
+ output_sequences.append([prev])
26
34
 
27
- for prev, cur in geo.pairwise(sequence):
35
+ for cur in seq:
28
36
  # invariant: prev is processed
29
- distance = geo.gps_distance(
30
- (prev.lat, prev.lon),
31
- (cur.lat, cur.lon),
32
- )
33
- if cutoff_distance <= distance:
34
- sequences.append([cur])
35
- continue
36
- time_diff = cur.time - prev.time
37
- assert 0 <= time_diff, "sequence must be sorted by capture times"
38
- if cutoff_time <= time_diff:
39
- sequences.append([cur])
40
- continue
41
- sequences[-1].append(cur)
37
+ if should_split(prev, cur):
38
+ output_sequences.append([cur])
39
+ else:
40
+ output_sequences[-1].append(cur)
41
+ prev = cur
42
42
  # invariant: cur is processed
43
43
 
44
- return sequences
44
+ assert sum(len(s) for s in output_sequences) == len(sequence)
45
+
46
+ return output_sequences
47
+
48
+
49
+ def split_sequence_by_agg(
50
+ sequence: T.Sequence[SeqItem],
51
+ should_split_with_sequence_state: T.Callable[[SeqItem, dict], bool],
52
+ ) -> list[list[SeqItem]]:
53
+ """
54
+ Split a sequence by should_split_with_sequence_state(cur, sequence_state) => True
55
+ """
56
+ output_sequences: list[list[SeqItem]] = []
57
+ sequence_state: dict = {}
58
+
59
+ for cur in sequence:
60
+ start_new_sequence = should_split_with_sequence_state(cur, sequence_state)
61
+
62
+ if not output_sequences:
63
+ output_sequences.append([])
64
+
65
+ if start_new_sequence:
66
+ # DO NOT reset the state because it contains the information of current item
67
+ # sequence_state = {}
68
+ if output_sequences[-1]:
69
+ output_sequences.append([])
70
+
71
+ output_sequences[-1].append(cur)
72
+
73
+ assert sum(len(s) for s in output_sequences) == len(sequence)
74
+
75
+ return output_sequences
45
76
 
46
77
 
47
78
  def duplication_check(
48
79
  sequence: PointSequence,
49
- duplicate_distance: float,
50
- duplicate_angle: float,
51
- ) -> T.Tuple[PointSequence, T.List[types.ErrorMetadata]]:
80
+ max_duplicate_distance: float,
81
+ max_duplicate_angle: float,
82
+ ) -> tuple[PointSequence, list[types.ErrorMetadata]]:
52
83
  dedups: PointSequence = []
53
- dups: T.List[types.ErrorMetadata] = []
84
+ dups: list[types.ErrorMetadata] = []
54
85
 
55
- sequence_iter = iter(sequence)
56
- prev = next(sequence_iter)
86
+ it = iter(sequence)
87
+ prev = next(it)
57
88
  if prev is None:
58
89
  return dedups, dups
90
+
59
91
  dedups.append(prev)
60
92
 
61
- for cur in sequence_iter:
93
+ for cur in it:
62
94
  # invariant: prev is processed
63
95
  distance = geo.gps_distance(
64
96
  (prev.lat, prev.lon),
@@ -70,21 +102,21 @@ def duplication_check(
70
102
  else:
71
103
  angle_diff = None
72
104
 
73
- if distance <= duplicate_distance and (
74
- angle_diff is not None and angle_diff <= duplicate_angle
105
+ if distance <= max_duplicate_distance and (
106
+ angle_diff is None or angle_diff <= max_duplicate_angle
75
107
  ):
76
- dups.append(
77
- types.describe_error_metadata(
78
- MapillaryDuplicationError(
79
- f"Duplicate of its previous image in terms of distance <= {duplicate_distance} and angle <= {duplicate_angle}",
80
- types.as_desc(cur),
81
- distance=distance,
82
- angle_diff=angle_diff,
83
- ),
84
- cur.filename,
85
- filetype=types.FileType.IMAGE,
108
+ msg = f"Duplicate of its previous image in terms of distance <= {max_duplicate_distance} and angle <= {max_duplicate_angle}"
109
+ dup = types.describe_error_metadata(
110
+ exceptions.MapillaryDuplicationError(
111
+ msg,
112
+ types.as_desc(cur),
113
+ distance=distance,
114
+ angle_diff=angle_diff,
86
115
  ),
116
+ cur.filename,
117
+ filetype=types.FileType.IMAGE,
87
118
  )
119
+ dups.append(dup)
88
120
  # prev does not change
89
121
  else:
90
122
  dedups.append(cur)
@@ -94,86 +126,14 @@ def duplication_check(
94
126
  return dedups, dups
95
127
 
96
128
 
97
- def cut_sequence(
98
- sequence: T.List[types.ImageMetadata],
99
- max_images: int,
100
- max_sequence_filesize: int,
101
- max_sequence_pixels: int,
102
- ) -> T.List[T.List[types.ImageMetadata]]:
103
- """
104
- Cut a sequence into multiple sequences by max_images or max filesize
105
- """
106
- sequences: T.List[T.List[types.ImageMetadata]] = []
107
- last_sequence_file_size = 0
108
- last_sequence_pixels = 0
109
-
110
- for image in sequence:
111
- # decent default values if width/height not available
112
- width = 1024 if image.width is None else image.width
113
- height = 1024 if image.height is None else image.height
114
-
115
- filesize = os.path.getsize(image.filename)
116
-
117
- if len(sequences) == 0:
118
- start_new_sequence = True
119
- else:
120
- if sequences[-1]:
121
- if max_images < len(sequences[-1]):
122
- LOG.debug(
123
- "Cut the sequence because the current sequence (%s) reaches the max number of images (%s)",
124
- len(sequences[-1]),
125
- max_images,
126
- )
127
- start_new_sequence = True
128
- elif max_sequence_filesize < last_sequence_file_size + filesize:
129
- LOG.debug(
130
- "Cut the sequence because the current sequence (%s) reaches the max filesize (%s)",
131
- last_sequence_file_size + filesize,
132
- max_sequence_filesize,
133
- )
134
- start_new_sequence = True
135
- elif max_sequence_pixels < last_sequence_pixels + width * height:
136
- LOG.debug(
137
- "Cut the sequence because the current sequence (%s) reaches the max pixels (%s)",
138
- last_sequence_pixels + width * height,
139
- max_sequence_pixels,
140
- )
141
- start_new_sequence = True
142
- else:
143
- start_new_sequence = False
144
- else:
145
- start_new_sequence = False
146
-
147
- if start_new_sequence:
148
- sequences.append([])
149
- last_sequence_file_size = 0
150
- last_sequence_pixels = 0
151
-
152
- sequences[-1].append(image)
153
- last_sequence_file_size += filesize
154
- last_sequence_pixels += width * height
155
-
156
- assert sum(len(s) for s in sequences) == len(sequence)
157
-
158
- return sequences
159
-
160
-
161
- def _group_sort_images_by_folder(
162
- image_metadatas: T.List[types.ImageMetadata],
163
- ) -> T.List[T.List[types.ImageMetadata]]:
164
- # group images by parent directory
165
- sequences_by_parent: T.Dict[str, T.List[types.ImageMetadata]] = {}
166
- for image_metadata in image_metadatas:
167
- filename = image_metadata.filename.resolve()
168
- sequences_by_parent.setdefault(str(filename.parent), []).append(image_metadata)
169
-
170
- sequences = list(sequences_by_parent.values())
171
- for sequence in sequences:
172
- sequence.sort(
173
- key=lambda metadata: metadata.sort_key(),
174
- )
175
-
176
- return sequences
129
+ def _group_by(
130
+ image_metadatas: T.Iterable[types.ImageMetadata],
131
+ group_key_func=T.Callable[[types.ImageMetadata], T.Hashable],
132
+ ) -> dict[T.Hashable, list[types.ImageMetadata]]:
133
+ grouped: dict[T.Hashable, list[types.ImageMetadata]] = {}
134
+ for metadata in image_metadatas:
135
+ grouped.setdefault(group_key_func(metadata), []).append(metadata)
136
+ return grouped
177
137
 
178
138
 
179
139
  def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
@@ -217,58 +177,426 @@ def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
217
177
  def _parse_filesize_in_bytes(filesize_str: str) -> int:
218
178
  filesize_str = filesize_str.strip().upper()
219
179
 
220
- if filesize_str.endswith("B"):
221
- return int(filesize_str[:-1])
222
- elif filesize_str.endswith("K"):
223
- return int(filesize_str[:-1]) * 1024
224
- elif filesize_str.endswith("M"):
225
- return int(filesize_str[:-1]) * 1024 * 1024
226
- elif filesize_str.endswith("G"):
227
- return int(filesize_str[:-1]) * 1024 * 1024 * 1024
228
- else:
229
- return int(filesize_str)
180
+ try:
181
+ if filesize_str.endswith("B"):
182
+ return int(filesize_str[:-1])
183
+ elif filesize_str.endswith("K"):
184
+ return int(filesize_str[:-1]) * 1024
185
+ elif filesize_str.endswith("M"):
186
+ return int(filesize_str[:-1]) * 1024 * 1024
187
+ elif filesize_str.endswith("G"):
188
+ return int(filesize_str[:-1]) * 1024 * 1024 * 1024
189
+ else:
190
+ return int(filesize_str)
191
+ except ValueError:
192
+ raise exceptions.MapillaryBadParameterError(
193
+ f"Expect valid file size that ends with B, K, M, or G, but got {filesize_str}"
194
+ )
230
195
 
231
196
 
232
197
  def _parse_pixels(pixels_str: str) -> int:
233
198
  pixels_str = pixels_str.strip().upper()
234
199
 
235
- if pixels_str.endswith("K"):
236
- return int(pixels_str[:-1]) * 1000
237
- elif pixels_str.endswith("M"):
238
- return int(pixels_str[:-1]) * 1000 * 1000
239
- elif pixels_str.endswith("G"):
240
- return int(pixels_str[:-1]) * 1000 * 1000 * 1000
200
+ try:
201
+ if pixels_str.endswith("K"):
202
+ return int(pixels_str[:-1]) * 1000
203
+ elif pixels_str.endswith("M"):
204
+ return int(pixels_str[:-1]) * 1000 * 1000
205
+ elif pixels_str.endswith("G"):
206
+ return int(pixels_str[:-1]) * 1000 * 1000 * 1000
207
+ else:
208
+ return int(pixels_str)
209
+ except ValueError:
210
+ raise exceptions.MapillaryBadParameterError(
211
+ f"Expect valid number of pixels that ends with K, M, or G, but got {pixels_str}"
212
+ )
213
+
214
+
215
+ def _avg_speed(sequence: T.Sequence[geo.PointLike]) -> float:
216
+ total_distance = 0.0
217
+ for cur, nxt in geo.pairwise(sequence):
218
+ total_distance += geo.gps_distance(
219
+ (cur.lat, cur.lon),
220
+ (nxt.lat, nxt.lon),
221
+ )
222
+
223
+ if sequence:
224
+ time_diff = sequence[-1].time - sequence[0].time
241
225
  else:
242
- return int(pixels_str)
226
+ time_diff = 0.0
243
227
 
228
+ if time_diff == 0.0:
229
+ return float("inf")
244
230
 
245
- def process_sequence_properties(
246
- metadatas: T.Sequence[types.MetadataOrError],
247
- cutoff_distance=constants.CUTOFF_DISTANCE,
248
- cutoff_time=constants.CUTOFF_TIME,
249
- interpolate_directions=False,
250
- duplicate_distance=constants.DUPLICATE_DISTANCE,
251
- duplicate_angle=constants.DUPLICATE_ANGLE,
252
- ) -> T.List[types.MetadataOrError]:
253
- try:
254
- max_sequence_filesize_in_bytes = _parse_filesize_in_bytes(
255
- constants.MAX_SEQUENCE_FILESIZE
231
+ return total_distance / time_diff
232
+
233
+
234
+ def _is_video_stationary(
235
+ sequence: T.Sequence[geo.PointLike], max_radius_in_meters: float
236
+ ) -> bool:
237
+ if not sequence:
238
+ return 0.0 <= max_radius_in_meters
239
+
240
+ start = (sequence[0].lat, sequence[0].lon)
241
+ for p in sequence:
242
+ distance = geo.gps_distance(start, (p.lat, p.lon))
243
+ if distance > max_radius_in_meters:
244
+ return False
245
+
246
+ return True
247
+
248
+
249
+ def _check_video_limits(
250
+ video_metadatas: T.Iterable[types.VideoMetadata],
251
+ max_sequence_filesize_in_bytes: int,
252
+ max_avg_speed: float,
253
+ max_radius_for_stationary_check: float,
254
+ ) -> tuple[list[types.VideoMetadata], list[types.ErrorMetadata]]:
255
+ output_video_metadatas: list[types.VideoMetadata] = []
256
+ error_metadatas: list[types.ErrorMetadata] = []
257
+
258
+ for video_metadata in video_metadatas:
259
+ try:
260
+ is_stationary = _is_video_stationary(
261
+ video_metadata.points,
262
+ max_radius_in_meters=max_radius_for_stationary_check,
263
+ )
264
+ if is_stationary:
265
+ raise exceptions.MapillaryStationaryVideoError("Stationary video")
266
+
267
+ video_filesize = (
268
+ utils.get_file_size(video_metadata.filename)
269
+ if video_metadata.filesize is None
270
+ else video_metadata.filesize
271
+ )
272
+ if video_filesize > max_sequence_filesize_in_bytes:
273
+ raise exceptions.MapillaryFileTooLargeError(
274
+ f"Video file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
275
+ )
276
+
277
+ contains_null_island = any(
278
+ p.lat == 0 and p.lon == 0 for p in video_metadata.points
279
+ )
280
+ if contains_null_island:
281
+ raise exceptions.MapillaryNullIslandError(
282
+ "Found GPS coordinates in Null Island (0, 0)",
283
+ )
284
+
285
+ too_fast = (
286
+ len(video_metadata.points) >= 2
287
+ and _avg_speed(video_metadata.points) > max_avg_speed
288
+ )
289
+ if too_fast:
290
+ raise exceptions.MapillaryCaptureSpeedTooFastError(
291
+ f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
292
+ )
293
+ except exceptions.MapillaryDescriptionError as ex:
294
+ error_metadatas.append(
295
+ types.describe_error_metadata(
296
+ exc=ex,
297
+ filename=video_metadata.filename,
298
+ filetype=video_metadata.filetype,
299
+ )
300
+ )
301
+ else:
302
+ output_video_metadatas.append(video_metadata)
303
+
304
+ LOG.info(
305
+ "Found %s videos and %s errors after video limit checks",
306
+ len(output_video_metadatas),
307
+ len(error_metadatas),
308
+ )
309
+
310
+ return output_video_metadatas, error_metadatas
311
+
312
+
313
+ def _check_sequences_by_limits(
314
+ input_sequences: T.Sequence[PointSequence],
315
+ max_sequence_filesize_in_bytes: int,
316
+ max_avg_speed: float,
317
+ ) -> tuple[list[PointSequence], list[types.ErrorMetadata]]:
318
+ output_sequences: list[PointSequence] = []
319
+ output_errors: list[types.ErrorMetadata] = []
320
+
321
+ for sequence in input_sequences:
322
+ sequence_filesize = sum(
323
+ utils.get_file_size(image.filename)
324
+ if image.filesize is None
325
+ else image.filesize
326
+ for image in sequence
256
327
  )
257
- except ValueError:
258
- raise MapillaryBadParameterError(
259
- f"Expect the envvar {constants._ENV_PREFIX}MAX_SEQUENCE_FILESIZE to be a valid filesize that ends with B, K, M, or G, but got {constants.MAX_SEQUENCE_FILESIZE}"
328
+
329
+ try:
330
+ if sequence_filesize > max_sequence_filesize_in_bytes:
331
+ raise exceptions.MapillaryFileTooLargeError(
332
+ f"Sequence file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
333
+ )
334
+
335
+ contains_null_island = any(
336
+ image.lat == 0 and image.lon == 0 for image in sequence
337
+ )
338
+ if contains_null_island:
339
+ raise exceptions.MapillaryNullIslandError(
340
+ "Found GPS coordinates in Null Island (0, 0)",
341
+ )
342
+
343
+ too_fast = len(sequence) >= 2 and _avg_speed(sequence) > max_avg_speed
344
+ if too_fast:
345
+ raise exceptions.MapillaryCaptureSpeedTooFastError(
346
+ f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
347
+ )
348
+ except exceptions.MapillaryDescriptionError as ex:
349
+ for image in sequence:
350
+ output_errors.append(
351
+ types.describe_error_metadata(
352
+ exc=ex,
353
+ filename=image.filename,
354
+ filetype=types.FileType.IMAGE,
355
+ )
356
+ )
357
+
358
+ else:
359
+ output_sequences.append(sequence)
360
+
361
+ assert sum(len(s) for s in output_sequences) + len(output_errors) == sum(
362
+ len(s) for s in input_sequences
363
+ )
364
+
365
+ LOG.info(
366
+ "Found %s sequences and %s errors after sequence limit checks",
367
+ len(output_sequences),
368
+ len(output_errors),
369
+ )
370
+
371
+ return output_sequences, output_errors
372
+
373
+
374
+ def _group_by_folder_and_camera(
375
+ image_metadatas: list[types.ImageMetadata],
376
+ ) -> list[list[types.ImageMetadata]]:
377
+ grouped = _group_by(
378
+ image_metadatas,
379
+ lambda metadata: (
380
+ str(metadata.filename.parent),
381
+ metadata.MAPDeviceMake,
382
+ metadata.MAPDeviceModel,
383
+ metadata.width,
384
+ metadata.height,
385
+ ),
386
+ )
387
+ for key in grouped:
388
+ LOG.debug("Group sequences by %s: %s images", key, len(grouped[key]))
389
+ output_sequences = list(grouped.values())
390
+
391
+ LOG.info(
392
+ "Found %s sequences from different folders and cameras",
393
+ len(output_sequences),
394
+ )
395
+
396
+ return output_sequences
397
+
398
+
399
+ def _split_sequences_by_cutoff_time(
400
+ input_sequences: T.Sequence[PointSequence], cutoff_time: float
401
+ ) -> list[PointSequence]:
402
+ def _should_split_by_cutoff_time(
403
+ prev: types.ImageMetadata, cur: types.ImageMetadata
404
+ ) -> bool:
405
+ time_diff = cur.time - prev.time
406
+ assert 0 <= time_diff, "sequence must be sorted by capture times"
407
+ should = cutoff_time < time_diff
408
+ if should:
409
+ LOG.debug(
410
+ "Split because the capture time gap %s seconds exceeds cutoff_time (%s seconds): %s: %s -> %s",
411
+ round(time_diff, 2),
412
+ round(cutoff_time, 2),
413
+ prev.filename.parent,
414
+ prev.filename.name,
415
+ cur.filename.name,
416
+ )
417
+ return should
418
+
419
+ output_sequences = []
420
+ for sequence in input_sequences:
421
+ output_sequences.extend(
422
+ split_sequence_by(sequence, should_split=_should_split_by_cutoff_time)
260
423
  )
261
424
 
262
- try:
263
- max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS)
264
- except ValueError:
265
- raise MapillaryBadParameterError(
266
- f"Expect the envvar {constants._ENV_PREFIX}MAX_SEQUENCE_PIXELS to be a valid number of pixels that ends with K, M, or G, but got {constants.MAX_SEQUENCE_PIXELS}"
425
+ assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
426
+
427
+ LOG.info(
428
+ "Found %s sequences after split by cutoff_time %d seconds",
429
+ len(output_sequences),
430
+ cutoff_time,
431
+ )
432
+
433
+ return output_sequences
434
+
435
+
436
+ def _split_sequences_by_cutoff_distance(
437
+ input_sequences: T.Sequence[PointSequence], cutoff_distance: float
438
+ ) -> list[PointSequence]:
439
+ def _should_split_by_cutoff_distance(
440
+ prev: types.ImageMetadata, cur: types.ImageMetadata
441
+ ) -> bool:
442
+ distance = geo.gps_distance(
443
+ (prev.lat, prev.lon),
444
+ (cur.lat, cur.lon),
445
+ )
446
+ should = cutoff_distance < distance
447
+ if should:
448
+ LOG.debug(
449
+ "Split because the distance gap %s meters exceeds cutoff_distance (%s meters): %s: %s -> %s",
450
+ round(distance, 2),
451
+ round(cutoff_distance, 2),
452
+ prev.filename.parent,
453
+ prev.filename.name,
454
+ cur.filename.name,
455
+ )
456
+ return should
457
+
458
+ output_sequences = []
459
+ for sequence in input_sequences:
460
+ output_sequences.extend(
461
+ split_sequence_by(sequence, _should_split_by_cutoff_distance)
462
+ )
463
+
464
+ assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
465
+
466
+ LOG.info(
467
+ "Found %s sequences after split by cutoff_distance %d meters",
468
+ len(output_sequences),
469
+ cutoff_distance,
470
+ )
471
+
472
+ return output_sequences
473
+
474
+
475
+ def _check_sequences_duplication(
476
+ input_sequences: T.Sequence[PointSequence],
477
+ duplicate_distance: float,
478
+ duplicate_angle: float,
479
+ ) -> tuple[list[PointSequence], list[types.ErrorMetadata]]:
480
+ output_sequences: list[PointSequence] = []
481
+ output_errors: list[types.ErrorMetadata] = []
482
+
483
+ for sequence in input_sequences:
484
+ output_sequence, errors = duplication_check(
485
+ sequence,
486
+ max_duplicate_distance=duplicate_distance,
487
+ max_duplicate_angle=duplicate_angle,
488
+ )
489
+ assert len(sequence) == len(output_sequence) + len(errors)
490
+ output_sequences.append(output_sequence)
491
+ output_errors.extend(errors)
492
+
493
+ assert sum(len(s) for s in output_sequences) + len(output_errors) == sum(
494
+ len(s) for s in input_sequences
495
+ )
496
+
497
+ LOG.info(
498
+ "Found %s sequences and %s errors after duplication check",
499
+ len(output_sequences),
500
+ len(output_errors),
501
+ )
502
+
503
+ return output_sequences, output_errors
504
+
505
+
506
+ def _split_sequences_by_limits(
507
+ input_sequences: T.Sequence[PointSequence],
508
+ max_sequence_filesize_in_bytes: float,
509
+ max_sequence_pixels: float,
510
+ ) -> list[PointSequence]:
511
+ max_sequence_images = constants.MAX_SEQUENCE_LENGTH
512
+ max_sequence_filesize = max_sequence_filesize_in_bytes
513
+
514
+ def _should_split(image: types.ImageMetadata, sequence_state: dict) -> bool:
515
+ last_sequence_images = sequence_state.get("last_sequence_images", 0)
516
+ last_sequence_file_size = sequence_state.get("last_sequence_file_size", 0)
517
+ last_sequence_pixels = sequence_state.get("last_sequence_pixels", 0)
518
+
519
+ # decent default values if width/height not available
520
+ width = 1024 if image.width is None else image.width
521
+ height = 1024 if image.height is None else image.height
522
+ pixels = width * height
523
+
524
+ if image.filesize is None:
525
+ filesize = os.path.getsize(image.filename)
526
+ else:
527
+ filesize = image.filesize
528
+
529
+ new_sequence_images = last_sequence_images + 1
530
+ new_sequence_file_size = last_sequence_file_size + filesize
531
+ new_sequence_pixels = last_sequence_pixels + pixels
532
+
533
+ if max_sequence_images < new_sequence_images:
534
+ LOG.debug(
535
+ "Split because the current sequence (%s) reaches the max number of images (%s)",
536
+ new_sequence_images,
537
+ max_sequence_images,
538
+ )
539
+ start_new_sequence = True
540
+ elif max_sequence_filesize < new_sequence_file_size:
541
+ LOG.debug(
542
+ "Split because the current sequence (%s) reaches the max filesize (%s)",
543
+ new_sequence_file_size,
544
+ max_sequence_filesize,
545
+ )
546
+ start_new_sequence = True
547
+ elif max_sequence_pixels < new_sequence_pixels:
548
+ LOG.debug(
549
+ "Split because the current sequence (%s) reaches the max pixels (%s)",
550
+ new_sequence_pixels,
551
+ max_sequence_pixels,
552
+ )
553
+ start_new_sequence = True
554
+ else:
555
+ start_new_sequence = False
556
+
557
+ if not start_new_sequence:
558
+ sequence_state["last_sequence_images"] = new_sequence_images
559
+ sequence_state["last_sequence_file_size"] = new_sequence_file_size
560
+ sequence_state["last_sequence_pixels"] = new_sequence_pixels
561
+ else:
562
+ sequence_state["last_sequence_images"] = 1
563
+ sequence_state["last_sequence_file_size"] = filesize
564
+ sequence_state["last_sequence_pixels"] = pixels
565
+
566
+ return start_new_sequence
567
+
568
+ output_sequences = []
569
+ for sequence in input_sequences:
570
+ output_sequences.extend(
571
+ split_sequence_by_agg(
572
+ sequence, should_split_with_sequence_state=_should_split
573
+ )
267
574
  )
268
575
 
269
- error_metadatas: T.List[types.ErrorMetadata] = []
270
- image_metadatas: T.List[types.ImageMetadata] = []
271
- video_metadatas: T.List[types.VideoMetadata] = []
576
+ assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
577
+
578
+ LOG.info("Found %s sequences after split by sequence limits", len(output_sequences))
579
+
580
+ return output_sequences
581
+
582
+
583
+ def process_sequence_properties(
584
+ metadatas: T.Sequence[types.MetadataOrError],
585
+ cutoff_distance: float = constants.CUTOFF_DISTANCE,
586
+ cutoff_time: float = constants.CUTOFF_TIME,
587
+ interpolate_directions: bool = False,
588
+ duplicate_distance: float = constants.DUPLICATE_DISTANCE,
589
+ duplicate_angle: float = constants.DUPLICATE_ANGLE,
590
+ max_avg_speed: float = constants.MAX_AVG_SPEED,
591
+ ) -> list[types.MetadataOrError]:
592
+ max_sequence_filesize_in_bytes = _parse_filesize_in_bytes(
593
+ constants.MAX_SEQUENCE_FILESIZE
594
+ )
595
+ max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS)
596
+
597
+ error_metadatas: list[types.ErrorMetadata] = []
598
+ image_metadatas: list[types.ImageMetadata] = []
599
+ video_metadatas: list[types.VideoMetadata] = []
272
600
 
273
601
  for metadata in metadatas:
274
602
  if isinstance(metadata, types.ErrorMetadata):
@@ -280,66 +608,92 @@ def process_sequence_properties(
280
608
  else:
281
609
  raise RuntimeError(f"invalid metadata type: {metadata}")
282
610
 
283
- sequences_by_folder = _group_sort_images_by_folder(image_metadatas)
284
- # make sure they are sorted
285
- for sequence in sequences_by_folder:
286
- for cur, nxt in geo.pairwise(sequence):
287
- assert cur.time <= nxt.time, "sequence must be sorted"
611
+ if video_metadatas:
612
+ # Check limits for videos
613
+ video_metadatas, video_error_metadatas = _check_video_limits(
614
+ video_metadatas,
615
+ max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
616
+ max_avg_speed=max_avg_speed,
617
+ max_radius_for_stationary_check=10.0,
618
+ )
619
+ error_metadatas.extend(video_error_metadatas)
288
620
 
289
- for s in sequences_by_folder:
290
- _interpolate_subsecs_for_sorting(s)
621
+ if image_metadatas:
622
+ sequences: list[PointSequence]
291
623
 
292
- # cut sequences
293
- sequences_after_cut: T.List[PointSequence] = []
294
- for sequence in sequences_by_folder:
295
- cut = cut_sequence_by_time_distance(sequence, cutoff_distance, cutoff_time)
296
- sequences_after_cut.extend(cut)
297
- assert len(image_metadatas) == sum(len(s) for s in sequences_after_cut)
624
+ # Group by folder and camera
625
+ sequences = _group_by_folder_and_camera(image_metadatas)
298
626
 
299
- # reuse imaeg_metadatas to store processed image metadatas
300
- image_metadatas = []
627
+ # Make sure each sequence is sorted (in-place update)
628
+ for sequence in sequences:
629
+ sequence.sort(
630
+ key=lambda metadata: metadata.sort_key(),
631
+ )
301
632
 
302
- sequence_idx = 0
633
+ # Interpolate subseconds for same timestamps (in-place update)
634
+ for sequence in sequences:
635
+ _interpolate_subsecs_for_sorting(sequence)
303
636
 
304
- for sequence in sequences_after_cut:
305
- # duplication check
306
- dedups, dups = duplication_check(
307
- sequence,
637
+ # Split sequences by cutoff time
638
+ # NOTE: Do not split by distance here because it affects the speed limit check
639
+ sequences = _split_sequences_by_cutoff_time(sequences, cutoff_time=cutoff_time)
640
+
641
+ # Duplication check
642
+ sequences, errors = _check_sequences_duplication(
643
+ sequences,
308
644
  duplicate_distance=duplicate_distance,
309
645
  duplicate_angle=duplicate_angle,
310
646
  )
311
- assert len(sequence) == len(dedups) + len(dups)
312
- error_metadatas.extend(dups)
313
-
314
- # interpolate angles
315
- if interpolate_directions:
316
- for p in dedups:
317
- p.angle = None
318
- geo.interpolate_directions_if_none(dedups)
319
-
320
- # cut sequence per MAX_SEQUENCE_LENGTH images
321
- cut = cut_sequence(
322
- dedups,
323
- constants.MAX_SEQUENCE_LENGTH,
324
- max_sequence_filesize_in_bytes,
325
- max_sequence_pixels,
647
+ error_metadatas.extend(errors)
648
+
649
+ # Interpolate angles (in-place update)
650
+ for sequence in sequences:
651
+ if interpolate_directions:
652
+ for image in sequence:
653
+ image.angle = None
654
+ geo.interpolate_directions_if_none(sequence)
655
+
656
+ # Split sequences by max number of images, max filesize, and max pixels
657
+ sequences = _split_sequences_by_limits(
658
+ sequences,
659
+ max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
660
+ max_sequence_pixels=max_sequence_pixels,
661
+ )
662
+
663
+ # Check limits for sequences
664
+ sequences, errors = _check_sequences_by_limits(
665
+ sequences,
666
+ max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
667
+ max_avg_speed=max_avg_speed,
668
+ )
669
+ error_metadatas.extend(errors)
670
+
671
+ # Split sequences by cutoff distance
672
+ # NOTE: The speed limit check probably rejects most of anomalies
673
+ sequences = _split_sequences_by_cutoff_distance(
674
+ sequences, cutoff_distance=cutoff_distance
326
675
  )
327
676
 
328
- # assign sequence UUIDs
329
- for c in cut:
330
- for p in c:
677
+ # Assign sequence UUIDs (in-place update)
678
+ sequence_idx = 0
679
+ for sequence in sequences:
680
+ for image in sequence:
331
681
  # using incremental id as shorter "uuid", so we can save some space for the desc file
332
- p.MAPSequenceUUID = str(sequence_idx)
333
- image_metadatas.append(p)
682
+ image.MAPSequenceUUID = str(sequence_idx)
334
683
  sequence_idx += 1
335
684
 
685
+ image_metadatas = []
686
+ for sequence in sequences:
687
+ image_metadatas.extend(sequence)
688
+
689
+ assert sequence_idx == len(
690
+ set(metadata.MAPSequenceUUID for metadata in image_metadatas)
691
+ )
692
+
336
693
  results = error_metadatas + image_metadatas + video_metadatas
337
694
 
338
695
  assert len(metadatas) == len(results), (
339
696
  f"expected {len(metadatas)} results but got {len(results)}"
340
697
  )
341
- assert sequence_idx == len(
342
- set(metadata.MAPSequenceUUID for metadata in image_metadatas)
343
- )
344
698
 
345
699
  return results