mapillary-tools 0.14.0a2__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +66 -262
  3. mapillary_tools/authenticate.py +54 -46
  4. mapillary_tools/blackvue_parser.py +79 -22
  5. mapillary_tools/commands/__main__.py +15 -16
  6. mapillary_tools/commands/upload.py +33 -4
  7. mapillary_tools/config.py +38 -17
  8. mapillary_tools/constants.py +127 -43
  9. mapillary_tools/exceptions.py +4 -0
  10. mapillary_tools/exif_read.py +2 -1
  11. mapillary_tools/exif_write.py +3 -1
  12. mapillary_tools/exiftool_read_video.py +52 -15
  13. mapillary_tools/exiftool_runner.py +4 -24
  14. mapillary_tools/ffmpeg.py +406 -232
  15. mapillary_tools/geo.py +16 -0
  16. mapillary_tools/geotag/__init__.py +0 -0
  17. mapillary_tools/geotag/base.py +8 -4
  18. mapillary_tools/geotag/factory.py +106 -89
  19. mapillary_tools/geotag/geotag_images_from_exiftool.py +27 -20
  20. mapillary_tools/geotag/geotag_images_from_gpx.py +7 -6
  21. mapillary_tools/geotag/geotag_images_from_video.py +35 -0
  22. mapillary_tools/geotag/geotag_videos_from_exiftool.py +61 -14
  23. mapillary_tools/geotag/geotag_videos_from_gpx.py +22 -9
  24. mapillary_tools/geotag/options.py +25 -3
  25. mapillary_tools/geotag/utils.py +9 -12
  26. mapillary_tools/geotag/video_extractors/base.py +1 -1
  27. mapillary_tools/geotag/video_extractors/exiftool.py +1 -1
  28. mapillary_tools/geotag/video_extractors/gpx.py +61 -70
  29. mapillary_tools/geotag/video_extractors/native.py +34 -31
  30. mapillary_tools/history.py +128 -8
  31. mapillary_tools/http.py +211 -0
  32. mapillary_tools/mp4/construct_mp4_parser.py +8 -2
  33. mapillary_tools/process_geotag_properties.py +47 -35
  34. mapillary_tools/process_sequence_properties.py +340 -325
  35. mapillary_tools/sample_video.py +8 -8
  36. mapillary_tools/serializer/description.py +587 -0
  37. mapillary_tools/serializer/gpx.py +132 -0
  38. mapillary_tools/types.py +44 -610
  39. mapillary_tools/upload.py +327 -352
  40. mapillary_tools/upload_api_v4.py +125 -72
  41. mapillary_tools/uploader.py +797 -216
  42. mapillary_tools/utils.py +57 -5
  43. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/METADATA +91 -34
  44. mapillary_tools-0.14.1.dist-info/RECORD +76 -0
  45. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/WHEEL +1 -1
  46. mapillary_tools-0.14.0a2.dist-info/RECORD +0 -72
  47. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/entry_points.txt +0 -0
  48. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/licenses/LICENSE +0 -0
  49. {mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/top_level.txt +0 -0
@@ -1,90 +1,109 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import functools
3
4
  import itertools
4
5
  import logging
5
6
  import math
6
7
  import os
7
8
  import typing as T
8
9
 
10
+ import humanize
11
+
9
12
  from . import constants, exceptions, geo, types, utils
13
+ from .serializer.description import DescriptionJSONSerializer
10
14
 
11
15
  LOG = logging.getLogger(__name__)
12
16
 
13
17
 
14
- SeqItem = T.TypeVar("SeqItem")
18
+ S = T.TypeVar("S")
19
+ R = T.TypeVar("R")
15
20
  PointSequence = T.List[geo.PointLike]
16
21
 
17
22
 
18
23
  def split_sequence_by(
19
- sequence: T.Sequence[SeqItem],
20
- should_split: T.Callable[[SeqItem, SeqItem], bool],
21
- ) -> list[list[SeqItem]]:
22
- """
23
- Split a sequence into multiple sequences by should_split(prev, cur) => True
24
+ sequence: T.Iterable[S], reduce: T.Callable[[R, S], tuple[R, bool]], initial: R
25
+ ) -> list[list[S]]:
24
26
  """
25
- output_sequences: list[list[SeqItem]] = []
26
-
27
- seq = iter(sequence)
28
-
29
- prev = next(seq, None)
30
- if prev is None:
31
- return output_sequences
32
-
33
- output_sequences.append([prev])
34
-
35
- for cur in seq:
36
- # invariant: prev is processed
37
- if should_split(prev, cur):
38
- output_sequences.append([cur])
39
- else:
40
- output_sequences[-1].append(cur)
41
- prev = cur
42
- # invariant: cur is processed
27
+ Split a sequence into multiple subsequences based on a reduction function.
43
28
 
44
- assert sum(len(s) for s in output_sequences) == len(sequence)
29
+ The function processes each element through a reduce function that maintains
30
+ state and determines whether to split the sequence at that point. When a split
31
+ is triggered, a new subsequence starts with the current element.
45
32
 
46
- return output_sequences
33
+ Args:
34
+ sequence: An iterable of elements to split
35
+ reduce: A function that takes (accumulated_state, current_element) and
36
+ returns (new_state, should_split). If should_split is True,
37
+ a new subsequence starts with the current element.
38
+ initial: The initial state value passed to the reduce function
47
39
 
40
+ Returns:
41
+ A list of subsequences, where each subsequence is a list of elements
48
42
 
49
- def split_sequence_by_agg(
50
- sequence: T.Sequence[SeqItem],
51
- should_split_with_sequence_state: T.Callable[[SeqItem, dict], bool],
52
- ) -> list[list[SeqItem]]:
53
- """
54
- Split a sequence by should_split_with_sequence_state(cur, sequence_state) => True
43
+ Examples:
44
+ >>> # Split on even numbers
45
+ >>> def split_on_even(count, x):
46
+ ... return count + 1, x % 2 == 0
47
+ >>> split_sequence_by([1, 3, 2, 4, 5, 6, 7], split_on_even, 0)
48
+ [[1, 3], [2], [4, 5], [6, 7]]
49
+
50
+ >>> # Split when sum exceeds threshold
51
+ >>> def split_when_sum_exceeds_5(total, x):
52
+ ... total += x
53
+ ... return (x, True) if total > 5 else (total, False)
54
+ >>> split_sequence_by([1, 2, 3, 4, 1, 2], split_when_sum_exceeds_5, 0)
55
+ [[1, 2], [3], [4, 1], [2]]
56
+
57
+ >>> # Split on specific values
58
+ >>> def split_on_zero(_, x):
59
+ ... return None, x == 0
60
+ >>> split_sequence_by([1, 2, 0, 3, 4, 0, 5], split_on_zero, None)
61
+ [[1, 2], [0, 3, 4], [0, 5]]
62
+
63
+ >>> # Empty sequence
64
+ >>> split_sequence_by([], lambda s, x: (s, False), 0)
65
+ []
66
+
67
+ >>> # Single element
68
+ >>> split_sequence_by([42], lambda s, x: (s, False), 0)
69
+ [[42]]
55
70
  """
56
- output_sequences: list[list[SeqItem]] = []
57
- sequence_state: dict = {}
58
71
 
59
- for cur in sequence:
60
- start_new_sequence = should_split_with_sequence_state(cur, sequence_state)
72
+ output_sequences: list[list[S]] = []
61
73
 
62
- if not output_sequences:
63
- output_sequences.append([])
74
+ value = initial
64
75
 
65
- if start_new_sequence:
66
- # DO NOT reset the state because it contains the information of current item
67
- # sequence_state = {}
68
- if output_sequences[-1]:
69
- output_sequences.append([])
76
+ for element in sequence:
77
+ value, should = reduce(value, element)
70
78
 
71
- output_sequences[-1].append(cur)
72
-
73
- assert sum(len(s) for s in output_sequences) == len(sequence)
79
+ if should:
80
+ output_sequences.append([element])
81
+ else:
82
+ if output_sequences:
83
+ output_sequences[-1].append(element)
84
+ else:
85
+ output_sequences.append([element])
74
86
 
75
87
  return output_sequences
76
88
 
77
89
 
78
90
  def duplication_check(
79
91
  sequence: PointSequence,
92
+ *,
80
93
  max_duplicate_distance: float,
81
94
  max_duplicate_angle: float,
82
95
  ) -> tuple[PointSequence, list[types.ErrorMetadata]]:
96
+ """
97
+ >>> duplication_check([], max_duplicate_distance=1, max_duplicate_angle=2)
98
+ ([], [])
99
+ """
100
+
83
101
  dedups: PointSequence = []
84
102
  dups: list[types.ErrorMetadata] = []
85
103
 
86
104
  it = iter(sequence)
87
- prev = next(it)
105
+ prev = next(it, None)
106
+
88
107
  if prev is None:
89
108
  return dedups, dups
90
109
 
@@ -92,10 +111,7 @@ def duplication_check(
92
111
 
93
112
  for cur in it:
94
113
  # invariant: prev is processed
95
- distance = geo.gps_distance(
96
- (prev.lat, prev.lon),
97
- (cur.lat, cur.lon),
98
- )
114
+ distance = geo.gps_distance((prev.lat, prev.lon), (cur.lat, cur.lon))
99
115
 
100
116
  if prev.angle is not None and cur.angle is not None:
101
117
  angle_diff = geo.diff_bearing(prev.angle, cur.angle)
@@ -106,15 +122,14 @@ def duplication_check(
106
122
  angle_diff is None or angle_diff <= max_duplicate_angle
107
123
  ):
108
124
  msg = f"Duplicate of its previous image in terms of distance <= {max_duplicate_distance} and angle <= {max_duplicate_angle}"
125
+ ex = exceptions.MapillaryDuplicationError(
126
+ msg,
127
+ DescriptionJSONSerializer.as_desc(cur),
128
+ distance=distance,
129
+ angle_diff=angle_diff,
130
+ )
109
131
  dup = types.describe_error_metadata(
110
- exceptions.MapillaryDuplicationError(
111
- msg,
112
- types.as_desc(cur),
113
- distance=distance,
114
- angle_diff=angle_diff,
115
- ),
116
- cur.filename,
117
- filetype=types.FileType.IMAGE,
132
+ ex, cur.filename, filetype=types.FileType.IMAGE
118
133
  )
119
134
  dups.append(dup)
120
135
  # prev does not change
@@ -126,9 +141,9 @@ def duplication_check(
126
141
  return dedups, dups
127
142
 
128
143
 
129
- def _group_by(
144
+ def _group_images_by(
130
145
  image_metadatas: T.Iterable[types.ImageMetadata],
131
- group_key_func=T.Callable[[types.ImageMetadata], T.Hashable],
146
+ group_key_func: T.Callable[[types.ImageMetadata], T.Hashable],
132
147
  ) -> dict[T.Hashable, list[types.ImageMetadata]]:
133
148
  grouped: dict[T.Hashable, list[types.ImageMetadata]] = {}
134
149
  for metadata in image_metadatas:
@@ -138,11 +153,21 @@ def _group_by(
138
153
 
139
154
  def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
140
155
  """
141
- Update the timestamps make sure they are unique and sorted
156
+ Update the timestamps to make sure they are unique and sorted
142
157
  in the same order by interpolating subseconds
158
+
143
159
  Examples:
144
- - Input: 1, 1, 1, 1, 1, 2
145
- - Output: 1, 1.2, 1.4, 1.6, 1.8, 2
160
+ >>> def make_point(t):
161
+ ... return geo.Point(lat=0, lon=0, time=t, alt=None, angle=None)
162
+ >>> points = [make_point(t) for t in [1, 1, 1, 1, 1, 2]]
163
+ >>> _interpolate_subsecs_for_sorting(points)
164
+ >>> [p.time for p in points]
165
+ [1.0, 1.2, 1.4, 1.6, 1.8, 2]
166
+
167
+ >>> points = [make_point(t) for t in [1.1]]
168
+ >>> _interpolate_subsecs_for_sorting(points)
169
+ >>> [p.time for p in points]
170
+ [1.1]
146
171
  """
147
172
 
148
173
  gidx = 0
@@ -174,63 +199,6 @@ def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
174
199
  )
175
200
 
176
201
 
177
- def _parse_filesize_in_bytes(filesize_str: str) -> int:
178
- filesize_str = filesize_str.strip().upper()
179
-
180
- try:
181
- if filesize_str.endswith("B"):
182
- return int(filesize_str[:-1])
183
- elif filesize_str.endswith("K"):
184
- return int(filesize_str[:-1]) * 1024
185
- elif filesize_str.endswith("M"):
186
- return int(filesize_str[:-1]) * 1024 * 1024
187
- elif filesize_str.endswith("G"):
188
- return int(filesize_str[:-1]) * 1024 * 1024 * 1024
189
- else:
190
- return int(filesize_str)
191
- except ValueError:
192
- raise exceptions.MapillaryBadParameterError(
193
- f"Expect valid file size that ends with B, K, M, or G, but got {filesize_str}"
194
- )
195
-
196
-
197
- def _parse_pixels(pixels_str: str) -> int:
198
- pixels_str = pixels_str.strip().upper()
199
-
200
- try:
201
- if pixels_str.endswith("K"):
202
- return int(pixels_str[:-1]) * 1000
203
- elif pixels_str.endswith("M"):
204
- return int(pixels_str[:-1]) * 1000 * 1000
205
- elif pixels_str.endswith("G"):
206
- return int(pixels_str[:-1]) * 1000 * 1000 * 1000
207
- else:
208
- return int(pixels_str)
209
- except ValueError:
210
- raise exceptions.MapillaryBadParameterError(
211
- f"Expect valid number of pixels that ends with K, M, or G, but got {pixels_str}"
212
- )
213
-
214
-
215
- def _avg_speed(sequence: T.Sequence[geo.PointLike]) -> float:
216
- total_distance = 0.0
217
- for cur, nxt in geo.pairwise(sequence):
218
- total_distance += geo.gps_distance(
219
- (cur.lat, cur.lon),
220
- (nxt.lat, nxt.lon),
221
- )
222
-
223
- if sequence:
224
- time_diff = sequence[-1].time - sequence[0].time
225
- else:
226
- time_diff = 0.0
227
-
228
- if time_diff == 0.0:
229
- return float("inf")
230
-
231
- return total_distance / time_diff
232
-
233
-
234
202
  def _is_video_stationary(
235
203
  sequence: T.Sequence[geo.PointLike], max_radius_in_meters: float
236
204
  ) -> bool:
@@ -248,8 +216,8 @@ def _is_video_stationary(
248
216
 
249
217
  def _check_video_limits(
250
218
  video_metadatas: T.Iterable[types.VideoMetadata],
251
- max_sequence_filesize_in_bytes: int,
252
- max_avg_speed: float,
219
+ max_sequence_filesize_in_bytes: int | None,
220
+ max_capture_speed_kmh: float,
253
221
  max_radius_for_stationary_check: float,
254
222
  ) -> tuple[list[types.VideoMetadata], list[types.ErrorMetadata]]:
255
223
  output_video_metadatas: list[types.VideoMetadata] = []
@@ -264,33 +232,38 @@ def _check_video_limits(
264
232
  if is_stationary:
265
233
  raise exceptions.MapillaryStationaryVideoError("Stationary video")
266
234
 
267
- video_filesize = (
268
- utils.get_file_size(video_metadata.filename)
269
- if video_metadata.filesize is None
270
- else video_metadata.filesize
271
- )
272
- if video_filesize > max_sequence_filesize_in_bytes:
273
- raise exceptions.MapillaryFileTooLargeError(
274
- f"Video file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
235
+ if max_sequence_filesize_in_bytes is not None:
236
+ video_filesize = (
237
+ utils.get_file_size(video_metadata.filename)
238
+ if video_metadata.filesize is None
239
+ else video_metadata.filesize
275
240
  )
241
+ if video_filesize > max_sequence_filesize_in_bytes:
242
+ raise exceptions.MapillaryFileTooLargeError(
243
+ f"Video file size {humanize.naturalsize(video_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}",
244
+ )
276
245
 
277
246
  contains_null_island = any(
278
247
  p.lat == 0 and p.lon == 0 for p in video_metadata.points
279
248
  )
280
249
  if contains_null_island:
281
250
  raise exceptions.MapillaryNullIslandError(
282
- "Found GPS coordinates in Null Island (0, 0)",
251
+ "GPS coordinates in Null Island (0, 0)"
283
252
  )
284
253
 
254
+ avg_speed_kmh = (
255
+ geo.avg_speed(video_metadata.points) * 3.6
256
+ ) # Convert m/s to km/h
285
257
  too_fast = (
286
258
  len(video_metadata.points) >= 2
287
- and _avg_speed(video_metadata.points) > max_avg_speed
259
+ and avg_speed_kmh > max_capture_speed_kmh
288
260
  )
289
261
  if too_fast:
290
262
  raise exceptions.MapillaryCaptureSpeedTooFastError(
291
- f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
263
+ f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h",
292
264
  )
293
265
  except exceptions.MapillaryDescriptionError as ex:
266
+ LOG.error(f"{_video_name(video_metadata)}: {ex}")
294
267
  error_metadatas.append(
295
268
  types.describe_error_metadata(
296
269
  exc=ex,
@@ -301,57 +274,55 @@ def _check_video_limits(
301
274
  else:
302
275
  output_video_metadatas.append(video_metadata)
303
276
 
304
- LOG.info(
305
- "Found %s videos and %s errors after video limit checks",
306
- len(output_video_metadatas),
307
- len(error_metadatas),
308
- )
309
-
310
277
  return output_video_metadatas, error_metadatas
311
278
 
312
279
 
280
+ def _video_name(video_metadata: types.VideoMetadata) -> str:
281
+ return video_metadata.filename.name
282
+
283
+
313
284
  def _check_sequences_by_limits(
314
285
  input_sequences: T.Sequence[PointSequence],
315
- max_sequence_filesize_in_bytes: int,
316
- max_avg_speed: float,
286
+ max_sequence_filesize_in_bytes: int | None,
287
+ max_capture_speed_kmh: float,
317
288
  ) -> tuple[list[PointSequence], list[types.ErrorMetadata]]:
318
289
  output_sequences: list[PointSequence] = []
319
290
  output_errors: list[types.ErrorMetadata] = []
320
291
 
321
292
  for sequence in input_sequences:
322
- sequence_filesize = sum(
323
- utils.get_file_size(image.filename)
324
- if image.filesize is None
325
- else image.filesize
326
- for image in sequence
327
- )
328
-
329
293
  try:
330
- if sequence_filesize > max_sequence_filesize_in_bytes:
331
- raise exceptions.MapillaryFileTooLargeError(
332
- f"Sequence file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
294
+ if max_sequence_filesize_in_bytes is not None:
295
+ sequence_filesize = sum(
296
+ utils.get_file_size(image.filename)
297
+ if image.filesize is None
298
+ else image.filesize
299
+ for image in sequence
333
300
  )
301
+ if sequence_filesize > max_sequence_filesize_in_bytes:
302
+ raise exceptions.MapillaryFileTooLargeError(
303
+ f"Sequence file size {humanize.naturalsize(sequence_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}",
304
+ )
334
305
 
335
306
  contains_null_island = any(
336
307
  image.lat == 0 and image.lon == 0 for image in sequence
337
308
  )
338
309
  if contains_null_island:
339
310
  raise exceptions.MapillaryNullIslandError(
340
- "Found GPS coordinates in Null Island (0, 0)",
311
+ "GPS coordinates in Null Island (0, 0)"
341
312
  )
342
313
 
343
- too_fast = len(sequence) >= 2 and _avg_speed(sequence) > max_avg_speed
314
+ avg_speed_kmh = geo.avg_speed(sequence) * 3.6 # Convert m/s to km/h
315
+ too_fast = len(sequence) >= 2 and avg_speed_kmh > max_capture_speed_kmh
344
316
  if too_fast:
345
317
  raise exceptions.MapillaryCaptureSpeedTooFastError(
346
- f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
318
+ f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h",
347
319
  )
348
320
  except exceptions.MapillaryDescriptionError as ex:
321
+ LOG.error(f"{_sequence_name(sequence)}: {ex}")
349
322
  for image in sequence:
350
323
  output_errors.append(
351
324
  types.describe_error_metadata(
352
- exc=ex,
353
- filename=image.filename,
354
- filetype=types.FileType.IMAGE,
325
+ exc=ex, filename=image.filename, filetype=types.FileType.IMAGE
355
326
  )
356
327
  )
357
328
 
@@ -362,19 +333,20 @@ def _check_sequences_by_limits(
362
333
  len(s) for s in input_sequences
363
334
  )
364
335
 
365
- LOG.info(
366
- "Found %s sequences and %s errors after sequence limit checks",
367
- len(output_sequences),
368
- len(output_errors),
369
- )
370
-
371
336
  return output_sequences, output_errors
372
337
 
373
338
 
339
+ def _sequence_name(sequence: T.Sequence[types.ImageMetadata]) -> str:
340
+ if not sequence:
341
+ return "N/A"
342
+ image = sequence[0]
343
+ return f"{image.filename.parent.name}/{image.filename.name}"
344
+
345
+
374
346
  def _group_by_folder_and_camera(
375
347
  image_metadatas: list[types.ImageMetadata],
376
348
  ) -> list[list[types.ImageMetadata]]:
377
- grouped = _group_by(
349
+ grouped = _group_images_by(
378
350
  image_metadatas,
379
351
  lambda metadata: (
380
352
  str(metadata.filename.parent),
@@ -385,89 +357,10 @@ def _group_by_folder_and_camera(
385
357
  ),
386
358
  )
387
359
  for key in grouped:
388
- LOG.debug("Group sequences by %s: %s images", key, len(grouped[key]))
360
+ LOG.debug(f"Grouped {len(grouped[key])} images by {key}")
389
361
  output_sequences = list(grouped.values())
390
362
 
391
- LOG.info(
392
- "Found %s sequences from different folders and cameras",
393
- len(output_sequences),
394
- )
395
-
396
- return output_sequences
397
-
398
-
399
- def _split_sequences_by_cutoff_time(
400
- input_sequences: T.Sequence[PointSequence], cutoff_time: float
401
- ) -> list[PointSequence]:
402
- def _should_split_by_cutoff_time(
403
- prev: types.ImageMetadata, cur: types.ImageMetadata
404
- ) -> bool:
405
- time_diff = cur.time - prev.time
406
- assert 0 <= time_diff, "sequence must be sorted by capture times"
407
- should = cutoff_time < time_diff
408
- if should:
409
- LOG.debug(
410
- "Split because the capture time gap %s seconds exceeds cutoff_time (%s seconds): %s: %s -> %s",
411
- round(time_diff, 2),
412
- round(cutoff_time, 2),
413
- prev.filename.parent,
414
- prev.filename.name,
415
- cur.filename.name,
416
- )
417
- return should
418
-
419
- output_sequences = []
420
- for sequence in input_sequences:
421
- output_sequences.extend(
422
- split_sequence_by(sequence, should_split=_should_split_by_cutoff_time)
423
- )
424
-
425
- assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
426
-
427
- LOG.info(
428
- "Found %s sequences after split by cutoff_time %d seconds",
429
- len(output_sequences),
430
- cutoff_time,
431
- )
432
-
433
- return output_sequences
434
-
435
-
436
- def _split_sequences_by_cutoff_distance(
437
- input_sequences: T.Sequence[PointSequence], cutoff_distance: float
438
- ) -> list[PointSequence]:
439
- def _should_split_by_cutoff_distance(
440
- prev: types.ImageMetadata, cur: types.ImageMetadata
441
- ) -> bool:
442
- distance = geo.gps_distance(
443
- (prev.lat, prev.lon),
444
- (cur.lat, cur.lon),
445
- )
446
- should = cutoff_distance < distance
447
- if should:
448
- LOG.debug(
449
- "Split because the distance gap %s meters exceeds cutoff_distance (%s meters): %s: %s -> %s",
450
- round(distance, 2),
451
- round(cutoff_distance, 2),
452
- prev.filename.parent,
453
- prev.filename.name,
454
- cur.filename.name,
455
- )
456
- return should
457
-
458
- output_sequences = []
459
- for sequence in input_sequences:
460
- output_sequences.extend(
461
- split_sequence_by(sequence, _should_split_by_cutoff_distance)
462
- )
463
-
464
- assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
465
-
466
- LOG.info(
467
- "Found %s sequences after split by cutoff_distance %d meters",
468
- len(output_sequences),
469
- cutoff_distance,
470
- )
363
+ LOG.info(f"Created {len(output_sequences)} sequences by folders and cameras")
471
364
 
472
365
  return output_sequences
473
366
 
@@ -487,95 +380,218 @@ def _check_sequences_duplication(
487
380
  max_duplicate_angle=duplicate_angle,
488
381
  )
489
382
  assert len(sequence) == len(output_sequence) + len(errors)
490
- output_sequences.append(output_sequence)
383
+ if output_sequence:
384
+ output_sequences.append(output_sequence)
491
385
  output_errors.extend(errors)
492
386
 
387
+ # All input images should be accounted for either in output sequences or errors
493
388
  assert sum(len(s) for s in output_sequences) + len(output_errors) == sum(
494
389
  len(s) for s in input_sequences
495
390
  )
496
391
 
497
- LOG.info(
498
- "Found %s sequences and %s errors after duplication check",
499
- len(output_sequences),
500
- len(output_errors),
501
- )
392
+ if output_errors:
393
+ LOG.info(
394
+ f"Duplication check: {len(output_errors)} image duplicates removed (with {duplicate_distance=} and {duplicate_angle=})"
395
+ )
502
396
 
503
397
  return output_sequences, output_errors
504
398
 
505
399
 
400
+ class SplitState(T.TypedDict, total=False):
401
+ sequence_images: int
402
+ sequence_file_size: int
403
+ sequence_pixels: int
404
+ image: types.ImageMetadata
405
+
406
+
407
+ def _should_split_by_max_sequence_images(
408
+ state: SplitState,
409
+ image: types.ImageMetadata,
410
+ max_sequence_images: int,
411
+ split: bool = False,
412
+ ) -> tuple[SplitState, bool]:
413
+ if not split:
414
+ new_sequence_images = state.get("sequence_images", 0) + 1
415
+ split = max_sequence_images < new_sequence_images
416
+ if split:
417
+ LOG.info(
418
+ f"Split sequence at {image.filename.name}: too many images ({new_sequence_images} > {max_sequence_images})"
419
+ )
420
+
421
+ if split:
422
+ new_sequence_images = 1
423
+
424
+ state["sequence_images"] = new_sequence_images
425
+
426
+ return state, split
427
+
428
+
429
+ def _should_split_by_cutoff_time(
430
+ state: SplitState,
431
+ image: types.ImageMetadata,
432
+ cutoff_time: float,
433
+ split: bool = False,
434
+ ) -> tuple[SplitState, bool]:
435
+ if not split:
436
+ last_image = state.get("image")
437
+ if last_image is not None:
438
+ diff = image.time - last_image.time
439
+ split = cutoff_time < diff
440
+ if split:
441
+ LOG.info(
442
+ f"Split sequence at {image.filename.name}: time gap too large ({diff:.6g} seconds > {cutoff_time:.6g} seconds)"
443
+ )
444
+
445
+ state["image"] = image
446
+
447
+ return state, split
448
+
449
+
450
+ def _should_split_by_cutoff_distance(
451
+ state: SplitState,
452
+ image: types.ImageMetadata,
453
+ cutoff_distance: float,
454
+ split: bool = False,
455
+ ) -> tuple[SplitState, bool]:
456
+ if not split:
457
+ last_image = state.get("image")
458
+ if last_image is not None:
459
+ diff = geo.gps_distance(
460
+ (last_image.lat, last_image.lon), (image.lat, image.lon)
461
+ )
462
+ split = cutoff_distance < diff
463
+ if split:
464
+ LOG.info(
465
+ f"Split sequence at {image.filename.name}: distance gap too large ({diff:.6g} meters > {cutoff_distance:.6g} meters)"
466
+ )
467
+
468
+ state["image"] = image
469
+
470
+ return state, split
471
+
472
+
473
+ def _should_split_by_max_sequence_filesize(
474
+ state: SplitState,
475
+ image: types.ImageMetadata,
476
+ max_sequence_filesize_in_bytes: int,
477
+ split: bool = False,
478
+ ) -> tuple[SplitState, bool]:
479
+ if image.filesize is None:
480
+ filesize = os.path.getsize(image.filename)
481
+ else:
482
+ filesize = image.filesize
483
+
484
+ if not split:
485
+ new_sequence_file_size = state.get("sequence_file_size", 0) + filesize
486
+ split = max_sequence_filesize_in_bytes < new_sequence_file_size
487
+ if split:
488
+ LOG.info(
489
+ f"Split sequence at {image.filename.name}: filesize too large ({new_sequence_file_size} > {max_sequence_filesize_in_bytes})"
490
+ )
491
+
492
+ if split:
493
+ new_sequence_file_size = filesize
494
+
495
+ state["sequence_file_size"] = new_sequence_file_size
496
+
497
+ return state, split
498
+
499
+
500
+ def _should_split_by_max_sequence_pixels(
501
+ state: SplitState,
502
+ image: types.ImageMetadata,
503
+ max_sequence_pixels: int,
504
+ split: bool = False,
505
+ ) -> tuple[SplitState, bool]:
506
+ # Default values if width/height not available
507
+ width = 1024 if image.width is None else image.width
508
+ height = 1024 if image.height is None else image.height
509
+ pixels = width * height
510
+
511
+ if not split:
512
+ new_sequence_pixels = state.get("sequence_pixels", 0) + pixels
513
+ split = max_sequence_pixels < new_sequence_pixels
514
+ if split:
515
+ LOG.info(
516
+ f"Split sequence at {image.filename.name}: pixels too large ({new_sequence_pixels} > {max_sequence_pixels})"
517
+ )
518
+
519
+ if split:
520
+ new_sequence_pixels = pixels
521
+
522
+ state["sequence_pixels"] = new_sequence_pixels
523
+
524
+ return state, split
525
+
526
+
506
527
  def _split_sequences_by_limits(
507
528
  input_sequences: T.Sequence[PointSequence],
508
- max_sequence_filesize_in_bytes: float,
509
- max_sequence_pixels: float,
529
+ max_sequence_filesize_in_bytes: int | None = None,
530
+ max_sequence_pixels: int | None = None,
531
+ max_sequence_images: int | None = None,
532
+ cutoff_time: float | None = None,
533
+ cutoff_distance: float | None = None,
510
534
  ) -> list[PointSequence]:
511
- max_sequence_images = constants.MAX_SEQUENCE_LENGTH
512
- max_sequence_filesize = max_sequence_filesize_in_bytes
535
+ should_splits = []
513
536
 
514
- def _should_split(image: types.ImageMetadata, sequence_state: dict) -> bool:
515
- last_sequence_images = sequence_state.get("last_sequence_images", 0)
516
- last_sequence_file_size = sequence_state.get("last_sequence_file_size", 0)
517
- last_sequence_pixels = sequence_state.get("last_sequence_pixels", 0)
518
-
519
- # decent default values if width/height not available
520
- width = 1024 if image.width is None else image.width
521
- height = 1024 if image.height is None else image.height
522
- pixels = width * height
523
-
524
- if image.filesize is None:
525
- filesize = os.path.getsize(image.filename)
526
- else:
527
- filesize = image.filesize
537
+ if max_sequence_images is not None:
538
+ should_splits.append(
539
+ functools.partial(
540
+ _should_split_by_max_sequence_images,
541
+ max_sequence_images=max_sequence_images,
542
+ )
543
+ )
528
544
 
529
- new_sequence_images = last_sequence_images + 1
530
- new_sequence_file_size = last_sequence_file_size + filesize
531
- new_sequence_pixels = last_sequence_pixels + pixels
545
+ if cutoff_time is not None:
546
+ should_splits.append(
547
+ functools.partial(_should_split_by_cutoff_time, cutoff_time=cutoff_time)
548
+ )
532
549
 
533
- if max_sequence_images < new_sequence_images:
534
- LOG.debug(
535
- "Split because the current sequence (%s) reaches the max number of images (%s)",
536
- new_sequence_images,
537
- max_sequence_images,
550
+ if cutoff_distance is not None:
551
+ should_splits.append(
552
+ functools.partial(
553
+ _should_split_by_cutoff_distance, cutoff_distance=cutoff_distance
538
554
  )
539
- start_new_sequence = True
540
- elif max_sequence_filesize < new_sequence_file_size:
541
- LOG.debug(
542
- "Split because the current sequence (%s) reaches the max filesize (%s)",
543
- new_sequence_file_size,
544
- max_sequence_filesize,
555
+ )
556
+
557
+ if max_sequence_filesize_in_bytes is not None:
558
+ should_splits.append(
559
+ functools.partial(
560
+ _should_split_by_max_sequence_filesize,
561
+ max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
545
562
  )
546
- start_new_sequence = True
547
- elif max_sequence_pixels < new_sequence_pixels:
548
- LOG.debug(
549
- "Split because the current sequence (%s) reaches the max pixels (%s)",
550
- new_sequence_pixels,
551
- max_sequence_pixels,
563
+ )
564
+
565
+ if max_sequence_pixels is not None:
566
+ should_splits.append(
567
+ functools.partial(
568
+ _should_split_by_max_sequence_pixels,
569
+ max_sequence_pixels=max_sequence_pixels,
552
570
  )
553
- start_new_sequence = True
554
- else:
555
- start_new_sequence = False
571
+ )
556
572
 
557
- if not start_new_sequence:
558
- sequence_state["last_sequence_images"] = new_sequence_images
559
- sequence_state["last_sequence_file_size"] = new_sequence_file_size
560
- sequence_state["last_sequence_pixels"] = new_sequence_pixels
561
- else:
562
- sequence_state["last_sequence_images"] = 1
563
- sequence_state["last_sequence_file_size"] = filesize
564
- sequence_state["last_sequence_pixels"] = pixels
573
+ def _should_split_agg(
574
+ state: SplitState, image: types.ImageMetadata
575
+ ) -> tuple[SplitState, bool]:
576
+ split = False
577
+
578
+ for should_split in should_splits:
579
+ state, split = should_split(state, image, split=split)
565
580
 
566
- return start_new_sequence
581
+ return state, split
567
582
 
568
583
  output_sequences = []
569
584
  for sequence in input_sequences:
570
585
  output_sequences.extend(
571
- split_sequence_by_agg(
572
- sequence, should_split_with_sequence_state=_should_split
586
+ split_sequence_by(
587
+ sequence, _should_split_agg, initial=T.cast(SplitState, {})
573
588
  )
574
589
  )
575
590
 
576
591
  assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
577
592
 
578
- LOG.info("Found %s sequences after split by sequence limits", len(output_sequences))
593
+ if len(input_sequences) != len(output_sequences):
594
+ LOG.info(f"Split sequences: {len(input_sequences)} -> {len(output_sequences)}")
579
595
 
580
596
  return output_sequences
581
597
 
@@ -587,12 +603,12 @@ def process_sequence_properties(
587
603
  interpolate_directions: bool = False,
588
604
  duplicate_distance: float = constants.DUPLICATE_DISTANCE,
589
605
  duplicate_angle: float = constants.DUPLICATE_ANGLE,
590
- max_avg_speed: float = constants.MAX_AVG_SPEED,
606
+ max_capture_speed_kmh: float = constants.MAX_CAPTURE_SPEED_KMH,
591
607
  ) -> list[types.MetadataOrError]:
592
- max_sequence_filesize_in_bytes = _parse_filesize_in_bytes(
593
- constants.MAX_SEQUENCE_FILESIZE
594
- )
595
- max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS)
608
+ LOG.info("==> Processing sequences...")
609
+
610
+ max_sequence_filesize_in_bytes = constants.MAX_SEQUENCE_FILESIZE
611
+ max_sequence_pixels = constants.MAX_SEQUENCE_PIXELS
596
612
 
597
613
  error_metadatas: list[types.ErrorMetadata] = []
598
614
  image_metadatas: list[types.ImageMetadata] = []
@@ -606,14 +622,14 @@ def process_sequence_properties(
606
622
  elif isinstance(metadata, types.VideoMetadata):
607
623
  video_metadatas.append(metadata)
608
624
  else:
609
- raise RuntimeError(f"invalid metadata type: {metadata}")
625
+ raise ValueError(f"invalid metadata type: {metadata}")
610
626
 
611
627
  if video_metadatas:
612
628
  # Check limits for videos
613
629
  video_metadatas, video_error_metadatas = _check_video_limits(
614
630
  video_metadatas,
615
631
  max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
616
- max_avg_speed=max_avg_speed,
632
+ max_capture_speed_kmh=max_capture_speed_kmh,
617
633
  max_radius_for_stationary_check=10.0,
618
634
  )
619
635
  error_metadatas.extend(video_error_metadatas)
@@ -634,9 +650,15 @@ def process_sequence_properties(
634
650
  for sequence in sequences:
635
651
  _interpolate_subsecs_for_sorting(sequence)
636
652
 
637
- # Split sequences by cutoff time
653
+ # Split sequences by max number of images, max filesize, max pixels, and cutoff time
638
654
  # NOTE: Do not split by distance here because it affects the speed limit check
639
- sequences = _split_sequences_by_cutoff_time(sequences, cutoff_time=cutoff_time)
655
+ sequences = _split_sequences_by_limits(
656
+ sequences,
657
+ max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
658
+ max_sequence_pixels=max_sequence_pixels,
659
+ max_sequence_images=constants.MAX_SEQUENCE_LENGTH,
660
+ cutoff_time=cutoff_time,
661
+ )
640
662
 
641
663
  # Duplication check
642
664
  sequences, errors = _check_sequences_duplication(
@@ -653,24 +675,17 @@ def process_sequence_properties(
653
675
  image.angle = None
654
676
  geo.interpolate_directions_if_none(sequence)
655
677
 
656
- # Split sequences by max number of images, max filesize, and max pixels
657
- sequences = _split_sequences_by_limits(
658
- sequences,
659
- max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
660
- max_sequence_pixels=max_sequence_pixels,
661
- )
662
-
663
678
  # Check limits for sequences
664
679
  sequences, errors = _check_sequences_by_limits(
665
680
  sequences,
666
681
  max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
667
- max_avg_speed=max_avg_speed,
682
+ max_capture_speed_kmh=max_capture_speed_kmh,
668
683
  )
669
684
  error_metadatas.extend(errors)
670
685
 
671
686
  # Split sequences by cutoff distance
672
- # NOTE: The speed limit check probably rejects most of anomalies
673
- sequences = _split_sequences_by_cutoff_distance(
687
+ # NOTE: The speed limit check probably rejects most anomalies
688
+ sequences = _split_sequences_by_limits(
674
689
  sequences, cutoff_distance=cutoff_distance
675
690
  )
676
691
 
@@ -693,7 +708,7 @@ def process_sequence_properties(
693
708
  results = error_metadatas + image_metadatas + video_metadatas
694
709
 
695
710
  assert len(metadatas) == len(results), (
696
- f"expected {len(metadatas)} results but got {len(results)}"
711
+ f"Expected {len(metadatas)} results but got {len(results)}"
697
712
  )
698
713
 
699
714
  return results