mapillary-tools 0.14.0b1__py3-none-any.whl → 0.14.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +66 -263
  3. mapillary_tools/authenticate.py +47 -39
  4. mapillary_tools/commands/__main__.py +15 -16
  5. mapillary_tools/commands/upload.py +33 -4
  6. mapillary_tools/config.py +5 -0
  7. mapillary_tools/constants.py +127 -45
  8. mapillary_tools/exceptions.py +4 -0
  9. mapillary_tools/exif_read.py +2 -1
  10. mapillary_tools/exif_write.py +3 -1
  11. mapillary_tools/geo.py +16 -0
  12. mapillary_tools/geotag/base.py +6 -2
  13. mapillary_tools/geotag/factory.py +9 -1
  14. mapillary_tools/geotag/geotag_images_from_exiftool.py +1 -1
  15. mapillary_tools/geotag/geotag_images_from_gpx.py +0 -6
  16. mapillary_tools/geotag/geotag_videos_from_exiftool.py +30 -9
  17. mapillary_tools/geotag/options.py +4 -1
  18. mapillary_tools/geotag/utils.py +9 -12
  19. mapillary_tools/geotag/video_extractors/gpx.py +2 -1
  20. mapillary_tools/geotag/video_extractors/native.py +25 -0
  21. mapillary_tools/history.py +124 -7
  22. mapillary_tools/http.py +211 -0
  23. mapillary_tools/mp4/construct_mp4_parser.py +8 -2
  24. mapillary_tools/process_geotag_properties.py +35 -38
  25. mapillary_tools/process_sequence_properties.py +339 -322
  26. mapillary_tools/sample_video.py +1 -2
  27. mapillary_tools/serializer/description.py +68 -58
  28. mapillary_tools/serializer/gpx.py +1 -1
  29. mapillary_tools/upload.py +202 -207
  30. mapillary_tools/upload_api_v4.py +57 -47
  31. mapillary_tools/uploader.py +728 -285
  32. mapillary_tools/utils.py +57 -5
  33. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/METADATA +7 -6
  34. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/RECORD +38 -37
  35. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/WHEEL +0 -0
  36. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/entry_points.txt +0 -0
  37. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/licenses/LICENSE +0 -0
  38. {mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.2.dist-info}/top_level.txt +0 -0
@@ -1,88 +1,109 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import functools
3
4
  import itertools
4
5
  import logging
5
6
  import math
6
7
  import os
7
8
  import typing as T
8
9
 
10
+ import humanize
11
+
9
12
  from . import constants, exceptions, geo, types, utils
10
13
  from .serializer.description import DescriptionJSONSerializer
11
14
 
12
15
  LOG = logging.getLogger(__name__)
13
16
 
14
17
 
15
- SeqItem = T.TypeVar("SeqItem")
18
+ S = T.TypeVar("S")
19
+ R = T.TypeVar("R")
16
20
  PointSequence = T.List[geo.PointLike]
17
21
 
18
22
 
19
23
  def split_sequence_by(
20
- sequence: T.Sequence[SeqItem],
21
- should_split: T.Callable[[SeqItem, SeqItem], bool],
22
- ) -> list[list[SeqItem]]:
23
- """
24
- Split a sequence into multiple sequences by should_split(prev, cur) => True
24
+ sequence: T.Iterable[S], reduce: T.Callable[[R, S], tuple[R, bool]], initial: R
25
+ ) -> list[list[S]]:
25
26
  """
26
- output_sequences: list[list[SeqItem]] = []
27
-
28
- if sequence:
29
- output_sequences.append([sequence[0]])
30
-
31
- for prev, cur in geo.pairwise(sequence):
32
- # invariant: prev is processed
33
- if should_split(prev, cur):
34
- output_sequences.append([cur])
35
- else:
36
- output_sequences[-1].append(cur)
37
- # invariant: cur is processed
27
+ Split a sequence into multiple subsequences based on a reduction function.
38
28
 
39
- assert sum(len(s) for s in output_sequences) == len(sequence), (
40
- output_sequences,
41
- sequence,
42
- )
29
+ The function processes each element through a reduce function that maintains
30
+ state and determines whether to split the sequence at that point. When a split
31
+ is triggered, a new subsequence starts with the current element.
43
32
 
44
- return output_sequences
33
+ Args:
34
+ sequence: An iterable of elements to split
35
+ reduce: A function that takes (accumulated_state, current_element) and
36
+ returns (new_state, should_split). If should_split is True,
37
+ a new subsequence starts with the current element.
38
+ initial: The initial state value passed to the reduce function
45
39
 
40
+ Returns:
41
+ A list of subsequences, where each subsequence is a list of elements
46
42
 
47
- def split_sequence_by_agg(
48
- sequence: T.Sequence[SeqItem],
49
- should_split_with_sequence_state: T.Callable[[SeqItem, dict], bool],
50
- ) -> list[list[SeqItem]]:
51
- """
52
- Split a sequence by should_split_with_sequence_state(cur, sequence_state) => True
43
+ Examples:
44
+ >>> # Split on even numbers
45
+ >>> def split_on_even(count, x):
46
+ ... return count + 1, x % 2 == 0
47
+ >>> split_sequence_by([1, 3, 2, 4, 5, 6, 7], split_on_even, 0)
48
+ [[1, 3], [2], [4, 5], [6, 7]]
49
+
50
+ >>> # Split when sum exceeds threshold
51
+ >>> def split_when_sum_exceeds_5(total, x):
52
+ ... total += x
53
+ ... return (x, True) if total > 5 else (total, False)
54
+ >>> split_sequence_by([1, 2, 3, 4, 1, 2], split_when_sum_exceeds_5, 0)
55
+ [[1, 2], [3], [4, 1], [2]]
56
+
57
+ >>> # Split on specific values
58
+ >>> def split_on_zero(_, x):
59
+ ... return None, x == 0
60
+ >>> split_sequence_by([1, 2, 0, 3, 4, 0, 5], split_on_zero, None)
61
+ [[1, 2], [0, 3, 4], [0, 5]]
62
+
63
+ >>> # Empty sequence
64
+ >>> split_sequence_by([], lambda s, x: (s, False), 0)
65
+ []
66
+
67
+ >>> # Single element
68
+ >>> split_sequence_by([42], lambda s, x: (s, False), 0)
69
+ [[42]]
53
70
  """
54
- output_sequences: list[list[SeqItem]] = []
55
- sequence_state: dict = {}
56
-
57
- for cur in sequence:
58
- start_new_sequence = should_split_with_sequence_state(cur, sequence_state)
59
71
 
60
- if not output_sequences:
61
- output_sequences.append([])
72
+ output_sequences: list[list[S]] = []
62
73
 
63
- if start_new_sequence:
64
- # DO NOT reset the state because it contains the information of current item
65
- # sequence_state = {}
66
- if output_sequences[-1]:
67
- output_sequences.append([])
74
+ value = initial
68
75
 
69
- output_sequences[-1].append(cur)
76
+ for element in sequence:
77
+ value, should = reduce(value, element)
70
78
 
71
- assert sum(len(s) for s in output_sequences) == len(sequence)
79
+ if should:
80
+ output_sequences.append([element])
81
+ else:
82
+ if output_sequences:
83
+ output_sequences[-1].append(element)
84
+ else:
85
+ output_sequences.append([element])
72
86
 
73
87
  return output_sequences
74
88
 
75
89
 
76
90
  def duplication_check(
77
91
  sequence: PointSequence,
92
+ *,
78
93
  max_duplicate_distance: float,
79
94
  max_duplicate_angle: float,
80
95
  ) -> tuple[PointSequence, list[types.ErrorMetadata]]:
96
+ """
97
+ >>> duplication_check([], max_duplicate_distance=1, max_duplicate_angle=2)
98
+ ([], [])
99
+ """
100
+
81
101
  dedups: PointSequence = []
82
102
  dups: list[types.ErrorMetadata] = []
83
103
 
84
104
  it = iter(sequence)
85
- prev = next(it)
105
+ prev = next(it, None)
106
+
86
107
  if prev is None:
87
108
  return dedups, dups
88
109
 
@@ -90,10 +111,7 @@ def duplication_check(
90
111
 
91
112
  for cur in it:
92
113
  # invariant: prev is processed
93
- distance = geo.gps_distance(
94
- (prev.lat, prev.lon),
95
- (cur.lat, cur.lon),
96
- )
114
+ distance = geo.gps_distance((prev.lat, prev.lon), (cur.lat, cur.lon))
97
115
 
98
116
  if prev.angle is not None and cur.angle is not None:
99
117
  angle_diff = geo.diff_bearing(prev.angle, cur.angle)
@@ -104,15 +122,14 @@ def duplication_check(
104
122
  angle_diff is None or angle_diff <= max_duplicate_angle
105
123
  ):
106
124
  msg = f"Duplicate of its previous image in terms of distance <= {max_duplicate_distance} and angle <= {max_duplicate_angle}"
125
+ ex = exceptions.MapillaryDuplicationError(
126
+ msg,
127
+ DescriptionJSONSerializer.as_desc(cur),
128
+ distance=distance,
129
+ angle_diff=angle_diff,
130
+ )
107
131
  dup = types.describe_error_metadata(
108
- exceptions.MapillaryDuplicationError(
109
- msg,
110
- DescriptionJSONSerializer.as_desc(cur),
111
- distance=distance,
112
- angle_diff=angle_diff,
113
- ),
114
- cur.filename,
115
- filetype=types.FileType.IMAGE,
132
+ ex, cur.filename, filetype=types.FileType.IMAGE
116
133
  )
117
134
  dups.append(dup)
118
135
  # prev does not change
@@ -124,9 +141,9 @@ def duplication_check(
124
141
  return dedups, dups
125
142
 
126
143
 
127
- def _group_by(
144
+ def _group_images_by(
128
145
  image_metadatas: T.Iterable[types.ImageMetadata],
129
- group_key_func=T.Callable[[types.ImageMetadata], T.Hashable],
146
+ group_key_func: T.Callable[[types.ImageMetadata], T.Hashable],
130
147
  ) -> dict[T.Hashable, list[types.ImageMetadata]]:
131
148
  grouped: dict[T.Hashable, list[types.ImageMetadata]] = {}
132
149
  for metadata in image_metadatas:
@@ -136,11 +153,21 @@ def _group_by(
136
153
 
137
154
  def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
138
155
  """
139
- Update the timestamps make sure they are unique and sorted
156
+ Update the timestamps to make sure they are unique and sorted
140
157
  in the same order by interpolating subseconds
158
+
141
159
  Examples:
142
- - Input: 1, 1, 1, 1, 1, 2
143
- - Output: 1, 1.2, 1.4, 1.6, 1.8, 2
160
+ >>> def make_point(t):
161
+ ... return geo.Point(lat=0, lon=0, time=t, alt=None, angle=None)
162
+ >>> points = [make_point(t) for t in [1, 1, 1, 1, 1, 2]]
163
+ >>> _interpolate_subsecs_for_sorting(points)
164
+ >>> [p.time for p in points]
165
+ [1.0, 1.2, 1.4, 1.6, 1.8, 2]
166
+
167
+ >>> points = [make_point(t) for t in [1.1]]
168
+ >>> _interpolate_subsecs_for_sorting(points)
169
+ >>> [p.time for p in points]
170
+ [1.1]
144
171
  """
145
172
 
146
173
  gidx = 0
@@ -172,63 +199,6 @@ def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
172
199
  )
173
200
 
174
201
 
175
- def _parse_filesize_in_bytes(filesize_str: str) -> int:
176
- filesize_str = filesize_str.strip().upper()
177
-
178
- try:
179
- if filesize_str.endswith("B"):
180
- return int(filesize_str[:-1])
181
- elif filesize_str.endswith("K"):
182
- return int(filesize_str[:-1]) * 1024
183
- elif filesize_str.endswith("M"):
184
- return int(filesize_str[:-1]) * 1024 * 1024
185
- elif filesize_str.endswith("G"):
186
- return int(filesize_str[:-1]) * 1024 * 1024 * 1024
187
- else:
188
- return int(filesize_str)
189
- except ValueError:
190
- raise exceptions.MapillaryBadParameterError(
191
- f"Expect valid file size that ends with B, K, M, or G, but got {filesize_str}"
192
- )
193
-
194
-
195
- def _parse_pixels(pixels_str: str) -> int:
196
- pixels_str = pixels_str.strip().upper()
197
-
198
- try:
199
- if pixels_str.endswith("K"):
200
- return int(pixels_str[:-1]) * 1000
201
- elif pixels_str.endswith("M"):
202
- return int(pixels_str[:-1]) * 1000 * 1000
203
- elif pixels_str.endswith("G"):
204
- return int(pixels_str[:-1]) * 1000 * 1000 * 1000
205
- else:
206
- return int(pixels_str)
207
- except ValueError:
208
- raise exceptions.MapillaryBadParameterError(
209
- f"Expect valid number of pixels that ends with K, M, or G, but got {pixels_str}"
210
- )
211
-
212
-
213
- def _avg_speed(sequence: T.Sequence[geo.PointLike]) -> float:
214
- total_distance = 0.0
215
- for cur, nxt in geo.pairwise(sequence):
216
- total_distance += geo.gps_distance(
217
- (cur.lat, cur.lon),
218
- (nxt.lat, nxt.lon),
219
- )
220
-
221
- if sequence:
222
- time_diff = sequence[-1].time - sequence[0].time
223
- else:
224
- time_diff = 0.0
225
-
226
- if time_diff == 0.0:
227
- return float("inf")
228
-
229
- return total_distance / time_diff
230
-
231
-
232
202
  def _is_video_stationary(
233
203
  sequence: T.Sequence[geo.PointLike], max_radius_in_meters: float
234
204
  ) -> bool:
@@ -246,8 +216,8 @@ def _is_video_stationary(
246
216
 
247
217
  def _check_video_limits(
248
218
  video_metadatas: T.Iterable[types.VideoMetadata],
249
- max_sequence_filesize_in_bytes: int,
250
- max_avg_speed: float,
219
+ max_sequence_filesize_in_bytes: int | None,
220
+ max_capture_speed_kmh: float,
251
221
  max_radius_for_stationary_check: float,
252
222
  ) -> tuple[list[types.VideoMetadata], list[types.ErrorMetadata]]:
253
223
  output_video_metadatas: list[types.VideoMetadata] = []
@@ -262,33 +232,38 @@ def _check_video_limits(
262
232
  if is_stationary:
263
233
  raise exceptions.MapillaryStationaryVideoError("Stationary video")
264
234
 
265
- video_filesize = (
266
- utils.get_file_size(video_metadata.filename)
267
- if video_metadata.filesize is None
268
- else video_metadata.filesize
269
- )
270
- if video_filesize > max_sequence_filesize_in_bytes:
271
- raise exceptions.MapillaryFileTooLargeError(
272
- f"Video file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
235
+ if max_sequence_filesize_in_bytes is not None:
236
+ video_filesize = (
237
+ utils.get_file_size(video_metadata.filename)
238
+ if video_metadata.filesize is None
239
+ else video_metadata.filesize
273
240
  )
241
+ if video_filesize > max_sequence_filesize_in_bytes:
242
+ raise exceptions.MapillaryFileTooLargeError(
243
+ f"Video file size {humanize.naturalsize(video_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}",
244
+ )
274
245
 
275
246
  contains_null_island = any(
276
247
  p.lat == 0 and p.lon == 0 for p in video_metadata.points
277
248
  )
278
249
  if contains_null_island:
279
250
  raise exceptions.MapillaryNullIslandError(
280
- "Found GPS coordinates in Null Island (0, 0)",
251
+ "GPS coordinates in Null Island (0, 0)"
281
252
  )
282
253
 
254
+ avg_speed_kmh = (
255
+ geo.avg_speed(video_metadata.points) * 3.6
256
+ ) # Convert m/s to km/h
283
257
  too_fast = (
284
258
  len(video_metadata.points) >= 2
285
- and _avg_speed(video_metadata.points) > max_avg_speed
259
+ and avg_speed_kmh > max_capture_speed_kmh
286
260
  )
287
261
  if too_fast:
288
262
  raise exceptions.MapillaryCaptureSpeedTooFastError(
289
- f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
263
+ f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h",
290
264
  )
291
265
  except exceptions.MapillaryDescriptionError as ex:
266
+ LOG.error(f"{_video_name(video_metadata)}: {ex}")
292
267
  error_metadatas.append(
293
268
  types.describe_error_metadata(
294
269
  exc=ex,
@@ -299,57 +274,55 @@ def _check_video_limits(
299
274
  else:
300
275
  output_video_metadatas.append(video_metadata)
301
276
 
302
- LOG.info(
303
- "Found %s videos and %s errors after video limit checks",
304
- len(output_video_metadatas),
305
- len(error_metadatas),
306
- )
307
-
308
277
  return output_video_metadatas, error_metadatas
309
278
 
310
279
 
280
+ def _video_name(video_metadata: types.VideoMetadata) -> str:
281
+ return video_metadata.filename.name
282
+
283
+
311
284
  def _check_sequences_by_limits(
312
285
  input_sequences: T.Sequence[PointSequence],
313
- max_sequence_filesize_in_bytes: int,
314
- max_avg_speed: float,
286
+ max_sequence_filesize_in_bytes: int | None,
287
+ max_capture_speed_kmh: float,
315
288
  ) -> tuple[list[PointSequence], list[types.ErrorMetadata]]:
316
289
  output_sequences: list[PointSequence] = []
317
290
  output_errors: list[types.ErrorMetadata] = []
318
291
 
319
292
  for sequence in input_sequences:
320
- sequence_filesize = sum(
321
- utils.get_file_size(image.filename)
322
- if image.filesize is None
323
- else image.filesize
324
- for image in sequence
325
- )
326
-
327
293
  try:
328
- if sequence_filesize > max_sequence_filesize_in_bytes:
329
- raise exceptions.MapillaryFileTooLargeError(
330
- f"Sequence file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
294
+ if max_sequence_filesize_in_bytes is not None:
295
+ sequence_filesize = sum(
296
+ utils.get_file_size(image.filename)
297
+ if image.filesize is None
298
+ else image.filesize
299
+ for image in sequence
331
300
  )
301
+ if sequence_filesize > max_sequence_filesize_in_bytes:
302
+ raise exceptions.MapillaryFileTooLargeError(
303
+ f"Sequence file size {humanize.naturalsize(sequence_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}",
304
+ )
332
305
 
333
306
  contains_null_island = any(
334
307
  image.lat == 0 and image.lon == 0 for image in sequence
335
308
  )
336
309
  if contains_null_island:
337
310
  raise exceptions.MapillaryNullIslandError(
338
- "Found GPS coordinates in Null Island (0, 0)",
311
+ "GPS coordinates in Null Island (0, 0)"
339
312
  )
340
313
 
341
- too_fast = len(sequence) >= 2 and _avg_speed(sequence) > max_avg_speed
314
+ avg_speed_kmh = geo.avg_speed(sequence) * 3.6 # Convert m/s to km/h
315
+ too_fast = len(sequence) >= 2 and avg_speed_kmh > max_capture_speed_kmh
342
316
  if too_fast:
343
317
  raise exceptions.MapillaryCaptureSpeedTooFastError(
344
- f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
318
+ f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h",
345
319
  )
346
320
  except exceptions.MapillaryDescriptionError as ex:
321
+ LOG.error(f"{_sequence_name(sequence)}: {ex}")
347
322
  for image in sequence:
348
323
  output_errors.append(
349
324
  types.describe_error_metadata(
350
- exc=ex,
351
- filename=image.filename,
352
- filetype=types.FileType.IMAGE,
325
+ exc=ex, filename=image.filename, filetype=types.FileType.IMAGE
353
326
  )
354
327
  )
355
328
 
@@ -360,19 +333,20 @@ def _check_sequences_by_limits(
360
333
  len(s) for s in input_sequences
361
334
  )
362
335
 
363
- LOG.info(
364
- "Found %s sequences and %s errors after sequence limit checks",
365
- len(output_sequences),
366
- len(output_errors),
367
- )
368
-
369
336
  return output_sequences, output_errors
370
337
 
371
338
 
339
+ def _sequence_name(sequence: T.Sequence[types.ImageMetadata]) -> str:
340
+ if not sequence:
341
+ return "N/A"
342
+ image = sequence[0]
343
+ return f"{image.filename.parent.name}/{image.filename.name}"
344
+
345
+
372
346
  def _group_by_folder_and_camera(
373
347
  image_metadatas: list[types.ImageMetadata],
374
348
  ) -> list[list[types.ImageMetadata]]:
375
- grouped = _group_by(
349
+ grouped = _group_images_by(
376
350
  image_metadatas,
377
351
  lambda metadata: (
378
352
  str(metadata.filename.parent),
@@ -383,89 +357,10 @@ def _group_by_folder_and_camera(
383
357
  ),
384
358
  )
385
359
  for key in grouped:
386
- LOG.debug("Group sequences by %s: %s images", key, len(grouped[key]))
360
+ LOG.debug(f"Grouped {len(grouped[key])} images by {key}")
387
361
  output_sequences = list(grouped.values())
388
362
 
389
- LOG.info(
390
- "Found %s sequences from different folders and cameras",
391
- len(output_sequences),
392
- )
393
-
394
- return output_sequences
395
-
396
-
397
- def _split_sequences_by_cutoff_time(
398
- input_sequences: T.Sequence[PointSequence], cutoff_time: float
399
- ) -> list[PointSequence]:
400
- def _should_split_by_cutoff_time(
401
- prev: types.ImageMetadata, cur: types.ImageMetadata
402
- ) -> bool:
403
- time_diff = cur.time - prev.time
404
- assert 0 <= time_diff, "sequence must be sorted by capture times"
405
- should = cutoff_time < time_diff
406
- if should:
407
- LOG.debug(
408
- "Split because the capture time gap %s seconds exceeds cutoff_time (%s seconds): %s: %s -> %s",
409
- round(time_diff, 2),
410
- round(cutoff_time, 2),
411
- prev.filename.parent,
412
- prev.filename.name,
413
- cur.filename.name,
414
- )
415
- return should
416
-
417
- output_sequences = []
418
- for sequence in input_sequences:
419
- output_sequences.extend(
420
- split_sequence_by(sequence, should_split=_should_split_by_cutoff_time)
421
- )
422
-
423
- assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
424
-
425
- LOG.info(
426
- "Found %s sequences after split by cutoff_time %d seconds",
427
- len(output_sequences),
428
- cutoff_time,
429
- )
430
-
431
- return output_sequences
432
-
433
-
434
- def _split_sequences_by_cutoff_distance(
435
- input_sequences: T.Sequence[PointSequence], cutoff_distance: float
436
- ) -> list[PointSequence]:
437
- def _should_split_by_cutoff_distance(
438
- prev: types.ImageMetadata, cur: types.ImageMetadata
439
- ) -> bool:
440
- distance = geo.gps_distance(
441
- (prev.lat, prev.lon),
442
- (cur.lat, cur.lon),
443
- )
444
- should = cutoff_distance < distance
445
- if should:
446
- LOG.debug(
447
- "Split because the distance gap %s meters exceeds cutoff_distance (%s meters): %s: %s -> %s",
448
- round(distance, 2),
449
- round(cutoff_distance, 2),
450
- prev.filename.parent,
451
- prev.filename.name,
452
- cur.filename.name,
453
- )
454
- return should
455
-
456
- output_sequences = []
457
- for sequence in input_sequences:
458
- output_sequences.extend(
459
- split_sequence_by(sequence, _should_split_by_cutoff_distance)
460
- )
461
-
462
- assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
463
-
464
- LOG.info(
465
- "Found %s sequences after split by cutoff_distance %d meters",
466
- len(output_sequences),
467
- cutoff_distance,
468
- )
363
+ LOG.info(f"Created {len(output_sequences)} sequences by folders and cameras")
469
364
 
470
365
  return output_sequences
471
366
 
@@ -485,95 +380,218 @@ def _check_sequences_duplication(
485
380
  max_duplicate_angle=duplicate_angle,
486
381
  )
487
382
  assert len(sequence) == len(output_sequence) + len(errors)
488
- output_sequences.append(output_sequence)
383
+ if output_sequence:
384
+ output_sequences.append(output_sequence)
489
385
  output_errors.extend(errors)
490
386
 
387
+ # All input images should be accounted for either in output sequences or errors
491
388
  assert sum(len(s) for s in output_sequences) + len(output_errors) == sum(
492
389
  len(s) for s in input_sequences
493
390
  )
494
391
 
495
- LOG.info(
496
- "Found %s sequences and %s errors after duplication check",
497
- len(output_sequences),
498
- len(output_errors),
499
- )
392
+ if output_errors:
393
+ LOG.info(
394
+ f"Duplication check: {len(output_errors)} image duplicates removed (with {duplicate_distance=} and {duplicate_angle=})"
395
+ )
500
396
 
501
397
  return output_sequences, output_errors
502
398
 
503
399
 
400
+ class SplitState(T.TypedDict, total=False):
401
+ sequence_images: int
402
+ sequence_file_size: int
403
+ sequence_pixels: int
404
+ image: types.ImageMetadata
405
+
406
+
407
+ def _should_split_by_max_sequence_images(
408
+ state: SplitState,
409
+ image: types.ImageMetadata,
410
+ max_sequence_images: int,
411
+ split: bool = False,
412
+ ) -> tuple[SplitState, bool]:
413
+ if not split:
414
+ new_sequence_images = state.get("sequence_images", 0) + 1
415
+ split = max_sequence_images < new_sequence_images
416
+ if split:
417
+ LOG.info(
418
+ f"Split sequence at {image.filename.name}: too many images ({new_sequence_images} > {max_sequence_images})"
419
+ )
420
+
421
+ if split:
422
+ new_sequence_images = 1
423
+
424
+ state["sequence_images"] = new_sequence_images
425
+
426
+ return state, split
427
+
428
+
429
+ def _should_split_by_cutoff_time(
430
+ state: SplitState,
431
+ image: types.ImageMetadata,
432
+ cutoff_time: float,
433
+ split: bool = False,
434
+ ) -> tuple[SplitState, bool]:
435
+ if not split:
436
+ last_image = state.get("image")
437
+ if last_image is not None:
438
+ diff = image.time - last_image.time
439
+ split = cutoff_time < diff
440
+ if split:
441
+ LOG.info(
442
+ f"Split sequence at {image.filename.name}: time gap too large ({diff:.6g} seconds > {cutoff_time:.6g} seconds)"
443
+ )
444
+
445
+ state["image"] = image
446
+
447
+ return state, split
448
+
449
+
450
+ def _should_split_by_cutoff_distance(
451
+ state: SplitState,
452
+ image: types.ImageMetadata,
453
+ cutoff_distance: float,
454
+ split: bool = False,
455
+ ) -> tuple[SplitState, bool]:
456
+ if not split:
457
+ last_image = state.get("image")
458
+ if last_image is not None:
459
+ diff = geo.gps_distance(
460
+ (last_image.lat, last_image.lon), (image.lat, image.lon)
461
+ )
462
+ split = cutoff_distance < diff
463
+ if split:
464
+ LOG.info(
465
+ f"Split sequence at {image.filename.name}: distance gap too large ({diff:.6g} meters > {cutoff_distance:.6g} meters)"
466
+ )
467
+
468
+ state["image"] = image
469
+
470
+ return state, split
471
+
472
+
473
+ def _should_split_by_max_sequence_filesize(
474
+ state: SplitState,
475
+ image: types.ImageMetadata,
476
+ max_sequence_filesize_in_bytes: int,
477
+ split: bool = False,
478
+ ) -> tuple[SplitState, bool]:
479
+ if image.filesize is None:
480
+ filesize = os.path.getsize(image.filename)
481
+ else:
482
+ filesize = image.filesize
483
+
484
+ if not split:
485
+ new_sequence_file_size = state.get("sequence_file_size", 0) + filesize
486
+ split = max_sequence_filesize_in_bytes < new_sequence_file_size
487
+ if split:
488
+ LOG.info(
489
+ f"Split sequence at {image.filename.name}: filesize too large ({new_sequence_file_size} > {max_sequence_filesize_in_bytes})"
490
+ )
491
+
492
+ if split:
493
+ new_sequence_file_size = filesize
494
+
495
+ state["sequence_file_size"] = new_sequence_file_size
496
+
497
+ return state, split
498
+
499
+
500
+ def _should_split_by_max_sequence_pixels(
501
+ state: SplitState,
502
+ image: types.ImageMetadata,
503
+ max_sequence_pixels: int,
504
+ split: bool = False,
505
+ ) -> tuple[SplitState, bool]:
506
+ # Default values if width/height not available
507
+ width = 1024 if image.width is None else image.width
508
+ height = 1024 if image.height is None else image.height
509
+ pixels = width * height
510
+
511
+ if not split:
512
+ new_sequence_pixels = state.get("sequence_pixels", 0) + pixels
513
+ split = max_sequence_pixels < new_sequence_pixels
514
+ if split:
515
+ LOG.info(
516
+ f"Split sequence at {image.filename.name}: pixels too large ({new_sequence_pixels} > {max_sequence_pixels})"
517
+ )
518
+
519
+ if split:
520
+ new_sequence_pixels = pixels
521
+
522
+ state["sequence_pixels"] = new_sequence_pixels
523
+
524
+ return state, split
525
+
526
+
504
527
  def _split_sequences_by_limits(
505
528
  input_sequences: T.Sequence[PointSequence],
506
- max_sequence_filesize_in_bytes: float,
507
- max_sequence_pixels: float,
529
+ max_sequence_filesize_in_bytes: int | None = None,
530
+ max_sequence_pixels: int | None = None,
531
+ max_sequence_images: int | None = None,
532
+ cutoff_time: float | None = None,
533
+ cutoff_distance: float | None = None,
508
534
  ) -> list[PointSequence]:
509
- max_sequence_images = constants.MAX_SEQUENCE_LENGTH
510
- max_sequence_filesize = max_sequence_filesize_in_bytes
511
-
512
- def _should_split(image: types.ImageMetadata, sequence_state: dict) -> bool:
513
- last_sequence_images = sequence_state.get("last_sequence_images", 0)
514
- last_sequence_file_size = sequence_state.get("last_sequence_file_size", 0)
515
- last_sequence_pixels = sequence_state.get("last_sequence_pixels", 0)
535
+ should_splits = []
516
536
 
517
- # decent default values if width/height not available
518
- width = 1024 if image.width is None else image.width
519
- height = 1024 if image.height is None else image.height
520
- pixels = width * height
521
-
522
- if image.filesize is None:
523
- filesize = os.path.getsize(image.filename)
524
- else:
525
- filesize = image.filesize
537
+ if max_sequence_images is not None:
538
+ should_splits.append(
539
+ functools.partial(
540
+ _should_split_by_max_sequence_images,
541
+ max_sequence_images=max_sequence_images,
542
+ )
543
+ )
526
544
 
527
- new_sequence_images = last_sequence_images + 1
528
- new_sequence_file_size = last_sequence_file_size + filesize
529
- new_sequence_pixels = last_sequence_pixels + pixels
545
+ if cutoff_time is not None:
546
+ should_splits.append(
547
+ functools.partial(_should_split_by_cutoff_time, cutoff_time=cutoff_time)
548
+ )
530
549
 
531
- if max_sequence_images < new_sequence_images:
532
- LOG.debug(
533
- "Split because the current sequence (%s) reaches the max number of images (%s)",
534
- new_sequence_images,
535
- max_sequence_images,
550
+ if cutoff_distance is not None:
551
+ should_splits.append(
552
+ functools.partial(
553
+ _should_split_by_cutoff_distance, cutoff_distance=cutoff_distance
536
554
  )
537
- start_new_sequence = True
538
- elif max_sequence_filesize < new_sequence_file_size:
539
- LOG.debug(
540
- "Split because the current sequence (%s) reaches the max filesize (%s)",
541
- new_sequence_file_size,
542
- max_sequence_filesize,
555
+ )
556
+
557
+ if max_sequence_filesize_in_bytes is not None:
558
+ should_splits.append(
559
+ functools.partial(
560
+ _should_split_by_max_sequence_filesize,
561
+ max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
543
562
  )
544
- start_new_sequence = True
545
- elif max_sequence_pixels < new_sequence_pixels:
546
- LOG.debug(
547
- "Split because the current sequence (%s) reaches the max pixels (%s)",
548
- new_sequence_pixels,
549
- max_sequence_pixels,
563
+ )
564
+
565
+ if max_sequence_pixels is not None:
566
+ should_splits.append(
567
+ functools.partial(
568
+ _should_split_by_max_sequence_pixels,
569
+ max_sequence_pixels=max_sequence_pixels,
550
570
  )
551
- start_new_sequence = True
552
- else:
553
- start_new_sequence = False
571
+ )
554
572
 
555
- if not start_new_sequence:
556
- sequence_state["last_sequence_images"] = new_sequence_images
557
- sequence_state["last_sequence_file_size"] = new_sequence_file_size
558
- sequence_state["last_sequence_pixels"] = new_sequence_pixels
559
- else:
560
- sequence_state["last_sequence_images"] = 1
561
- sequence_state["last_sequence_file_size"] = filesize
562
- sequence_state["last_sequence_pixels"] = pixels
573
+ def _should_split_agg(
574
+ state: SplitState, image: types.ImageMetadata
575
+ ) -> tuple[SplitState, bool]:
576
+ split = False
563
577
 
564
- return start_new_sequence
578
+ for should_split in should_splits:
579
+ state, split = should_split(state, image, split=split)
580
+
581
+ return state, split
565
582
 
566
583
  output_sequences = []
567
584
  for sequence in input_sequences:
568
585
  output_sequences.extend(
569
- split_sequence_by_agg(
570
- sequence, should_split_with_sequence_state=_should_split
586
+ split_sequence_by(
587
+ sequence, _should_split_agg, initial=T.cast(SplitState, {})
571
588
  )
572
589
  )
573
590
 
574
591
  assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
575
592
 
576
- LOG.info("Found %s sequences after split by sequence limits", len(output_sequences))
593
+ if len(input_sequences) != len(output_sequences):
594
+ LOG.info(f"Split sequences: {len(input_sequences)} -> {len(output_sequences)}")
577
595
 
578
596
  return output_sequences
579
597
 
@@ -585,12 +603,12 @@ def process_sequence_properties(
585
603
  interpolate_directions: bool = False,
586
604
  duplicate_distance: float = constants.DUPLICATE_DISTANCE,
587
605
  duplicate_angle: float = constants.DUPLICATE_ANGLE,
588
- max_avg_speed: float = constants.MAX_AVG_SPEED,
606
+ max_capture_speed_kmh: float = constants.MAX_CAPTURE_SPEED_KMH,
589
607
  ) -> list[types.MetadataOrError]:
590
- max_sequence_filesize_in_bytes = _parse_filesize_in_bytes(
591
- constants.MAX_SEQUENCE_FILESIZE
592
- )
593
- max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS)
608
+ LOG.info("==> Processing sequences...")
609
+
610
+ max_sequence_filesize_in_bytes = constants.MAX_SEQUENCE_FILESIZE
611
+ max_sequence_pixels = constants.MAX_SEQUENCE_PIXELS
594
612
 
595
613
  error_metadatas: list[types.ErrorMetadata] = []
596
614
  image_metadatas: list[types.ImageMetadata] = []
@@ -604,14 +622,14 @@ def process_sequence_properties(
604
622
  elif isinstance(metadata, types.VideoMetadata):
605
623
  video_metadatas.append(metadata)
606
624
  else:
607
- raise RuntimeError(f"invalid metadata type: {metadata}")
625
+ raise ValueError(f"invalid metadata type: {metadata}")
608
626
 
609
627
  if video_metadatas:
610
628
  # Check limits for videos
611
629
  video_metadatas, video_error_metadatas = _check_video_limits(
612
630
  video_metadatas,
613
631
  max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
614
- max_avg_speed=max_avg_speed,
632
+ max_capture_speed_kmh=max_capture_speed_kmh,
615
633
  max_radius_for_stationary_check=10.0,
616
634
  )
617
635
  error_metadatas.extend(video_error_metadatas)
@@ -632,9 +650,15 @@ def process_sequence_properties(
632
650
  for sequence in sequences:
633
651
  _interpolate_subsecs_for_sorting(sequence)
634
652
 
635
- # Split sequences by cutoff time
653
+ # Split sequences by max number of images, max filesize, max pixels, and cutoff time
636
654
  # NOTE: Do not split by distance here because it affects the speed limit check
637
- sequences = _split_sequences_by_cutoff_time(sequences, cutoff_time=cutoff_time)
655
+ sequences = _split_sequences_by_limits(
656
+ sequences,
657
+ max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
658
+ max_sequence_pixels=max_sequence_pixels,
659
+ max_sequence_images=constants.MAX_SEQUENCE_LENGTH,
660
+ cutoff_time=cutoff_time,
661
+ )
638
662
 
639
663
  # Duplication check
640
664
  sequences, errors = _check_sequences_duplication(
@@ -651,24 +675,17 @@ def process_sequence_properties(
651
675
  image.angle = None
652
676
  geo.interpolate_directions_if_none(sequence)
653
677
 
654
- # Split sequences by max number of images, max filesize, and max pixels
655
- sequences = _split_sequences_by_limits(
656
- sequences,
657
- max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
658
- max_sequence_pixels=max_sequence_pixels,
659
- )
660
-
661
678
  # Check limits for sequences
662
679
  sequences, errors = _check_sequences_by_limits(
663
680
  sequences,
664
681
  max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
665
- max_avg_speed=max_avg_speed,
682
+ max_capture_speed_kmh=max_capture_speed_kmh,
666
683
  )
667
684
  error_metadatas.extend(errors)
668
685
 
669
686
  # Split sequences by cutoff distance
670
- # NOTE: The speed limit check probably rejects most of anomalies
671
- sequences = _split_sequences_by_cutoff_distance(
687
+ # NOTE: The speed limit check probably rejects most anomalies
688
+ sequences = _split_sequences_by_limits(
672
689
  sequences, cutoff_distance=cutoff_distance
673
690
  )
674
691
 
@@ -691,7 +708,7 @@ def process_sequence_properties(
691
708
  results = error_metadatas + image_metadatas + video_metadatas
692
709
 
693
710
  assert len(metadatas) == len(results), (
694
- f"expected {len(metadatas)} results but got {len(results)}"
711
+ f"Expected {len(metadatas)} results but got {len(results)}"
695
712
  )
696
713
 
697
714
  return results