PyPI - mapillary-tools - Versions diffs - 0.14.0b1__py3-none-any.whl → 0.14.1__py3-none-any.whl - Mend

mapillary-tools 0.14.0b1py3-none-any.whl → 0.14.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

mapillary_tools/__init__.py +1 -1
mapillary_tools/api_v4.py +66 -263
mapillary_tools/authenticate.py +46 -38
mapillary_tools/commands/__main__.py +15 -16
mapillary_tools/commands/upload.py +33 -4
mapillary_tools/constants.py +127 -45
mapillary_tools/exceptions.py +4 -0
mapillary_tools/exif_read.py +2 -1
mapillary_tools/exif_write.py +3 -1
mapillary_tools/geo.py +16 -0
mapillary_tools/geotag/base.py +6 -2
mapillary_tools/geotag/factory.py +9 -1
mapillary_tools/geotag/geotag_images_from_exiftool.py +1 -1
mapillary_tools/geotag/geotag_images_from_gpx.py +0 -6
mapillary_tools/geotag/geotag_videos_from_exiftool.py +30 -9
mapillary_tools/geotag/utils.py +9 -12
mapillary_tools/geotag/video_extractors/gpx.py +2 -1
mapillary_tools/geotag/video_extractors/native.py +25 -0
mapillary_tools/history.py +124 -7
mapillary_tools/http.py +211 -0
mapillary_tools/mp4/construct_mp4_parser.py +8 -2
mapillary_tools/process_geotag_properties.py +31 -27
mapillary_tools/process_sequence_properties.py +339 -322
mapillary_tools/sample_video.py +1 -2
mapillary_tools/serializer/description.py +56 -56
mapillary_tools/serializer/gpx.py +1 -1
mapillary_tools/upload.py +201 -205
mapillary_tools/upload_api_v4.py +57 -47
mapillary_tools/uploader.py +720 -285
mapillary_tools/utils.py +57 -5
{mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/METADATA +7 -6
{mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/RECORD +36 -35
{mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/WHEEL +0 -0
{mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/entry_points.txt +0 -0
{mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/licenses/LICENSE +0 -0
{mapillary_tools-0.14.0b1.dist-info → mapillary_tools-0.14.1.dist-info}/top_level.txt +0 -0

mapillary_tools/process_sequence_properties.py CHANGED Viewed

@@ -1,88 +1,109 @@
 from __future__ import annotations
+import functools
 import itertools
 import logging
 import math
 import os
 import typing as T
+import humanize
 from . import constants, exceptions, geo, types, utils
 from .serializer.description import DescriptionJSONSerializer
 LOG = logging.getLogger(__name__)
-SeqItem = T.TypeVar("SeqItem")
+S = T.TypeVar("S")
+R = T.TypeVar("R")
 PointSequence = T.List[geo.PointLike]
 def split_sequence_by(
-    sequence: T.Sequence[SeqItem],
-    should_split: T.Callable[[SeqItem, SeqItem], bool],
-) -> list[list[SeqItem]]:
-    """
-    Split a sequence into multiple sequences by should_split(prev, cur) => True
+    sequence: T.Iterable[S], reduce: T.Callable[[R, S], tuple[R, bool]], initial: R
+) -> list[list[S]]:
     """
-    output_sequences: list[list[SeqItem]] = []
-    if sequence:
-        output_sequences.append([sequence[0]])
-    for prev, cur in geo.pairwise(sequence):
-        # invariant: prev is processed
-        if should_split(prev, cur):
-            output_sequences.append([cur])
-        else:
-            output_sequences[-1].append(cur)
-        # invariant: cur is processed
+    Split a sequence into multiple subsequences based on a reduction function.
-    assert sum(len(s) for s in output_sequences) == len(sequence), (
-        output_sequences,
-        sequence,
-    )
+    The function processes each element through a reduce function that maintains
+    state and determines whether to split the sequence at that point. When a split
+    is triggered, a new subsequence starts with the current element.
-    return output_sequences
+    Args:
+        sequence: An iterable of elements to split
+        reduce: A function that takes (accumulated_state, current_element) and
+               returns (new_state, should_split). If should_split is True,
+               a new subsequence starts with the current element.
+        initial: The initial state value passed to the reduce function
+    Returns:
+        A list of subsequences, where each subsequence is a list of elements
-def split_sequence_by_agg(
-    sequence: T.Sequence[SeqItem],
-    should_split_with_sequence_state: T.Callable[[SeqItem, dict], bool],
-) -> list[list[SeqItem]]:
-    """
-    Split a sequence by should_split_with_sequence_state(cur, sequence_state) => True
+    Examples:
+        >>> # Split on even numbers
+        >>> def split_on_even(count, x):
+        ...     return count + 1, x % 2 == 0
+        >>> split_sequence_by([1, 3, 2, 4, 5, 6, 7], split_on_even, 0)
+        [[1, 3], [2], [4, 5], [6, 7]]
+        >>> # Split when sum exceeds threshold
+        >>> def split_when_sum_exceeds_5(total, x):
+        ...     total += x
+        ...     return (x, True) if total > 5 else (total, False)
+        >>> split_sequence_by([1, 2, 3, 4, 1, 2], split_when_sum_exceeds_5, 0)
+        [[1, 2], [3], [4, 1], [2]]
+        >>> # Split on specific values
+        >>> def split_on_zero(_, x):
+        ...     return None, x == 0
+        >>> split_sequence_by([1, 2, 0, 3, 4, 0, 5], split_on_zero, None)
+        [[1, 2], [0, 3, 4], [0, 5]]
+        >>> # Empty sequence
+        >>> split_sequence_by([], lambda s, x: (s, False), 0)
+        []
+        >>> # Single element
+        >>> split_sequence_by([42], lambda s, x: (s, False), 0)
+        [[42]]
     """
-    output_sequences: list[list[SeqItem]] = []
-    sequence_state: dict = {}
-    for cur in sequence:
-        start_new_sequence = should_split_with_sequence_state(cur, sequence_state)
-        if not output_sequences:
-            output_sequences.append([])
+    output_sequences: list[list[S]] = []
-        if start_new_sequence:
-            # DO NOT reset the state because it contains the information of current item
-            # sequence_state = {}
-            if output_sequences[-1]:
-                output_sequences.append([])
+    value = initial
-        output_sequences[-1].append(cur)
+    for element in sequence:
+        value, should = reduce(value, element)
-    assert sum(len(s) for s in output_sequences) == len(sequence)
+        if should:
+            output_sequences.append([element])
+        else:
+            if output_sequences:
+                output_sequences[-1].append(element)
+            else:
+                output_sequences.append([element])
     return output_sequences
 def duplication_check(
     sequence: PointSequence,
+    *,
     max_duplicate_distance: float,
     max_duplicate_angle: float,
 ) -> tuple[PointSequence, list[types.ErrorMetadata]]:
+    """
+    >>> duplication_check([], max_duplicate_distance=1, max_duplicate_angle=2)
+    ([], [])
+    """
     dedups: PointSequence = []
     dups: list[types.ErrorMetadata] = []
     it = iter(sequence)
-    prev = next(it)
+    prev = next(it, None)
     if prev is None:
         return dedups, dups
@@ -90,10 +111,7 @@ def duplication_check(
     for cur in it:
         # invariant: prev is processed
-        distance = geo.gps_distance(
-            (prev.lat, prev.lon),
-            (cur.lat, cur.lon),
-        )
+        distance = geo.gps_distance((prev.lat, prev.lon), (cur.lat, cur.lon))
         if prev.angle is not None and cur.angle is not None:
             angle_diff = geo.diff_bearing(prev.angle, cur.angle)
@@ -104,15 +122,14 @@ def duplication_check(
             angle_diff is None or angle_diff <= max_duplicate_angle
         ):
             msg = f"Duplicate of its previous image in terms of distance <= {max_duplicate_distance} and angle <= {max_duplicate_angle}"
+            ex = exceptions.MapillaryDuplicationError(
+                msg,
+                DescriptionJSONSerializer.as_desc(cur),
+                distance=distance,
+                angle_diff=angle_diff,
+            )
             dup = types.describe_error_metadata(
-                exceptions.MapillaryDuplicationError(
-                    msg,
-                    DescriptionJSONSerializer.as_desc(cur),
-                    distance=distance,
-                    angle_diff=angle_diff,
-                ),
-                cur.filename,
-                filetype=types.FileType.IMAGE,
+                ex, cur.filename, filetype=types.FileType.IMAGE
             )
             dups.append(dup)
             # prev does not change
@@ -124,9 +141,9 @@ def duplication_check(
     return dedups, dups
-def _group_by(
+def _group_images_by(
     image_metadatas: T.Iterable[types.ImageMetadata],
-    group_key_func=T.Callable[[types.ImageMetadata], T.Hashable],
+    group_key_func: T.Callable[[types.ImageMetadata], T.Hashable],
 ) -> dict[T.Hashable, list[types.ImageMetadata]]:
     grouped: dict[T.Hashable, list[types.ImageMetadata]] = {}
     for metadata in image_metadatas:
@@ -136,11 +153,21 @@ def _group_by(
 def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
     """
-    Update the timestamps make sure they are unique and sorted
+    Update the timestamps to make sure they are unique and sorted
     in the same order by interpolating subseconds
     Examples:
-    - Input: 1, 1, 1, 1, 1, 2
-    - Output: 1, 1.2, 1.4, 1.6, 1.8, 2
+        >>> def make_point(t):
+        ...     return geo.Point(lat=0, lon=0, time=t, alt=None, angle=None)
+        >>> points = [make_point(t) for t in [1, 1, 1, 1, 1, 2]]
+        >>> _interpolate_subsecs_for_sorting(points)
+        >>> [p.time for p in points]
+        [1.0, 1.2, 1.4, 1.6, 1.8, 2]
+        >>> points = [make_point(t) for t in [1.1]]
+        >>> _interpolate_subsecs_for_sorting(points)
+        >>> [p.time for p in points]
+        [1.1]
     """
     gidx = 0
@@ -172,63 +199,6 @@ def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
         )
-def _parse_filesize_in_bytes(filesize_str: str) -> int:
-    filesize_str = filesize_str.strip().upper()
-    try:
-        if filesize_str.endswith("B"):
-            return int(filesize_str[:-1])
-        elif filesize_str.endswith("K"):
-            return int(filesize_str[:-1]) * 1024
-        elif filesize_str.endswith("M"):
-            return int(filesize_str[:-1]) * 1024 * 1024
-        elif filesize_str.endswith("G"):
-            return int(filesize_str[:-1]) * 1024 * 1024 * 1024
-        else:
-            return int(filesize_str)
-    except ValueError:
-        raise exceptions.MapillaryBadParameterError(
-            f"Expect valid file size that ends with B, K, M, or G, but got {filesize_str}"
-        )
-def _parse_pixels(pixels_str: str) -> int:
-    pixels_str = pixels_str.strip().upper()
-    try:
-        if pixels_str.endswith("K"):
-            return int(pixels_str[:-1]) * 1000
-        elif pixels_str.endswith("M"):
-            return int(pixels_str[:-1]) * 1000 * 1000
-        elif pixels_str.endswith("G"):
-            return int(pixels_str[:-1]) * 1000 * 1000 * 1000
-        else:
-            return int(pixels_str)
-    except ValueError:
-        raise exceptions.MapillaryBadParameterError(
-            f"Expect valid number of pixels that ends with K, M, or G, but got {pixels_str}"
-        )
-def _avg_speed(sequence: T.Sequence[geo.PointLike]) -> float:
-    total_distance = 0.0
-    for cur, nxt in geo.pairwise(sequence):
-        total_distance += geo.gps_distance(
-            (cur.lat, cur.lon),
-            (nxt.lat, nxt.lon),
-        )
-    if sequence:
-        time_diff = sequence[-1].time - sequence[0].time
-    else:
-        time_diff = 0.0
-    if time_diff == 0.0:
-        return float("inf")
-    return total_distance / time_diff
 def _is_video_stationary(
     sequence: T.Sequence[geo.PointLike], max_radius_in_meters: float
 ) -> bool:
@@ -246,8 +216,8 @@ def _is_video_stationary(
 def _check_video_limits(
     video_metadatas: T.Iterable[types.VideoMetadata],
-    max_sequence_filesize_in_bytes: int,
-    max_avg_speed: float,
+    max_sequence_filesize_in_bytes: int | None,
+    max_capture_speed_kmh: float,
     max_radius_for_stationary_check: float,
 ) -> tuple[list[types.VideoMetadata], list[types.ErrorMetadata]]:
     output_video_metadatas: list[types.VideoMetadata] = []
@@ -262,33 +232,38 @@ def _check_video_limits(
             if is_stationary:
                 raise exceptions.MapillaryStationaryVideoError("Stationary video")
-            video_filesize = (
-                utils.get_file_size(video_metadata.filename)
-                if video_metadata.filesize is None
-                else video_metadata.filesize
-            )
-            if video_filesize > max_sequence_filesize_in_bytes:
-                raise exceptions.MapillaryFileTooLargeError(
-                    f"Video file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
+            if max_sequence_filesize_in_bytes is not None:
+                video_filesize = (
+                    utils.get_file_size(video_metadata.filename)
+                    if video_metadata.filesize is None
+                    else video_metadata.filesize
                 )
+                if video_filesize > max_sequence_filesize_in_bytes:
+                    raise exceptions.MapillaryFileTooLargeError(
+                        f"Video file size {humanize.naturalsize(video_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}",
+                    )
             contains_null_island = any(
                 p.lat == 0 and p.lon == 0 for p in video_metadata.points
             )
             if contains_null_island:
                 raise exceptions.MapillaryNullIslandError(
-                    "Found GPS coordinates in Null Island (0, 0)",
+                    "GPS coordinates in Null Island (0, 0)"
                 )
+            avg_speed_kmh = (
+                geo.avg_speed(video_metadata.points) * 3.6
+            )  # Convert m/s to km/h
             too_fast = (
                 len(video_metadata.points) >= 2
-                and _avg_speed(video_metadata.points) > max_avg_speed
+                and avg_speed_kmh > max_capture_speed_kmh
             )
             if too_fast:
                 raise exceptions.MapillaryCaptureSpeedTooFastError(
-                    f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
+                    f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h",
                 )
         except exceptions.MapillaryDescriptionError as ex:
+            LOG.error(f"{_video_name(video_metadata)}: {ex}")
             error_metadatas.append(
                 types.describe_error_metadata(
                     exc=ex,
@@ -299,57 +274,55 @@ def _check_video_limits(
         else:
             output_video_metadatas.append(video_metadata)
-    LOG.info(
-        "Found %s videos and %s errors after video limit checks",
-        len(output_video_metadatas),
-        len(error_metadatas),
-    )
     return output_video_metadatas, error_metadatas
+def _video_name(video_metadata: types.VideoMetadata) -> str:
+    return video_metadata.filename.name
 def _check_sequences_by_limits(
     input_sequences: T.Sequence[PointSequence],
-    max_sequence_filesize_in_bytes: int,
-    max_avg_speed: float,
+    max_sequence_filesize_in_bytes: int | None,
+    max_capture_speed_kmh: float,
 ) -> tuple[list[PointSequence], list[types.ErrorMetadata]]:
     output_sequences: list[PointSequence] = []
     output_errors: list[types.ErrorMetadata] = []
     for sequence in input_sequences:
-        sequence_filesize = sum(
-            utils.get_file_size(image.filename)
-            if image.filesize is None
-            else image.filesize
-            for image in sequence
-        )
         try:
-            if sequence_filesize > max_sequence_filesize_in_bytes:
-                raise exceptions.MapillaryFileTooLargeError(
-                    f"Sequence file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
+            if max_sequence_filesize_in_bytes is not None:
+                sequence_filesize = sum(
+                    utils.get_file_size(image.filename)
+                    if image.filesize is None
+                    else image.filesize
+                    for image in sequence
                 )
+                if sequence_filesize > max_sequence_filesize_in_bytes:
+                    raise exceptions.MapillaryFileTooLargeError(
+                        f"Sequence file size {humanize.naturalsize(sequence_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}",
+                    )
             contains_null_island = any(
                 image.lat == 0 and image.lon == 0 for image in sequence
             )
             if contains_null_island:
                 raise exceptions.MapillaryNullIslandError(
-                    "Found GPS coordinates in Null Island (0, 0)",
+                    "GPS coordinates in Null Island (0, 0)"
                 )
-            too_fast = len(sequence) >= 2 and _avg_speed(sequence) > max_avg_speed
+            avg_speed_kmh = geo.avg_speed(sequence) * 3.6  # Convert m/s to km/h
+            too_fast = len(sequence) >= 2 and avg_speed_kmh > max_capture_speed_kmh
             if too_fast:
                 raise exceptions.MapillaryCaptureSpeedTooFastError(
-                    f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
+                    f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h",
                 )
         except exceptions.MapillaryDescriptionError as ex:
+            LOG.error(f"{_sequence_name(sequence)}: {ex}")
             for image in sequence:
                 output_errors.append(
                     types.describe_error_metadata(
-                        exc=ex,
-                        filename=image.filename,
-                        filetype=types.FileType.IMAGE,
+                        exc=ex, filename=image.filename, filetype=types.FileType.IMAGE
                     )
                 )
@@ -360,19 +333,20 @@ def _check_sequences_by_limits(
         len(s) for s in input_sequences
     )
-    LOG.info(
-        "Found %s sequences and %s errors after sequence limit checks",
-        len(output_sequences),
-        len(output_errors),
-    )
     return output_sequences, output_errors
+def _sequence_name(sequence: T.Sequence[types.ImageMetadata]) -> str:
+    if not sequence:
+        return "N/A"
+    image = sequence[0]
+    return f"{image.filename.parent.name}/{image.filename.name}"
 def _group_by_folder_and_camera(
     image_metadatas: list[types.ImageMetadata],
 ) -> list[list[types.ImageMetadata]]:
-    grouped = _group_by(
+    grouped = _group_images_by(
         image_metadatas,
         lambda metadata: (
             str(metadata.filename.parent),
@@ -383,89 +357,10 @@ def _group_by_folder_and_camera(
         ),
     )
     for key in grouped:
-        LOG.debug("Group sequences by %s: %s images", key, len(grouped[key]))
+        LOG.debug(f"Grouped {len(grouped[key])} images by {key}")
     output_sequences = list(grouped.values())
-    LOG.info(
-        "Found %s sequences from different folders and cameras",
-        len(output_sequences),
-    )
-    return output_sequences
-def _split_sequences_by_cutoff_time(
-    input_sequences: T.Sequence[PointSequence], cutoff_time: float
-) -> list[PointSequence]:
-    def _should_split_by_cutoff_time(
-        prev: types.ImageMetadata, cur: types.ImageMetadata
-    ) -> bool:
-        time_diff = cur.time - prev.time
-        assert 0 <= time_diff, "sequence must be sorted by capture times"
-        should = cutoff_time < time_diff
-        if should:
-            LOG.debug(
-                "Split because the capture time gap %s seconds exceeds cutoff_time (%s seconds): %s: %s -> %s",
-                round(time_diff, 2),
-                round(cutoff_time, 2),
-                prev.filename.parent,
-                prev.filename.name,
-                cur.filename.name,
-            )
-        return should
-    output_sequences = []
-    for sequence in input_sequences:
-        output_sequences.extend(
-            split_sequence_by(sequence, should_split=_should_split_by_cutoff_time)
-        )
-    assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
-    LOG.info(
-        "Found %s sequences after split by cutoff_time %d seconds",
-        len(output_sequences),
-        cutoff_time,
-    )
-    return output_sequences
-def _split_sequences_by_cutoff_distance(
-    input_sequences: T.Sequence[PointSequence], cutoff_distance: float
-) -> list[PointSequence]:
-    def _should_split_by_cutoff_distance(
-        prev: types.ImageMetadata, cur: types.ImageMetadata
-    ) -> bool:
-        distance = geo.gps_distance(
-            (prev.lat, prev.lon),
-            (cur.lat, cur.lon),
-        )
-        should = cutoff_distance < distance
-        if should:
-            LOG.debug(
-                "Split because the distance gap %s meters exceeds cutoff_distance (%s meters): %s: %s -> %s",
-                round(distance, 2),
-                round(cutoff_distance, 2),
-                prev.filename.parent,
-                prev.filename.name,
-                cur.filename.name,
-            )
-        return should
-    output_sequences = []
-    for sequence in input_sequences:
-        output_sequences.extend(
-            split_sequence_by(sequence, _should_split_by_cutoff_distance)
-        )
-    assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
-    LOG.info(
-        "Found %s sequences after split by cutoff_distance %d meters",
-        len(output_sequences),
-        cutoff_distance,
-    )
+    LOG.info(f"Created {len(output_sequences)} sequences by folders and cameras")
     return output_sequences
@@ -485,95 +380,218 @@ def _check_sequences_duplication(
             max_duplicate_angle=duplicate_angle,
         )
         assert len(sequence) == len(output_sequence) + len(errors)
-        output_sequences.append(output_sequence)
+        if output_sequence:
+            output_sequences.append(output_sequence)
         output_errors.extend(errors)
+    # All input images should be accounted for either in output sequences or errors
     assert sum(len(s) for s in output_sequences) + len(output_errors) == sum(
         len(s) for s in input_sequences
     )
-    LOG.info(
-        "Found %s sequences and %s errors after duplication check",
-        len(output_sequences),
-        len(output_errors),
-    )
+    if output_errors:
+        LOG.info(
+            f"Duplication check: {len(output_errors)} image duplicates removed (with {duplicate_distance=} and {duplicate_angle=})"
+        )
     return output_sequences, output_errors
+class SplitState(T.TypedDict, total=False):
+    sequence_images: int
+    sequence_file_size: int
+    sequence_pixels: int
+    image: types.ImageMetadata
+def _should_split_by_max_sequence_images(
+    state: SplitState,
+    image: types.ImageMetadata,
+    max_sequence_images: int,
+    split: bool = False,
+) -> tuple[SplitState, bool]:
+    if not split:
+        new_sequence_images = state.get("sequence_images", 0) + 1
+        split = max_sequence_images < new_sequence_images
+        if split:
+            LOG.info(
+                f"Split sequence at {image.filename.name}: too many images ({new_sequence_images} > {max_sequence_images})"
+            )
+    if split:
+        new_sequence_images = 1
+    state["sequence_images"] = new_sequence_images
+    return state, split
+def _should_split_by_cutoff_time(
+    state: SplitState,
+    image: types.ImageMetadata,
+    cutoff_time: float,
+    split: bool = False,
+) -> tuple[SplitState, bool]:
+    if not split:
+        last_image = state.get("image")
+        if last_image is not None:
+            diff = image.time - last_image.time
+            split = cutoff_time < diff
+            if split:
+                LOG.info(
+                    f"Split sequence at {image.filename.name}: time gap too large ({diff:.6g} seconds > {cutoff_time:.6g} seconds)"
+                )
+    state["image"] = image
+    return state, split
+def _should_split_by_cutoff_distance(
+    state: SplitState,
+    image: types.ImageMetadata,
+    cutoff_distance: float,
+    split: bool = False,
+) -> tuple[SplitState, bool]:
+    if not split:
+        last_image = state.get("image")
+        if last_image is not None:
+            diff = geo.gps_distance(
+                (last_image.lat, last_image.lon), (image.lat, image.lon)
+            )
+            split = cutoff_distance < diff
+            if split:
+                LOG.info(
+                    f"Split sequence at {image.filename.name}: distance gap too large ({diff:.6g} meters > {cutoff_distance:.6g} meters)"
+                )
+    state["image"] = image
+    return state, split
+def _should_split_by_max_sequence_filesize(
+    state: SplitState,
+    image: types.ImageMetadata,
+    max_sequence_filesize_in_bytes: int,
+    split: bool = False,
+) -> tuple[SplitState, bool]:
+    if image.filesize is None:
+        filesize = os.path.getsize(image.filename)
+    else:
+        filesize = image.filesize
+    if not split:
+        new_sequence_file_size = state.get("sequence_file_size", 0) + filesize
+        split = max_sequence_filesize_in_bytes < new_sequence_file_size
+        if split:
+            LOG.info(
+                f"Split sequence at {image.filename.name}: filesize too large ({new_sequence_file_size} > {max_sequence_filesize_in_bytes})"
+            )
+    if split:
+        new_sequence_file_size = filesize
+    state["sequence_file_size"] = new_sequence_file_size
+    return state, split
+def _should_split_by_max_sequence_pixels(
+    state: SplitState,
+    image: types.ImageMetadata,
+    max_sequence_pixels: int,
+    split: bool = False,
+) -> tuple[SplitState, bool]:
+    # Default values if width/height not available
+    width = 1024 if image.width is None else image.width
+    height = 1024 if image.height is None else image.height
+    pixels = width * height
+    if not split:
+        new_sequence_pixels = state.get("sequence_pixels", 0) + pixels
+        split = max_sequence_pixels < new_sequence_pixels
+        if split:
+            LOG.info(
+                f"Split sequence at {image.filename.name}: pixels too large ({new_sequence_pixels} > {max_sequence_pixels})"
+            )
+    if split:
+        new_sequence_pixels = pixels
+    state["sequence_pixels"] = new_sequence_pixels
+    return state, split
 def _split_sequences_by_limits(
     input_sequences: T.Sequence[PointSequence],
-    max_sequence_filesize_in_bytes: float,
-    max_sequence_pixels: float,
+    max_sequence_filesize_in_bytes: int | None = None,
+    max_sequence_pixels: int | None = None,
+    max_sequence_images: int | None = None,
+    cutoff_time: float | None = None,
+    cutoff_distance: float | None = None,
 ) -> list[PointSequence]:
-    max_sequence_images = constants.MAX_SEQUENCE_LENGTH
-    max_sequence_filesize = max_sequence_filesize_in_bytes
-    def _should_split(image: types.ImageMetadata, sequence_state: dict) -> bool:
-        last_sequence_images = sequence_state.get("last_sequence_images", 0)
-        last_sequence_file_size = sequence_state.get("last_sequence_file_size", 0)
-        last_sequence_pixels = sequence_state.get("last_sequence_pixels", 0)
+    should_splits = []
-        # decent default values if width/height not available
-        width = 1024 if image.width is None else image.width
-        height = 1024 if image.height is None else image.height
-        pixels = width * height
-        if image.filesize is None:
-            filesize = os.path.getsize(image.filename)
-        else:
-            filesize = image.filesize
+    if max_sequence_images is not None:
+        should_splits.append(
+            functools.partial(
+                _should_split_by_max_sequence_images,
+                max_sequence_images=max_sequence_images,
+            )
+        )
-        new_sequence_images = last_sequence_images + 1
-        new_sequence_file_size = last_sequence_file_size + filesize
-        new_sequence_pixels = last_sequence_pixels + pixels
+    if cutoff_time is not None:
+        should_splits.append(
+            functools.partial(_should_split_by_cutoff_time, cutoff_time=cutoff_time)
+        )
-        if max_sequence_images < new_sequence_images:
-            LOG.debug(
-                "Split because the current sequence (%s) reaches the max number of images (%s)",
-                new_sequence_images,
-                max_sequence_images,
+    if cutoff_distance is not None:
+        should_splits.append(
+            functools.partial(
+                _should_split_by_cutoff_distance, cutoff_distance=cutoff_distance
             )
-            start_new_sequence = True
-        elif max_sequence_filesize < new_sequence_file_size:
-            LOG.debug(
-                "Split because the current sequence (%s) reaches the max filesize (%s)",
-                new_sequence_file_size,
-                max_sequence_filesize,
+        )
+    if max_sequence_filesize_in_bytes is not None:
+        should_splits.append(
+            functools.partial(
+                _should_split_by_max_sequence_filesize,
+                max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
             )
-            start_new_sequence = True
-        elif max_sequence_pixels < new_sequence_pixels:
-            LOG.debug(
-                "Split because the current sequence (%s) reaches the max pixels (%s)",
-                new_sequence_pixels,
-                max_sequence_pixels,
+        )
+    if max_sequence_pixels is not None:
+        should_splits.append(
+            functools.partial(
+                _should_split_by_max_sequence_pixels,
+                max_sequence_pixels=max_sequence_pixels,
             )
-            start_new_sequence = True
-        else:
-            start_new_sequence = False
+        )
-        if not start_new_sequence:
-            sequence_state["last_sequence_images"] = new_sequence_images
-            sequence_state["last_sequence_file_size"] = new_sequence_file_size
-            sequence_state["last_sequence_pixels"] = new_sequence_pixels
-        else:
-            sequence_state["last_sequence_images"] = 1
-            sequence_state["last_sequence_file_size"] = filesize
-            sequence_state["last_sequence_pixels"] = pixels
+    def _should_split_agg(
+        state: SplitState, image: types.ImageMetadata
+    ) -> tuple[SplitState, bool]:
+        split = False
-        return start_new_sequence
+        for should_split in should_splits:
+            state, split = should_split(state, image, split=split)
+        return state, split
     output_sequences = []
     for sequence in input_sequences:
         output_sequences.extend(
-            split_sequence_by_agg(
-                sequence, should_split_with_sequence_state=_should_split
+            split_sequence_by(
+                sequence, _should_split_agg, initial=T.cast(SplitState, {})
             )
         )
     assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
-    LOG.info("Found %s sequences after split by sequence limits", len(output_sequences))
+    if len(input_sequences) != len(output_sequences):
+        LOG.info(f"Split sequences: {len(input_sequences)} -> {len(output_sequences)}")
     return output_sequences
@@ -585,12 +603,12 @@ def process_sequence_properties(
     interpolate_directions: bool = False,
     duplicate_distance: float = constants.DUPLICATE_DISTANCE,
     duplicate_angle: float = constants.DUPLICATE_ANGLE,
-    max_avg_speed: float = constants.MAX_AVG_SPEED,
+    max_capture_speed_kmh: float = constants.MAX_CAPTURE_SPEED_KMH,
 ) -> list[types.MetadataOrError]:
-    max_sequence_filesize_in_bytes = _parse_filesize_in_bytes(
-        constants.MAX_SEQUENCE_FILESIZE
-    )
-    max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS)
+    LOG.info("==> Processing sequences...")
+    max_sequence_filesize_in_bytes = constants.MAX_SEQUENCE_FILESIZE
+    max_sequence_pixels = constants.MAX_SEQUENCE_PIXELS
     error_metadatas: list[types.ErrorMetadata] = []
     image_metadatas: list[types.ImageMetadata] = []
@@ -604,14 +622,14 @@ def process_sequence_properties(
         elif isinstance(metadata, types.VideoMetadata):
             video_metadatas.append(metadata)
         else:
-            raise RuntimeError(f"invalid metadata type: {metadata}")
+            raise ValueError(f"invalid metadata type: {metadata}")
     if video_metadatas:
         # Check limits for videos
         video_metadatas, video_error_metadatas = _check_video_limits(
             video_metadatas,
             max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
-            max_avg_speed=max_avg_speed,
+            max_capture_speed_kmh=max_capture_speed_kmh,
             max_radius_for_stationary_check=10.0,
         )
         error_metadatas.extend(video_error_metadatas)
@@ -632,9 +650,15 @@ def process_sequence_properties(
         for sequence in sequences:
             _interpolate_subsecs_for_sorting(sequence)
-        # Split sequences by cutoff time
+        # Split sequences by max number of images, max filesize, max pixels, and cutoff time
         # NOTE: Do not split by distance here because it affects the speed limit check
-        sequences = _split_sequences_by_cutoff_time(sequences, cutoff_time=cutoff_time)
+        sequences = _split_sequences_by_limits(
+            sequences,
+            max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
+            max_sequence_pixels=max_sequence_pixels,
+            max_sequence_images=constants.MAX_SEQUENCE_LENGTH,
+            cutoff_time=cutoff_time,
+        )
         # Duplication check
         sequences, errors = _check_sequences_duplication(
@@ -651,24 +675,17 @@ def process_sequence_properties(
                     image.angle = None
             geo.interpolate_directions_if_none(sequence)
-        # Split sequences by max number of images, max filesize, and max pixels
-        sequences = _split_sequences_by_limits(
-            sequences,
-            max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
-            max_sequence_pixels=max_sequence_pixels,
-        )
         # Check limits for sequences
         sequences, errors = _check_sequences_by_limits(
             sequences,
             max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
-            max_avg_speed=max_avg_speed,
+            max_capture_speed_kmh=max_capture_speed_kmh,
         )
         error_metadatas.extend(errors)
         # Split sequences by cutoff distance
-        # NOTE: The speed limit check probably rejects most of anomalies
-        sequences = _split_sequences_by_cutoff_distance(
+        # NOTE: The speed limit check probably rejects most anomalies
+        sequences = _split_sequences_by_limits(
             sequences, cutoff_distance=cutoff_distance
         )
@@ -691,7 +708,7 @@ def process_sequence_properties(
     results = error_metadatas + image_metadatas + video_metadatas
     assert len(metadatas) == len(results), (
-        f"expected {len(metadatas)} results but got {len(results)}"
+        f"Expected {len(metadatas)} results but got {len(results)}"
     )
     return results

mapillary-tools 0.14.0b1__py3-none-any.whl → 0.14.1__py3-none-any.whl

mapillary-tools 0.14.0b1py3-none-any.whl → 0.14.1py3-none-any.whl