PyPI - mapillary-tools - Versions diffs - 0.13.3__py3-none-any.whl → 0.14.0__py3-none-any.whl - Mend

mapillary-tools 0.13.3py3-none-any.whl → 0.14.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

mapillary_tools/__init__.py +1 -1
mapillary_tools/api_v4.py +198 -55
mapillary_tools/authenticate.py +326 -64
mapillary_tools/blackvue_parser.py +195 -0
mapillary_tools/camm/camm_builder.py +55 -97
mapillary_tools/camm/camm_parser.py +429 -181
mapillary_tools/commands/__main__.py +10 -6
mapillary_tools/commands/authenticate.py +8 -1
mapillary_tools/commands/process.py +27 -51
mapillary_tools/commands/process_and_upload.py +18 -5
mapillary_tools/commands/sample_video.py +2 -3
mapillary_tools/commands/upload.py +44 -13
mapillary_tools/commands/video_process_and_upload.py +19 -5
mapillary_tools/config.py +65 -26
mapillary_tools/constants.py +141 -18
mapillary_tools/exceptions.py +37 -34
mapillary_tools/exif_read.py +221 -116
mapillary_tools/exif_write.py +10 -8
mapillary_tools/exiftool_read.py +33 -42
mapillary_tools/exiftool_read_video.py +97 -47
mapillary_tools/exiftool_runner.py +57 -0
mapillary_tools/ffmpeg.py +417 -242
mapillary_tools/geo.py +158 -118
mapillary_tools/geotag/__init__.py +0 -1
mapillary_tools/geotag/base.py +147 -0
mapillary_tools/geotag/factory.py +307 -0
mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
mapillary_tools/geotag/geotag_images_from_exiftool.py +136 -85
mapillary_tools/geotag/geotag_images_from_gpx.py +60 -124
mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
mapillary_tools/geotag/geotag_images_from_video.py +88 -51
mapillary_tools/geotag/geotag_videos_from_exiftool.py +123 -0
mapillary_tools/geotag/geotag_videos_from_gpx.py +52 -0
mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
mapillary_tools/geotag/image_extractors/base.py +18 -0
mapillary_tools/geotag/image_extractors/exif.py +60 -0
mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
mapillary_tools/geotag/options.py +182 -0
mapillary_tools/geotag/utils.py +52 -16
mapillary_tools/geotag/video_extractors/base.py +18 -0
mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
mapillary_tools/geotag/video_extractors/gpx.py +116 -0
mapillary_tools/geotag/video_extractors/native.py +160 -0
mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
mapillary_tools/history.py +134 -20
mapillary_tools/mp4/construct_mp4_parser.py +17 -10
mapillary_tools/mp4/io_utils.py +0 -1
mapillary_tools/mp4/mp4_sample_parser.py +36 -28
mapillary_tools/mp4/simple_mp4_builder.py +10 -9
mapillary_tools/mp4/simple_mp4_parser.py +13 -22
mapillary_tools/process_geotag_properties.py +184 -414
mapillary_tools/process_sequence_properties.py +594 -225
mapillary_tools/sample_video.py +20 -26
mapillary_tools/serializer/description.py +587 -0
mapillary_tools/serializer/gpx.py +132 -0
mapillary_tools/telemetry.py +26 -13
mapillary_tools/types.py +98 -611
mapillary_tools/upload.py +411 -387
mapillary_tools/upload_api_v4.py +167 -142
mapillary_tools/uploader.py +804 -284
mapillary_tools/utils.py +49 -18
{mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/METADATA +93 -35
mapillary_tools-0.14.0.dist-info/RECORD +75 -0
{mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/WHEEL +1 -1
mapillary_tools/geotag/blackvue_parser.py +0 -118
mapillary_tools/geotag/geotag_from_generic.py +0 -22
mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
mapillary_tools/video_data_extraction/cli_options.py +0 -22
mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
mapillary_tools/video_data_extraction/extractors/gpx_parser.py +0 -108
mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
mapillary_tools-0.13.3.dist-info/RECORD +0 -75
/mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
{mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/entry_points.txt +0 -0
{mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info/licenses}/LICENSE +0 -0
{mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/top_level.txt +0 -0

mapillary_tools/process_sequence_properties.py CHANGED Viewed

@@ -1,90 +1,137 @@
+from __future__ import annotations
+import functools
 import itertools
 import logging
 import math
 import os
 import typing as T
-from . import constants, geo, types
-from .exceptions import MapillaryBadParameterError, MapillaryDuplicationError
+import humanize
+from . import constants, exceptions, geo, types, utils
+from .serializer.description import DescriptionJSONSerializer
 LOG = logging.getLogger(__name__)
-Point = T.TypeVar("Point", bound=geo.Point)
-PointSequence = T.List[Point]
+S = T.TypeVar("S")
+R = T.TypeVar("R")
+PointSequence = T.List[geo.PointLike]
-def cut_sequence_by_time_distance(
-    sequence: PointSequence,
-    cutoff_distance: float,
-    cutoff_time: float,
-) -> T.List[PointSequence]:
-    sequences: T.List[PointSequence] = []
+def split_sequence_by(
+    sequence: T.Iterable[S], reduce: T.Callable[[R, S], tuple[R, bool]], initial: R
+) -> list[list[S]]:
+    """
+    Split a sequence into multiple subsequences based on a reduction function.
-    if sequence:
-        sequences.append([sequence[0]])
+    The function processes each element through a reduce function that maintains
+    state and determines whether to split the sequence at that point. When a split
+    is triggered, a new subsequence starts with the current element.
-    for prev, cur in geo.pairwise(sequence):
-        # invariant: prev is processed
-        distance = geo.gps_distance(
-            (prev.lat, prev.lon),
-            (cur.lat, cur.lon),
-        )
-        if cutoff_distance <= distance:
-            sequences.append([cur])
-            continue
-        time_diff = cur.time - prev.time
-        assert 0 <= time_diff, "sequence must be sorted by capture times"
-        if cutoff_time <= time_diff:
-            sequences.append([cur])
-            continue
-        sequences[-1].append(cur)
-        # invariant: cur is processed
+    Args:
+        sequence: An iterable of elements to split
+        reduce: A function that takes (accumulated_state, current_element) and
+               returns (new_state, should_split). If should_split is True,
+               a new subsequence starts with the current element.
+        initial: The initial state value passed to the reduce function
+    Returns:
+        A list of subsequences, where each subsequence is a list of elements
+    Examples:
+        >>> # Split on even numbers
+        >>> def split_on_even(count, x):
+        ...     return count + 1, x % 2 == 0
+        >>> split_sequence_by([1, 3, 2, 4, 5, 6, 7], split_on_even, 0)
+        [[1, 3], [2], [4, 5], [6, 7]]
+        >>> # Split when sum exceeds threshold
+        >>> def split_when_sum_exceeds_5(total, x):
+        ...     total += x
+        ...     return (x, True) if total > 5 else (total, False)
+        >>> split_sequence_by([1, 2, 3, 4, 1, 2], split_when_sum_exceeds_5, 0)
+        [[1, 2], [3], [4, 1], [2]]
+        >>> # Split on specific values
+        >>> def split_on_zero(_, x):
+        ...     return None, x == 0
+        >>> split_sequence_by([1, 2, 0, 3, 4, 0, 5], split_on_zero, None)
+        [[1, 2], [0, 3, 4], [0, 5]]
+        >>> # Empty sequence
+        >>> split_sequence_by([], lambda s, x: (s, False), 0)
+        []
+        >>> # Single element
+        >>> split_sequence_by([42], lambda s, x: (s, False), 0)
+        [[42]]
+    """
-    return sequences
+    output_sequences: list[list[S]] = []
+    value = initial
+    for element in sequence:
+        value, should = reduce(value, element)
+        if should:
+            output_sequences.append([element])
+        else:
+            if output_sequences:
+                output_sequences[-1].append(element)
+            else:
+                output_sequences.append([element])
+    return output_sequences
 def duplication_check(
     sequence: PointSequence,
-    duplicate_distance: float,
-    duplicate_angle: float,
-) -> T.Tuple[PointSequence, T.List[types.ErrorMetadata]]:
+    *,
+    max_duplicate_distance: float,
+    max_duplicate_angle: float,
+) -> tuple[PointSequence, list[types.ErrorMetadata]]:
+    """
+    >>> duplication_check([], max_duplicate_distance=1, max_duplicate_angle=2)
+    ([], [])
+    """
     dedups: PointSequence = []
-    dups: T.List[types.ErrorMetadata] = []
+    dups: list[types.ErrorMetadata] = []
+    it = iter(sequence)
+    prev = next(it, None)
-    sequence_iter = iter(sequence)
-    prev = next(sequence_iter)
     if prev is None:
         return dedups, dups
     dedups.append(prev)
-    for cur in sequence_iter:
+    for cur in it:
         # invariant: prev is processed
-        distance = geo.gps_distance(
-            (prev.lat, prev.lon),
-            (cur.lat, cur.lon),
-        )
+        distance = geo.gps_distance((prev.lat, prev.lon), (cur.lat, cur.lon))
         if prev.angle is not None and cur.angle is not None:
             angle_diff = geo.diff_bearing(prev.angle, cur.angle)
         else:
             angle_diff = None
-        if distance <= duplicate_distance and (
-            angle_diff is not None and angle_diff <= duplicate_angle
+        if distance <= max_duplicate_distance and (
+            angle_diff is None or angle_diff <= max_duplicate_angle
         ):
-            dups.append(
-                types.describe_error_metadata(
-                    MapillaryDuplicationError(
-                        f"Duplicate of its previous image in terms of distance <= {duplicate_distance} and angle <= {duplicate_angle}",
-                        types.as_desc(cur),
-                        distance=distance,
-                        angle_diff=angle_diff,
-                    ),
-                    cur.filename,
-                    filetype=types.FileType.IMAGE,
-                ),
+            msg = f"Duplicate of its previous image in terms of distance <= {max_duplicate_distance} and angle <= {max_duplicate_angle}"
+            ex = exceptions.MapillaryDuplicationError(
+                msg,
+                DescriptionJSONSerializer.as_desc(cur),
+                distance=distance,
+                angle_diff=angle_diff,
             )
+            dup = types.describe_error_metadata(
+                ex, cur.filename, filetype=types.FileType.IMAGE
+            )
+            dups.append(dup)
             # prev does not change
         else:
             dedups.append(cur)
@@ -94,95 +141,33 @@ def duplication_check(
     return dedups, dups
-def cut_sequence(
-    sequence: T.List[types.ImageMetadata],
-    max_images: int,
-    max_sequence_filesize: int,
-    max_sequence_pixels: int,
-) -> T.List[T.List[types.ImageMetadata]]:
-    """
-    Cut a sequence into multiple sequences by max_images or max filesize
-    """
-    sequences: T.List[T.List[types.ImageMetadata]] = []
-    last_sequence_file_size = 0
-    last_sequence_pixels = 0
-    for image in sequence:
-        # decent default values if width/height not available
-        width = 1024 if image.width is None else image.width
-        height = 1024 if image.height is None else image.height
-        filesize = os.path.getsize(image.filename)
-        if len(sequences) == 0:
-            start_new_sequence = True
-        else:
-            if sequences[-1]:
-                if max_images < len(sequences[-1]):
-                    LOG.debug(
-                        "Cut the sequence because the current sequence (%s) reaches the max number of images (%s)",
-                        len(sequences[-1]),
-                        max_images,
-                    )
-                    start_new_sequence = True
-                elif max_sequence_filesize < last_sequence_file_size + filesize:
-                    LOG.debug(
-                        "Cut the sequence because the current sequence (%s) reaches the max filesize (%s)",
-                        last_sequence_file_size + filesize,
-                        max_sequence_filesize,
-                    )
-                    start_new_sequence = True
-                elif max_sequence_pixels < last_sequence_pixels + width * height:
-                    LOG.debug(
-                        "Cut the sequence because the current sequence (%s) reaches the max pixels (%s)",
-                        last_sequence_pixels + width * height,
-                        max_sequence_pixels,
-                    )
-                    start_new_sequence = True
-                else:
-                    start_new_sequence = False
-            else:
-                start_new_sequence = False
-        if start_new_sequence:
-            sequences.append([])
-            last_sequence_file_size = 0
-            last_sequence_pixels = 0
-        sequences[-1].append(image)
-        last_sequence_file_size += filesize
-        last_sequence_pixels += width * height
-    assert sum(len(s) for s in sequences) == len(sequence)
-    return sequences
-def _group_sort_images_by_folder(
-    image_metadatas: T.List[types.ImageMetadata],
-) -> T.List[T.List[types.ImageMetadata]]:
-    # group images by parent directory
-    sequences_by_parent: T.Dict[str, T.List[types.ImageMetadata]] = {}
-    for image_metadata in image_metadatas:
-        filename = image_metadata.filename.resolve()
-        sequences_by_parent.setdefault(str(filename.parent), []).append(image_metadata)
-    sequences = list(sequences_by_parent.values())
-    for sequence in sequences:
-        sequence.sort(
-            key=lambda metadata: metadata.sort_key(),
-        )
-    return sequences
+def _group_images_by(
+    image_metadatas: T.Iterable[types.ImageMetadata],
+    group_key_func: T.Callable[[types.ImageMetadata], T.Hashable],
+) -> dict[T.Hashable, list[types.ImageMetadata]]:
+    grouped: dict[T.Hashable, list[types.ImageMetadata]] = {}
+    for metadata in image_metadatas:
+        grouped.setdefault(group_key_func(metadata), []).append(metadata)
+    return grouped
 def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
     """
-    Update the timestamps make sure they are unique and sorted
+    Update the timestamps to make sure they are unique and sorted
     in the same order by interpolating subseconds
     Examples:
-    - Input: 1, 1, 1, 1, 1, 2
-    - Output: 1, 1.2, 1.4, 1.6, 1.8, 2
+        >>> def make_point(t):
+        ...     return geo.Point(lat=0, lon=0, time=t, alt=None, angle=None)
+        >>> points = [make_point(t) for t in [1, 1, 1, 1, 1, 2]]
+        >>> _interpolate_subsecs_for_sorting(points)
+        >>> [p.time for p in points]
+        [1.0, 1.2, 1.4, 1.6, 1.8, 2]
+        >>> points = [make_point(t) for t in [1.1]]
+        >>> _interpolate_subsecs_for_sorting(points)
+        >>> [p.time for p in points]
+        [1.1]
     """
     gidx = 0
@@ -214,61 +199,420 @@ def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
         )
-def _parse_filesize_in_bytes(filesize_str: str) -> int:
-    filesize_str = filesize_str.strip().upper()
+def _is_video_stationary(
+    sequence: T.Sequence[geo.PointLike], max_radius_in_meters: float
+) -> bool:
+    if not sequence:
+        return 0.0 <= max_radius_in_meters
-    if filesize_str.endswith("B"):
-        return int(filesize_str[:-1])
-    elif filesize_str.endswith("K"):
-        return int(filesize_str[:-1]) * 1024
-    elif filesize_str.endswith("M"):
-        return int(filesize_str[:-1]) * 1024 * 1024
-    elif filesize_str.endswith("G"):
-        return int(filesize_str[:-1]) * 1024 * 1024 * 1024
-    else:
-        return int(filesize_str)
+    start = (sequence[0].lat, sequence[0].lon)
+    for p in sequence:
+        distance = geo.gps_distance(start, (p.lat, p.lon))
+        if distance > max_radius_in_meters:
+            return False
+    return True
+def _check_video_limits(
+    video_metadatas: T.Iterable[types.VideoMetadata],
+    max_sequence_filesize_in_bytes: int | None,
+    max_capture_speed_kmh: float,
+    max_radius_for_stationary_check: float,
+) -> tuple[list[types.VideoMetadata], list[types.ErrorMetadata]]:
+    output_video_metadatas: list[types.VideoMetadata] = []
+    error_metadatas: list[types.ErrorMetadata] = []
+    for video_metadata in video_metadatas:
+        try:
+            is_stationary = _is_video_stationary(
+                video_metadata.points,
+                max_radius_in_meters=max_radius_for_stationary_check,
+            )
+            if is_stationary:
+                raise exceptions.MapillaryStationaryVideoError("Stationary video")
+            if max_sequence_filesize_in_bytes is not None:
+                video_filesize = (
+                    utils.get_file_size(video_metadata.filename)
+                    if video_metadata.filesize is None
+                    else video_metadata.filesize
+                )
+                if video_filesize > max_sequence_filesize_in_bytes:
+                    raise exceptions.MapillaryFileTooLargeError(
+                        f"Video file size {humanize.naturalsize(video_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}",
+                    )
+            contains_null_island = any(
+                p.lat == 0 and p.lon == 0 for p in video_metadata.points
+            )
+            if contains_null_island:
+                raise exceptions.MapillaryNullIslandError(
+                    "GPS coordinates in Null Island (0, 0)"
+                )
+            avg_speed_kmh = (
+                geo.avg_speed(video_metadata.points) * 3.6
+            )  # Convert m/s to km/h
+            too_fast = (
+                len(video_metadata.points) >= 2
+                and avg_speed_kmh > max_capture_speed_kmh
+            )
+            if too_fast:
+                raise exceptions.MapillaryCaptureSpeedTooFastError(
+                    f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h",
+                )
+        except exceptions.MapillaryDescriptionError as ex:
+            LOG.error(f"{_video_name(video_metadata)}: {ex}")
+            error_metadatas.append(
+                types.describe_error_metadata(
+                    exc=ex,
+                    filename=video_metadata.filename,
+                    filetype=video_metadata.filetype,
+                )
+            )
+        else:
+            output_video_metadatas.append(video_metadata)
+    return output_video_metadatas, error_metadatas
+def _video_name(video_metadata: types.VideoMetadata) -> str:
+    return video_metadata.filename.name
+def _check_sequences_by_limits(
+    input_sequences: T.Sequence[PointSequence],
+    max_sequence_filesize_in_bytes: int | None,
+    max_capture_speed_kmh: float,
+) -> tuple[list[PointSequence], list[types.ErrorMetadata]]:
+    output_sequences: list[PointSequence] = []
+    output_errors: list[types.ErrorMetadata] = []
+    for sequence in input_sequences:
+        try:
+            if max_sequence_filesize_in_bytes is not None:
+                sequence_filesize = sum(
+                    utils.get_file_size(image.filename)
+                    if image.filesize is None
+                    else image.filesize
+                    for image in sequence
+                )
+                if sequence_filesize > max_sequence_filesize_in_bytes:
+                    raise exceptions.MapillaryFileTooLargeError(
+                        f"Sequence file size {humanize.naturalsize(sequence_filesize)} exceeds max allowed {humanize.naturalsize(max_sequence_filesize_in_bytes)}",
+                    )
+            contains_null_island = any(
+                image.lat == 0 and image.lon == 0 for image in sequence
+            )
+            if contains_null_island:
+                raise exceptions.MapillaryNullIslandError(
+                    "GPS coordinates in Null Island (0, 0)"
+                )
+            avg_speed_kmh = geo.avg_speed(sequence) * 3.6  # Convert m/s to km/h
+            too_fast = len(sequence) >= 2 and avg_speed_kmh > max_capture_speed_kmh
+            if too_fast:
+                raise exceptions.MapillaryCaptureSpeedTooFastError(
+                    f"Capture speed {avg_speed_kmh:.3f} km/h exceeds max allowed {max_capture_speed_kmh:.3f} km/h",
+                )
+        except exceptions.MapillaryDescriptionError as ex:
+            LOG.error(f"{_sequence_name(sequence)}: {ex}")
+            for image in sequence:
+                output_errors.append(
+                    types.describe_error_metadata(
+                        exc=ex, filename=image.filename, filetype=types.FileType.IMAGE
+                    )
+                )
+        else:
+            output_sequences.append(sequence)
+    assert sum(len(s) for s in output_sequences) + len(output_errors) == sum(
+        len(s) for s in input_sequences
+    )
+    return output_sequences, output_errors
+def _sequence_name(sequence: T.Sequence[types.ImageMetadata]) -> str:
+    if not sequence:
+        return "N/A"
+    image = sequence[0]
+    return f"{image.filename.parent.name}/{image.filename.name}"
-def _parse_pixels(pixels_str: str) -> int:
-    pixels_str = pixels_str.strip().upper()
+def _group_by_folder_and_camera(
+    image_metadatas: list[types.ImageMetadata],
+) -> list[list[types.ImageMetadata]]:
+    grouped = _group_images_by(
+        image_metadatas,
+        lambda metadata: (
+            str(metadata.filename.parent),
+            metadata.MAPDeviceMake,
+            metadata.MAPDeviceModel,
+            metadata.width,
+            metadata.height,
+        ),
+    )
+    for key in grouped:
+        LOG.debug(f"Grouped {len(grouped[key])} images by {key}")
+    output_sequences = list(grouped.values())
+    LOG.info(f"Created {len(output_sequences)} sequences by folders and cameras")
+    return output_sequences
+def _check_sequences_duplication(
+    input_sequences: T.Sequence[PointSequence],
+    duplicate_distance: float,
+    duplicate_angle: float,
+) -> tuple[list[PointSequence], list[types.ErrorMetadata]]:
+    output_sequences: list[PointSequence] = []
+    output_errors: list[types.ErrorMetadata] = []
+    for sequence in input_sequences:
+        output_sequence, errors = duplication_check(
+            sequence,
+            max_duplicate_distance=duplicate_distance,
+            max_duplicate_angle=duplicate_angle,
+        )
+        assert len(sequence) == len(output_sequence) + len(errors)
+        if output_sequence:
+            output_sequences.append(output_sequence)
+        output_errors.extend(errors)
+    # All input images should be accounted for either in output sequences or errors
+    assert sum(len(s) for s in output_sequences) + len(output_errors) == sum(
+        len(s) for s in input_sequences
+    )
+    if output_errors:
+        LOG.info(
+            f"Duplication check: {len(output_errors)} image duplicates removed (with {duplicate_distance=} and {duplicate_angle=})"
+        )
+    return output_sequences, output_errors
+class SplitState(T.TypedDict, total=False):
+    sequence_images: int
+    sequence_file_size: int
+    sequence_pixels: int
+    image: types.ImageMetadata
+def _should_split_by_max_sequence_images(
+    state: SplitState,
+    image: types.ImageMetadata,
+    max_sequence_images: int,
+    split: bool = False,
+) -> tuple[SplitState, bool]:
+    if not split:
+        new_sequence_images = state.get("sequence_images", 0) + 1
+        split = max_sequence_images < new_sequence_images
+        if split:
+            LOG.info(
+                f"Split sequence at {image.filename.name}: too many images ({new_sequence_images} > {max_sequence_images})"
+            )
+    if split:
+        new_sequence_images = 1
-    if pixels_str.endswith("K"):
-        return int(pixels_str[:-1]) * 1000
-    elif pixels_str.endswith("M"):
-        return int(pixels_str[:-1]) * 1000 * 1000
-    elif pixels_str.endswith("G"):
-        return int(pixels_str[:-1]) * 1000 * 1000 * 1000
+    state["sequence_images"] = new_sequence_images
+    return state, split
+def _should_split_by_cutoff_time(
+    state: SplitState,
+    image: types.ImageMetadata,
+    cutoff_time: float,
+    split: bool = False,
+) -> tuple[SplitState, bool]:
+    if not split:
+        last_image = state.get("image")
+        if last_image is not None:
+            diff = image.time - last_image.time
+            split = cutoff_time < diff
+            if split:
+                LOG.info(
+                    f"Split sequence at {image.filename.name}: time gap too large ({diff:.6g} seconds > {cutoff_time:.6g} seconds)"
+                )
+    state["image"] = image
+    return state, split
+def _should_split_by_cutoff_distance(
+    state: SplitState,
+    image: types.ImageMetadata,
+    cutoff_distance: float,
+    split: bool = False,
+) -> tuple[SplitState, bool]:
+    if not split:
+        last_image = state.get("image")
+        if last_image is not None:
+            diff = geo.gps_distance(
+                (last_image.lat, last_image.lon), (image.lat, image.lon)
+            )
+            split = cutoff_distance < diff
+            if split:
+                LOG.info(
+                    f"Split sequence at {image.filename.name}: distance gap too large ({diff:.6g} meters > {cutoff_distance:.6g} meters)"
+                )
+    state["image"] = image
+    return state, split
+def _should_split_by_max_sequence_filesize(
+    state: SplitState,
+    image: types.ImageMetadata,
+    max_sequence_filesize_in_bytes: int,
+    split: bool = False,
+) -> tuple[SplitState, bool]:
+    if image.filesize is None:
+        filesize = os.path.getsize(image.filename)
     else:
-        return int(pixels_str)
+        filesize = image.filesize
+    if not split:
+        new_sequence_file_size = state.get("sequence_file_size", 0) + filesize
+        split = max_sequence_filesize_in_bytes < new_sequence_file_size
+        if split:
+            LOG.info(
+                f"Split sequence at {image.filename.name}: filesize too large ({new_sequence_file_size} > {max_sequence_filesize_in_bytes})"
+            )
+    if split:
+        new_sequence_file_size = filesize
-def process_sequence_properties(
-    metadatas: T.Sequence[types.MetadataOrError],
-    cutoff_distance=constants.CUTOFF_DISTANCE,
-    cutoff_time=constants.CUTOFF_TIME,
-    interpolate_directions=False,
-    duplicate_distance=constants.DUPLICATE_DISTANCE,
-    duplicate_angle=constants.DUPLICATE_ANGLE,
-) -> T.List[types.MetadataOrError]:
-    try:
-        max_sequence_filesize_in_bytes = _parse_filesize_in_bytes(
-            constants.MAX_SEQUENCE_FILESIZE
+    state["sequence_file_size"] = new_sequence_file_size
+    return state, split
+def _should_split_by_max_sequence_pixels(
+    state: SplitState,
+    image: types.ImageMetadata,
+    max_sequence_pixels: int,
+    split: bool = False,
+) -> tuple[SplitState, bool]:
+    # Default values if width/height not available
+    width = 1024 if image.width is None else image.width
+    height = 1024 if image.height is None else image.height
+    pixels = width * height
+    if not split:
+        new_sequence_pixels = state.get("sequence_pixels", 0) + pixels
+        split = max_sequence_pixels < new_sequence_pixels
+        if split:
+            LOG.info(
+                f"Split sequence at {image.filename.name}: pixels too large ({new_sequence_pixels} > {max_sequence_pixels})"
+            )
+    if split:
+        new_sequence_pixels = pixels
+    state["sequence_pixels"] = new_sequence_pixels
+    return state, split
+def _split_sequences_by_limits(
+    input_sequences: T.Sequence[PointSequence],
+    max_sequence_filesize_in_bytes: int | None = None,
+    max_sequence_pixels: int | None = None,
+    max_sequence_images: int | None = None,
+    cutoff_time: float | None = None,
+    cutoff_distance: float | None = None,
+) -> list[PointSequence]:
+    should_splits = []
+    if max_sequence_images is not None:
+        should_splits.append(
+            functools.partial(
+                _should_split_by_max_sequence_images,
+                max_sequence_images=max_sequence_images,
+            )
         )
-    except ValueError:
-        raise MapillaryBadParameterError(
-            f"Expect the envvar {constants._ENV_PREFIX}MAX_SEQUENCE_FILESIZE to be a valid filesize that ends with B, K, M, or G, but got {constants.MAX_SEQUENCE_FILESIZE}"
+    if cutoff_time is not None:
+        should_splits.append(
+            functools.partial(_should_split_by_cutoff_time, cutoff_time=cutoff_time)
         )
-    try:
-        max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS)
-    except ValueError:
-        raise MapillaryBadParameterError(
-            f"Expect the envvar {constants._ENV_PREFIX}MAX_SEQUENCE_PIXELS to be a valid number of pixels that ends with K, M, or G, but got {constants.MAX_SEQUENCE_PIXELS}"
+    if cutoff_distance is not None:
+        should_splits.append(
+            functools.partial(
+                _should_split_by_cutoff_distance, cutoff_distance=cutoff_distance
+            )
+        )
+    if max_sequence_filesize_in_bytes is not None:
+        should_splits.append(
+            functools.partial(
+                _should_split_by_max_sequence_filesize,
+                max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
+            )
+        )
+    if max_sequence_pixels is not None:
+        should_splits.append(
+            functools.partial(
+                _should_split_by_max_sequence_pixels,
+                max_sequence_pixels=max_sequence_pixels,
+            )
+        )
+    def _should_split_agg(
+        state: SplitState, image: types.ImageMetadata
+    ) -> tuple[SplitState, bool]:
+        split = False
+        for should_split in should_splits:
+            state, split = should_split(state, image, split=split)
+        return state, split
+    output_sequences = []
+    for sequence in input_sequences:
+        output_sequences.extend(
+            split_sequence_by(
+                sequence, _should_split_agg, initial=T.cast(SplitState, {})
+            )
         )
-    error_metadatas: T.List[types.ErrorMetadata] = []
-    image_metadatas: T.List[types.ImageMetadata] = []
-    video_metadatas: T.List[types.VideoMetadata] = []
+    assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
+    if len(input_sequences) != len(output_sequences):
+        LOG.info(f"Split sequences: {len(input_sequences)} -> {len(output_sequences)}")
+    return output_sequences
+def process_sequence_properties(
+    metadatas: T.Sequence[types.MetadataOrError],
+    cutoff_distance: float = constants.CUTOFF_DISTANCE,
+    cutoff_time: float = constants.CUTOFF_TIME,
+    interpolate_directions: bool = False,
+    duplicate_distance: float = constants.DUPLICATE_DISTANCE,
+    duplicate_angle: float = constants.DUPLICATE_ANGLE,
+    max_capture_speed_kmh: float = constants.MAX_CAPTURE_SPEED_KMH,
+) -> list[types.MetadataOrError]:
+    LOG.info("==> Processing sequences...")
+    max_sequence_filesize_in_bytes = constants.MAX_SEQUENCE_FILESIZE
+    max_sequence_pixels = constants.MAX_SEQUENCE_PIXELS
+    error_metadatas: list[types.ErrorMetadata] = []
+    image_metadatas: list[types.ImageMetadata] = []
+    video_metadatas: list[types.VideoMetadata] = []
     for metadata in metadatas:
         if isinstance(metadata, types.ErrorMetadata):
@@ -278,68 +622,93 @@ def process_sequence_properties(
         elif isinstance(metadata, types.VideoMetadata):
             video_metadatas.append(metadata)
         else:
-            raise RuntimeError(f"invalid metadata type: {metadata}")
-    sequences_by_folder = _group_sort_images_by_folder(image_metadatas)
-    # make sure they are sorted
-    for sequence in sequences_by_folder:
-        for cur, nxt in geo.pairwise(sequence):
-            assert cur.time <= nxt.time, "sequence must be sorted"
+            raise ValueError(f"invalid metadata type: {metadata}")
+    if video_metadatas:
+        # Check limits for videos
+        video_metadatas, video_error_metadatas = _check_video_limits(
+            video_metadatas,
+            max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
+            max_capture_speed_kmh=max_capture_speed_kmh,
+            max_radius_for_stationary_check=10.0,
+        )
+        error_metadatas.extend(video_error_metadatas)
-    for s in sequences_by_folder:
-        _interpolate_subsecs_for_sorting(s)
+    if image_metadatas:
+        sequences: list[PointSequence]
-    # cut sequences
-    sequences_after_cut: T.List[PointSequence] = []
-    for sequence in sequences_by_folder:
-        cut = cut_sequence_by_time_distance(sequence, cutoff_distance, cutoff_time)
-        sequences_after_cut.extend(cut)
-    assert len(image_metadatas) == sum(len(s) for s in sequences_after_cut)
+        # Group by folder and camera
+        sequences = _group_by_folder_and_camera(image_metadatas)
-    # reuse imaeg_metadatas to store processed image metadatas
-    image_metadatas = []
+        # Make sure each sequence is sorted (in-place update)
+        for sequence in sequences:
+            sequence.sort(
+                key=lambda metadata: metadata.sort_key(),
+            )
-    sequence_idx = 0
+        # Interpolate subseconds for same timestamps (in-place update)
+        for sequence in sequences:
+            _interpolate_subsecs_for_sorting(sequence)
+        # Split sequences by max number of images, max filesize, max pixels, and cutoff time
+        # NOTE: Do not split by distance here because it affects the speed limit check
+        sequences = _split_sequences_by_limits(
+            sequences,
+            max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
+            max_sequence_pixels=max_sequence_pixels,
+            max_sequence_images=constants.MAX_SEQUENCE_LENGTH,
+            cutoff_time=cutoff_time,
+        )
-    for sequence in sequences_after_cut:
-        # duplication check
-        dedups, dups = duplication_check(
-            sequence,
+        # Duplication check
+        sequences, errors = _check_sequences_duplication(
+            sequences,
             duplicate_distance=duplicate_distance,
             duplicate_angle=duplicate_angle,
         )
-        assert len(sequence) == len(dedups) + len(dups)
-        error_metadatas.extend(dups)
-        # interpolate angles
-        if interpolate_directions:
-            for p in dedups:
-                p.angle = None
-        geo.interpolate_directions_if_none(dedups)
-        # cut sequence per MAX_SEQUENCE_LENGTH images
-        cut = cut_sequence(
-            dedups,
-            constants.MAX_SEQUENCE_LENGTH,
-            max_sequence_filesize_in_bytes,
-            max_sequence_pixels,
+        error_metadatas.extend(errors)
+        # Interpolate angles (in-place update)
+        for sequence in sequences:
+            if interpolate_directions:
+                for image in sequence:
+                    image.angle = None
+            geo.interpolate_directions_if_none(sequence)
+        # Check limits for sequences
+        sequences, errors = _check_sequences_by_limits(
+            sequences,
+            max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
+            max_capture_speed_kmh=max_capture_speed_kmh,
+        )
+        error_metadatas.extend(errors)
+        # Split sequences by cutoff distance
+        # NOTE: The speed limit check probably rejects most anomalies
+        sequences = _split_sequences_by_limits(
+            sequences, cutoff_distance=cutoff_distance
         )
-        # assign sequence UUIDs
-        for c in cut:
-            for p in c:
+        # Assign sequence UUIDs (in-place update)
+        sequence_idx = 0
+        for sequence in sequences:
+            for image in sequence:
                 # using incremental id as shorter "uuid", so we can save some space for the desc file
-                p.MAPSequenceUUID = str(sequence_idx)
-                image_metadatas.append(p)
+                image.MAPSequenceUUID = str(sequence_idx)
             sequence_idx += 1
+        image_metadatas = []
+        for sequence in sequences:
+            image_metadatas.extend(sequence)
+        assert sequence_idx == len(
+            set(metadata.MAPSequenceUUID for metadata in image_metadatas)
+        )
     results = error_metadatas + image_metadatas + video_metadatas
     assert len(metadatas) == len(results), (
-        f"expected {len(metadatas)} results but got {len(results)}"
-    )
-    assert sequence_idx == len(
-        set(metadata.MAPSequenceUUID for metadata in image_metadatas)
+        f"Expected {len(metadatas)} results but got {len(results)}"
     )
     return results

mapillary-tools 0.13.3__py3-none-any.whl → 0.14.0__py3-none-any.whl

mapillary-tools 0.13.3py3-none-any.whl → 0.14.0py3-none-any.whl