PyPI - mapillary-tools - Versions diffs - 0.13.3a1__py3-none-any.whl → 0.14.0a2__py3-none-any.whl - Mend

mapillary-tools 0.13.3a1py3-none-any.whl → 0.14.0a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

mapillary_tools/__init__.py +1 -1
mapillary_tools/api_v4.py +237 -16
mapillary_tools/authenticate.py +325 -64
mapillary_tools/{geotag/blackvue_parser.py → blackvue_parser.py} +74 -54
mapillary_tools/camm/camm_builder.py +55 -97
mapillary_tools/camm/camm_parser.py +429 -181
mapillary_tools/commands/__main__.py +12 -6
mapillary_tools/commands/authenticate.py +8 -1
mapillary_tools/commands/process.py +27 -51
mapillary_tools/commands/process_and_upload.py +19 -5
mapillary_tools/commands/sample_video.py +2 -3
mapillary_tools/commands/upload.py +18 -9
mapillary_tools/commands/video_process_and_upload.py +19 -5
mapillary_tools/config.py +31 -13
mapillary_tools/constants.py +47 -6
mapillary_tools/exceptions.py +34 -35
mapillary_tools/exif_read.py +221 -116
mapillary_tools/exif_write.py +7 -7
mapillary_tools/exiftool_read.py +33 -42
mapillary_tools/exiftool_read_video.py +46 -33
mapillary_tools/exiftool_runner.py +77 -0
mapillary_tools/ffmpeg.py +24 -23
mapillary_tools/geo.py +144 -120
mapillary_tools/geotag/base.py +147 -0
mapillary_tools/geotag/factory.py +291 -0
mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
mapillary_tools/geotag/geotag_images_from_exiftool.py +126 -82
mapillary_tools/geotag/geotag_images_from_gpx.py +53 -118
mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
mapillary_tools/geotag/geotag_images_from_video.py +53 -51
mapillary_tools/geotag/geotag_videos_from_exiftool.py +97 -0
mapillary_tools/geotag/geotag_videos_from_gpx.py +39 -0
mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
mapillary_tools/geotag/image_extractors/base.py +18 -0
mapillary_tools/geotag/image_extractors/exif.py +60 -0
mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
mapillary_tools/geotag/options.py +160 -0
mapillary_tools/geotag/utils.py +52 -16
mapillary_tools/geotag/video_extractors/base.py +18 -0
mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
mapillary_tools/{video_data_extraction/extractors/gpx_parser.py → geotag/video_extractors/gpx.py} +57 -39
mapillary_tools/geotag/video_extractors/native.py +157 -0
mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
mapillary_tools/history.py +7 -13
mapillary_tools/mp4/construct_mp4_parser.py +9 -8
mapillary_tools/mp4/io_utils.py +0 -1
mapillary_tools/mp4/mp4_sample_parser.py +36 -28
mapillary_tools/mp4/simple_mp4_builder.py +10 -9
mapillary_tools/mp4/simple_mp4_parser.py +13 -22
mapillary_tools/process_geotag_properties.py +155 -392
mapillary_tools/process_sequence_properties.py +562 -208
mapillary_tools/sample_video.py +13 -20
mapillary_tools/telemetry.py +26 -13
mapillary_tools/types.py +111 -58
mapillary_tools/upload.py +316 -298
mapillary_tools/upload_api_v4.py +55 -122
mapillary_tools/uploader.py +396 -254
mapillary_tools/utils.py +42 -18
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/METADATA +3 -2
mapillary_tools-0.14.0a2.dist-info/RECORD +72 -0
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/WHEEL +1 -1
mapillary_tools/geotag/__init__.py +0 -1
mapillary_tools/geotag/geotag_from_generic.py +0 -22
mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
mapillary_tools/video_data_extraction/cli_options.py +0 -22
mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
mapillary_tools-0.13.3a1.dist-info/RECORD +0 -75
/mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/entry_points.txt +0 -0
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info/licenses}/LICENSE +0 -0
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/top_level.txt +0 -0

mapillary_tools/process_sequence_properties.py CHANGED Viewed

@@ -1,64 +1,96 @@
+from __future__ import annotations
 import itertools
 import logging
 import math
 import os
 import typing as T
-from . import constants, geo, types
-from .exceptions import MapillaryBadParameterError, MapillaryDuplicationError
+from . import constants, exceptions, geo, types, utils
 LOG = logging.getLogger(__name__)
-Point = T.TypeVar("Point", bound=geo.Point)
-PointSequence = T.List[Point]
+SeqItem = T.TypeVar("SeqItem")
+PointSequence = T.List[geo.PointLike]
-def cut_sequence_by_time_distance(
-    sequence: PointSequence,
-    cutoff_distance: float,
-    cutoff_time: float,
-) -> T.List[PointSequence]:
-    sequences: T.List[PointSequence] = []
+def split_sequence_by(
+    sequence: T.Sequence[SeqItem],
+    should_split: T.Callable[[SeqItem, SeqItem], bool],
+) -> list[list[SeqItem]]:
+    """
+    Split a sequence into multiple sequences by should_split(prev, cur) => True
+    """
+    output_sequences: list[list[SeqItem]] = []
-    if sequence:
-        sequences.append([sequence[0]])
+    seq = iter(sequence)
+    prev = next(seq, None)
+    if prev is None:
+        return output_sequences
+    output_sequences.append([prev])
-    for prev, cur in geo.pairwise(sequence):
+    for cur in seq:
         # invariant: prev is processed
-        distance = geo.gps_distance(
-            (prev.lat, prev.lon),
-            (cur.lat, cur.lon),
-        )
-        if cutoff_distance <= distance:
-            sequences.append([cur])
-            continue
-        time_diff = cur.time - prev.time
-        assert 0 <= time_diff, "sequence must be sorted by capture times"
-        if cutoff_time <= time_diff:
-            sequences.append([cur])
-            continue
-        sequences[-1].append(cur)
+        if should_split(prev, cur):
+            output_sequences.append([cur])
+        else:
+            output_sequences[-1].append(cur)
+        prev = cur
         # invariant: cur is processed
-    return sequences
+    assert sum(len(s) for s in output_sequences) == len(sequence)
+    return output_sequences
+def split_sequence_by_agg(
+    sequence: T.Sequence[SeqItem],
+    should_split_with_sequence_state: T.Callable[[SeqItem, dict], bool],
+) -> list[list[SeqItem]]:
+    """
+    Split a sequence by should_split_with_sequence_state(cur, sequence_state) => True
+    """
+    output_sequences: list[list[SeqItem]] = []
+    sequence_state: dict = {}
+    for cur in sequence:
+        start_new_sequence = should_split_with_sequence_state(cur, sequence_state)
+        if not output_sequences:
+            output_sequences.append([])
+        if start_new_sequence:
+            # DO NOT reset the state because it contains the information of current item
+            # sequence_state = {}
+            if output_sequences[-1]:
+                output_sequences.append([])
+        output_sequences[-1].append(cur)
+    assert sum(len(s) for s in output_sequences) == len(sequence)
+    return output_sequences
 def duplication_check(
     sequence: PointSequence,
-    duplicate_distance: float,
-    duplicate_angle: float,
-) -> T.Tuple[PointSequence, T.List[types.ErrorMetadata]]:
+    max_duplicate_distance: float,
+    max_duplicate_angle: float,
+) -> tuple[PointSequence, list[types.ErrorMetadata]]:
     dedups: PointSequence = []
-    dups: T.List[types.ErrorMetadata] = []
+    dups: list[types.ErrorMetadata] = []
-    sequence_iter = iter(sequence)
-    prev = next(sequence_iter)
+    it = iter(sequence)
+    prev = next(it)
     if prev is None:
         return dedups, dups
     dedups.append(prev)
-    for cur in sequence_iter:
+    for cur in it:
         # invariant: prev is processed
         distance = geo.gps_distance(
             (prev.lat, prev.lon),
@@ -70,21 +102,21 @@ def duplication_check(
         else:
             angle_diff = None
-        if distance <= duplicate_distance and (
-            angle_diff is not None and angle_diff <= duplicate_angle
+        if distance <= max_duplicate_distance and (
+            angle_diff is None or angle_diff <= max_duplicate_angle
         ):
-            dups.append(
-                types.describe_error_metadata(
-                    MapillaryDuplicationError(
-                        f"Duplicate of its previous image in terms of distance <= {duplicate_distance} and angle <= {duplicate_angle}",
-                        types.as_desc(cur),
-                        distance=distance,
-                        angle_diff=angle_diff,
-                    ),
-                    cur.filename,
-                    filetype=types.FileType.IMAGE,
+            msg = f"Duplicate of its previous image in terms of distance <= {max_duplicate_distance} and angle <= {max_duplicate_angle}"
+            dup = types.describe_error_metadata(
+                exceptions.MapillaryDuplicationError(
+                    msg,
+                    types.as_desc(cur),
+                    distance=distance,
+                    angle_diff=angle_diff,
                 ),
+                cur.filename,
+                filetype=types.FileType.IMAGE,
             )
+            dups.append(dup)
             # prev does not change
         else:
             dedups.append(cur)
@@ -94,86 +126,14 @@ def duplication_check(
     return dedups, dups
-def cut_sequence(
-    sequence: T.List[types.ImageMetadata],
-    max_images: int,
-    max_sequence_filesize: int,
-    max_sequence_pixels: int,
-) -> T.List[T.List[types.ImageMetadata]]:
-    """
-    Cut a sequence into multiple sequences by max_images or max filesize
-    """
-    sequences: T.List[T.List[types.ImageMetadata]] = []
-    last_sequence_file_size = 0
-    last_sequence_pixels = 0
-    for image in sequence:
-        # decent default values if width/height not available
-        width = 1024 if image.width is None else image.width
-        height = 1024 if image.height is None else image.height
-        filesize = os.path.getsize(image.filename)
-        if len(sequences) == 0:
-            start_new_sequence = True
-        else:
-            if sequences[-1]:
-                if max_images < len(sequences[-1]):
-                    LOG.debug(
-                        "Cut the sequence because the current sequence (%s) reaches the max number of images (%s)",
-                        len(sequences[-1]),
-                        max_images,
-                    )
-                    start_new_sequence = True
-                elif max_sequence_filesize < last_sequence_file_size + filesize:
-                    LOG.debug(
-                        "Cut the sequence because the current sequence (%s) reaches the max filesize (%s)",
-                        last_sequence_file_size + filesize,
-                        max_sequence_filesize,
-                    )
-                    start_new_sequence = True
-                elif max_sequence_pixels < last_sequence_pixels + width * height:
-                    LOG.debug(
-                        "Cut the sequence because the current sequence (%s) reaches the max pixels (%s)",
-                        last_sequence_pixels + width * height,
-                        max_sequence_pixels,
-                    )
-                    start_new_sequence = True
-                else:
-                    start_new_sequence = False
-            else:
-                start_new_sequence = False
-        if start_new_sequence:
-            sequences.append([])
-            last_sequence_file_size = 0
-            last_sequence_pixels = 0
-        sequences[-1].append(image)
-        last_sequence_file_size += filesize
-        last_sequence_pixels += width * height
-    assert sum(len(s) for s in sequences) == len(sequence)
-    return sequences
-def _group_sort_images_by_folder(
-    image_metadatas: T.List[types.ImageMetadata],
-) -> T.List[T.List[types.ImageMetadata]]:
-    # group images by parent directory
-    sequences_by_parent: T.Dict[str, T.List[types.ImageMetadata]] = {}
-    for image_metadata in image_metadatas:
-        filename = image_metadata.filename.resolve()
-        sequences_by_parent.setdefault(str(filename.parent), []).append(image_metadata)
-    sequences = list(sequences_by_parent.values())
-    for sequence in sequences:
-        sequence.sort(
-            key=lambda metadata: metadata.sort_key(),
-        )
-    return sequences
+def _group_by(
+    image_metadatas: T.Iterable[types.ImageMetadata],
+    group_key_func=T.Callable[[types.ImageMetadata], T.Hashable],
+) -> dict[T.Hashable, list[types.ImageMetadata]]:
+    grouped: dict[T.Hashable, list[types.ImageMetadata]] = {}
+    for metadata in image_metadatas:
+        grouped.setdefault(group_key_func(metadata), []).append(metadata)
+    return grouped
 def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
@@ -217,58 +177,426 @@ def _interpolate_subsecs_for_sorting(sequence: PointSequence) -> None:
 def _parse_filesize_in_bytes(filesize_str: str) -> int:
     filesize_str = filesize_str.strip().upper()
-    if filesize_str.endswith("B"):
-        return int(filesize_str[:-1])
-    elif filesize_str.endswith("K"):
-        return int(filesize_str[:-1]) * 1024
-    elif filesize_str.endswith("M"):
-        return int(filesize_str[:-1]) * 1024 * 1024
-    elif filesize_str.endswith("G"):
-        return int(filesize_str[:-1]) * 1024 * 1024 * 1024
-    else:
-        return int(filesize_str)
+    try:
+        if filesize_str.endswith("B"):
+            return int(filesize_str[:-1])
+        elif filesize_str.endswith("K"):
+            return int(filesize_str[:-1]) * 1024
+        elif filesize_str.endswith("M"):
+            return int(filesize_str[:-1]) * 1024 * 1024
+        elif filesize_str.endswith("G"):
+            return int(filesize_str[:-1]) * 1024 * 1024 * 1024
+        else:
+            return int(filesize_str)
+    except ValueError:
+        raise exceptions.MapillaryBadParameterError(
+            f"Expect valid file size that ends with B, K, M, or G, but got {filesize_str}"
+        )
 def _parse_pixels(pixels_str: str) -> int:
     pixels_str = pixels_str.strip().upper()
-    if pixels_str.endswith("K"):
-        return int(pixels_str[:-1]) * 1000
-    elif pixels_str.endswith("M"):
-        return int(pixels_str[:-1]) * 1000 * 1000
-    elif pixels_str.endswith("G"):
-        return int(pixels_str[:-1]) * 1000 * 1000 * 1000
+    try:
+        if pixels_str.endswith("K"):
+            return int(pixels_str[:-1]) * 1000
+        elif pixels_str.endswith("M"):
+            return int(pixels_str[:-1]) * 1000 * 1000
+        elif pixels_str.endswith("G"):
+            return int(pixels_str[:-1]) * 1000 * 1000 * 1000
+        else:
+            return int(pixels_str)
+    except ValueError:
+        raise exceptions.MapillaryBadParameterError(
+            f"Expect valid number of pixels that ends with K, M, or G, but got {pixels_str}"
+        )
+def _avg_speed(sequence: T.Sequence[geo.PointLike]) -> float:
+    total_distance = 0.0
+    for cur, nxt in geo.pairwise(sequence):
+        total_distance += geo.gps_distance(
+            (cur.lat, cur.lon),
+            (nxt.lat, nxt.lon),
+        )
+    if sequence:
+        time_diff = sequence[-1].time - sequence[0].time
     else:
-        return int(pixels_str)
+        time_diff = 0.0
+    if time_diff == 0.0:
+        return float("inf")
-def process_sequence_properties(
-    metadatas: T.Sequence[types.MetadataOrError],
-    cutoff_distance=constants.CUTOFF_DISTANCE,
-    cutoff_time=constants.CUTOFF_TIME,
-    interpolate_directions=False,
-    duplicate_distance=constants.DUPLICATE_DISTANCE,
-    duplicate_angle=constants.DUPLICATE_ANGLE,
-) -> T.List[types.MetadataOrError]:
-    try:
-        max_sequence_filesize_in_bytes = _parse_filesize_in_bytes(
-            constants.MAX_SEQUENCE_FILESIZE
+    return total_distance / time_diff
+def _is_video_stationary(
+    sequence: T.Sequence[geo.PointLike], max_radius_in_meters: float
+) -> bool:
+    if not sequence:
+        return 0.0 <= max_radius_in_meters
+    start = (sequence[0].lat, sequence[0].lon)
+    for p in sequence:
+        distance = geo.gps_distance(start, (p.lat, p.lon))
+        if distance > max_radius_in_meters:
+            return False
+    return True
+def _check_video_limits(
+    video_metadatas: T.Iterable[types.VideoMetadata],
+    max_sequence_filesize_in_bytes: int,
+    max_avg_speed: float,
+    max_radius_for_stationary_check: float,
+) -> tuple[list[types.VideoMetadata], list[types.ErrorMetadata]]:
+    output_video_metadatas: list[types.VideoMetadata] = []
+    error_metadatas: list[types.ErrorMetadata] = []
+    for video_metadata in video_metadatas:
+        try:
+            is_stationary = _is_video_stationary(
+                video_metadata.points,
+                max_radius_in_meters=max_radius_for_stationary_check,
+            )
+            if is_stationary:
+                raise exceptions.MapillaryStationaryVideoError("Stationary video")
+            video_filesize = (
+                utils.get_file_size(video_metadata.filename)
+                if video_metadata.filesize is None
+                else video_metadata.filesize
+            )
+            if video_filesize > max_sequence_filesize_in_bytes:
+                raise exceptions.MapillaryFileTooLargeError(
+                    f"Video file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
+                )
+            contains_null_island = any(
+                p.lat == 0 and p.lon == 0 for p in video_metadata.points
+            )
+            if contains_null_island:
+                raise exceptions.MapillaryNullIslandError(
+                    "Found GPS coordinates in Null Island (0, 0)",
+                )
+            too_fast = (
+                len(video_metadata.points) >= 2
+                and _avg_speed(video_metadata.points) > max_avg_speed
+            )
+            if too_fast:
+                raise exceptions.MapillaryCaptureSpeedTooFastError(
+                    f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
+                )
+        except exceptions.MapillaryDescriptionError as ex:
+            error_metadatas.append(
+                types.describe_error_metadata(
+                    exc=ex,
+                    filename=video_metadata.filename,
+                    filetype=video_metadata.filetype,
+                )
+            )
+        else:
+            output_video_metadatas.append(video_metadata)
+    LOG.info(
+        "Found %s videos and %s errors after video limit checks",
+        len(output_video_metadatas),
+        len(error_metadatas),
+    )
+    return output_video_metadatas, error_metadatas
+def _check_sequences_by_limits(
+    input_sequences: T.Sequence[PointSequence],
+    max_sequence_filesize_in_bytes: int,
+    max_avg_speed: float,
+) -> tuple[list[PointSequence], list[types.ErrorMetadata]]:
+    output_sequences: list[PointSequence] = []
+    output_errors: list[types.ErrorMetadata] = []
+    for sequence in input_sequences:
+        sequence_filesize = sum(
+            utils.get_file_size(image.filename)
+            if image.filesize is None
+            else image.filesize
+            for image in sequence
         )
-    except ValueError:
-        raise MapillaryBadParameterError(
-            f"Expect the envvar {constants._ENV_PREFIX}MAX_SEQUENCE_FILESIZE to be a valid filesize that ends with B, K, M, or G, but got {constants.MAX_SEQUENCE_FILESIZE}"
+        try:
+            if sequence_filesize > max_sequence_filesize_in_bytes:
+                raise exceptions.MapillaryFileTooLargeError(
+                    f"Sequence file size exceeds the maximum allowed file size ({max_sequence_filesize_in_bytes} bytes)",
+                )
+            contains_null_island = any(
+                image.lat == 0 and image.lon == 0 for image in sequence
+            )
+            if contains_null_island:
+                raise exceptions.MapillaryNullIslandError(
+                    "Found GPS coordinates in Null Island (0, 0)",
+                )
+            too_fast = len(sequence) >= 2 and _avg_speed(sequence) > max_avg_speed
+            if too_fast:
+                raise exceptions.MapillaryCaptureSpeedTooFastError(
+                    f"Capture speed too fast (exceeds {round(max_avg_speed, 3)} m/s)",
+                )
+        except exceptions.MapillaryDescriptionError as ex:
+            for image in sequence:
+                output_errors.append(
+                    types.describe_error_metadata(
+                        exc=ex,
+                        filename=image.filename,
+                        filetype=types.FileType.IMAGE,
+                    )
+                )
+        else:
+            output_sequences.append(sequence)
+    assert sum(len(s) for s in output_sequences) + len(output_errors) == sum(
+        len(s) for s in input_sequences
+    )
+    LOG.info(
+        "Found %s sequences and %s errors after sequence limit checks",
+        len(output_sequences),
+        len(output_errors),
+    )
+    return output_sequences, output_errors
+def _group_by_folder_and_camera(
+    image_metadatas: list[types.ImageMetadata],
+) -> list[list[types.ImageMetadata]]:
+    grouped = _group_by(
+        image_metadatas,
+        lambda metadata: (
+            str(metadata.filename.parent),
+            metadata.MAPDeviceMake,
+            metadata.MAPDeviceModel,
+            metadata.width,
+            metadata.height,
+        ),
+    )
+    for key in grouped:
+        LOG.debug("Group sequences by %s: %s images", key, len(grouped[key]))
+    output_sequences = list(grouped.values())
+    LOG.info(
+        "Found %s sequences from different folders and cameras",
+        len(output_sequences),
+    )
+    return output_sequences
+def _split_sequences_by_cutoff_time(
+    input_sequences: T.Sequence[PointSequence], cutoff_time: float
+) -> list[PointSequence]:
+    def _should_split_by_cutoff_time(
+        prev: types.ImageMetadata, cur: types.ImageMetadata
+    ) -> bool:
+        time_diff = cur.time - prev.time
+        assert 0 <= time_diff, "sequence must be sorted by capture times"
+        should = cutoff_time < time_diff
+        if should:
+            LOG.debug(
+                "Split because the capture time gap %s seconds exceeds cutoff_time (%s seconds): %s: %s -> %s",
+                round(time_diff, 2),
+                round(cutoff_time, 2),
+                prev.filename.parent,
+                prev.filename.name,
+                cur.filename.name,
+            )
+        return should
+    output_sequences = []
+    for sequence in input_sequences:
+        output_sequences.extend(
+            split_sequence_by(sequence, should_split=_should_split_by_cutoff_time)
         )
-    try:
-        max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS)
-    except ValueError:
-        raise MapillaryBadParameterError(
-            f"Expect the envvar {constants._ENV_PREFIX}MAX_SEQUENCE_PIXELS to be a valid number of pixels that ends with K, M, or G, but got {constants.MAX_SEQUENCE_PIXELS}"
+    assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
+    LOG.info(
+        "Found %s sequences after split by cutoff_time %d seconds",
+        len(output_sequences),
+        cutoff_time,
+    )
+    return output_sequences
+def _split_sequences_by_cutoff_distance(
+    input_sequences: T.Sequence[PointSequence], cutoff_distance: float
+) -> list[PointSequence]:
+    def _should_split_by_cutoff_distance(
+        prev: types.ImageMetadata, cur: types.ImageMetadata
+    ) -> bool:
+        distance = geo.gps_distance(
+            (prev.lat, prev.lon),
+            (cur.lat, cur.lon),
+        )
+        should = cutoff_distance < distance
+        if should:
+            LOG.debug(
+                "Split because the distance gap %s meters exceeds cutoff_distance (%s meters): %s: %s -> %s",
+                round(distance, 2),
+                round(cutoff_distance, 2),
+                prev.filename.parent,
+                prev.filename.name,
+                cur.filename.name,
+            )
+        return should
+    output_sequences = []
+    for sequence in input_sequences:
+        output_sequences.extend(
+            split_sequence_by(sequence, _should_split_by_cutoff_distance)
+        )
+    assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
+    LOG.info(
+        "Found %s sequences after split by cutoff_distance %d meters",
+        len(output_sequences),
+        cutoff_distance,
+    )
+    return output_sequences
+def _check_sequences_duplication(
+    input_sequences: T.Sequence[PointSequence],
+    duplicate_distance: float,
+    duplicate_angle: float,
+) -> tuple[list[PointSequence], list[types.ErrorMetadata]]:
+    output_sequences: list[PointSequence] = []
+    output_errors: list[types.ErrorMetadata] = []
+    for sequence in input_sequences:
+        output_sequence, errors = duplication_check(
+            sequence,
+            max_duplicate_distance=duplicate_distance,
+            max_duplicate_angle=duplicate_angle,
+        )
+        assert len(sequence) == len(output_sequence) + len(errors)
+        output_sequences.append(output_sequence)
+        output_errors.extend(errors)
+    assert sum(len(s) for s in output_sequences) + len(output_errors) == sum(
+        len(s) for s in input_sequences
+    )
+    LOG.info(
+        "Found %s sequences and %s errors after duplication check",
+        len(output_sequences),
+        len(output_errors),
+    )
+    return output_sequences, output_errors
+def _split_sequences_by_limits(
+    input_sequences: T.Sequence[PointSequence],
+    max_sequence_filesize_in_bytes: float,
+    max_sequence_pixels: float,
+) -> list[PointSequence]:
+    max_sequence_images = constants.MAX_SEQUENCE_LENGTH
+    max_sequence_filesize = max_sequence_filesize_in_bytes
+    def _should_split(image: types.ImageMetadata, sequence_state: dict) -> bool:
+        last_sequence_images = sequence_state.get("last_sequence_images", 0)
+        last_sequence_file_size = sequence_state.get("last_sequence_file_size", 0)
+        last_sequence_pixels = sequence_state.get("last_sequence_pixels", 0)
+        # decent default values if width/height not available
+        width = 1024 if image.width is None else image.width
+        height = 1024 if image.height is None else image.height
+        pixels = width * height
+        if image.filesize is None:
+            filesize = os.path.getsize(image.filename)
+        else:
+            filesize = image.filesize
+        new_sequence_images = last_sequence_images + 1
+        new_sequence_file_size = last_sequence_file_size + filesize
+        new_sequence_pixels = last_sequence_pixels + pixels
+        if max_sequence_images < new_sequence_images:
+            LOG.debug(
+                "Split because the current sequence (%s) reaches the max number of images (%s)",
+                new_sequence_images,
+                max_sequence_images,
+            )
+            start_new_sequence = True
+        elif max_sequence_filesize < new_sequence_file_size:
+            LOG.debug(
+                "Split because the current sequence (%s) reaches the max filesize (%s)",
+                new_sequence_file_size,
+                max_sequence_filesize,
+            )
+            start_new_sequence = True
+        elif max_sequence_pixels < new_sequence_pixels:
+            LOG.debug(
+                "Split because the current sequence (%s) reaches the max pixels (%s)",
+                new_sequence_pixels,
+                max_sequence_pixels,
+            )
+            start_new_sequence = True
+        else:
+            start_new_sequence = False
+        if not start_new_sequence:
+            sequence_state["last_sequence_images"] = new_sequence_images
+            sequence_state["last_sequence_file_size"] = new_sequence_file_size
+            sequence_state["last_sequence_pixels"] = new_sequence_pixels
+        else:
+            sequence_state["last_sequence_images"] = 1
+            sequence_state["last_sequence_file_size"] = filesize
+            sequence_state["last_sequence_pixels"] = pixels
+        return start_new_sequence
+    output_sequences = []
+    for sequence in input_sequences:
+        output_sequences.extend(
+            split_sequence_by_agg(
+                sequence, should_split_with_sequence_state=_should_split
+            )
         )
-    error_metadatas: T.List[types.ErrorMetadata] = []
-    image_metadatas: T.List[types.ImageMetadata] = []
-    video_metadatas: T.List[types.VideoMetadata] = []
+    assert sum(len(s) for s in output_sequences) == sum(len(s) for s in input_sequences)
+    LOG.info("Found %s sequences after split by sequence limits", len(output_sequences))
+    return output_sequences
+def process_sequence_properties(
+    metadatas: T.Sequence[types.MetadataOrError],
+    cutoff_distance: float = constants.CUTOFF_DISTANCE,
+    cutoff_time: float = constants.CUTOFF_TIME,
+    interpolate_directions: bool = False,
+    duplicate_distance: float = constants.DUPLICATE_DISTANCE,
+    duplicate_angle: float = constants.DUPLICATE_ANGLE,
+    max_avg_speed: float = constants.MAX_AVG_SPEED,
+) -> list[types.MetadataOrError]:
+    max_sequence_filesize_in_bytes = _parse_filesize_in_bytes(
+        constants.MAX_SEQUENCE_FILESIZE
+    )
+    max_sequence_pixels = _parse_pixels(constants.MAX_SEQUENCE_PIXELS)
+    error_metadatas: list[types.ErrorMetadata] = []
+    image_metadatas: list[types.ImageMetadata] = []
+    video_metadatas: list[types.VideoMetadata] = []
     for metadata in metadatas:
         if isinstance(metadata, types.ErrorMetadata):
@@ -280,66 +608,92 @@ def process_sequence_properties(
         else:
             raise RuntimeError(f"invalid metadata type: {metadata}")
-    sequences_by_folder = _group_sort_images_by_folder(image_metadatas)
-    # make sure they are sorted
-    for sequence in sequences_by_folder:
-        for cur, nxt in geo.pairwise(sequence):
-            assert cur.time <= nxt.time, "sequence must be sorted"
+    if video_metadatas:
+        # Check limits for videos
+        video_metadatas, video_error_metadatas = _check_video_limits(
+            video_metadatas,
+            max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
+            max_avg_speed=max_avg_speed,
+            max_radius_for_stationary_check=10.0,
+        )
+        error_metadatas.extend(video_error_metadatas)
-    for s in sequences_by_folder:
-        _interpolate_subsecs_for_sorting(s)
+    if image_metadatas:
+        sequences: list[PointSequence]
-    # cut sequences
-    sequences_after_cut: T.List[PointSequence] = []
-    for sequence in sequences_by_folder:
-        cut = cut_sequence_by_time_distance(sequence, cutoff_distance, cutoff_time)
-        sequences_after_cut.extend(cut)
-    assert len(image_metadatas) == sum(len(s) for s in sequences_after_cut)
+        # Group by folder and camera
+        sequences = _group_by_folder_and_camera(image_metadatas)
-    # reuse imaeg_metadatas to store processed image metadatas
-    image_metadatas = []
+        # Make sure each sequence is sorted (in-place update)
+        for sequence in sequences:
+            sequence.sort(
+                key=lambda metadata: metadata.sort_key(),
+            )
-    sequence_idx = 0
+        # Interpolate subseconds for same timestamps (in-place update)
+        for sequence in sequences:
+            _interpolate_subsecs_for_sorting(sequence)
-    for sequence in sequences_after_cut:
-        # duplication check
-        dedups, dups = duplication_check(
-            sequence,
+        # Split sequences by cutoff time
+        # NOTE: Do not split by distance here because it affects the speed limit check
+        sequences = _split_sequences_by_cutoff_time(sequences, cutoff_time=cutoff_time)
+        # Duplication check
+        sequences, errors = _check_sequences_duplication(
+            sequences,
             duplicate_distance=duplicate_distance,
             duplicate_angle=duplicate_angle,
         )
-        assert len(sequence) == len(dedups) + len(dups)
-        error_metadatas.extend(dups)
-        # interpolate angles
-        if interpolate_directions:
-            for p in dedups:
-                p.angle = None
-        geo.interpolate_directions_if_none(dedups)
-        # cut sequence per MAX_SEQUENCE_LENGTH images
-        cut = cut_sequence(
-            dedups,
-            constants.MAX_SEQUENCE_LENGTH,
-            max_sequence_filesize_in_bytes,
-            max_sequence_pixels,
+        error_metadatas.extend(errors)
+        # Interpolate angles (in-place update)
+        for sequence in sequences:
+            if interpolate_directions:
+                for image in sequence:
+                    image.angle = None
+            geo.interpolate_directions_if_none(sequence)
+        # Split sequences by max number of images, max filesize, and max pixels
+        sequences = _split_sequences_by_limits(
+            sequences,
+            max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
+            max_sequence_pixels=max_sequence_pixels,
+        )
+        # Check limits for sequences
+        sequences, errors = _check_sequences_by_limits(
+            sequences,
+            max_sequence_filesize_in_bytes=max_sequence_filesize_in_bytes,
+            max_avg_speed=max_avg_speed,
+        )
+        error_metadatas.extend(errors)
+        # Split sequences by cutoff distance
+        # NOTE: The speed limit check probably rejects most of anomalies
+        sequences = _split_sequences_by_cutoff_distance(
+            sequences, cutoff_distance=cutoff_distance
         )
-        # assign sequence UUIDs
-        for c in cut:
-            for p in c:
+        # Assign sequence UUIDs (in-place update)
+        sequence_idx = 0
+        for sequence in sequences:
+            for image in sequence:
                 # using incremental id as shorter "uuid", so we can save some space for the desc file
-                p.MAPSequenceUUID = str(sequence_idx)
-                image_metadatas.append(p)
+                image.MAPSequenceUUID = str(sequence_idx)
             sequence_idx += 1
+        image_metadatas = []
+        for sequence in sequences:
+            image_metadatas.extend(sequence)
+        assert sequence_idx == len(
+            set(metadata.MAPSequenceUUID for metadata in image_metadatas)
+        )
     results = error_metadatas + image_metadatas + video_metadatas
     assert len(metadatas) == len(results), (
         f"expected {len(metadatas)} results but got {len(results)}"
     )
-    assert sequence_idx == len(
-        set(metadata.MAPSequenceUUID for metadata in image_metadatas)
-    )
     return results

mapillary-tools 0.13.3a1__py3-none-any.whl → 0.14.0a2__py3-none-any.whl

mapillary-tools 0.13.3a1py3-none-any.whl → 0.14.0a2py3-none-any.whl