PyPI - mapillary-tools - Versions diffs - 0.13.3a1__py3-none-any.whl → 0.14.0__py3-none-any.whl - Mend

mapillary-tools 0.13.3a1py3-none-any.whl → 0.14.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

mapillary_tools/__init__.py +1 -1
mapillary_tools/api_v4.py +287 -22
mapillary_tools/authenticate.py +326 -64
mapillary_tools/blackvue_parser.py +195 -0
mapillary_tools/camm/camm_builder.py +55 -97
mapillary_tools/camm/camm_parser.py +429 -181
mapillary_tools/commands/__main__.py +17 -8
mapillary_tools/commands/authenticate.py +8 -1
mapillary_tools/commands/process.py +27 -51
mapillary_tools/commands/process_and_upload.py +19 -5
mapillary_tools/commands/sample_video.py +2 -3
mapillary_tools/commands/upload.py +44 -13
mapillary_tools/commands/video_process_and_upload.py +19 -5
mapillary_tools/config.py +65 -26
mapillary_tools/constants.py +141 -18
mapillary_tools/exceptions.py +37 -34
mapillary_tools/exif_read.py +221 -116
mapillary_tools/exif_write.py +10 -8
mapillary_tools/exiftool_read.py +33 -42
mapillary_tools/exiftool_read_video.py +97 -47
mapillary_tools/exiftool_runner.py +57 -0
mapillary_tools/ffmpeg.py +417 -242
mapillary_tools/geo.py +158 -118
mapillary_tools/geotag/__init__.py +0 -1
mapillary_tools/geotag/base.py +147 -0
mapillary_tools/geotag/factory.py +307 -0
mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
mapillary_tools/geotag/geotag_images_from_exiftool.py +136 -85
mapillary_tools/geotag/geotag_images_from_gpx.py +60 -124
mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
mapillary_tools/geotag/geotag_images_from_video.py +88 -51
mapillary_tools/geotag/geotag_videos_from_exiftool.py +123 -0
mapillary_tools/geotag/geotag_videos_from_gpx.py +52 -0
mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
mapillary_tools/geotag/image_extractors/base.py +18 -0
mapillary_tools/geotag/image_extractors/exif.py +60 -0
mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
mapillary_tools/geotag/options.py +182 -0
mapillary_tools/geotag/utils.py +52 -16
mapillary_tools/geotag/video_extractors/base.py +18 -0
mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
mapillary_tools/geotag/video_extractors/gpx.py +116 -0
mapillary_tools/geotag/video_extractors/native.py +160 -0
mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
mapillary_tools/history.py +134 -20
mapillary_tools/mp4/construct_mp4_parser.py +17 -10
mapillary_tools/mp4/io_utils.py +0 -1
mapillary_tools/mp4/mp4_sample_parser.py +36 -28
mapillary_tools/mp4/simple_mp4_builder.py +10 -9
mapillary_tools/mp4/simple_mp4_parser.py +13 -22
mapillary_tools/process_geotag_properties.py +184 -414
mapillary_tools/process_sequence_properties.py +594 -225
mapillary_tools/sample_video.py +20 -26
mapillary_tools/serializer/description.py +587 -0
mapillary_tools/serializer/gpx.py +132 -0
mapillary_tools/telemetry.py +26 -13
mapillary_tools/types.py +98 -611
mapillary_tools/upload.py +408 -416
mapillary_tools/upload_api_v4.py +172 -174
mapillary_tools/uploader.py +804 -284
mapillary_tools/utils.py +49 -18
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/METADATA +93 -35
mapillary_tools-0.14.0.dist-info/RECORD +75 -0
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/WHEEL +1 -1
mapillary_tools/geotag/blackvue_parser.py +0 -118
mapillary_tools/geotag/geotag_from_generic.py +0 -22
mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
mapillary_tools/video_data_extraction/cli_options.py +0 -22
mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
mapillary_tools/video_data_extraction/extractors/gpx_parser.py +0 -108
mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
mapillary_tools-0.13.3a1.dist-info/RECORD +0 -75
/mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/entry_points.txt +0 -0
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info/licenses}/LICENSE +0 -0
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/top_level.txt +0 -0

mapillary_tools/ffmpeg.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # pyre-ignore-all-errors[5, 24]
+from __future__ import annotations
 import datetime
 import json
@@ -12,8 +13,7 @@ import typing as T
 from pathlib import Path
 LOG = logging.getLogger(__name__)
-FRAME_EXT = ".jpg"
-NA_STREAM_IDX = "NA"
+_MAX_STDERR_LENGTH = 2048
 class StreamTag(T.TypedDict):
@@ -30,19 +30,19 @@ class Stream(T.TypedDict):
     index: int
     tags: StreamTag
     width: int
+    r_frame_rate: str
+    avg_frame_rate: str
+    nb_frames: str
 class ProbeOutput(T.TypedDict):
-    streams: T.List[Stream]
+    streams: list[Stream]
 class FFmpegNotFoundError(Exception):
     pass
-_MAX_STDERR_LENGTH = 2048
 def _truncate_begin(s: str) -> str:
     if _MAX_STDERR_LENGTH < len(s):
         return "..." + s[-_MAX_STDERR_LENGTH:]
@@ -73,79 +73,43 @@ class FFmpegCalledProcessError(Exception):
 class FFMPEG:
+    FRAME_EXT = ".jpg"
     def __init__(
         self,
         ffmpeg_path: str = "ffmpeg",
         ffprobe_path: str = "ffprobe",
-        stderr: T.Optional[int] = None,
+        stderr: int | None = None,
     ) -> None:
         """
-        ffmpeg_path: path to ffmpeg binary
-        ffprobe_path: path to ffprobe binary
-        stderr: param passed to subprocess.run to control whether to capture stderr
+        Initialize FFMPEG wrapper with paths to ffmpeg and ffprobe binaries.
+        Args:
+            ffmpeg_path: Path to ffmpeg binary executable
+            ffprobe_path: Path to ffprobe binary executable
+            stderr: Parameter passed to subprocess.run to control stderr capture.
+                   Use subprocess.PIPE to capture stderr, None to inherit from parent
         """
         self.ffmpeg_path = ffmpeg_path
         self.ffprobe_path = ffprobe_path
         self.stderr = stderr
-    def _run_ffprobe_json(self, cmd: T.List[str]) -> T.Dict:
-        full_cmd: T.List[str] = [self.ffprobe_path, "-print_format", "json", *cmd]
-        LOG.info(f"Extracting video information: {' '.join(full_cmd)}")
-        try:
-            completed = subprocess.run(
-                full_cmd,
-                check=True,
-                stdout=subprocess.PIPE,
-                stderr=self.stderr,
-            )
-        except FileNotFoundError:
-            raise FFmpegNotFoundError(
-                f'The ffprobe command "{self.ffprobe_path}" not found'
-            )
-        except subprocess.CalledProcessError as ex:
-            raise FFmpegCalledProcessError(ex) from ex
-        try:
-            stdout = completed.stdout.decode("utf-8")
-        except UnicodeDecodeError:
-            raise RuntimeError(
-                f"Error decoding ffprobe output as unicode: {_truncate_end(str(completed.stdout))}"
-            )
-        try:
-            output = json.loads(stdout)
-        except json.JSONDecodeError:
-            raise RuntimeError(
-                f"Error JSON decoding ffprobe output: {_truncate_end(stdout)}"
-            )
-        # This check is for macOS:
-        # ffprobe -hide_banner -print_format json not_exists
-        # you will get exit code == 0 with the following stdout and stderr:
-        # {
-        # }
-        # not_exists: No such file or directory
-        if not output:
-            raise RuntimeError(
-                f"Empty JSON ffprobe output with STDERR: {_truncate_begin(str(completed.stderr))}"
-            )
+    def probe_format_and_streams(self, video_path: Path) -> ProbeOutput:
+        """
+        Probe video file to extract format and stream information using ffprobe.
-        return output
+        Args:
+            video_path: Path to the video file to probe
-    def _run_ffmpeg(self, cmd: T.List[str]) -> None:
-        full_cmd: T.List[str] = [self.ffmpeg_path, *cmd]
-        LOG.info(f"Extracting frames: {' '.join(full_cmd)}")
-        try:
-            subprocess.run(full_cmd, check=True, stderr=self.stderr)
-        except FileNotFoundError:
-            raise FFmpegNotFoundError(
-                f'The ffmpeg command "{self.ffmpeg_path}" not found'
-            )
-        except subprocess.CalledProcessError as ex:
-            raise FFmpegCalledProcessError(ex) from ex
+        Returns:
+            Dictionary containing streams and format information from ffprobe
-    def probe_format_and_streams(self, video_path: Path) -> ProbeOutput:
-        cmd: T.List[str] = [
+        Raises:
+            FFmpegNotFoundError: If ffprobe binary is not found
+            FFmpegCalledProcessError: If ffprobe command fails
+            RuntimeError: If output cannot be decoded or parsed as JSON
+        """
+        cmd: list[str] = [
             "-hide_banner",
             "-show_format",
             "-show_streams",
@@ -153,49 +117,79 @@ class FFMPEG:
         ]
         return T.cast(ProbeOutput, self._run_ffprobe_json(cmd))
-    def extract_frames(
+    def extract_frames_by_interval(
         self,
         video_path: Path,
         sample_dir: Path,
         sample_interval: float,
-        stream_idx: T.Optional[int] = None,
+        stream_specifier: int | str = "v",
     ) -> None:
         """
-        Extract frames by the sample interval from the specified video stream.
-        stream_idx: the stream_index specifier to a **video stream**. If it's None, defaults to "v". See http://ffmpeg.org/ffmpeg.html#Stream-specifiers-1
+        Extract frames from video at regular time intervals using fps filter.
+        Args:
+            video_path: Path to input video file
+            sample_dir: Directory where extracted frame images will be saved
+            sample_interval: Time interval between extracted frames in seconds
+            stream_specifier: Stream specifier to target specific stream(s).
+                              Can be an integer (stream index) or "v" (all video streams)
+                              See https://ffmpeg.org/ffmpeg.html#Stream-specifiers-1
+        Raises:
+            FFmpegNotFoundError: If ffmpeg binary is not found
+            FFmpegCalledProcessError: If ffmpeg command fails
         """
+        self._validate_stream_specifier(stream_specifier)
         sample_prefix = sample_dir.joinpath(video_path.stem)
-        if stream_idx is not None:
-            stream_selector = ["-map", f"0:{stream_idx}"]
-            ouput_template = f"{sample_prefix}_{stream_idx}_%06d{FRAME_EXT}"
-            stream_specifier = f"{stream_idx}"
-        else:
-            stream_selector = []
-            ouput_template = f"{sample_prefix}_{NA_STREAM_IDX}_%06d{FRAME_EXT}"
-            stream_specifier = "v"
-        cmd: T.List[str] = [
-            # global options should be specified first
-            *["-hide_banner", "-nostdin"],
-            # input 0
+        stream_selector = ["-map", f"0:{stream_specifier}"]
+        output_template = f"{sample_prefix}_{stream_specifier}_%06d{self.FRAME_EXT}"
+        cmd: list[str] = [
+            # Global options should be specified first
+            *["-hide_banner"],
+            # Input 0
             *["-i", str(video_path)],
-            # select stream
+            # Select stream
             *stream_selector,
-            # filter videos
+            # Filter videos
             *["-vf", f"fps=1/{sample_interval}"],
-            # video quality level (or the alias -q:v)
-            *[f"-qscale:{stream_specifier}", "2"],
+            # Video quality level (or the alias -q:v)
             # -q:v=1 is the best quality but larger image sizes
             # see https://stackoverflow.com/a/10234065
             # *["-qscale:v", "1", "-qmin", "1"],
-            # output
-            ouput_template,
+            *["-qscale:v", "2"],
+            # Output
+            output_template,
         ]
-        self._run_ffmpeg(cmd)
+        self.run_ffmpeg_non_interactive(cmd)
+    @classmethod
+    def generate_binary_search(cls, sorted_frame_indices: list[int]) -> str:
+        """
+        Generate a binary search expression for ffmpeg select filter.
+        Creates an optimized filter expression that uses binary search logic
+        to efficiently select specific frame numbers from a video stream.
+        Args:
+            sorted_frame_indices: List of frame numbers to select, must be sorted in ascending order
+        Returns:
+            FFmpeg filter expression string using binary search logic
+        Examples:
+            >>> FFMPEG.generate_binary_search([])
+            '0'
+            >>> FFMPEG.generate_binary_search([1])
+            'eq(n\\\\,1)'
+            >>> FFMPEG.generate_binary_search([1, 2])
+            'if(lt(n\\\\,2)\\\\,eq(n\\\\,1)\\\\,eq(n\\\\,2))'
+            >>> FFMPEG.generate_binary_search([1, 2, 3])
+            'if(lt(n\\\\,2)\\\\,eq(n\\\\,1)\\\\,if(lt(n\\\\,3)\\\\,eq(n\\\\,2)\\\\,eq(n\\\\,3)))'
+        """
-    def generate_binary_search(self, sorted_frame_indices: T.Sequence[int]) -> str:
         length = len(sorted_frame_indices)
         if length == 0:
@@ -204,39 +198,50 @@ class FFMPEG:
         if length == 1:
             return f"eq(n\\,{sorted_frame_indices[0]})"
-        middle = length // 2
-        return f"if(lt(n\\,{sorted_frame_indices[middle]})\\,{self.generate_binary_search(sorted_frame_indices[:middle])}\\,{self.generate_binary_search(sorted_frame_indices[middle:])})"
+        middle_idx = length // 2
+        left = cls.generate_binary_search(sorted_frame_indices[:middle_idx])
+        right = cls.generate_binary_search(sorted_frame_indices[middle_idx:])
+        return f"if(lt(n\\,{sorted_frame_indices[middle_idx]})\\,{left}\\,{right})"
     def extract_specified_frames(
         self,
         video_path: Path,
         sample_dir: Path,
-        frame_indices: T.Set[int],
-        stream_idx: T.Optional[int] = None,
+        frame_indices: set[int],
+        stream_specifier: int | str = "v",
     ) -> None:
         """
-        Extract specified frames from the specified video stream.
-        stream_idx: the stream_index specifier to a **video stream**. If it's None, defaults to "v". See http://ffmpeg.org/ffmpeg.html#Stream-specifiers-1
+        Extract specific frames from video by frame number using select filter.
+        Uses a binary search filter expression to efficiently select only the
+        specified frame numbers from the video stream.
+        Args:
+            video_path: Path to input video file
+            sample_dir: Directory where extracted frame images will be saved
+            frame_indices: Set of specific frame numbers to extract (0-based)
+            stream_specifier: Stream specifier to target specific stream(s).
+                              Can be an integer (stream index) or "v" (all video streams)
+                              See https://ffmpeg.org/ffmpeg.html#Stream-specifiers-1
+        Raises:
+            FFmpegNotFoundError: If ffmpeg binary is not found
+            FFmpegCalledProcessError: If ffmpeg command fails
+        Note:
+            Frame indices are 0-based but ffmpeg output files are numbered starting from 1.
+            Creates temporary filter script file on Windows to avoid command line length limits.
         """
+        self._validate_stream_specifier(stream_specifier)
         if not frame_indices:
             return
         sample_prefix = sample_dir.joinpath(video_path.stem)
-        if stream_idx is not None:
-            stream_selector = ["-map", f"0:{stream_idx}"]
-            ouput_template = f"{sample_prefix}_{stream_idx}_%06d{FRAME_EXT}"
-            stream_specifier = f"{stream_idx}"
-        else:
-            stream_selector = []
-            ouput_template = f"{sample_prefix}_{NA_STREAM_IDX}_%06d{FRAME_EXT}"
-            stream_specifier = "v"
-        # Write the select filter to a temp file because:
-        # The select filter could be large and
-        # the maximum command line length for the CreateProcess function is 32767 characters
-        # https://devblogs.microsoft.com/oldnewthing/20031210-00/?p=41553
+        stream_selector = ["-map", f"0:{stream_specifier}"]
+        output_template = f"{sample_prefix}_{stream_specifier}_%06d{self.FRAME_EXT}"
         eqs = self.generate_binary_search(sorted(frame_indices))
@@ -246,6 +251,10 @@ class FFMPEG:
         else:
             delete = True
+        # Write the select filter to a temp file because:
+        # The select filter could be large and
+        # the maximum command line length for the CreateProcess function is 32767 characters
+        # https://devblogs.microsoft.com/oldnewthing/20031210-00/?p=41553
         with tempfile.NamedTemporaryFile(mode="w+", delete=delete) as select_file:
             try:
                 select_file.write(f"select={eqs}")
@@ -253,14 +262,14 @@ class FFMPEG:
                 # If not close, error "The process cannot access the file because it is being used by another process"
                 if not delete:
                     select_file.close()
-                cmd: T.List[str] = [
-                    # global options should be specified first
-                    *["-hide_banner", "-nostdin"],
-                    # input 0
+                cmd: list[str] = [
+                    # Global options should be specified first
+                    *["-hide_banner"],
+                    # Input 0
                     *["-i", str(video_path)],
-                    # select stream
+                    # Select stream
                     *stream_selector,
-                    # filter videos
+                    # Filter videos
                     *[
                         *["-filter_script:v", select_file.name],
                         # Each frame is passed with its timestamp from the demuxer to the muxer
@@ -268,8 +277,8 @@ class FFMPEG:
                         # vsync is deprecated by fps_mode,
                         # but fps_mode is not avaliable on some older versions ;(
                         # *[f"-fps_mode:{stream_specifier}", "passthrough"],
-                        # Set the number of video frames to output
-                        *[f"-frames:{stream_specifier}", str(len(frame_indices))],
+                        # Set the number of video frames to output (this is an optimization to let ffmpeg stop early)
+                        *["-frames:v", str(len(frame_indices))],
                         # Disabled because it doesn't always name the sample images as expected
                         # For example "select(n\,1)" we expected the first sample to be IMG_001.JPG
                         # but it could be IMG_005.JPG
@@ -277,15 +286,15 @@ class FFMPEG:
                         # If set to 1, expand the filename with pts from pkt->pts. Default value is 0.
                         # *["-frame_pts", "1"],
                     ],
-                    # video quality level (or the alias -q:v)
-                    *[f"-qscale:{stream_specifier}", "2"],
+                    # Video quality level (or the alias -q:v)
                     # -q:v=1 is the best quality but larger image sizes
                     # see https://stackoverflow.com/a/10234065
                     # *["-qscale:v", "1", "-qmin", "1"],
+                    *["-qscale:v", "2"],
                     # output
-                    ouput_template,
+                    output_template,
                 ]
-                self._run_ffmpeg(cmd)
+                self.run_ffmpeg_non_interactive(cmd)
             finally:
                 if not delete:
                     try:
@@ -293,45 +302,286 @@ class FFMPEG:
                     except FileNotFoundError:
                         pass
+    @classmethod
+    def sort_selected_samples(
+        cls,
+        sample_dir: Path,
+        video_path: Path,
+        selected_stream_specifiers: list[int | str] | None = None,
+    ) -> list[tuple[int, list[Path | None]]]:
+        """
+        Group extracted frame samples by frame index across multiple streams.
+        Groups frames so that the Nth group contains all frames from the selected
+        streams at frame index N, allowing synchronized access to multi-stream frames.
+        Args:
+            sample_dir: Directory containing extracted frame files
+            video_path: Original video file path (used to match frame filenames)
+            selected_stream_specifiers: List of stream specifiers to include in output.
+                                       Can contain integers (stream indices) or "v" (all video streams).
+                                       If None, defaults to ["v"]
+        Returns:
+            List of tuples where each tuple contains:
+            - frame_idx (int): The frame index
+            - sample_paths (list[Path | None]): Paths to frame files from each selected stream,
+              or None if no frame exists for that stream at this index
+        Note:
+            Output is sorted by frame index in ascending order.
+        """
+        if selected_stream_specifiers is None:
+            selected_stream_specifiers = ["v"]
+        for stream_specifier in selected_stream_specifiers:
+            cls._validate_stream_specifier(stream_specifier)
+        stream_samples: dict[int, list[tuple[str, Path]]] = {}
+        for stream_specifier, frame_idx, sample_path in cls.iterate_samples(
+            sample_dir, video_path
+        ):
+            stream_samples.setdefault(frame_idx, []).append(
+                (str(stream_specifier), sample_path)
+            )
+        selected: list[tuple[int, list[Path | None]]] = []
+        for frame_idx in sorted(stream_samples.keys()):
+            indexed_by_specifier = {
+                specifier: sample_path
+                for specifier, sample_path in stream_samples[frame_idx]
+            }
+            selected_sample_paths = [
+                indexed_by_specifier.get(str(specifier))
+                for specifier in selected_stream_specifiers
+            ]
+            selected.append((frame_idx, selected_sample_paths))
+        return selected
+    @classmethod
+    def iterate_samples(
+        cls, sample_dir: Path, video_path: Path
+    ) -> T.Generator[tuple[str, int, Path], None, None]:
+        """
+        Iterate over all extracted frame samples in a directory.
+        Searches for frame files matching the expected naming pattern and yields
+        information about each frame including stream specifier, frame index, and file path.
+        Args:
+            sample_dir: Directory containing extracted frame files
+            video_path: Original video file path (used to match frame filenames)
+        Yields:
+            Tuple containing:
+            - stream_specifier (str): Stream specifier (number or "v")
+            - frame_idx (int): Frame index (0-based or 1-based depending on extraction method)
+            - sample_path (Path): Path to the frame image file
+        Note:
+            Expected filename pattern: {video_stem}_{stream_specifier}_{frame_idx:06d}.jpg
+            where stream_specifier can be a number or "v" for video streams.
+        """
+        sample_basename_pattern = re.compile(
+            rf"""
+            ^{re.escape(video_path.stem)}  # Match the video stem
+            _(?P<stream_specifier>\d+|v)   # Stream specifier can be a number or "v"
+            _(?P<frame_idx>\d+)$           # Frame index, can be 0-padded
+            """,
+            re.X,
+        )
+        for sample_path in sample_dir.iterdir():
+            result = cls._extract_stream_frame_idx(
+                sample_path.name, sample_basename_pattern
+            )
+            if result is not None:
+                stream_specifier, frame_idx = result
+                yield (stream_specifier, frame_idx, sample_path)
+    def run_ffmpeg_non_interactive(self, cmd: list[str]) -> None:
+        """
+        Execute ffmpeg command in non-interactive mode.
+        Runs ffmpeg with the given command arguments, automatically adding
+        the -nostdin flag to prevent interactive prompts.
+        Args:
+            cmd: List of command line arguments to pass to ffmpeg
+        Raises:
+            FFmpegNotFoundError: If ffmpeg binary is not found
+            FFmpegCalledProcessError: If ffmpeg command fails
+        """
+        full_cmd: list[str] = [self.ffmpeg_path, "-nostdin", *cmd]
+        LOG.info(f"Running ffmpeg: {' '.join(full_cmd)}")
+        try:
+            subprocess.run(full_cmd, check=True, stderr=self.stderr)
+        except FileNotFoundError:
+            raise FFmpegNotFoundError(
+                f'The ffmpeg command "{self.ffmpeg_path}" not found'
+            )
+        except subprocess.CalledProcessError as ex:
+            raise FFmpegCalledProcessError(ex) from ex
+    @classmethod
+    def _extract_stream_frame_idx(
+        cls, sample_basename: str, pattern: T.Pattern[str]
+    ) -> tuple[str, int] | None:
+        """
+        Extract stream specifier and frame index from sample basename
+        Returns:
+            If returning None, it means the basename does not match the pattern
+        Examples:
+            * basename GX010001_v_000000.jpg will extract ("v", 0)
+            * basename GX010001_1_000002.jpg will extract ("1", 2)
+        """
+        image_no_ext, ext = os.path.splitext(sample_basename)
+        if ext.lower() != cls.FRAME_EXT.lower():
+            return None
+        match = pattern.match(image_no_ext)
+        if not match:
+            return None
+        stream_specifier = match.group("stream_specifier")
+        # Convert 0-padded numbers to int
+        # e.g. 000000 -> 0
+        # e.g. 000001 -> 1
+        frame_idx_str = match.group("frame_idx")
+        frame_idx_str = frame_idx_str.lstrip("0") or "0"
+        try:
+            frame_idx = int(frame_idx_str)
+        except ValueError:
+            return None
+        return stream_specifier, frame_idx
+    def _run_ffprobe_json(self, cmd: list[str]) -> dict:
+        full_cmd: list[str] = [self.ffprobe_path, "-print_format", "json", *cmd]
+        LOG.info(f"Extracting video information: {' '.join(full_cmd)}")
+        try:
+            completed = subprocess.run(
+                full_cmd, check=True, stdout=subprocess.PIPE, stderr=self.stderr
+            )
+        except FileNotFoundError:
+            raise FFmpegNotFoundError(
+                f'The ffprobe command "{self.ffprobe_path}" not found'
+            )
+        except subprocess.CalledProcessError as ex:
+            raise FFmpegCalledProcessError(ex) from ex
+        try:
+            stdout = completed.stdout.decode("utf-8")
+        except UnicodeDecodeError:
+            raise RuntimeError(
+                f"Error decoding ffprobe output as unicode: {_truncate_end(str(completed.stdout))}"
+            )
+        try:
+            output = json.loads(stdout)
+        except json.JSONDecodeError:
+            raise RuntimeError(
+                f"Error JSON decoding ffprobe output: {_truncate_end(stdout)}"
+            )
+        # This check is for macOS:
+        # ffprobe -hide_banner -print_format json not_exists
+        # you will get exit code == 0 with the following stdout and stderr:
+        # {
+        # }
+        # not_exists: No such file or directory
+        if not output:
+            raise RuntimeError(
+                f"Empty JSON ffprobe output with STDERR: {_truncate_begin(str(completed.stderr))}"
+            )
+        return output
+    @classmethod
+    def _validate_stream_specifier(cls, stream_specifier: int | str) -> None:
+        if isinstance(stream_specifier, str):
+            if stream_specifier in ["v"]:
+                pass
+            else:
+                try:
+                    int(stream_specifier)
+                except ValueError:
+                    raise ValueError(f"Invalid stream specifier: {stream_specifier}")
 class Probe:
-    probe: ProbeOutput
+    probe_output: ProbeOutput
-    def __init__(self, probe: ProbeOutput) -> None:
-        self.probe = probe
+    def __init__(self, probe_output: ProbeOutput) -> None:
+        """
+        Initialize Probe with ffprobe output data.
-    def probe_video_start_time(self) -> T.Optional[datetime.datetime]:
+        Args:
+            probe_output: Dictionary containing streams and format information from ffprobe
         """
-        Find video start time of the given video.
-        It searches video creation time and duration in video streams first and then the other streams.
-        Once found, return stream creation time - stream duration as the video start time.
+        self.probe_output = probe_output
+    def probe_video_start_time(self) -> datetime.datetime | None:
         """
-        streams = self.probe.get("streams", [])
+        Determine the start time of the video by analyzing stream metadata.
+        Searches for creation time and duration information in video streams first,
+        then falls back to other stream types. Calculates start time as:
+        creation_time - duration
-        # search start time from video streams
+        Returns:
+            Video start time as datetime object, or None if cannot be determined
+        Note:
+            Prioritizes video streams with highest resolution when multiple exist.
+        """
+        streams = self.probe_output.get("streams", [])
+        # Search start time from video streams
         video_streams = self.probe_video_streams()
         video_streams.sort(
             key=lambda s: s.get("width", 0) * s.get("height", 0), reverse=True
         )
         for stream in video_streams:
-            start_time = extract_stream_start_time(stream)
+            start_time = self.extract_stream_start_time(stream)
             if start_time is not None:
                 return start_time
-        # search start time from the other streams
+        # Search start time from the other streams
         for stream in streams:
             if stream.get("codec_type") != "video":
-                start_time = extract_stream_start_time(stream)
+                start_time = self.extract_stream_start_time(stream)
                 if start_time is not None:
                     return start_time
         return None
-    def probe_video_streams(self) -> T.List[Stream]:
-        streams = self.probe.get("streams", [])
+    def probe_video_streams(self) -> list[Stream]:
+        """
+        Extract all video streams from the probe output.
+        Returns:
+            List of video stream dictionaries containing metadata like codec,
+            dimensions, frame rate, etc.
+        """
+        streams = self.probe_output.get("streams", [])
         return [stream for stream in streams if stream.get("codec_type") == "video"]
-    def probe_video_with_max_resolution(self) -> T.Optional[Stream]:
+    def probe_video_with_max_resolution(self) -> Stream | None:
+        """
+        Find the video stream with the highest resolution.
+        Sorts all video streams by width × height and returns the one with
+        the largest resolution.
+        Returns:
+            Stream dictionary for the highest resolution video stream,
+            or None if no video streams exist
+        """
         video_streams = self.probe_video_streams()
         video_streams.sort(
             key=lambda s: s.get("width", 0) * s.get("height", 0), reverse=True
@@ -340,112 +590,37 @@ class Probe:
             return None
         return video_streams[0]
+    @classmethod
+    def extract_stream_start_time(cls, stream: Stream) -> datetime.datetime | None:
+        """
+        Calculate the start time of a specific stream.
-def extract_stream_start_time(stream: Stream) -> T.Optional[datetime.datetime]:
-    """
-    Find the start time of the given stream.
-    Start time is the creation time of the stream minus the duration of the stream.
-    """
-    duration_str = stream.get("duration")
-    LOG.debug("Extracted video duration: %s", duration_str)
-    if duration_str is None:
-        return None
-    duration = float(duration_str)
+        Determines start time by subtracting stream duration from creation time:
+        start_time = creation_time - duration
-    creation_time_str = stream.get("tags", {}).get("creation_time")
-    LOG.debug("Extracted video creation time: %s", creation_time_str)
-    if creation_time_str is None:
-        return None
-    try:
-        creation_time = datetime.datetime.fromisoformat(creation_time_str)
-    except ValueError:
-        creation_time = datetime.datetime.strptime(
-            creation_time_str, "%Y-%m-%dT%H:%M:%S.%f%z"
-        )
-    return creation_time - datetime.timedelta(seconds=duration)
-def _extract_stream_frame_idx(
-    sample_basename: str,
-    sample_basename_pattern: T.Pattern[str],
-) -> T.Optional[T.Tuple[T.Optional[int], int]]:
-    """
-    extract stream id and frame index from sample basename
-    e.g. basename GX010001_NA_000000.jpg will extract (None, 0)
-    e.g. basename GX010001_1_000002.jpg will extract (1, 2)
-    If returning None, it means the basename does not match the pattern
-    """
-    image_no_ext, ext = os.path.splitext(sample_basename)
-    if ext.lower() != FRAME_EXT.lower():
-        return None
+        Args:
+            stream: Stream dictionary containing metadata including tags and duration
-    match = sample_basename_pattern.match(image_no_ext)
-    if not match:
-        return None
+        Returns:
+            Stream start time as datetime object, or None if required metadata is missing
-    g1 = match.group("stream_idx")
-    try:
-        if g1 == NA_STREAM_IDX:
-            stream_idx = None
-        else:
-            stream_idx = int(g1)
-    except ValueError:
-        return None
-    # convert 0-padded numbers to int
-    # e.g. 000000 -> 0
-    # e.g. 000001 -> 1
-    g2 = match.group("frame_idx")
-    g2 = g2.lstrip("0") or "0"
-    try:
-        frame_idx = int(g2)
-    except ValueError:
-        return None
+        Note:
+            Handles multiple datetime formats including ISO format and custom patterns.
+        """
+        duration_str = stream.get("duration")
+        LOG.debug("Extracted video duration: %s", duration_str)
+        if duration_str is None:
+            return None
+        duration = float(duration_str)
-    return stream_idx, frame_idx
-def iterate_samples(
-    sample_dir: Path, video_path: Path
-) -> T.Generator[T.Tuple[T.Optional[int], int, Path], None, None]:
-    """
-    Search all samples in the sample_dir,
-    and return a generator of the tuple: (stream ID, frame index, sample path).
-    The frame index could be 0-based or 1-based depending on how it's sampled.
-    """
-    sample_basename_pattern = re.compile(
-        rf"^{re.escape(video_path.stem)}_(?P<stream_idx>\d+|{re.escape(NA_STREAM_IDX)})_(?P<frame_idx>\d+)$"
-    )
-    for sample_path in sample_dir.iterdir():
-        stream_frame_idx = _extract_stream_frame_idx(
-            sample_path.name,
-            sample_basename_pattern,
-        )
-        if stream_frame_idx is not None:
-            stream_idx, frame_idx = stream_frame_idx
-            yield (stream_idx, frame_idx, sample_path)
-def sort_selected_samples(
-    sample_dir: Path, video_path: Path, selected_stream_indices: T.List[T.Optional[int]]
-) -> T.List[T.Tuple[int, T.List[T.Optional[Path]]]]:
-    """
-    Group frames by frame index, so that
-    the Nth group contains all the frames from the selected streams at frame index N.
-    """
-    stream_samples: T.Dict[int, T.List[T.Tuple[T.Optional[int], Path]]] = {}
-    for stream_idx, frame_idx, sample_path in iterate_samples(sample_dir, video_path):
-        stream_samples.setdefault(frame_idx, []).append((stream_idx, sample_path))
-    selected: T.List[T.Tuple[int, T.List[T.Optional[Path]]]] = []
-    for frame_idx in sorted(stream_samples.keys()):
-        indexed = {
-            stream_idx: sample_path
-            for stream_idx, sample_path in stream_samples[frame_idx]
-        }
-        selected_sample_paths = [
-            indexed.get(stream_idx) for stream_idx in selected_stream_indices
-        ]
-        selected.append((frame_idx, selected_sample_paths))
-    return selected
+        creation_time_str = stream.get("tags", {}).get("creation_time")
+        LOG.debug("Extracted video creation time: %s", creation_time_str)
+        if creation_time_str is None:
+            return None
+        try:
+            creation_time = datetime.datetime.fromisoformat(creation_time_str)
+        except ValueError:
+            creation_time = datetime.datetime.strptime(
+                creation_time_str, "%Y-%m-%dT%H:%M:%S.%f%z"
+            )
+        return creation_time - datetime.timedelta(seconds=duration)

mapillary-tools 0.13.3a1__py3-none-any.whl → 0.14.0__py3-none-any.whl

mapillary-tools 0.13.3a1py3-none-any.whl → 0.14.0py3-none-any.whl