PyPI - mapillary-tools - Versions diffs - 0.13.3a1__py3-none-any.whl → 0.14.0__py3-none-any.whl - Mend

mapillary-tools 0.13.3a1py3-none-any.whl → 0.14.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

mapillary_tools/__init__.py +1 -1
mapillary_tools/api_v4.py +287 -22
mapillary_tools/authenticate.py +326 -64
mapillary_tools/blackvue_parser.py +195 -0
mapillary_tools/camm/camm_builder.py +55 -97
mapillary_tools/camm/camm_parser.py +429 -181
mapillary_tools/commands/__main__.py +17 -8
mapillary_tools/commands/authenticate.py +8 -1
mapillary_tools/commands/process.py +27 -51
mapillary_tools/commands/process_and_upload.py +19 -5
mapillary_tools/commands/sample_video.py +2 -3
mapillary_tools/commands/upload.py +44 -13
mapillary_tools/commands/video_process_and_upload.py +19 -5
mapillary_tools/config.py +65 -26
mapillary_tools/constants.py +141 -18
mapillary_tools/exceptions.py +37 -34
mapillary_tools/exif_read.py +221 -116
mapillary_tools/exif_write.py +10 -8
mapillary_tools/exiftool_read.py +33 -42
mapillary_tools/exiftool_read_video.py +97 -47
mapillary_tools/exiftool_runner.py +57 -0
mapillary_tools/ffmpeg.py +417 -242
mapillary_tools/geo.py +158 -118
mapillary_tools/geotag/__init__.py +0 -1
mapillary_tools/geotag/base.py +147 -0
mapillary_tools/geotag/factory.py +307 -0
mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
mapillary_tools/geotag/geotag_images_from_exiftool.py +136 -85
mapillary_tools/geotag/geotag_images_from_gpx.py +60 -124
mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
mapillary_tools/geotag/geotag_images_from_video.py +88 -51
mapillary_tools/geotag/geotag_videos_from_exiftool.py +123 -0
mapillary_tools/geotag/geotag_videos_from_gpx.py +52 -0
mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
mapillary_tools/geotag/image_extractors/base.py +18 -0
mapillary_tools/geotag/image_extractors/exif.py +60 -0
mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
mapillary_tools/geotag/options.py +182 -0
mapillary_tools/geotag/utils.py +52 -16
mapillary_tools/geotag/video_extractors/base.py +18 -0
mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
mapillary_tools/geotag/video_extractors/gpx.py +116 -0
mapillary_tools/geotag/video_extractors/native.py +160 -0
mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
mapillary_tools/history.py +134 -20
mapillary_tools/mp4/construct_mp4_parser.py +17 -10
mapillary_tools/mp4/io_utils.py +0 -1
mapillary_tools/mp4/mp4_sample_parser.py +36 -28
mapillary_tools/mp4/simple_mp4_builder.py +10 -9
mapillary_tools/mp4/simple_mp4_parser.py +13 -22
mapillary_tools/process_geotag_properties.py +184 -414
mapillary_tools/process_sequence_properties.py +594 -225
mapillary_tools/sample_video.py +20 -26
mapillary_tools/serializer/description.py +587 -0
mapillary_tools/serializer/gpx.py +132 -0
mapillary_tools/telemetry.py +26 -13
mapillary_tools/types.py +98 -611
mapillary_tools/upload.py +408 -416
mapillary_tools/upload_api_v4.py +172 -174
mapillary_tools/uploader.py +804 -284
mapillary_tools/utils.py +49 -18
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/METADATA +93 -35
mapillary_tools-0.14.0.dist-info/RECORD +75 -0
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/WHEEL +1 -1
mapillary_tools/geotag/blackvue_parser.py +0 -118
mapillary_tools/geotag/geotag_from_generic.py +0 -22
mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
mapillary_tools/video_data_extraction/cli_options.py +0 -22
mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
mapillary_tools/video_data_extraction/extractors/gpx_parser.py +0 -108
mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
mapillary_tools-0.13.3a1.dist-info/RECORD +0 -75
/mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/entry_points.txt +0 -0
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info/licenses}/LICENSE +0 -0
{mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0.dist-info}/top_level.txt +0 -0

mapillary_tools/upload_api_v4.py CHANGED Viewed

@@ -1,129 +1,114 @@
-import enum
+from __future__ import annotations
 import io
-import json
-import logging
 import os
 import random
+import sys
 import typing as T
 import uuid
+from pathlib import Path
+if sys.version_info >= (3, 12):
+    from typing import override
+else:
+    from typing_extensions import override
+import tempfile
 import requests
-from .api_v4 import MAPILLARY_GRAPH_API_ENDPOINT, request_get, request_post
+from .api_v4 import (
+    HTTPContentError,
+    jsonify_response,
+    request_get,
+    request_post,
+    REQUESTS_TIMEOUT,
+)
-LOG = logging.getLogger(__name__)
 MAPILLARY_UPLOAD_ENDPOINT = os.getenv(
     "MAPILLARY_UPLOAD_ENDPOINT", "https://rupload.facebook.com/mapillary_public_uploads"
 )
-DEFAULT_CHUNK_SIZE = 1024 * 1024 * 16  # 16MB
-# According to the docs, UPLOAD_REQUESTS_TIMEOUT can be a tuple of
-# (connection_timeout, read_timeout): https://requests.readthedocs.io/en/latest/user/advanced/#timeouts
-# In my test, however, the connection_timeout rules both connection timeout and read timeout.
-# i.e. if your the server does not respond within this timeout, it will throw:
-# ConnectionError: ('Connection aborted.', timeout('The write operation timed out'))
-# So let us make sure the largest possible chunks can be uploaded before this timeout for now,
-REQUESTS_TIMEOUT = (20, 20)  # 20 seconds
-UPLOAD_REQUESTS_TIMEOUT = (30 * 60, 30 * 60)  # 30 minutes
-class ClusterFileType(enum.Enum):
-    ZIP = "zip"
-    BLACKVUE = "mly_blackvue_video"
-    CAMM = "mly_camm_video"
-def _sanitize_headers(headers: T.Dict):
-    return {
-        k: v
-        for k, v in headers.items()
-        if k.lower() not in ["authorization", "cookie", "x-fb-access-token"]
-    }
-_S = T.TypeVar("_S", str, bytes)
-def _truncate_end(s: _S) -> _S:
-    MAX_LENGTH = 512
-    if MAX_LENGTH < len(s):
-        if isinstance(s, bytes):
-            return s[:MAX_LENGTH] + b"..."
-        else:
-            return str(s[:MAX_LENGTH]) + "..."
-    else:
-        return s
 class UploadService:
+    """
+    Upload byte streams to the Upload Service.
+    """
     user_access_token: str
     session_key: str
-    callbacks: T.List[T.Callable[[bytes, T.Optional[requests.Response]], None]]
-    cluster_filetype: ClusterFileType
-    organization_id: T.Optional[T.Union[str, int]]
-    chunk_size: int
-    MIME_BY_CLUSTER_TYPE: T.Dict[ClusterFileType, str] = {
-        ClusterFileType.ZIP: "application/zip",
-        ClusterFileType.BLACKVUE: "video/mp4",
-        ClusterFileType.CAMM: "video/mp4",
-    }
-    def __init__(
-        self,
-        user_access_token: str,
-        session_key: str,
-        organization_id: T.Optional[T.Union[str, int]] = None,
-        cluster_filetype: ClusterFileType = ClusterFileType.ZIP,
-        chunk_size: int = DEFAULT_CHUNK_SIZE,
-    ):
-        if chunk_size <= 0:
-            raise ValueError("Expect positive chunk size")
+    def __init__(self, user_access_token: str, session_key: str):
         self.user_access_token = user_access_token
         self.session_key = session_key
-        self.organization_id = organization_id
-        #  validate the input
-        self.cluster_filetype = ClusterFileType(cluster_filetype)
-        self.callbacks = []
-        self.chunk_size = chunk_size
     def fetch_offset(self) -> int:
         headers = {
             "Authorization": f"OAuth {self.user_access_token}",
         }
         url = f"{MAPILLARY_UPLOAD_ENDPOINT}/{self.session_key}"
-        LOG.debug("GET %s", url)
-        resp = request_get(
-            url,
-            headers=headers,
-            timeout=REQUESTS_TIMEOUT,
-        )
-        LOG.debug("HTTP response %s: %s", resp.status_code, resp.content)
+        resp = request_get(url, headers=headers, timeout=REQUESTS_TIMEOUT)
         resp.raise_for_status()
-        data = resp.json()
-        return data["offset"]
-    def upload(
-        self,
-        data: T.IO[bytes],
-        offset: T.Optional[int] = None,
-    ) -> str:
-        chunks = self._chunkize_byte_stream(data)
-        return self.upload_chunks(chunks, offset=offset)
+        data = jsonify_response(resp)
+        try:
+            return data["offset"]
+        except KeyError:
+            raise HTTPContentError("Offset not found in the response", resp)
-    def _chunkize_byte_stream(
-        self, stream: T.IO[bytes]
+    @classmethod
+    def chunkize_byte_stream(
+        cls, stream: T.IO[bytes], chunk_size: int
     ) -> T.Generator[bytes, None, None]:
+        """
+        Chunkize a byte stream into chunks of the specified size.
+        >>> list(UploadService.chunkize_byte_stream(io.BytesIO(b"foo"), 1))
+        [b'f', b'o', b'o']
+        >>> list(UploadService.chunkize_byte_stream(io.BytesIO(b"foo"), 10))
+        [b'foo']
+        """
+        if chunk_size <= 0:
+            raise ValueError("Expect positive chunk size")
         while True:
-            data = stream.read(self.chunk_size)
+            data = stream.read(chunk_size)
             if not data:
                 break
             yield data
-    def _offset_chunks(
-        self, chunks: T.Iterable[bytes], offset: int
+    @classmethod
+    def shift_chunks(
+        cls, chunks: T.Iterable[bytes], offset: int
     ) -> T.Generator[bytes, None, None]:
-        assert offset >= 0, f"Expect non-negative offset but got {offset}"
+        """
+        Shift the chunks by the offset.
+        >>> list(UploadService.shift_chunks([b"foo", b"bar"], 0))
+        [b'foo', b'bar']
+        >>> list(UploadService.shift_chunks([b"foo", b"bar"], 1))
+        [b'oo', b'bar']
+        >>> list(UploadService.shift_chunks([b"foo", b"bar"], 3))
+        [b'bar']
+        >>> list(UploadService.shift_chunks([b"foo", b"bar"], 6))
+        []
+        >>> list(UploadService.shift_chunks([b"foo", b"bar"], 7))
+        []
+        >>> list(UploadService.shift_chunks([], 0))
+        []
+        """
+        if offset < 0:
+            raise ValueError(f"Expect non-negative offset but got {offset}")
         for chunk in chunks:
             if offset:
@@ -135,128 +120,141 @@ class UploadService:
             else:
                 yield chunk
-    def _attach_callbacks(
-        self, chunks: T.Iterable[bytes]
-    ) -> T.Generator[bytes, None, None]:
-        for chunk in chunks:
-            yield chunk
-            for callback in self.callbacks:
-                callback(chunk, None)
+    def upload_byte_stream(
+        self,
+        stream: T.IO[bytes],
+        offset: int | None = None,
+        chunk_size: int = 2 * 1024 * 1024,  # 2MB
+        read_timeout: float | None = None,
+    ) -> str:
+        if offset is None:
+            offset = self.fetch_offset()
+        return self.upload_chunks(
+            self.chunkize_byte_stream(stream, chunk_size),
+            offset,
+            read_timeout=read_timeout,
+        )
     def upload_chunks(
         self,
         chunks: T.Iterable[bytes],
-        offset: T.Optional[int] = None,
+        offset: int | None = None,
+        read_timeout: float | None = None,
     ) -> str:
         if offset is None:
             offset = self.fetch_offset()
+        shifted_chunks = self.shift_chunks(chunks, offset)
+        return self.upload_shifted_chunks(
+            shifted_chunks, offset, read_timeout=read_timeout
+        )
-        chunks = self._attach_callbacks(self._offset_chunks(chunks, offset))
+    def upload_shifted_chunks(
+        self,
+        shifted_chunks: T.Iterable[bytes],
+        offset: int,
+        read_timeout: float | None = None,
+    ) -> str:
+        """
+        Upload the chunks that must already be shifted by the offset (e.g. fp.seek(offset, io.SEEK_SET))
+        """
         headers = {
             "Authorization": f"OAuth {self.user_access_token}",
             "Offset": f"{offset}",
             "X-Entity-Name": self.session_key,
-            "X-Entity-Type": self.MIME_BY_CLUSTER_TYPE[self.cluster_filetype],
         }
         url = f"{MAPILLARY_UPLOAD_ENDPOINT}/{self.session_key}"
-        LOG.debug("POST %s HEADERS %s", url, json.dumps(_sanitize_headers(headers)))
         resp = request_post(
             url,
             headers=headers,
-            data=chunks,
-            timeout=UPLOAD_REQUESTS_TIMEOUT,
+            data=shifted_chunks,
+            timeout=(REQUESTS_TIMEOUT, read_timeout),
         )
-        LOG.debug("HTTP response %s: %s", resp.status_code, _truncate_end(resp.content))
-        payload = resp.json()
-        try:
-            return payload["h"]
-        except KeyError:
-            raise RuntimeError(
-                f"Upload server error: File handle not found in the upload response {resp.text}"
-            )
-    def finish(self, file_handle: str) -> str:
-        headers = {
-            "Authorization": f"OAuth {self.user_access_token}",
-        }
-        data: T.Dict[str, T.Union[str, int]] = {
-            "file_handle": file_handle,
-            "file_type": self.cluster_filetype.value,
-        }
-        if self.organization_id is not None:
-            data["organization_id"] = self.organization_id
-        url = f"{MAPILLARY_GRAPH_API_ENDPOINT}/finish_upload"
-        LOG.debug("POST %s HEADERS %s", url, json.dumps(_sanitize_headers(headers)))
-        resp = request_post(
-            url,
-            headers=headers,
-            json=data,
-            timeout=REQUESTS_TIMEOUT,
-        )
-        LOG.debug("HTTP response %s: %s", resp.status_code, _truncate_end(resp.content))
         resp.raise_for_status()
-        data = resp.json()
-        cluster_id = data.get("cluster_id")
-        if cluster_id is None:
-            raise RuntimeError(
-                f"Upload server error: failed to create the cluster {resp.text}"
-            )
+        data = jsonify_response(resp)
-        return T.cast(str, cluster_id)
+        try:
+            return data["h"]
+        except KeyError:
+            raise HTTPContentError("File handle not found in the response", resp)
 # A mock class for testing only
 class FakeUploadService(UploadService):
-    def __init__(self, *args, **kwargs):
+    """
+    A mock upload service that simulates the upload process for testing purposes.
+    It writes the uploaded data to a file in a temporary directory and generates a fake file handle.
+    """
+    FILE_HANDLE_DIR: str = "file_handles"
+    def __init__(
+        self,
+        *args,
+        upload_path: Path | None = None,
+        transient_error_ratio: float = 0.0,
+        **kwargs,
+    ):
         super().__init__(*args, **kwargs)
-        self._upload_path = os.getenv(
-            "MAPILLARY_UPLOAD_PATH", "mapillary_public_uploads"
-        )
-        self._error_ratio = 0.1
+        if upload_path is None:
+            upload_path = Path(tempfile.gettempdir()).joinpath(
+                "mapillary_public_uploads"
+            )
+        self._upload_path = upload_path
+        self._transient_error_ratio = transient_error_ratio
-    def upload_chunks(
+    @override
+    def upload_shifted_chunks(
         self,
-        chunks: T.Iterable[bytes],
-        offset: T.Optional[int] = None,
+        shifted_chunks: T.Iterable[bytes],
+        offset: int,
+        read_timeout: float | None = None,
     ) -> str:
-        if offset is None:
-            offset = self.fetch_offset()
-        chunks = self._attach_callbacks(self._offset_chunks(chunks, offset))
+        expected_offset = self.fetch_offset()
+        if offset != expected_offset:
+            raise ValueError(
+                f"Expect offset {expected_offset} but got {offset} for session {self.session_key}"
+            )
         os.makedirs(self._upload_path, exist_ok=True)
-        filename = os.path.join(self._upload_path, self.session_key)
-        with open(filename, "ab") as fp:
-            for chunk in chunks:
-                if random.random() <= self._error_ratio:
-                    raise requests.ConnectionError(
-                        f"TEST ONLY: Failed to upload with error ratio {self._error_ratio}"
-                    )
+        filename = self._upload_path.joinpath(self.session_key)
+        with filename.open("ab") as fp:
+            for chunk in shifted_chunks:
+                self._randomly_raise_transient_error()
                 fp.write(chunk)
-                if random.random() <= self._error_ratio:
-                    raise requests.ConnectionError(
-                        f"TEST ONLY: Partially uploaded with error ratio {self._error_ratio}"
-                    )
-        return uuid.uuid4().hex
+                self._randomly_raise_transient_error()
+        file_handle_dir = self._upload_path.joinpath(self.FILE_HANDLE_DIR)
+        file_handle_path = file_handle_dir.joinpath(self.session_key)
+        if not file_handle_path.exists():
+            os.makedirs(file_handle_dir, exist_ok=True)
+            random_file_handle = uuid.uuid4().hex
+            file_handle_path.write_text(random_file_handle)
-    def finish(self, _: str) -> str:
-        return "0"
+        return file_handle_path.read_text()
+    @override
     def fetch_offset(self) -> int:
-        if random.random() <= self._error_ratio:
-            raise requests.ConnectionError(
-                f"TEST ONLY: Partially uploaded with error ratio {self._error_ratio}"
-            )
-        filename = os.path.join(self._upload_path, self.session_key)
-        if not os.path.exists(filename):
+        self._randomly_raise_transient_error()
+        filename = self._upload_path.joinpath(self.session_key)
+        if not filename.exists():
             return 0
         with open(filename, "rb") as fp:
             fp.seek(0, io.SEEK_END)
             return fp.tell()
+    @property
+    def upload_path(self) -> Path:
+        return self._upload_path
+    def _randomly_raise_transient_error(self):
+        """
+        Randomly raise a transient error based on the configured error ratio.
+        This is for testing purposes only.
+        """
+        if random.random() <= self._transient_error_ratio:
+            raise requests.ConnectionError(
+                f"[TEST ONLY]: Transient error with ratio {self._transient_error_ratio}"
+            )

mapillary-tools 0.13.3a1__py3-none-any.whl → 0.14.0__py3-none-any.whl

mapillary-tools 0.13.3a1py3-none-any.whl → 0.14.0py3-none-any.whl