PyPI - mapillary-tools - Versions diffs - 0.14.0a2__py3-none-any.whl → 0.14.1__py3-none-any.whl - Mend

mapillary-tools 0.14.0a2py3-none-any.whl → 0.14.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

mapillary_tools/__init__.py +1 -1
mapillary_tools/api_v4.py +66 -262
mapillary_tools/authenticate.py +54 -46
mapillary_tools/blackvue_parser.py +79 -22
mapillary_tools/commands/__main__.py +15 -16
mapillary_tools/commands/upload.py +33 -4
mapillary_tools/config.py +38 -17
mapillary_tools/constants.py +127 -43
mapillary_tools/exceptions.py +4 -0
mapillary_tools/exif_read.py +2 -1
mapillary_tools/exif_write.py +3 -1
mapillary_tools/exiftool_read_video.py +52 -15
mapillary_tools/exiftool_runner.py +4 -24
mapillary_tools/ffmpeg.py +406 -232
mapillary_tools/geo.py +16 -0
mapillary_tools/geotag/__init__.py +0 -0
mapillary_tools/geotag/base.py +8 -4
mapillary_tools/geotag/factory.py +106 -89
mapillary_tools/geotag/geotag_images_from_exiftool.py +27 -20
mapillary_tools/geotag/geotag_images_from_gpx.py +7 -6
mapillary_tools/geotag/geotag_images_from_video.py +35 -0
mapillary_tools/geotag/geotag_videos_from_exiftool.py +61 -14
mapillary_tools/geotag/geotag_videos_from_gpx.py +22 -9
mapillary_tools/geotag/options.py +25 -3
mapillary_tools/geotag/utils.py +9 -12
mapillary_tools/geotag/video_extractors/base.py +1 -1
mapillary_tools/geotag/video_extractors/exiftool.py +1 -1
mapillary_tools/geotag/video_extractors/gpx.py +61 -70
mapillary_tools/geotag/video_extractors/native.py +34 -31
mapillary_tools/history.py +128 -8
mapillary_tools/http.py +211 -0
mapillary_tools/mp4/construct_mp4_parser.py +8 -2
mapillary_tools/process_geotag_properties.py +47 -35
mapillary_tools/process_sequence_properties.py +340 -325
mapillary_tools/sample_video.py +8 -8
mapillary_tools/serializer/description.py +587 -0
mapillary_tools/serializer/gpx.py +132 -0
mapillary_tools/types.py +44 -610
mapillary_tools/upload.py +327 -352
mapillary_tools/upload_api_v4.py +125 -72
mapillary_tools/uploader.py +797 -216
mapillary_tools/utils.py +57 -5
{mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/METADATA +91 -34
mapillary_tools-0.14.1.dist-info/RECORD +76 -0
{mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/WHEEL +1 -1
mapillary_tools-0.14.0a2.dist-info/RECORD +0 -72
{mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/entry_points.txt +0 -0
{mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/licenses/LICENSE +0 -0
{mapillary_tools-0.14.0a2.dist-info → mapillary_tools-0.14.1.dist-info}/top_level.txt +0 -0

mapillary_tools/history.py CHANGED Viewed

@@ -1,12 +1,25 @@
 from __future__ import annotations
+import contextlib
+import dbm
 import json
 import logging
 import string
+import threading
+import time
 import typing as T
 from pathlib import Path
+# dbm modules are dynamically imported, so here we explicitly import dbm.sqlite3 to make sure pyinstaller include it
+# Otherwise you will see: ImportError: no dbm clone found; tried ['dbm.sqlite3', 'dbm.gnu', 'dbm.ndbm', 'dbm.dumb']
+try:
+    import dbm.sqlite3  # type: ignore
+except ImportError:
+    pass
 from . import constants, types
+from .serializer.description import DescriptionJSONSerializer
 JSONDict = T.Dict[str, T.Union[str, int, float, None]]
@@ -35,10 +48,21 @@ def history_desc_path(md5sum: str) -> Path:
     )
-def is_uploaded(md5sum: str) -> bool:
+def read_history_record(md5sum: str) -> None | T.Dict[str, T.Any]:
     if not constants.MAPILLARY_UPLOAD_HISTORY_PATH:
-        return False
-    return history_desc_path(md5sum).is_file()
+        return None
+    path = history_desc_path(md5sum)
+    if not path.is_file():
+        return None
+    with path.open("r") as fp:
+        try:
+            return json.load(fp)
+        except json.JSONDecodeError as ex:
+            LOG.error(f"Failed to read upload history {path}: {ex}")
+            return None
 def write_history(
@@ -52,11 +76,107 @@ def write_history(
     path = history_desc_path(md5sum)
     LOG.debug("Writing upload history: %s", path)
     path.resolve().parent.mkdir(parents=True, exist_ok=True)
-    history: dict[str, T.Any] = {
-        "params": params,
-        "summary": summary,
-    }
+    history: dict[str, T.Any] = {"params": params, "summary": summary}
     if metadatas is not None:
-        history["descs"] = [types.as_desc(metadata) for metadata in metadatas]
+        history["descs"] = [
+            DescriptionJSONSerializer.as_desc(metadata) for metadata in metadatas
+        ]
     with open(path, "w") as fp:
         fp.write(json.dumps(history))
+class PersistentCache:
+    _lock: contextlib.nullcontext | threading.Lock
+    def __init__(self, file: str):
+        # SQLite3 backend supports concurrent access without a lock
+        if dbm.whichdb(file) == "dbm.sqlite3":
+            self._lock = contextlib.nullcontext()
+        else:
+            self._lock = threading.Lock()
+        self._file = file
+    def get(self, key: str) -> str | None:
+        s = time.perf_counter()
+        with self._lock:
+            with dbm.open(self._file, flag="c") as db:
+                value: bytes | None = db.get(key)
+        if value is None:
+            return None
+        payload = self._decode(value)
+        if self._is_expired(payload):
+            return None
+        file_handle = payload.get("file_handle")
+        LOG.debug(
+            f"Found file handle for {key} in cache ({(time.perf_counter() - s) * 1000:.0f} ms)"
+        )
+        return T.cast(str, file_handle)
+    def set(self, key: str, file_handle: str, expires_in: int = 3600 * 24 * 2) -> None:
+        s = time.perf_counter()
+        payload = {
+            "expires_at": time.time() + expires_in,
+            "file_handle": file_handle,
+        }
+        value: bytes = json.dumps(payload).encode("utf-8")
+        with self._lock:
+            with dbm.open(self._file, flag="c") as db:
+                db[key] = value
+        LOG.debug(
+            f"Cached file handle for {key} ({(time.perf_counter() - s) * 1000:.0f} ms)"
+        )
+    def clear_expired(self) -> list[str]:
+        s = time.perf_counter()
+        expired_keys: list[str] = []
+        with self._lock:
+            with dbm.open(self._file, flag="c") as db:
+                if hasattr(db, "items"):
+                    items: T.Iterable[tuple[str | bytes, bytes]] = db.items()
+                else:
+                    items = ((key, db[key]) for key in db.keys())
+                for key, value in items:
+                    payload = self._decode(value)
+                    if self._is_expired(payload):
+                        del db[key]
+                        expired_keys.append(T.cast(str, key))
+        if expired_keys:
+            LOG.debug(
+                f"Cleared {len(expired_keys)} expired entries from the cache ({(time.perf_counter() - s) * 1000:.0f} ms)"
+            )
+        return expired_keys
+    def _is_expired(self, payload: JSONDict) -> bool:
+        expires_at = payload.get("expires_at")
+        if isinstance(expires_at, (int, float)):
+            return expires_at is None or expires_at <= time.time()
+        return False
+    def _decode(self, value: bytes) -> JSONDict:
+        try:
+            payload = json.loads(value.decode("utf-8"))
+        except json.JSONDecodeError as ex:
+            LOG.warning(f"Failed to decode cache value: {ex}")
+            return {}
+        if not isinstance(payload, dict):
+            LOG.warning(f"Invalid cache value format: {payload}")
+            return {}
+        return payload

mapillary_tools/http.py ADDED Viewed

@@ -0,0 +1,211 @@
+from __future__ import annotations
+import logging
+import ssl
+import sys
+import typing as T
+from json import dumps
+if sys.version_info >= (3, 12):
+    from typing import override
+else:
+    from typing_extensions import override
+import requests
+from requests.adapters import HTTPAdapter
+LOG = logging.getLogger(__name__)
+class HTTPSystemCertsAdapter(HTTPAdapter):
+    """
+    This adapter uses the system's certificate store instead of the certifi module.
+    The implementation is based on the project https://pypi.org/project/pip-system-certs/,
+    which has a system-wide effect.
+    """
+    def init_poolmanager(self, *args, **kwargs):
+        ssl_context = ssl.create_default_context()
+        ssl_context.load_default_certs()
+        kwargs["ssl_context"] = ssl_context
+        super().init_poolmanager(*args, **kwargs)
+    def cert_verify(self, *args, **kwargs):
+        super().cert_verify(*args, **kwargs)
+        # By default Python requests uses the ca_certs from the certifi module
+        # But we want to use the certificate store instead.
+        # By clearing the ca_certs variable we force it to fall back on that behaviour (handled in urllib3)
+        if "conn" in kwargs:
+            conn = kwargs["conn"]
+        else:
+            conn = args[0]
+        conn.ca_certs = None
+class Session(requests.Session):
+    # NOTE: This is a global flag that affects all Session instances
+    USE_SYSTEM_CERTS: T.ClassVar[bool] = False
+    # Instance variables
+    disable_logging_request: bool = False
+    disable_logging_response: bool = False
+    # Avoid mounting twice
+    _mounted: bool = False
+    @override
+    def request(self, method: str | bytes, url: str | bytes, *args, **kwargs):
+        self._log_debug_request(method, url, *args, **kwargs)
+        if Session.USE_SYSTEM_CERTS:
+            if not self._mounted:
+                self.mount("https://", HTTPSystemCertsAdapter())
+                self._mounted = True
+            resp = super().request(method, url, *args, **kwargs)
+        else:
+            try:
+                resp = super().request(method, url, *args, **kwargs)
+            except requests.exceptions.SSLError as ex:
+                if "SSLCertVerificationError" not in str(ex):
+                    raise ex
+                Session.USE_SYSTEM_CERTS = True
+                # HTTPSConnectionPool(host='graph.mapillary.com', port=443): Max retries exceeded with url: /login (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1018)')))
+                LOG.warning(
+                    "SSL error occurred, falling back to system SSL certificates: %s",
+                    ex,
+                )
+                return self.request(method, url, *args, **kwargs)
+        self._log_debug_response(resp)
+        return resp
+    def _log_debug_request(self, method: str | bytes, url: str | bytes, **kwargs):
+        if self.disable_logging_request:
+            return
+        if not LOG.isEnabledFor(logging.DEBUG):
+            return
+        if isinstance(method, str) and isinstance(url, str):
+            msg = f"HTTP {method} {url}"
+        else:
+            msg = f"HTTP {method!r} {url!r}"
+        if Session.USE_SYSTEM_CERTS:
+            msg += " (w/sys_certs)"
+        json = kwargs.get("json")
+        if json is not None:
+            t = _truncate(dumps(_sanitize(json)))
+            msg += f" JSON={t}"
+        params = kwargs.get("params")
+        if params is not None:
+            msg += f" PARAMS={_sanitize(params)}"
+        headers = kwargs.get("headers")
+        if headers is not None:
+            msg += f" HEADERS={_sanitize(headers)}"
+        timeout = kwargs.get("timeout")
+        if timeout is not None:
+            msg += f" TIMEOUT={timeout}"
+        msg = msg.replace("\n", "\\n")
+        LOG.debug(msg)
+    def _log_debug_response(self, resp: requests.Response):
+        if self.disable_logging_response:
+            return
+        if not LOG.isEnabledFor(logging.DEBUG):
+            return
+        elapsed = resp.elapsed.total_seconds() * 1000  # Convert to milliseconds
+        msg = f"HTTP {resp.status_code} {resp.reason} ({elapsed:.0f} ms): {str(_truncate_response_content(resp))}"
+        LOG.debug(msg)
+def readable_http_error(ex: requests.HTTPError) -> str:
+    return readable_http_response(ex.response)
+def readable_http_response(resp: requests.Response) -> str:
+    return f"{resp.request.method} {resp.url} => {resp.status_code} {resp.reason}: {str(_truncate_response_content(resp))}"
+@T.overload
+def _truncate(s: bytes, limit: int = 256) -> bytes | str: ...
+@T.overload
+def _truncate(s: str, limit: int = 256) -> str: ...
+def _truncate(s, limit=256):
+    if limit < len(s):
+        if isinstance(s, bytes):
+            try:
+                s = s.decode("utf-8")
+            except UnicodeDecodeError:
+                pass
+        remaining = len(s) - limit
+        if isinstance(s, bytes):
+            return s[:limit] + f"...({remaining} bytes truncated)".encode("utf-8")
+        else:
+            return str(s[:limit]) + f"...({remaining} chars truncated)"
+    else:
+        return s
+def _sanitize(headers: T.Mapping[T.Any, T.Any]) -> T.Mapping[T.Any, T.Any]:
+    new_headers = {}
+    for k, v in headers.items():
+        if k.lower() in [
+            "authorization",
+            "cookie",
+            "x-fb-access-token",
+            "access-token",
+            "access_token",
+            "password",
+            "user_upload_token",
+        ]:
+            new_headers[k] = "[REDACTED]"
+        else:
+            if isinstance(v, (str, bytes)):
+                new_headers[k] = T.cast(T.Any, _truncate(v))
+            else:
+                new_headers[k] = v
+    return new_headers
+def _truncate_response_content(resp: requests.Response) -> str | bytes:
+    try:
+        json_data = resp.json()
+    except requests.JSONDecodeError:
+        if resp.content is not None:
+            data = _truncate(resp.content)
+        else:
+            data = ""
+    else:
+        if isinstance(json_data, dict):
+            data = _truncate(dumps(_sanitize(json_data)))
+        else:
+            data = _truncate(str(json_data))
+    if isinstance(data, bytes):
+        return data.replace(b"\n", b"\\n")
+    elif isinstance(data, str):
+        return data.replace("\n", "\\n")
+    return data

mapillary_tools/mp4/construct_mp4_parser.py CHANGED Viewed

@@ -370,6 +370,10 @@ BoxHeader64 = C.Struct(
 SwitchMapType = T.Dict[BoxType, T.Union[C.Construct, "SwitchMapType"]]
+class BoxNotFoundError(Exception):
+    pass
 class Box64ConstructBuilder:
     """
     Build a box struct that **parses** MP4 boxes with both 32-bit and 64-bit sizes.
@@ -567,7 +571,9 @@ def _new_cmap_without_boxes(
 # pyre-ignore[9]: pyre does not support recursive type SwitchMapType
 MP4_WITHOUT_STBL_CMAP: SwitchMapType = {
     # pyre-ignore[6]: pyre does not support recursive type SwitchMapType
-    b"moov": _new_cmap_without_boxes(CMAP[b"moov"], [b"stbl"]),
+    b"moov": _new_cmap_without_boxes(
+        CMAP[b"moov"], T.cast(T.Sequence[BoxType], [b"stbl"])
+    ),
 }
 # for parsing mp4 only
@@ -589,7 +595,7 @@ def find_box_at_pathx(
 ) -> BoxDict:
     found = find_box_at_path(box, path)
     if found is None:
-        raise ValueError(f"box at path {path} not found")
+        raise BoxNotFoundError(f"box at path {path} not found")
     return found

mapillary_tools/process_geotag_properties.py CHANGED Viewed

@@ -1,12 +1,11 @@
 from __future__ import annotations
-import collections
 import datetime
-import json
 import logging
 import typing as T
 from pathlib import Path
+import humanize
 from tqdm import tqdm
 from . import constants, exceptions, exif_write, types, utils
@@ -17,6 +16,11 @@ from .geotag.options import (
     SourcePathOption,
     SourceType,
 )
+from .serializer.description import (
+    DescriptionJSONSerializer,
+    validate_and_fail_metadata,
+)
+from .serializer.gpx import GPXSerializer
 LOG = logging.getLogger(__name__)
 DEFAULT_GEOTAG_SOURCE_OPTIONS = [
@@ -42,8 +46,10 @@ def _parse_source_options(
 ) -> list[SourceOption]:
     parsed_options: list[SourceOption] = []
-    for s in geotag_source:
-        parsed_options.extend(parse_source_option(s))
+    if video_geotag_source and geotag_source:
+        LOG.warning(
+            "Video source options will be processed BEFORE the generic source options"
+        )
     for s in video_geotag_source:
         for video_option in parse_source_option(s):
@@ -52,6 +58,9 @@ def _parse_source_options(
             )
             parsed_options.append(video_option)
+    for s in geotag_source:
+        parsed_options.extend(parse_source_option(s))
     if geotag_source_path is not None:
         for parsed_option in parsed_options:
             if parsed_option.source_path is None:
@@ -163,7 +172,7 @@ def _overwrite_exif_tags(
         metadatas,
         desc="Overwriting EXIF",
         unit="images",
-        disable=LOG.getEffectiveLevel() <= logging.DEBUG,
+        disable=LOG.isEnabledFor(logging.DEBUG),
     ):
         dt = datetime.datetime.fromtimestamp(metadata.time, datetime.timezone.utc)
         dt = dt.replace(tzinfo=datetime.timezone.utc)
@@ -200,25 +209,33 @@ def _write_metadatas(
     desc_path: str,
 ) -> None:
     if desc_path == "-":
-        descs = [types.as_desc(metadata) for metadata in metadatas]
-        print(json.dumps(descs, indent=2))
+        descs = DescriptionJSONSerializer.serialize(metadatas)
+        print(descs.decode("utf-8"))
     else:
-        descs = [types.as_desc(metadata) for metadata in metadatas]
-        with open(desc_path, "w") as fp:
-            json.dump(descs, fp)
+        normalized_suffix = Path(desc_path).suffix.strip().lower()
+        if normalized_suffix in [".gpx"]:
+            descs = GPXSerializer.serialize(metadatas)
+        else:
+            descs = DescriptionJSONSerializer.serialize(metadatas)
+        with open(desc_path, "wb") as fp:
+            fp.write(descs)
         LOG.info("Check the description file for details: %s", desc_path)
-def _is_error_skipped(error_type: str, skipped_process_errors: set[T.Type[Exception]]):
-    skipped_process_error_names = set(err.__name__ for err in skipped_process_errors)
-    skip_all = Exception in skipped_process_errors
-    return skip_all or error_type in skipped_process_error_names
+def _is_error_skipped(
+    error_type: type[Exception], skipped_process_errors: set[type[Exception]]
+):
+    return (Exception in skipped_process_errors) or (
+        error_type in skipped_process_errors
+    )
 def _show_stats(
     metadatas: T.Sequence[types.MetadataOrError],
     skipped_process_errors: set[T.Type[Exception]],
 ) -> None:
+    LOG.info("==> Process summary")
     metadatas_by_filetype: dict[types.FileType, list[types.MetadataOrError]] = {}
     for metadata in metadatas:
         if isinstance(metadata, types.ImageMetadata):
@@ -234,9 +251,7 @@ def _show_stats(
         metadata
         for metadata in metadatas
         if isinstance(metadata, types.ErrorMetadata)
-        and not _is_error_skipped(
-            metadata.error.__class__.__name__, skipped_process_errors
-        )
+        and not _is_error_skipped(type(metadata.error), skipped_process_errors)
     ]
     if critical_error_metadatas:
         raise exceptions.MapillaryProcessError(
@@ -252,38 +267,35 @@ def _show_stats_per_filetype(
     good_metadatas: list[types.Metadata]
     good_metadatas, error_metadatas = types.separate_errors(metadatas)
-    filesize_to_upload = sum(
-        [0 if m.filesize is None else m.filesize for m in good_metadatas]
-    )
-    LOG.info("%8d %s(s) read in total", len(metadatas), filetype.value)
+    LOG.info(f"{len(metadatas)} {filetype.value} read in total")
     if good_metadatas:
+        total_filesize = sum(
+            [0 if m.filesize is None else m.filesize for m in good_metadatas]
+        )
         LOG.info(
-            "\t %8d %s(s) (%s MB) are ready to be uploaded",
-            len(good_metadatas),
-            filetype.value,
-            round(filesize_to_upload / 1024 / 1024, 1),
+            f"\t{len(good_metadatas)} ({humanize.naturalsize(total_filesize)}) ready"
         )
-    error_counter = collections.Counter(
-        metadata.error.__class__.__name__ for metadata in error_metadatas
-    )
+    errors_by_type: dict[type[Exception], list[types.ErrorMetadata]] = {}
+    for metadata in error_metadatas:
+        errors_by_type.setdefault(type(metadata.error), []).append(metadata)
-    for error_type, count in error_counter.items():
+    for error_type, errors in errors_by_type.items():
+        total_filesize = sum([utils.get_file_size_quietly(m.filename) for m in errors])
         if _is_error_skipped(error_type, skipped_process_errors):
             LOG.warning(
-                "\t %8d %s(s) skipped due to %s", count, filetype.value, error_type
+                f"\t{len(errors)} ({humanize.naturalsize(total_filesize)}) {error_type.__name__}"
             )
         else:
             LOG.error(
-                "\t %8d %s(s) failed due to %s", count, filetype.value, error_type
+                f"\t{len(errors)} ({humanize.naturalsize(total_filesize)}) {error_type.__name__}"
             )
 def _validate_metadatas(
     metadatas: T.Collection[types.MetadataOrError], num_processes: int | None
 ) -> list[types.MetadataOrError]:
-    LOG.debug("Validating %d metadatas", len(metadatas))
+    LOG.info(f"==> Validating {len(metadatas)} metadatas...")
     # validating metadatas is slow, hence multiprocessing
@@ -293,7 +305,7 @@ def _validate_metadatas(
     # See https://stackoverflow.com/a/61432070
     good_metadatas, error_metadatas = types.separate_errors(metadatas)
     map_results = utils.mp_map_maybe(
-        types.validate_and_fail_metadata,
+        validate_and_fail_metadata,
         T.cast(T.Iterable[types.Metadata], good_metadatas),
         num_processes=num_processes,
     )
@@ -308,7 +320,7 @@ def _validate_metadatas(
         )
     )
-    return validated_metadatas + error_metadatas
+    return T.cast(list[types.MetadataOrError], validated_metadatas + error_metadatas)
 def process_finalize(

mapillary-tools 0.14.0a2__py3-none-any.whl → 0.14.1__py3-none-any.whl

mapillary-tools 0.14.0a2py3-none-any.whl → 0.14.1py3-none-any.whl