mapillary-tools 0.14.2__py3-none-any.whl → 0.14.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- VERSION = "0.14.2"
1
+ VERSION = "0.14.4"
@@ -1,24 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
- import contextlib
4
- import dbm
5
3
  import json
6
4
  import logging
5
+ import os
6
+ import sqlite3
7
7
  import string
8
8
  import threading
9
9
  import time
10
10
  import typing as T
11
+ from functools import wraps
11
12
  from pathlib import Path
12
13
 
13
- # dbm modules are dynamically imported, so here we explicitly import dbm.sqlite3 to make sure pyinstaller include it
14
- # Otherwise you will see: ImportError: no dbm clone found; tried ['dbm.sqlite3', 'dbm.gnu', 'dbm.ndbm', 'dbm.dumb']
15
- try:
16
- import dbm.sqlite3 # type: ignore
17
- except ImportError:
18
- pass
19
-
20
-
21
- from . import constants, types
14
+ from . import constants, store, types
22
15
  from .serializer.description import DescriptionJSONSerializer
23
16
 
24
17
  JSONDict = T.Dict[str, T.Union[str, int, float, None]]
@@ -85,98 +78,140 @@ def write_history(
85
78
  fp.write(json.dumps(history))
86
79
 
87
80
 
81
+ def _retry_on_database_lock_error(fn):
82
+ """
83
+ Decorator to retry a function if it raises a sqlite3.OperationalError with
84
+ "database is locked" in the message.
85
+ """
86
+
87
+ @wraps(fn)
88
+ def wrapper(*args, **kwargs):
89
+ while True:
90
+ try:
91
+ return fn(*args, **kwargs)
92
+ except sqlite3.OperationalError as ex:
93
+ if "database is locked" in str(ex).lower():
94
+ LOG.warning(f"{str(ex)}")
95
+ LOG.info("Retrying in 1 second...")
96
+ time.sleep(1)
97
+ else:
98
+ raise ex
99
+
100
+ return wrapper
101
+
102
+
88
103
  class PersistentCache:
89
- _lock: contextlib.nullcontext | threading.Lock
104
+ _lock: threading.Lock
90
105
 
91
106
  def __init__(self, file: str):
92
- # SQLite3 backend supports concurrent access without a lock
93
- if dbm.whichdb(file) == "dbm.sqlite3":
94
- self._lock = contextlib.nullcontext()
95
- else:
96
- self._lock = threading.Lock()
97
107
  self._file = file
108
+ self._lock = threading.Lock()
98
109
 
99
110
  def get(self, key: str) -> str | None:
111
+ if not self._db_existed():
112
+ return None
113
+
100
114
  s = time.perf_counter()
101
115
 
102
- with self._lock:
103
- with dbm.open(self._file, flag="c") as db:
104
- value: bytes | None = db.get(key)
116
+ with store.KeyValueStore(self._file, flag="r") as db:
117
+ try:
118
+ raw_payload: bytes | None = db.get(key) # data retrieved from db[key]
119
+ except Exception as ex:
120
+ if self._table_not_found(ex):
121
+ return None
122
+ raise ex
105
123
 
106
- if value is None:
124
+ if raw_payload is None:
107
125
  return None
108
126
 
109
- payload = self._decode(value)
127
+ data: JSONDict = self._decode(raw_payload) # JSON dict decoded from db[key]
110
128
 
111
- if self._is_expired(payload):
129
+ if self._is_expired(data):
112
130
  return None
113
131
 
114
- file_handle = payload.get("file_handle")
132
+ cached_value = data.get("value") # value in the JSON dict decoded from db[key]
115
133
 
116
134
  LOG.debug(
117
135
  f"Found file handle for {key} in cache ({(time.perf_counter() - s) * 1000:.0f} ms)"
118
136
  )
119
137
 
120
- return T.cast(str, file_handle)
138
+ return T.cast(str, cached_value)
121
139
 
122
- def set(self, key: str, file_handle: str, expires_in: int = 3600 * 24 * 2) -> None:
140
+ @_retry_on_database_lock_error
141
+ def set(self, key: str, value: str, expires_in: int = 3600 * 24 * 2) -> None:
123
142
  s = time.perf_counter()
124
143
 
125
- payload = {
144
+ data = {
126
145
  "expires_at": time.time() + expires_in,
127
- "file_handle": file_handle,
146
+ "value": value,
128
147
  }
129
148
 
130
- value: bytes = json.dumps(payload).encode("utf-8")
149
+ payload: bytes = json.dumps(data).encode("utf-8")
131
150
 
132
151
  with self._lock:
133
- with dbm.open(self._file, flag="c") as db:
134
- db[key] = value
152
+ with store.KeyValueStore(self._file, flag="c") as db:
153
+ db[key] = payload
135
154
 
136
155
  LOG.debug(
137
156
  f"Cached file handle for {key} ({(time.perf_counter() - s) * 1000:.0f} ms)"
138
157
  )
139
158
 
159
+ @_retry_on_database_lock_error
140
160
  def clear_expired(self) -> list[str]:
141
- s = time.perf_counter()
142
-
143
161
  expired_keys: list[str] = []
144
162
 
145
- with self._lock:
146
- with dbm.open(self._file, flag="c") as db:
147
- if hasattr(db, "items"):
148
- items: T.Iterable[tuple[str | bytes, bytes]] = db.items()
149
- else:
150
- items = ((key, db[key]) for key in db.keys())
163
+ s = time.perf_counter()
151
164
 
152
- for key, value in items:
153
- payload = self._decode(value)
154
- if self._is_expired(payload):
165
+ with self._lock:
166
+ with store.KeyValueStore(self._file, flag="c") as db:
167
+ for key, raw_payload in db.items():
168
+ data = self._decode(raw_payload)
169
+ if self._is_expired(data):
155
170
  del db[key]
156
171
  expired_keys.append(T.cast(str, key))
157
172
 
158
- if expired_keys:
159
- LOG.debug(
160
- f"Cleared {len(expired_keys)} expired entries from the cache ({(time.perf_counter() - s) * 1000:.0f} ms)"
161
- )
173
+ LOG.debug(
174
+ f"Cleared {len(expired_keys)} expired entries from the cache ({(time.perf_counter() - s) * 1000:.0f} ms)"
175
+ )
162
176
 
163
177
  return expired_keys
164
178
 
165
- def _is_expired(self, payload: JSONDict) -> bool:
166
- expires_at = payload.get("expires_at")
179
+ def keys(self) -> list[str]:
180
+ if not self._db_existed():
181
+ return []
182
+
183
+ try:
184
+ with store.KeyValueStore(self._file, flag="r") as db:
185
+ return [key.decode("utf-8") for key in db.keys()]
186
+ except Exception as ex:
187
+ if self._table_not_found(ex):
188
+ return []
189
+ raise ex
190
+
191
+ def _is_expired(self, data: JSONDict) -> bool:
192
+ expires_at = data.get("expires_at")
167
193
  if isinstance(expires_at, (int, float)):
168
194
  return expires_at is None or expires_at <= time.time()
169
195
  return False
170
196
 
171
- def _decode(self, value: bytes) -> JSONDict:
197
+ def _decode(self, raw_payload: bytes) -> JSONDict:
172
198
  try:
173
- payload = json.loads(value.decode("utf-8"))
199
+ data = json.loads(raw_payload.decode("utf-8"))
174
200
  except json.JSONDecodeError as ex:
175
201
  LOG.warning(f"Failed to decode cache value: {ex}")
176
202
  return {}
177
203
 
178
- if not isinstance(payload, dict):
179
- LOG.warning(f"Invalid cache value format: {payload}")
204
+ if not isinstance(data, dict):
205
+ LOG.warning(f"Invalid cache value format: {raw_payload!r}")
180
206
  return {}
181
207
 
182
- return payload
208
+ return data
209
+
210
+ def _db_existed(self) -> bool:
211
+ return os.path.exists(self._file)
212
+
213
+ def _table_not_found(self, ex: Exception) -> bool:
214
+ if isinstance(ex, sqlite3.OperationalError):
215
+ if "no such table" in str(ex):
216
+ return True
217
+ return False
@@ -0,0 +1,128 @@
1
+ """
2
+ This module provides a persistent key-value store based on SQLite.
3
+
4
+ This implementation is mostly copied from dbm.sqlite3 in the Python standard library,
5
+ but works for Python >= 3.9, whereas dbm.sqlite3 is only available for Python 3.13.
6
+
7
+ Source: https://github.com/python/cpython/blob/3.13/Lib/dbm/sqlite3.py
8
+ """
9
+
10
+ import os
11
+ import sqlite3
12
+ import sys
13
+ from collections.abc import MutableMapping
14
+ from contextlib import closing, suppress
15
+ from pathlib import Path
16
+
17
+ BUILD_TABLE = """
18
+ CREATE TABLE IF NOT EXISTS Dict (
19
+ key BLOB UNIQUE NOT NULL,
20
+ value BLOB NOT NULL
21
+ )
22
+ """
23
+ GET_SIZE = "SELECT COUNT (key) FROM Dict"
24
+ LOOKUP_KEY = "SELECT value FROM Dict WHERE key = CAST(? AS BLOB)"
25
+ STORE_KV = "REPLACE INTO Dict (key, value) VALUES (CAST(? AS BLOB), CAST(? AS BLOB))"
26
+ DELETE_KEY = "DELETE FROM Dict WHERE key = CAST(? AS BLOB)"
27
+ ITER_KEYS = "SELECT key FROM Dict"
28
+
29
+
30
+ def _normalize_uri(path):
31
+ path = Path(path)
32
+ uri = path.absolute().as_uri()
33
+ while "//" in uri:
34
+ uri = uri.replace("//", "/")
35
+ return uri
36
+
37
+
38
+ class KeyValueStore(MutableMapping):
39
+ def __init__(self, path, /, *, flag="r", mode=0o666):
40
+ """Open a key-value database and return the object.
41
+
42
+ The 'path' parameter is the name of the database file.
43
+
44
+ The optional 'flag' parameter can be one of ...:
45
+ 'r' (default): open an existing database for read only access
46
+ 'w': open an existing database for read/write access
47
+ 'c': create a database if it does not exist; open for read/write access
48
+ 'n': always create a new, empty database; open for read/write access
49
+
50
+ The optional 'mode' parameter is the Unix file access mode of the database;
51
+ only used when creating a new database. Default: 0o666.
52
+ """
53
+ path = os.fsdecode(path)
54
+ if flag == "r":
55
+ flag = "ro"
56
+ elif flag == "w":
57
+ flag = "rw"
58
+ elif flag == "c":
59
+ flag = "rwc"
60
+ Path(path).touch(mode=mode, exist_ok=True)
61
+ elif flag == "n":
62
+ flag = "rwc"
63
+ Path(path).unlink(missing_ok=True)
64
+ Path(path).touch(mode=mode)
65
+ else:
66
+ raise ValueError(f"Flag must be one of 'r', 'w', 'c', or 'n', not {flag!r}")
67
+
68
+ # We use the URI format when opening the database.
69
+ uri = _normalize_uri(path)
70
+ uri = f"{uri}?mode={flag}"
71
+
72
+ if sys.version_info >= (3, 12):
73
+ # This is the preferred way, but only available in Python 3.10 and newer.
74
+ self._cx = sqlite3.connect(uri, autocommit=True, uri=True)
75
+ else:
76
+ self._cx = sqlite3.connect(uri, uri=True)
77
+
78
+ # This is an optimization only; it's ok if it fails.
79
+ with suppress(sqlite3.OperationalError):
80
+ self._cx.execute("PRAGMA journal_mode = wal")
81
+
82
+ if flag == "rwc":
83
+ self._execute(BUILD_TABLE)
84
+
85
+ def _execute(self, *args, **kwargs):
86
+ if sys.version_info >= (3, 12):
87
+ return closing(self._cx.execute(*args, **kwargs))
88
+ else:
89
+ # Use a context manager to commit the changes
90
+ with self._cx:
91
+ return closing(self._cx.execute(*args, **kwargs))
92
+
93
+ def __len__(self):
94
+ with self._execute(GET_SIZE) as cu:
95
+ row = cu.fetchone()
96
+ return row[0]
97
+
98
+ def __getitem__(self, key):
99
+ with self._execute(LOOKUP_KEY, (key,)) as cu:
100
+ row = cu.fetchone()
101
+ if not row:
102
+ raise KeyError(key)
103
+ return row[0]
104
+
105
+ def __setitem__(self, key, value):
106
+ self._execute(STORE_KV, (key, value))
107
+
108
+ def __delitem__(self, key):
109
+ with self._execute(DELETE_KEY, (key,)) as cu:
110
+ if not cu.rowcount:
111
+ raise KeyError(key)
112
+
113
+ def __iter__(self):
114
+ with self._execute(ITER_KEYS) as cu:
115
+ for row in cu:
116
+ yield row[0]
117
+
118
+ def close(self):
119
+ self._cx.close()
120
+
121
+ def keys(self):
122
+ return list(super().keys())
123
+
124
+ def __enter__(self):
125
+ return self
126
+
127
+ def __exit__(self, *args):
128
+ self.close()
@@ -2,6 +2,9 @@ from __future__ import annotations
2
2
 
3
3
  import concurrent.futures
4
4
  import dataclasses
5
+ import datetime
6
+ import email.utils
7
+ import hashlib
5
8
  import io
6
9
  import json
7
10
  import logging
@@ -56,6 +59,9 @@ class UploadOptions:
56
59
  user_items: config.UserItem
57
60
  chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024)
58
61
  num_upload_workers: int = constants.MAX_IMAGE_UPLOAD_WORKERS
62
+ # When set, upload cache will be read/write there
63
+ # This option is exposed for testing purpose. In PROD, the path is calculated based on envvar and user_items
64
+ upload_cache_path: Path | None = None
59
65
  dry_run: bool = False
60
66
  nofinish: bool = False
61
67
  noresume: bool = False
@@ -471,7 +477,7 @@ class ZipUploader:
471
477
  # Arcname should be unique, the name does not matter
472
478
  arcname = f"{idx}.jpg"
473
479
  zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
474
- zipf.writestr(zipinfo, SingleImageUploader.dump_image_bytes(metadata))
480
+ zipf.writestr(zipinfo, CachedImageUploader.dump_image_bytes(metadata))
475
481
  assert len(sequence) == len(set(zipf.namelist()))
476
482
  zipf.comment = json.dumps(
477
483
  {"sequence_md5sum": sequence_md5sum},
@@ -537,6 +543,13 @@ class ImageSequenceUploader:
537
543
  def __init__(self, upload_options: UploadOptions, emitter: EventEmitter):
538
544
  self.upload_options = upload_options
539
545
  self.emitter = emitter
546
+ # Create a single shared SingleImageUploader instance that will be used across all uploads
547
+ cache = _maybe_create_persistent_cache_instance(self.upload_options)
548
+ if cache:
549
+ cache.clear_expired()
550
+ self.cached_image_uploader = CachedImageUploader(
551
+ self.upload_options, cache=cache
552
+ )
540
553
 
541
554
  def upload_images(
542
555
  self, image_metadatas: T.Sequence[types.ImageMetadata]
@@ -688,10 +701,6 @@ class ImageSequenceUploader:
688
701
  with api_v4.create_user_session(
689
702
  self.upload_options.user_items["user_upload_token"]
690
703
  ) as user_session:
691
- single_image_uploader = SingleImageUploader(
692
- self.upload_options, user_session=user_session
693
- )
694
-
695
704
  while True:
696
705
  # Assert that all images are already pushed into the queue
697
706
  try:
@@ -710,8 +719,8 @@ class ImageSequenceUploader:
710
719
  }
711
720
 
712
721
  # image_progress will be updated during uploading
713
- file_handle = single_image_uploader.upload(
714
- image_metadata, image_progress
722
+ file_handle = self.cached_image_uploader.upload(
723
+ user_session, image_metadata, image_progress
715
724
  )
716
725
 
717
726
  # Update chunk_size (it was constant if set)
@@ -731,24 +740,27 @@ class ImageSequenceUploader:
731
740
  return indexed_file_handles
732
741
 
733
742
 
734
- class SingleImageUploader:
743
+ class CachedImageUploader:
735
744
  def __init__(
736
745
  self,
737
746
  upload_options: UploadOptions,
738
- user_session: requests.Session | None = None,
747
+ cache: history.PersistentCache | None = None,
739
748
  ):
740
749
  self.upload_options = upload_options
741
- self.user_session = user_session
742
- self.cache = self._maybe_create_persistent_cache_instance(
743
- self.upload_options.user_items, upload_options
744
- )
750
+ self.cache = cache
751
+ if self.cache:
752
+ self.cache.clear_expired()
745
753
 
754
+ # Thread-safe
746
755
  def upload(
747
- self, image_metadata: types.ImageMetadata, image_progress: dict[str, T.Any]
756
+ self,
757
+ user_session: requests.Session,
758
+ image_metadata: types.ImageMetadata,
759
+ image_progress: dict[str, T.Any],
748
760
  ) -> str:
749
761
  image_bytes = self.dump_image_bytes(image_metadata)
750
762
 
751
- uploader = Uploader(self.upload_options, user_session=self.user_session)
763
+ uploader = Uploader(self.upload_options, user_session=user_session)
752
764
 
753
765
  session_key = uploader._gen_session_key(io.BytesIO(image_bytes), image_progress)
754
766
 
@@ -786,51 +798,7 @@ class SingleImageUploader:
786
798
  f"Failed to dump EXIF bytes: {ex}", metadata.filename
787
799
  ) from ex
788
800
 
789
- @classmethod
790
- def _maybe_create_persistent_cache_instance(
791
- cls, user_items: config.UserItem, upload_options: UploadOptions
792
- ) -> history.PersistentCache | None:
793
- if not constants.UPLOAD_CACHE_DIR:
794
- LOG.debug(
795
- "Upload cache directory is set empty, skipping caching upload file handles"
796
- )
797
- return None
798
-
799
- if upload_options.dry_run:
800
- LOG.debug("Dry-run mode enabled, skipping caching upload file handles")
801
- return None
802
-
803
- # Different python/CLI versions use different cache (dbm) formats.
804
- # Separate them to avoid conflicts
805
- py_version_parts = [str(part) for part in sys.version_info[:3]]
806
- version = f"py_{'_'.join(py_version_parts)}_{VERSION}"
807
-
808
- cache_path_dir = (
809
- Path(constants.UPLOAD_CACHE_DIR)
810
- .joinpath(version)
811
- .joinpath(api_v4.MAPILLARY_CLIENT_TOKEN.replace("|", "_"))
812
- .joinpath(
813
- user_items.get("MAPSettingsUserKey", user_items["user_upload_token"])
814
- )
815
- )
816
- cache_path_dir.mkdir(parents=True, exist_ok=True)
817
- cache_path = cache_path_dir.joinpath("cached_file_handles")
818
-
819
- # Sanitize sensitive segments for logging
820
- sanitized_cache_path = (
821
- Path(constants.UPLOAD_CACHE_DIR)
822
- .joinpath(version)
823
- .joinpath("***")
824
- .joinpath("***")
825
- .joinpath("cached_file_handles")
826
- )
827
- LOG.debug(f"File handle cache path: {sanitized_cache_path}")
828
-
829
- cache = history.PersistentCache(str(cache_path.resolve()))
830
- cache.clear_expired()
831
-
832
- return cache
833
-
801
+ # Thread-safe
834
802
  def _get_cached_file_handle(self, key: str) -> str | None:
835
803
  if self.cache is None:
836
804
  return None
@@ -840,6 +808,7 @@ class SingleImageUploader:
840
808
 
841
809
  return self.cache.get(key)
842
810
 
811
+ # Thread-safe
843
812
  def _set_file_handle_cache(self, key: str, value: str) -> None:
844
813
  if self.cache is None:
845
814
  return
@@ -979,27 +948,33 @@ class Uploader:
979
948
  begin_offset = progress.get("begin_offset")
980
949
  offset = progress.get("offset")
981
950
 
982
- if retries <= constants.MAX_UPLOAD_RETRIES and _is_retriable_exception(ex):
983
- self.emitter.emit("upload_retrying", progress)
951
+ LOG.warning(
952
+ f"Error uploading {self._upload_name(progress)} at {offset=} since {begin_offset=}: {ex.__class__.__name__}: {ex}"
953
+ )
984
954
 
985
- LOG.warning(
986
- f"Error uploading {self._upload_name(progress)} at {offset=} since {begin_offset=}: {ex.__class__.__name__}: {ex}"
987
- )
955
+ if retries <= constants.MAX_UPLOAD_RETRIES:
956
+ retriable, retry_after_sec = _is_retriable_exception(ex)
957
+ if retriable:
958
+ self.emitter.emit("upload_retrying", progress)
988
959
 
989
- # Keep things immutable here. Will increment retries in the caller
990
- retries += 1
991
- if _is_immediate_retriable_exception(ex):
992
- sleep_for = 0
993
- else:
994
- sleep_for = min(2**retries, 16)
995
- LOG.info(
996
- f"Retrying in {sleep_for} seconds ({retries}/{constants.MAX_UPLOAD_RETRIES})"
997
- )
998
- if sleep_for:
999
- time.sleep(sleep_for)
1000
- else:
1001
- self.emitter.emit("upload_failed", progress)
1002
- raise ex
960
+ # Keep things immutable here. Will increment retries in the caller
961
+ retries += 1
962
+ if _is_immediate_retriable_exception(ex):
963
+ sleep_for = 0
964
+ else:
965
+ sleep_for = min(2**retries, 16)
966
+ sleep_for += retry_after_sec
967
+
968
+ LOG.info(
969
+ f"Retrying in {sleep_for} seconds ({retries}/{constants.MAX_UPLOAD_RETRIES})"
970
+ )
971
+ if sleep_for:
972
+ time.sleep(sleep_for)
973
+
974
+ return
975
+
976
+ self.emitter.emit("upload_failed", progress)
977
+ raise ex
1003
978
 
1004
979
  @classmethod
1005
980
  def _upload_name(cls, progress: UploaderProgress):
@@ -1116,23 +1091,188 @@ def _is_immediate_retriable_exception(ex: BaseException) -> bool:
1116
1091
  return False
1117
1092
 
1118
1093
 
1119
- def _is_retriable_exception(ex: BaseException) -> bool:
1094
+ def _is_retriable_exception(ex: BaseException) -> tuple[bool, int]:
1095
+ """
1096
+ Determine if an exception should be retried and how long to wait.
1097
+
1098
+ Args:
1099
+ ex: Exception to check for retryability
1100
+
1101
+ Returns:
1102
+ Tuple of (retriable, retry_after_sec) where:
1103
+ - retriable: True if the exception should be retried
1104
+ - retry_after_sec: Seconds to wait before retry (>= 0)
1105
+
1106
+ Examples:
1107
+ >>> resp = requests.Response()
1108
+ >>> resp._content = b"foo"
1109
+ >>> resp.status_code = 400
1110
+ >>> ex = requests.HTTPError("error", response=resp)
1111
+ >>> _is_retriable_exception(ex)
1112
+ (False, 0)
1113
+ >>> resp._content = b'{"backoff": 13000, "debug_info": {"retriable": false, "type": "RequestRateLimitedError", "message": "Request rate limit has been exceeded"}}'
1114
+ >>> resp.status_code = 400
1115
+ >>> ex = requests.HTTPError("error", response=resp)
1116
+ >>> _is_retriable_exception(ex)
1117
+ (True, 13)
1118
+ >>> resp._content = b'{"backoff": "foo", "debug_info": {"retriable": false, "type": "RequestRateLimitedError", "message": "Request rate limit has been exceeded"}}'
1119
+ >>> resp.status_code = 400
1120
+ >>> ex = requests.HTTPError("error", response=resp)
1121
+ >>> _is_retriable_exception(ex)
1122
+ (True, 10)
1123
+ >>> resp._content = b'{"debug_info": {"retriable": false, "type": "RequestRateLimitedError", "message": "Request rate limit has been exceeded"}}'
1124
+ >>> resp.status_code = 400
1125
+ >>> ex = requests.HTTPError("error", response=resp)
1126
+ >>> _is_retriable_exception(ex)
1127
+ (True, 10)
1128
+ >>> resp._content = b"foo"
1129
+ >>> resp.status_code = 429
1130
+ >>> ex = requests.HTTPError("error", response=resp)
1131
+ >>> _is_retriable_exception(ex)
1132
+ (True, 10)
1133
+ >>> resp._content = b"foo"
1134
+ >>> resp.status_code = 429
1135
+ >>> ex = requests.HTTPError("error", response=resp)
1136
+ >>> _is_retriable_exception(ex)
1137
+ (True, 10)
1138
+ >>> resp._content = b'{"backoff": 12000, "debug_info": {"retriable": false, "type": "RequestRateLimitedError", "message": "Request rate limit has been exceeded"}}'
1139
+ >>> resp.status_code = 429
1140
+ >>> ex = requests.HTTPError("error", response=resp)
1141
+ >>> _is_retriable_exception(ex)
1142
+ (True, 12)
1143
+ >>> resp._content = b'{"backoff": 12000, "debug_info": {"retriable": false, "type": "RequestRateLimitedError", "message": "Request rate limit has been exceeded"}}'
1144
+ >>> resp.headers = {"Retry-After": "1"}
1145
+ >>> resp.status_code = 503
1146
+ >>> ex = requests.HTTPError("error", response=resp)
1147
+ >>> _is_retriable_exception(ex)
1148
+ (True, 1)
1149
+ """
1150
+
1151
+ DEFAULT_RETRY_AFTER_RATE_LIMIT_SEC = 10
1152
+
1120
1153
  if isinstance(ex, (requests.ConnectionError, requests.Timeout)):
1121
- return True
1154
+ return True, 0
1122
1155
 
1123
1156
  if isinstance(ex, requests.HTTPError) and isinstance(
1124
1157
  ex.response, requests.Response
1125
1158
  ):
1126
- if 400 <= ex.response.status_code < 500:
1159
+ status_code = ex.response.status_code
1160
+
1161
+ # Always retry with some delay
1162
+ if status_code == 429:
1163
+ retry_after_sec = (
1164
+ _parse_retry_after_from_header(ex.response)
1165
+ or DEFAULT_RETRY_AFTER_RATE_LIMIT_SEC
1166
+ )
1167
+
1127
1168
  try:
1128
- resp = ex.response.json()
1129
- except json.JSONDecodeError:
1130
- return False
1131
- return resp.get("debug_info", {}).get("retriable", False)
1132
- else:
1133
- return True
1169
+ data = ex.response.json()
1170
+ except requests.JSONDecodeError:
1171
+ return True, retry_after_sec
1134
1172
 
1135
- return False
1173
+ backoff_ms = _parse_backoff(data.get("backoff"))
1174
+ if backoff_ms is None:
1175
+ return True, retry_after_sec
1176
+ else:
1177
+ return True, max(0, int(int(backoff_ms) / 1000))
1178
+
1179
+ if 400 <= status_code < 500:
1180
+ try:
1181
+ data = ex.response.json()
1182
+ except requests.JSONDecodeError:
1183
+ return False, (_parse_retry_after_from_header(ex.response) or 0)
1184
+
1185
+ debug_info = data.get("debug_info", {})
1186
+
1187
+ if isinstance(debug_info, dict):
1188
+ error_type = debug_info.get("type")
1189
+ else:
1190
+ error_type = None
1191
+
1192
+ # The server may respond 429 RequestRateLimitedError but with retryable=False
1193
+ # We should retry for this case regardless
1194
+ # e.g. HTTP 429 {"backoff": 10000, "debug_info": {"retriable": false, "type": "RequestRateLimitedError", "message": "Request rate limit has been exceeded"}}
1195
+ if error_type == "RequestRateLimitedError":
1196
+ backoff_ms = _parse_backoff(data.get("backoff"))
1197
+ if backoff_ms is None:
1198
+ return True, (
1199
+ _parse_retry_after_from_header(ex.response)
1200
+ or DEFAULT_RETRY_AFTER_RATE_LIMIT_SEC
1201
+ )
1202
+ else:
1203
+ return True, max(0, int(int(backoff_ms) / 1000))
1204
+
1205
+ return debug_info.get("retriable", False), 0
1206
+
1207
+ if 500 <= status_code < 600:
1208
+ return True, (_parse_retry_after_from_header(ex.response) or 0)
1209
+
1210
+ return False, 0
1211
+
1212
+
1213
+ def _parse_backoff(backoff: T.Any) -> int | None:
1214
+ if backoff is not None:
1215
+ try:
1216
+ backoff_ms = int(backoff)
1217
+ except (ValueError, TypeError):
1218
+ backoff_ms = None
1219
+ else:
1220
+ backoff_ms = None
1221
+ return backoff_ms
1222
+
1223
+
1224
+ def _parse_retry_after_from_header(resp: requests.Response) -> int | None:
1225
+ """
1226
+ Parse Retry-After header from HTTP response.
1227
+ See See https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Retry-After
1228
+
1229
+ Args:
1230
+ resp: HTTP response object with headers
1231
+
1232
+ Returns:
1233
+ Number of seconds to wait (>= 0) or None if header missing/invalid.
1234
+
1235
+ Examples:
1236
+ >>> resp = requests.Response()
1237
+ >>> resp.headers = {"Retry-After": "1"}
1238
+ >>> _parse_retry_after_from_header(resp)
1239
+ 1
1240
+ >>> resp.headers = {"Retry-After": "-1"}
1241
+ >>> _parse_retry_after_from_header(resp)
1242
+ 0
1243
+ >>> resp.headers = {"Retry-After": "Wed, 21 Oct 2015 07:28:00 GMT"}
1244
+ >>> _parse_retry_after_from_header(resp)
1245
+ 0
1246
+ >>> resp.headers = {"Retry-After": "Wed, 21 Oct 2315 07:28:00"}
1247
+ >>> _parse_retry_after_from_header(resp)
1248
+ """
1249
+
1250
+ value = resp.headers.get("Retry-After")
1251
+ if value is None:
1252
+ return None
1253
+
1254
+ try:
1255
+ return max(0, int(value))
1256
+ except (ValueError, TypeError):
1257
+ pass
1258
+
1259
+ # e.g. "Wed, 21 Oct 2015 07:28:00 GMT"
1260
+ try:
1261
+ dt = email.utils.parsedate_to_datetime(value)
1262
+ except (ValueError, TypeError):
1263
+ dt = None
1264
+
1265
+ if dt is None:
1266
+ LOG.warning(f"Error parsing Retry-After: {value}")
1267
+ return None
1268
+
1269
+ try:
1270
+ delta = dt - datetime.datetime.now(datetime.timezone.utc)
1271
+ except (TypeError, ValueError):
1272
+ # e.g. TypeError: can't subtract offset-naive and offset-aware datetimes
1273
+ return None
1274
+
1275
+ return max(0, int(delta.total_seconds()))
1136
1276
 
1137
1277
 
1138
1278
  _SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
@@ -1168,3 +1308,57 @@ def _prefixed_uuid4():
1168
1308
 
1169
1309
  def _is_uuid(key: str) -> bool:
1170
1310
  return key.startswith("uuid_") or key.startswith("mly_tools_uuid_")
1311
+
1312
+
1313
+ def _build_upload_cache_path(upload_options: UploadOptions) -> Path:
1314
+ # Different python/CLI versions use different cache formats.
1315
+ # Separate them to avoid conflicts
1316
+ py_version_parts = [str(part) for part in sys.version_info[:3]]
1317
+ version = f"py_{'_'.join(py_version_parts)}_{VERSION}"
1318
+ # File handles are not sharable between different users
1319
+ user_id = str(
1320
+ upload_options.user_items.get(
1321
+ "MAPSettingsUserKey", upload_options.user_items["user_upload_token"]
1322
+ )
1323
+ )
1324
+ # Use hash to avoid log sensitive data
1325
+ user_fingerprint = utils.md5sum_fp(
1326
+ io.BytesIO((api_v4.MAPILLARY_CLIENT_TOKEN + user_id).encode("utf-8")),
1327
+ md5=hashlib.sha256(),
1328
+ ).hexdigest()[:24]
1329
+
1330
+ cache_path = (
1331
+ Path(constants.UPLOAD_CACHE_DIR)
1332
+ .joinpath(version)
1333
+ .joinpath(user_fingerprint)
1334
+ .joinpath("cached_file_handles")
1335
+ )
1336
+
1337
+ return cache_path
1338
+
1339
+
1340
+ def _maybe_create_persistent_cache_instance(
1341
+ upload_options: UploadOptions,
1342
+ ) -> history.PersistentCache | None:
1343
+ """Create a persistent cache instance if caching is enabled."""
1344
+
1345
+ if upload_options.dry_run:
1346
+ LOG.debug("Dry-run mode enabled, skipping caching upload file handles")
1347
+ return None
1348
+
1349
+ if upload_options.upload_cache_path is None:
1350
+ if not constants.UPLOAD_CACHE_DIR:
1351
+ LOG.debug(
1352
+ "Upload cache directory is set empty, skipping caching upload file handles"
1353
+ )
1354
+ return None
1355
+
1356
+ cache_path = _build_upload_cache_path(upload_options)
1357
+ else:
1358
+ cache_path = upload_options.upload_cache_path
1359
+
1360
+ LOG.debug(f"File handle cache path: {cache_path}")
1361
+
1362
+ cache_path.parent.mkdir(parents=True, exist_ok=True)
1363
+
1364
+ return history.PersistentCache(str(cache_path.resolve()))
mapillary_tools/utils.py CHANGED
@@ -247,8 +247,8 @@ def configure_logger(
247
247
  try:
248
248
  # Disable globally for now. TODO Disable it in non-interactive mode only
249
249
  raise ImportError
250
- from rich.console import Console # type: ignore
251
- from rich.logging import RichHandler # type: ignore
250
+ from rich.console import Console # type: ignore[import]
251
+ from rich.logging import RichHandler # type: ignore[import]
252
252
  except ImportError:
253
253
  formatter = logging.Formatter(
254
254
  "%(asctime)s.%(msecs)03d - %(levelname)-7s - %(message)s",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mapillary_tools
3
- Version: 0.14.2
3
+ Version: 0.14.4
4
4
  Summary: Mapillary Image/Video Import Pipeline
5
5
  Author-email: Mapillary <support@mapillary.com>
6
6
  License: BSD
@@ -1,4 +1,4 @@
1
- mapillary_tools/__init__.py,sha256=qdi1NvyXrEYAkFGfNSU2jMM-Ua-ehuWizcwvW341obw,19
1
+ mapillary_tools/__init__.py,sha256=SlLq3LO5ZN4Ap8_XR8x6nXyckJ4qcIktiWVQg34oqMY,19
2
2
  mapillary_tools/api_v4.py,sha256=bckAU_atUs0pSuqySeY4W0Rs011a21ClJHo_mbbcXXw,4864
3
3
  mapillary_tools/authenticate.py,sha256=mmaOwjQ444DcX4lRw2ms3naBg5Y_xwIJAIWeVdsQfqM,11742
4
4
  mapillary_tools/blackvue_parser.py,sha256=ea2JtU9MWU6yB0bQlF970_Of0bJVofSTRq1P30WKW-0,5623
@@ -12,18 +12,19 @@ mapillary_tools/exiftool_read_video.py,sha256=23O_bjUOVq6j7i3xMz6fY-XIEsjinsCejK
12
12
  mapillary_tools/exiftool_runner.py,sha256=g4gSyqeh3D6EnMJ-c3s-RnO2EP_jD354Qkaz0Y-4D04,1658
13
13
  mapillary_tools/ffmpeg.py,sha256=akpvvsjAR-Iiv-hOrUoJvPM9vUU3JqMQ5HJL1_NgwB8,22908
14
14
  mapillary_tools/geo.py,sha256=mWaESfDf_zHmyvnt5aVFro4FGrjiULNsuZ6HfGUWvSA,11009
15
- mapillary_tools/history.py,sha256=LP6e0zEYVBwRGUbFaGoE_AaBIEdpB4XrZsg9qwJVvRI,5344
15
+ mapillary_tools/history.py,sha256=MoJVp2D-JUPoORDvNhGt-2dgBstPLZ4nyPToIuqIAg4,6287
16
16
  mapillary_tools/http.py,sha256=-df_oGyImO2AOmPnXcKMcztlL4LOZLArE6ki81NMGUA,6411
17
17
  mapillary_tools/ipc.py,sha256=DwWQb9hNshx0bg0Fo5NjY0mXjs-FkbR6tIQmjMgMtmg,1089
18
18
  mapillary_tools/process_geotag_properties.py,sha256=3EaVvjfKB-O38OjopBcxeEdP6qI5IPIxqmO6isjcXKM,14205
19
19
  mapillary_tools/process_sequence_properties.py,sha256=n4VjQHrgVjksIr3WoBviRhrQIBBDHGXMClolfyz6tu4,24057
20
20
  mapillary_tools/sample_video.py,sha256=pKSj1Vc8e5p1XGjykBuKY9XieTOskc-9L3F4L407jDM,13935
21
+ mapillary_tools/store.py,sha256=dA1D0afDvhVm0MYEI1yA5FfqjQM5etYK4pcfMc4nKAU,4077
21
22
  mapillary_tools/telemetry.py,sha256=lL6qQbtOZft4DZZrCNK3njlwHT_30zLyYS_YRN5pgHY,1568
22
23
  mapillary_tools/types.py,sha256=pIU2wcxiOUWT5Pd05pgNzY9EVEDlwoldtlF2IIYYvE0,5909
23
24
  mapillary_tools/upload.py,sha256=XejAgmVW4Y33MiQ2g-shvHZA_zXTekEsOUHUHNx2AE4,24047
24
25
  mapillary_tools/upload_api_v4.py,sha256=VgOf7RhfUuzmlSBUp5CpekKIJ0xQrC0r-r0Ds9-wU4I,7344
25
- mapillary_tools/uploader.py,sha256=Rw-1AkxE4TnddJNU6EW--9wmKYRqHbcTeheujdaluiM,39813
26
- mapillary_tools/utils.py,sha256=cP9idKt4EJqfC0qqOGneSoPNpPiYhaW8VjQ9CLYjESc,8092
26
+ mapillary_tools/uploader.py,sha256=T2BNlncuFibg5RJ7c2qweUXDAg6-zYc-rdgbV_JEKDU,46636
27
+ mapillary_tools/utils.py,sha256=HjTZ01GQv_UNGySaTZ_Mc1Gn_Y0x3knQf7Vh17whDFw,8108
27
28
  mapillary_tools/camm/camm_builder.py,sha256=ub6Z9ijep8zAo1NOlU51Gxk95kQ2vfN58YgVCLmNMRk,9211
28
29
  mapillary_tools/camm/camm_parser.py,sha256=aNHP65hNXYQBWBTfhaj_S5XYzmAHhjwcAfGhbm83__o,18043
29
30
  mapillary_tools/commands/__init__.py,sha256=41CFrPLGlG3566uhxssEF3TGAtSpADFPPcDMHbViU0E,171
@@ -68,9 +69,9 @@ mapillary_tools/mp4/simple_mp4_builder.py,sha256=9TUGk1hzI6mQFN1P30jwHL3dCYz3Zz7
68
69
  mapillary_tools/mp4/simple_mp4_parser.py,sha256=g3vvPhBoNu7anhVzC5_XQCV7IwfRWro1vJ6d6GyDkHE,6315
69
70
  mapillary_tools/serializer/description.py,sha256=ECnQxC-1LOgkAKE5qFi9Y2KuCeH8KPUjjNFDiwebjvo,18647
70
71
  mapillary_tools/serializer/gpx.py,sha256=_xx6gHjaWHrlXaUpB5GGBrbRKzbExFyIzWWAH-CvksI,4383
71
- mapillary_tools-0.14.2.dist-info/licenses/LICENSE,sha256=l2D8cKfFmmJq_wcVq_JElPJrlvWQOzNWx7gMLINucxc,1292
72
- mapillary_tools-0.14.2.dist-info/METADATA,sha256=rEmF5Twbh9m-zYNF1klQjGLOIrmiv5mCAQnbOb1Z8GE,22200
73
- mapillary_tools-0.14.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
74
- mapillary_tools-0.14.2.dist-info/entry_points.txt,sha256=A3f3LP-BO_P-U8Y29QfpT4jx6Mjk3sXjTi2Yew4bvj8,75
75
- mapillary_tools-0.14.2.dist-info/top_level.txt,sha256=FbDkMgOrt1S70ho1WSBrOwzKOSkJFDwwqFOoY5-527s,16
76
- mapillary_tools-0.14.2.dist-info/RECORD,,
72
+ mapillary_tools-0.14.4.dist-info/licenses/LICENSE,sha256=l2D8cKfFmmJq_wcVq_JElPJrlvWQOzNWx7gMLINucxc,1292
73
+ mapillary_tools-0.14.4.dist-info/METADATA,sha256=DmtexHTWyoTovIswRtvkT638DbdPNXcfVfPcaCflu4w,22200
74
+ mapillary_tools-0.14.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
75
+ mapillary_tools-0.14.4.dist-info/entry_points.txt,sha256=A3f3LP-BO_P-U8Y29QfpT4jx6Mjk3sXjTi2Yew4bvj8,75
76
+ mapillary_tools-0.14.4.dist-info/top_level.txt,sha256=FbDkMgOrt1S70ho1WSBrOwzKOSkJFDwwqFOoY5-527s,16
77
+ mapillary_tools-0.14.4.dist-info/RECORD,,