mapillary-tools 0.14.1__tar.gz → 0.14.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/PKG-INFO +1 -1
- mapillary_tools-0.14.4/mapillary_tools/__init__.py +1 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/authenticate.py +1 -1
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/config.py +5 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/options.py +4 -1
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/history.py +88 -53
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/process_geotag_properties.py +4 -11
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/serializer/description.py +12 -2
- mapillary_tools-0.14.4/mapillary_tools/store.py +128 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/upload.py +1 -2
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/uploader.py +284 -82
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/utils.py +2 -2
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools.egg-info/PKG-INFO +1 -1
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools.egg-info/SOURCES.txt +1 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/pyproject.toml +1 -0
- mapillary_tools-0.14.1/mapillary_tools/__init__.py +0 -1
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/LICENSE +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/README.md +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/api_v4.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/blackvue_parser.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/camm/camm_builder.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/camm/camm_parser.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/commands/__init__.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/commands/__main__.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/commands/authenticate.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/commands/process.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/commands/process_and_upload.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/commands/sample_video.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/commands/upload.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/commands/video_process.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/commands/video_process_and_upload.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/commands/zip.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/constants.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/exceptions.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/exif_read.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/exif_write.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/exiftool_read.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/exiftool_read_video.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/exiftool_runner.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/ffmpeg.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geo.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/__init__.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/base.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/factory.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_images_from_exif.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_images_from_exiftool.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_images_from_gpx.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_images_from_gpx_file.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_images_from_nmea_file.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_images_from_video.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_videos_from_exiftool.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_videos_from_gpx.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_videos_from_video.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/image_extractors/base.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/image_extractors/exif.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/image_extractors/exiftool.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/utils.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/video_extractors/base.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/video_extractors/exiftool.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/video_extractors/gpx.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/video_extractors/native.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/gpmf/gpmf_gps_filter.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/gpmf/gpmf_parser.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/gpmf/gps_filter.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/http.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/ipc.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/mp4/__init__.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/mp4/construct_mp4_parser.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/mp4/io_utils.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/mp4/mp4_sample_parser.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/mp4/simple_mp4_builder.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/mp4/simple_mp4_parser.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/process_sequence_properties.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/sample_video.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/serializer/gpx.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/telemetry.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/types.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/upload_api_v4.py +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools.egg-info/dependency_links.txt +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools.egg-info/entry_points.txt +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools.egg-info/requires.txt +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools.egg-info/top_level.txt +0 -0
- {mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/setup.cfg +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
VERSION = "0.14.4"
|
|
@@ -165,7 +165,7 @@ def _prompt(message: str) -> str:
|
|
|
165
165
|
|
|
166
166
|
def _validate_profile(user_items: config.UserItem) -> config.UserItem:
|
|
167
167
|
try:
|
|
168
|
-
|
|
168
|
+
config.UserItemSchemaValidator.validate(user_items)
|
|
169
169
|
except jsonschema.ValidationError as ex:
|
|
170
170
|
raise exceptions.MapillaryBadParameterError(
|
|
171
171
|
f"Invalid profile format: {ex.message}"
|
|
@@ -6,6 +6,8 @@ import sys
|
|
|
6
6
|
import typing as T
|
|
7
7
|
from typing import TypedDict
|
|
8
8
|
|
|
9
|
+
import jsonschema
|
|
10
|
+
|
|
9
11
|
if sys.version_info >= (3, 11):
|
|
10
12
|
from typing import Required
|
|
11
13
|
else:
|
|
@@ -50,6 +52,9 @@ UserItemSchema = {
|
|
|
50
52
|
}
|
|
51
53
|
|
|
52
54
|
|
|
55
|
+
UserItemSchemaValidator = jsonschema.Draft202012Validator(UserItemSchema)
|
|
56
|
+
|
|
57
|
+
|
|
53
58
|
def _load_config(config_path: str) -> configparser.ConfigParser:
|
|
54
59
|
config = configparser.ConfigParser()
|
|
55
60
|
# Override to not change option names (by default it will lower them)
|
|
@@ -173,8 +173,11 @@ SourceOptionSchema = {
|
|
|
173
173
|
}
|
|
174
174
|
|
|
175
175
|
|
|
176
|
+
SourceOptionSchemaValidator = jsonschema.Draft202012Validator(SourceOptionSchema)
|
|
177
|
+
|
|
178
|
+
|
|
176
179
|
def validate_option(instance):
|
|
177
|
-
|
|
180
|
+
SourceOptionSchemaValidator.validate(instance=instance)
|
|
178
181
|
|
|
179
182
|
|
|
180
183
|
if __name__ == "__main__":
|
|
@@ -1,24 +1,17 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import contextlib
|
|
4
|
-
import dbm
|
|
5
3
|
import json
|
|
6
4
|
import logging
|
|
5
|
+
import os
|
|
6
|
+
import sqlite3
|
|
7
7
|
import string
|
|
8
8
|
import threading
|
|
9
9
|
import time
|
|
10
10
|
import typing as T
|
|
11
|
+
from functools import wraps
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
|
|
13
|
-
|
|
14
|
-
# Otherwise you will see: ImportError: no dbm clone found; tried ['dbm.sqlite3', 'dbm.gnu', 'dbm.ndbm', 'dbm.dumb']
|
|
15
|
-
try:
|
|
16
|
-
import dbm.sqlite3 # type: ignore
|
|
17
|
-
except ImportError:
|
|
18
|
-
pass
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
from . import constants, types
|
|
14
|
+
from . import constants, store, types
|
|
22
15
|
from .serializer.description import DescriptionJSONSerializer
|
|
23
16
|
|
|
24
17
|
JSONDict = T.Dict[str, T.Union[str, int, float, None]]
|
|
@@ -85,98 +78,140 @@ def write_history(
|
|
|
85
78
|
fp.write(json.dumps(history))
|
|
86
79
|
|
|
87
80
|
|
|
81
|
+
def _retry_on_database_lock_error(fn):
|
|
82
|
+
"""
|
|
83
|
+
Decorator to retry a function if it raises a sqlite3.OperationalError with
|
|
84
|
+
"database is locked" in the message.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
@wraps(fn)
|
|
88
|
+
def wrapper(*args, **kwargs):
|
|
89
|
+
while True:
|
|
90
|
+
try:
|
|
91
|
+
return fn(*args, **kwargs)
|
|
92
|
+
except sqlite3.OperationalError as ex:
|
|
93
|
+
if "database is locked" in str(ex).lower():
|
|
94
|
+
LOG.warning(f"{str(ex)}")
|
|
95
|
+
LOG.info("Retrying in 1 second...")
|
|
96
|
+
time.sleep(1)
|
|
97
|
+
else:
|
|
98
|
+
raise ex
|
|
99
|
+
|
|
100
|
+
return wrapper
|
|
101
|
+
|
|
102
|
+
|
|
88
103
|
class PersistentCache:
|
|
89
|
-
_lock:
|
|
104
|
+
_lock: threading.Lock
|
|
90
105
|
|
|
91
106
|
def __init__(self, file: str):
|
|
92
|
-
# SQLite3 backend supports concurrent access without a lock
|
|
93
|
-
if dbm.whichdb(file) == "dbm.sqlite3":
|
|
94
|
-
self._lock = contextlib.nullcontext()
|
|
95
|
-
else:
|
|
96
|
-
self._lock = threading.Lock()
|
|
97
107
|
self._file = file
|
|
108
|
+
self._lock = threading.Lock()
|
|
98
109
|
|
|
99
110
|
def get(self, key: str) -> str | None:
|
|
111
|
+
if not self._db_existed():
|
|
112
|
+
return None
|
|
113
|
+
|
|
100
114
|
s = time.perf_counter()
|
|
101
115
|
|
|
102
|
-
with self.
|
|
103
|
-
|
|
104
|
-
|
|
116
|
+
with store.KeyValueStore(self._file, flag="r") as db:
|
|
117
|
+
try:
|
|
118
|
+
raw_payload: bytes | None = db.get(key) # data retrieved from db[key]
|
|
119
|
+
except Exception as ex:
|
|
120
|
+
if self._table_not_found(ex):
|
|
121
|
+
return None
|
|
122
|
+
raise ex
|
|
105
123
|
|
|
106
|
-
if
|
|
124
|
+
if raw_payload is None:
|
|
107
125
|
return None
|
|
108
126
|
|
|
109
|
-
|
|
127
|
+
data: JSONDict = self._decode(raw_payload) # JSON dict decoded from db[key]
|
|
110
128
|
|
|
111
|
-
if self._is_expired(
|
|
129
|
+
if self._is_expired(data):
|
|
112
130
|
return None
|
|
113
131
|
|
|
114
|
-
|
|
132
|
+
cached_value = data.get("value") # value in the JSON dict decoded from db[key]
|
|
115
133
|
|
|
116
134
|
LOG.debug(
|
|
117
135
|
f"Found file handle for {key} in cache ({(time.perf_counter() - s) * 1000:.0f} ms)"
|
|
118
136
|
)
|
|
119
137
|
|
|
120
|
-
return T.cast(str,
|
|
138
|
+
return T.cast(str, cached_value)
|
|
121
139
|
|
|
122
|
-
|
|
140
|
+
@_retry_on_database_lock_error
|
|
141
|
+
def set(self, key: str, value: str, expires_in: int = 3600 * 24 * 2) -> None:
|
|
123
142
|
s = time.perf_counter()
|
|
124
143
|
|
|
125
|
-
|
|
144
|
+
data = {
|
|
126
145
|
"expires_at": time.time() + expires_in,
|
|
127
|
-
"
|
|
146
|
+
"value": value,
|
|
128
147
|
}
|
|
129
148
|
|
|
130
|
-
|
|
149
|
+
payload: bytes = json.dumps(data).encode("utf-8")
|
|
131
150
|
|
|
132
151
|
with self._lock:
|
|
133
|
-
with
|
|
134
|
-
db[key] =
|
|
152
|
+
with store.KeyValueStore(self._file, flag="c") as db:
|
|
153
|
+
db[key] = payload
|
|
135
154
|
|
|
136
155
|
LOG.debug(
|
|
137
156
|
f"Cached file handle for {key} ({(time.perf_counter() - s) * 1000:.0f} ms)"
|
|
138
157
|
)
|
|
139
158
|
|
|
159
|
+
@_retry_on_database_lock_error
|
|
140
160
|
def clear_expired(self) -> list[str]:
|
|
141
|
-
s = time.perf_counter()
|
|
142
|
-
|
|
143
161
|
expired_keys: list[str] = []
|
|
144
162
|
|
|
145
|
-
|
|
146
|
-
with dbm.open(self._file, flag="c") as db:
|
|
147
|
-
if hasattr(db, "items"):
|
|
148
|
-
items: T.Iterable[tuple[str | bytes, bytes]] = db.items()
|
|
149
|
-
else:
|
|
150
|
-
items = ((key, db[key]) for key in db.keys())
|
|
163
|
+
s = time.perf_counter()
|
|
151
164
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
165
|
+
with self._lock:
|
|
166
|
+
with store.KeyValueStore(self._file, flag="c") as db:
|
|
167
|
+
for key, raw_payload in db.items():
|
|
168
|
+
data = self._decode(raw_payload)
|
|
169
|
+
if self._is_expired(data):
|
|
155
170
|
del db[key]
|
|
156
171
|
expired_keys.append(T.cast(str, key))
|
|
157
172
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
)
|
|
173
|
+
LOG.debug(
|
|
174
|
+
f"Cleared {len(expired_keys)} expired entries from the cache ({(time.perf_counter() - s) * 1000:.0f} ms)"
|
|
175
|
+
)
|
|
162
176
|
|
|
163
177
|
return expired_keys
|
|
164
178
|
|
|
165
|
-
def
|
|
166
|
-
|
|
179
|
+
def keys(self) -> list[str]:
|
|
180
|
+
if not self._db_existed():
|
|
181
|
+
return []
|
|
182
|
+
|
|
183
|
+
try:
|
|
184
|
+
with store.KeyValueStore(self._file, flag="r") as db:
|
|
185
|
+
return [key.decode("utf-8") for key in db.keys()]
|
|
186
|
+
except Exception as ex:
|
|
187
|
+
if self._table_not_found(ex):
|
|
188
|
+
return []
|
|
189
|
+
raise ex
|
|
190
|
+
|
|
191
|
+
def _is_expired(self, data: JSONDict) -> bool:
|
|
192
|
+
expires_at = data.get("expires_at")
|
|
167
193
|
if isinstance(expires_at, (int, float)):
|
|
168
194
|
return expires_at is None or expires_at <= time.time()
|
|
169
195
|
return False
|
|
170
196
|
|
|
171
|
-
def _decode(self,
|
|
197
|
+
def _decode(self, raw_payload: bytes) -> JSONDict:
|
|
172
198
|
try:
|
|
173
|
-
|
|
199
|
+
data = json.loads(raw_payload.decode("utf-8"))
|
|
174
200
|
except json.JSONDecodeError as ex:
|
|
175
201
|
LOG.warning(f"Failed to decode cache value: {ex}")
|
|
176
202
|
return {}
|
|
177
203
|
|
|
178
|
-
if not isinstance(
|
|
179
|
-
LOG.warning(f"Invalid cache value format: {
|
|
204
|
+
if not isinstance(data, dict):
|
|
205
|
+
LOG.warning(f"Invalid cache value format: {raw_payload!r}")
|
|
180
206
|
return {}
|
|
181
207
|
|
|
182
|
-
return
|
|
208
|
+
return data
|
|
209
|
+
|
|
210
|
+
def _db_existed(self) -> bool:
|
|
211
|
+
return os.path.exists(self._file)
|
|
212
|
+
|
|
213
|
+
def _table_not_found(self, ex: Exception) -> bool:
|
|
214
|
+
if isinstance(ex, sqlite3.OperationalError):
|
|
215
|
+
if "no such table" in str(ex):
|
|
216
|
+
return True
|
|
217
|
+
return False
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/process_geotag_properties.py
RENAMED
|
@@ -304,19 +304,12 @@ def _validate_metadatas(
|
|
|
304
304
|
# TypeError: __init__() missing 3 required positional arguments: 'image_time', 'gpx_start_time', and 'gpx_end_time'
|
|
305
305
|
# See https://stackoverflow.com/a/61432070
|
|
306
306
|
good_metadatas, error_metadatas = types.separate_errors(metadatas)
|
|
307
|
-
map_results = utils.mp_map_maybe(
|
|
308
|
-
validate_and_fail_metadata,
|
|
309
|
-
T.cast(T.Iterable[types.Metadata], good_metadatas),
|
|
310
|
-
num_processes=num_processes,
|
|
311
|
-
)
|
|
312
307
|
|
|
313
308
|
validated_metadatas = list(
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
disable=LOG.getEffectiveLevel() <= logging.DEBUG,
|
|
319
|
-
total=len(good_metadatas),
|
|
309
|
+
utils.mp_map_maybe(
|
|
310
|
+
validate_and_fail_metadata,
|
|
311
|
+
T.cast(T.Iterable[types.Metadata], good_metadatas),
|
|
312
|
+
num_processes=num_processes,
|
|
320
313
|
)
|
|
321
314
|
)
|
|
322
315
|
|
|
@@ -259,6 +259,11 @@ ImageDescriptionFileSchema = _merge_schema(
|
|
|
259
259
|
)
|
|
260
260
|
|
|
261
261
|
|
|
262
|
+
ImageDescriptionFileSchemaValidator = jsonschema.Draft202012Validator(
|
|
263
|
+
ImageDescriptionFileSchema
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
262
267
|
VideoDescriptionFileSchema = _merge_schema(
|
|
263
268
|
VideoDescriptionSchema,
|
|
264
269
|
{
|
|
@@ -295,6 +300,11 @@ VideoDescriptionFileSchema = _merge_schema(
|
|
|
295
300
|
)
|
|
296
301
|
|
|
297
302
|
|
|
303
|
+
VideoDescriptionFileSchemaValidator = jsonschema.Draft202012Validator(
|
|
304
|
+
VideoDescriptionFileSchema
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
|
|
298
308
|
ImageVideoDescriptionFileSchema = {
|
|
299
309
|
"oneOf": [VideoDescriptionFileSchema, ImageDescriptionFileSchema]
|
|
300
310
|
}
|
|
@@ -520,7 +530,7 @@ def parse_capture_time(time: str) -> datetime.datetime:
|
|
|
520
530
|
|
|
521
531
|
def validate_image_desc(desc: T.Any) -> None:
|
|
522
532
|
try:
|
|
523
|
-
|
|
533
|
+
ImageDescriptionFileSchemaValidator.validate(desc)
|
|
524
534
|
except jsonschema.ValidationError as ex:
|
|
525
535
|
# do not use str(ex) which is more verbose
|
|
526
536
|
raise exceptions.MapillaryMetadataValidationError(ex.message) from ex
|
|
@@ -533,7 +543,7 @@ def validate_image_desc(desc: T.Any) -> None:
|
|
|
533
543
|
|
|
534
544
|
def validate_video_desc(desc: T.Any) -> None:
|
|
535
545
|
try:
|
|
536
|
-
|
|
546
|
+
VideoDescriptionFileSchemaValidator.validate(desc)
|
|
537
547
|
except jsonschema.ValidationError as ex:
|
|
538
548
|
# do not use str(ex) which is more verbose
|
|
539
549
|
raise exceptions.MapillaryMetadataValidationError(ex.message) from ex
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module provides a persistent key-value store based on SQLite.
|
|
3
|
+
|
|
4
|
+
This implementation is mostly copied from dbm.sqlite3 in the Python standard library,
|
|
5
|
+
but works for Python >= 3.9, whereas dbm.sqlite3 is only available for Python 3.13.
|
|
6
|
+
|
|
7
|
+
Source: https://github.com/python/cpython/blob/3.13/Lib/dbm/sqlite3.py
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import sqlite3
|
|
12
|
+
import sys
|
|
13
|
+
from collections.abc import MutableMapping
|
|
14
|
+
from contextlib import closing, suppress
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
BUILD_TABLE = """
|
|
18
|
+
CREATE TABLE IF NOT EXISTS Dict (
|
|
19
|
+
key BLOB UNIQUE NOT NULL,
|
|
20
|
+
value BLOB NOT NULL
|
|
21
|
+
)
|
|
22
|
+
"""
|
|
23
|
+
GET_SIZE = "SELECT COUNT (key) FROM Dict"
|
|
24
|
+
LOOKUP_KEY = "SELECT value FROM Dict WHERE key = CAST(? AS BLOB)"
|
|
25
|
+
STORE_KV = "REPLACE INTO Dict (key, value) VALUES (CAST(? AS BLOB), CAST(? AS BLOB))"
|
|
26
|
+
DELETE_KEY = "DELETE FROM Dict WHERE key = CAST(? AS BLOB)"
|
|
27
|
+
ITER_KEYS = "SELECT key FROM Dict"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _normalize_uri(path):
|
|
31
|
+
path = Path(path)
|
|
32
|
+
uri = path.absolute().as_uri()
|
|
33
|
+
while "//" in uri:
|
|
34
|
+
uri = uri.replace("//", "/")
|
|
35
|
+
return uri
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class KeyValueStore(MutableMapping):
|
|
39
|
+
def __init__(self, path, /, *, flag="r", mode=0o666):
|
|
40
|
+
"""Open a key-value database and return the object.
|
|
41
|
+
|
|
42
|
+
The 'path' parameter is the name of the database file.
|
|
43
|
+
|
|
44
|
+
The optional 'flag' parameter can be one of ...:
|
|
45
|
+
'r' (default): open an existing database for read only access
|
|
46
|
+
'w': open an existing database for read/write access
|
|
47
|
+
'c': create a database if it does not exist; open for read/write access
|
|
48
|
+
'n': always create a new, empty database; open for read/write access
|
|
49
|
+
|
|
50
|
+
The optional 'mode' parameter is the Unix file access mode of the database;
|
|
51
|
+
only used when creating a new database. Default: 0o666.
|
|
52
|
+
"""
|
|
53
|
+
path = os.fsdecode(path)
|
|
54
|
+
if flag == "r":
|
|
55
|
+
flag = "ro"
|
|
56
|
+
elif flag == "w":
|
|
57
|
+
flag = "rw"
|
|
58
|
+
elif flag == "c":
|
|
59
|
+
flag = "rwc"
|
|
60
|
+
Path(path).touch(mode=mode, exist_ok=True)
|
|
61
|
+
elif flag == "n":
|
|
62
|
+
flag = "rwc"
|
|
63
|
+
Path(path).unlink(missing_ok=True)
|
|
64
|
+
Path(path).touch(mode=mode)
|
|
65
|
+
else:
|
|
66
|
+
raise ValueError(f"Flag must be one of 'r', 'w', 'c', or 'n', not {flag!r}")
|
|
67
|
+
|
|
68
|
+
# We use the URI format when opening the database.
|
|
69
|
+
uri = _normalize_uri(path)
|
|
70
|
+
uri = f"{uri}?mode={flag}"
|
|
71
|
+
|
|
72
|
+
if sys.version_info >= (3, 12):
|
|
73
|
+
# This is the preferred way, but only available in Python 3.10 and newer.
|
|
74
|
+
self._cx = sqlite3.connect(uri, autocommit=True, uri=True)
|
|
75
|
+
else:
|
|
76
|
+
self._cx = sqlite3.connect(uri, uri=True)
|
|
77
|
+
|
|
78
|
+
# This is an optimization only; it's ok if it fails.
|
|
79
|
+
with suppress(sqlite3.OperationalError):
|
|
80
|
+
self._cx.execute("PRAGMA journal_mode = wal")
|
|
81
|
+
|
|
82
|
+
if flag == "rwc":
|
|
83
|
+
self._execute(BUILD_TABLE)
|
|
84
|
+
|
|
85
|
+
def _execute(self, *args, **kwargs):
|
|
86
|
+
if sys.version_info >= (3, 12):
|
|
87
|
+
return closing(self._cx.execute(*args, **kwargs))
|
|
88
|
+
else:
|
|
89
|
+
# Use a context manager to commit the changes
|
|
90
|
+
with self._cx:
|
|
91
|
+
return closing(self._cx.execute(*args, **kwargs))
|
|
92
|
+
|
|
93
|
+
def __len__(self):
|
|
94
|
+
with self._execute(GET_SIZE) as cu:
|
|
95
|
+
row = cu.fetchone()
|
|
96
|
+
return row[0]
|
|
97
|
+
|
|
98
|
+
def __getitem__(self, key):
|
|
99
|
+
with self._execute(LOOKUP_KEY, (key,)) as cu:
|
|
100
|
+
row = cu.fetchone()
|
|
101
|
+
if not row:
|
|
102
|
+
raise KeyError(key)
|
|
103
|
+
return row[0]
|
|
104
|
+
|
|
105
|
+
def __setitem__(self, key, value):
|
|
106
|
+
self._execute(STORE_KV, (key, value))
|
|
107
|
+
|
|
108
|
+
def __delitem__(self, key):
|
|
109
|
+
with self._execute(DELETE_KEY, (key,)) as cu:
|
|
110
|
+
if not cu.rowcount:
|
|
111
|
+
raise KeyError(key)
|
|
112
|
+
|
|
113
|
+
def __iter__(self):
|
|
114
|
+
with self._execute(ITER_KEYS) as cu:
|
|
115
|
+
for row in cu:
|
|
116
|
+
yield row[0]
|
|
117
|
+
|
|
118
|
+
def close(self):
|
|
119
|
+
self._cx.close()
|
|
120
|
+
|
|
121
|
+
def keys(self):
|
|
122
|
+
return list(super().keys())
|
|
123
|
+
|
|
124
|
+
def __enter__(self):
|
|
125
|
+
return self
|
|
126
|
+
|
|
127
|
+
def __exit__(self, *args):
|
|
128
|
+
self.close()
|
|
@@ -10,7 +10,6 @@ import uuid
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
|
|
12
12
|
import humanize
|
|
13
|
-
import jsonschema
|
|
14
13
|
import requests
|
|
15
14
|
from tqdm import tqdm
|
|
16
15
|
|
|
@@ -57,7 +56,7 @@ def upload(
|
|
|
57
56
|
|
|
58
57
|
metadatas = _load_descs(_metadatas_from_process, import_paths, desc_path)
|
|
59
58
|
|
|
60
|
-
|
|
59
|
+
config.UserItemSchemaValidator.validate(user_items)
|
|
61
60
|
|
|
62
61
|
# Setup the emitter -- the order matters here
|
|
63
62
|
|
|
@@ -2,6 +2,9 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import concurrent.futures
|
|
4
4
|
import dataclasses
|
|
5
|
+
import datetime
|
|
6
|
+
import email.utils
|
|
7
|
+
import hashlib
|
|
5
8
|
import io
|
|
6
9
|
import json
|
|
7
10
|
import logging
|
|
@@ -36,6 +39,7 @@ from . import (
|
|
|
36
39
|
types,
|
|
37
40
|
upload_api_v4,
|
|
38
41
|
utils,
|
|
42
|
+
VERSION,
|
|
39
43
|
)
|
|
40
44
|
from .camm import camm_builder, camm_parser
|
|
41
45
|
from .gpmf import gpmf_parser
|
|
@@ -55,6 +59,9 @@ class UploadOptions:
|
|
|
55
59
|
user_items: config.UserItem
|
|
56
60
|
chunk_size: int = int(constants.UPLOAD_CHUNK_SIZE_MB * 1024 * 1024)
|
|
57
61
|
num_upload_workers: int = constants.MAX_IMAGE_UPLOAD_WORKERS
|
|
62
|
+
# When set, upload cache will be read/write there
|
|
63
|
+
# This option is exposed for testing purpose. In PROD, the path is calculated based on envvar and user_items
|
|
64
|
+
upload_cache_path: Path | None = None
|
|
58
65
|
dry_run: bool = False
|
|
59
66
|
nofinish: bool = False
|
|
60
67
|
noresume: bool = False
|
|
@@ -470,7 +477,7 @@ class ZipUploader:
|
|
|
470
477
|
# Arcname should be unique, the name does not matter
|
|
471
478
|
arcname = f"{idx}.jpg"
|
|
472
479
|
zipinfo = zipfile.ZipInfo(arcname, date_time=(1980, 1, 1, 0, 0, 0))
|
|
473
|
-
zipf.writestr(zipinfo,
|
|
480
|
+
zipf.writestr(zipinfo, CachedImageUploader.dump_image_bytes(metadata))
|
|
474
481
|
assert len(sequence) == len(set(zipf.namelist()))
|
|
475
482
|
zipf.comment = json.dumps(
|
|
476
483
|
{"sequence_md5sum": sequence_md5sum},
|
|
@@ -536,6 +543,13 @@ class ImageSequenceUploader:
|
|
|
536
543
|
def __init__(self, upload_options: UploadOptions, emitter: EventEmitter):
|
|
537
544
|
self.upload_options = upload_options
|
|
538
545
|
self.emitter = emitter
|
|
546
|
+
# Create a single shared SingleImageUploader instance that will be used across all uploads
|
|
547
|
+
cache = _maybe_create_persistent_cache_instance(self.upload_options)
|
|
548
|
+
if cache:
|
|
549
|
+
cache.clear_expired()
|
|
550
|
+
self.cached_image_uploader = CachedImageUploader(
|
|
551
|
+
self.upload_options, cache=cache
|
|
552
|
+
)
|
|
539
553
|
|
|
540
554
|
def upload_images(
|
|
541
555
|
self, image_metadatas: T.Sequence[types.ImageMetadata]
|
|
@@ -687,10 +701,6 @@ class ImageSequenceUploader:
|
|
|
687
701
|
with api_v4.create_user_session(
|
|
688
702
|
self.upload_options.user_items["user_upload_token"]
|
|
689
703
|
) as user_session:
|
|
690
|
-
single_image_uploader = SingleImageUploader(
|
|
691
|
-
self.upload_options, user_session=user_session
|
|
692
|
-
)
|
|
693
|
-
|
|
694
704
|
while True:
|
|
695
705
|
# Assert that all images are already pushed into the queue
|
|
696
706
|
try:
|
|
@@ -709,8 +719,8 @@ class ImageSequenceUploader:
|
|
|
709
719
|
}
|
|
710
720
|
|
|
711
721
|
# image_progress will be updated during uploading
|
|
712
|
-
file_handle =
|
|
713
|
-
image_metadata, image_progress
|
|
722
|
+
file_handle = self.cached_image_uploader.upload(
|
|
723
|
+
user_session, image_metadata, image_progress
|
|
714
724
|
)
|
|
715
725
|
|
|
716
726
|
# Update chunk_size (it was constant if set)
|
|
@@ -730,24 +740,27 @@ class ImageSequenceUploader:
|
|
|
730
740
|
return indexed_file_handles
|
|
731
741
|
|
|
732
742
|
|
|
733
|
-
class
|
|
743
|
+
class CachedImageUploader:
|
|
734
744
|
def __init__(
|
|
735
745
|
self,
|
|
736
746
|
upload_options: UploadOptions,
|
|
737
|
-
|
|
747
|
+
cache: history.PersistentCache | None = None,
|
|
738
748
|
):
|
|
739
749
|
self.upload_options = upload_options
|
|
740
|
-
self.
|
|
741
|
-
self.cache
|
|
742
|
-
self.
|
|
743
|
-
)
|
|
750
|
+
self.cache = cache
|
|
751
|
+
if self.cache:
|
|
752
|
+
self.cache.clear_expired()
|
|
744
753
|
|
|
754
|
+
# Thread-safe
|
|
745
755
|
def upload(
|
|
746
|
-
self,
|
|
756
|
+
self,
|
|
757
|
+
user_session: requests.Session,
|
|
758
|
+
image_metadata: types.ImageMetadata,
|
|
759
|
+
image_progress: dict[str, T.Any],
|
|
747
760
|
) -> str:
|
|
748
761
|
image_bytes = self.dump_image_bytes(image_metadata)
|
|
749
762
|
|
|
750
|
-
uploader = Uploader(self.upload_options, user_session=
|
|
763
|
+
uploader = Uploader(self.upload_options, user_session=user_session)
|
|
751
764
|
|
|
752
765
|
session_key = uploader._gen_session_key(io.BytesIO(image_bytes), image_progress)
|
|
753
766
|
|
|
@@ -785,44 +798,7 @@ class SingleImageUploader:
|
|
|
785
798
|
f"Failed to dump EXIF bytes: {ex}", metadata.filename
|
|
786
799
|
) from ex
|
|
787
800
|
|
|
788
|
-
|
|
789
|
-
def _maybe_create_persistent_cache_instance(
|
|
790
|
-
cls, user_items: config.UserItem, upload_options: UploadOptions
|
|
791
|
-
) -> history.PersistentCache | None:
|
|
792
|
-
if not constants.UPLOAD_CACHE_DIR:
|
|
793
|
-
LOG.debug(
|
|
794
|
-
"Upload cache directory is set empty, skipping caching upload file handles"
|
|
795
|
-
)
|
|
796
|
-
return None
|
|
797
|
-
|
|
798
|
-
if upload_options.dry_run:
|
|
799
|
-
LOG.debug("Dry-run mode enabled, skipping caching upload file handles")
|
|
800
|
-
return None
|
|
801
|
-
|
|
802
|
-
cache_path_dir = (
|
|
803
|
-
Path(constants.UPLOAD_CACHE_DIR)
|
|
804
|
-
.joinpath(api_v4.MAPILLARY_CLIENT_TOKEN.replace("|", "_"))
|
|
805
|
-
.joinpath(
|
|
806
|
-
user_items.get("MAPSettingsUserKey", user_items["user_upload_token"])
|
|
807
|
-
)
|
|
808
|
-
)
|
|
809
|
-
cache_path_dir.mkdir(parents=True, exist_ok=True)
|
|
810
|
-
cache_path = cache_path_dir.joinpath("cached_file_handles")
|
|
811
|
-
|
|
812
|
-
# Sanitize sensitive segments for logging
|
|
813
|
-
sanitized_cache_path = (
|
|
814
|
-
Path(constants.UPLOAD_CACHE_DIR)
|
|
815
|
-
.joinpath("***")
|
|
816
|
-
.joinpath("***")
|
|
817
|
-
.joinpath("cached_file_handles")
|
|
818
|
-
)
|
|
819
|
-
LOG.debug(f"File handle cache path: {sanitized_cache_path}")
|
|
820
|
-
|
|
821
|
-
cache = history.PersistentCache(str(cache_path.resolve()))
|
|
822
|
-
cache.clear_expired()
|
|
823
|
-
|
|
824
|
-
return cache
|
|
825
|
-
|
|
801
|
+
# Thread-safe
|
|
826
802
|
def _get_cached_file_handle(self, key: str) -> str | None:
|
|
827
803
|
if self.cache is None:
|
|
828
804
|
return None
|
|
@@ -832,6 +808,7 @@ class SingleImageUploader:
|
|
|
832
808
|
|
|
833
809
|
return self.cache.get(key)
|
|
834
810
|
|
|
811
|
+
# Thread-safe
|
|
835
812
|
def _set_file_handle_cache(self, key: str, value: str) -> None:
|
|
836
813
|
if self.cache is None:
|
|
837
814
|
return
|
|
@@ -971,27 +948,33 @@ class Uploader:
|
|
|
971
948
|
begin_offset = progress.get("begin_offset")
|
|
972
949
|
offset = progress.get("offset")
|
|
973
950
|
|
|
974
|
-
|
|
975
|
-
self.
|
|
951
|
+
LOG.warning(
|
|
952
|
+
f"Error uploading {self._upload_name(progress)} at {offset=} since {begin_offset=}: {ex.__class__.__name__}: {ex}"
|
|
953
|
+
)
|
|
976
954
|
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
955
|
+
if retries <= constants.MAX_UPLOAD_RETRIES:
|
|
956
|
+
retriable, retry_after_sec = _is_retriable_exception(ex)
|
|
957
|
+
if retriable:
|
|
958
|
+
self.emitter.emit("upload_retrying", progress)
|
|
980
959
|
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
960
|
+
# Keep things immutable here. Will increment retries in the caller
|
|
961
|
+
retries += 1
|
|
962
|
+
if _is_immediate_retriable_exception(ex):
|
|
963
|
+
sleep_for = 0
|
|
964
|
+
else:
|
|
965
|
+
sleep_for = min(2**retries, 16)
|
|
966
|
+
sleep_for += retry_after_sec
|
|
967
|
+
|
|
968
|
+
LOG.info(
|
|
969
|
+
f"Retrying in {sleep_for} seconds ({retries}/{constants.MAX_UPLOAD_RETRIES})"
|
|
970
|
+
)
|
|
971
|
+
if sleep_for:
|
|
972
|
+
time.sleep(sleep_for)
|
|
973
|
+
|
|
974
|
+
return
|
|
975
|
+
|
|
976
|
+
self.emitter.emit("upload_failed", progress)
|
|
977
|
+
raise ex
|
|
995
978
|
|
|
996
979
|
@classmethod
|
|
997
980
|
def _upload_name(cls, progress: UploaderProgress):
|
|
@@ -1108,23 +1091,188 @@ def _is_immediate_retriable_exception(ex: BaseException) -> bool:
|
|
|
1108
1091
|
return False
|
|
1109
1092
|
|
|
1110
1093
|
|
|
1111
|
-
def _is_retriable_exception(ex: BaseException) -> bool:
|
|
1094
|
+
def _is_retriable_exception(ex: BaseException) -> tuple[bool, int]:
|
|
1095
|
+
"""
|
|
1096
|
+
Determine if an exception should be retried and how long to wait.
|
|
1097
|
+
|
|
1098
|
+
Args:
|
|
1099
|
+
ex: Exception to check for retryability
|
|
1100
|
+
|
|
1101
|
+
Returns:
|
|
1102
|
+
Tuple of (retriable, retry_after_sec) where:
|
|
1103
|
+
- retriable: True if the exception should be retried
|
|
1104
|
+
- retry_after_sec: Seconds to wait before retry (>= 0)
|
|
1105
|
+
|
|
1106
|
+
Examples:
|
|
1107
|
+
>>> resp = requests.Response()
|
|
1108
|
+
>>> resp._content = b"foo"
|
|
1109
|
+
>>> resp.status_code = 400
|
|
1110
|
+
>>> ex = requests.HTTPError("error", response=resp)
|
|
1111
|
+
>>> _is_retriable_exception(ex)
|
|
1112
|
+
(False, 0)
|
|
1113
|
+
>>> resp._content = b'{"backoff": 13000, "debug_info": {"retriable": false, "type": "RequestRateLimitedError", "message": "Request rate limit has been exceeded"}}'
|
|
1114
|
+
>>> resp.status_code = 400
|
|
1115
|
+
>>> ex = requests.HTTPError("error", response=resp)
|
|
1116
|
+
>>> _is_retriable_exception(ex)
|
|
1117
|
+
(True, 13)
|
|
1118
|
+
>>> resp._content = b'{"backoff": "foo", "debug_info": {"retriable": false, "type": "RequestRateLimitedError", "message": "Request rate limit has been exceeded"}}'
|
|
1119
|
+
>>> resp.status_code = 400
|
|
1120
|
+
>>> ex = requests.HTTPError("error", response=resp)
|
|
1121
|
+
>>> _is_retriable_exception(ex)
|
|
1122
|
+
(True, 10)
|
|
1123
|
+
>>> resp._content = b'{"debug_info": {"retriable": false, "type": "RequestRateLimitedError", "message": "Request rate limit has been exceeded"}}'
|
|
1124
|
+
>>> resp.status_code = 400
|
|
1125
|
+
>>> ex = requests.HTTPError("error", response=resp)
|
|
1126
|
+
>>> _is_retriable_exception(ex)
|
|
1127
|
+
(True, 10)
|
|
1128
|
+
>>> resp._content = b"foo"
|
|
1129
|
+
>>> resp.status_code = 429
|
|
1130
|
+
>>> ex = requests.HTTPError("error", response=resp)
|
|
1131
|
+
>>> _is_retriable_exception(ex)
|
|
1132
|
+
(True, 10)
|
|
1133
|
+
>>> resp._content = b"foo"
|
|
1134
|
+
>>> resp.status_code = 429
|
|
1135
|
+
>>> ex = requests.HTTPError("error", response=resp)
|
|
1136
|
+
>>> _is_retriable_exception(ex)
|
|
1137
|
+
(True, 10)
|
|
1138
|
+
>>> resp._content = b'{"backoff": 12000, "debug_info": {"retriable": false, "type": "RequestRateLimitedError", "message": "Request rate limit has been exceeded"}}'
|
|
1139
|
+
>>> resp.status_code = 429
|
|
1140
|
+
>>> ex = requests.HTTPError("error", response=resp)
|
|
1141
|
+
>>> _is_retriable_exception(ex)
|
|
1142
|
+
(True, 12)
|
|
1143
|
+
>>> resp._content = b'{"backoff": 12000, "debug_info": {"retriable": false, "type": "RequestRateLimitedError", "message": "Request rate limit has been exceeded"}}'
|
|
1144
|
+
>>> resp.headers = {"Retry-After": "1"}
|
|
1145
|
+
>>> resp.status_code = 503
|
|
1146
|
+
>>> ex = requests.HTTPError("error", response=resp)
|
|
1147
|
+
>>> _is_retriable_exception(ex)
|
|
1148
|
+
(True, 1)
|
|
1149
|
+
"""
|
|
1150
|
+
|
|
1151
|
+
DEFAULT_RETRY_AFTER_RATE_LIMIT_SEC = 10
|
|
1152
|
+
|
|
1112
1153
|
if isinstance(ex, (requests.ConnectionError, requests.Timeout)):
|
|
1113
|
-
return True
|
|
1154
|
+
return True, 0
|
|
1114
1155
|
|
|
1115
1156
|
if isinstance(ex, requests.HTTPError) and isinstance(
|
|
1116
1157
|
ex.response, requests.Response
|
|
1117
1158
|
):
|
|
1118
|
-
|
|
1159
|
+
status_code = ex.response.status_code
|
|
1160
|
+
|
|
1161
|
+
# Always retry with some delay
|
|
1162
|
+
if status_code == 429:
|
|
1163
|
+
retry_after_sec = (
|
|
1164
|
+
_parse_retry_after_from_header(ex.response)
|
|
1165
|
+
or DEFAULT_RETRY_AFTER_RATE_LIMIT_SEC
|
|
1166
|
+
)
|
|
1167
|
+
|
|
1119
1168
|
try:
|
|
1120
|
-
|
|
1121
|
-
except
|
|
1122
|
-
return
|
|
1123
|
-
return resp.get("debug_info", {}).get("retriable", False)
|
|
1124
|
-
else:
|
|
1125
|
-
return True
|
|
1169
|
+
data = ex.response.json()
|
|
1170
|
+
except requests.JSONDecodeError:
|
|
1171
|
+
return True, retry_after_sec
|
|
1126
1172
|
|
|
1127
|
-
|
|
1173
|
+
backoff_ms = _parse_backoff(data.get("backoff"))
|
|
1174
|
+
if backoff_ms is None:
|
|
1175
|
+
return True, retry_after_sec
|
|
1176
|
+
else:
|
|
1177
|
+
return True, max(0, int(int(backoff_ms) / 1000))
|
|
1178
|
+
|
|
1179
|
+
if 400 <= status_code < 500:
|
|
1180
|
+
try:
|
|
1181
|
+
data = ex.response.json()
|
|
1182
|
+
except requests.JSONDecodeError:
|
|
1183
|
+
return False, (_parse_retry_after_from_header(ex.response) or 0)
|
|
1184
|
+
|
|
1185
|
+
debug_info = data.get("debug_info", {})
|
|
1186
|
+
|
|
1187
|
+
if isinstance(debug_info, dict):
|
|
1188
|
+
error_type = debug_info.get("type")
|
|
1189
|
+
else:
|
|
1190
|
+
error_type = None
|
|
1191
|
+
|
|
1192
|
+
# The server may respond 429 RequestRateLimitedError but with retryable=False
|
|
1193
|
+
# We should retry for this case regardless
|
|
1194
|
+
# e.g. HTTP 429 {"backoff": 10000, "debug_info": {"retriable": false, "type": "RequestRateLimitedError", "message": "Request rate limit has been exceeded"}}
|
|
1195
|
+
if error_type == "RequestRateLimitedError":
|
|
1196
|
+
backoff_ms = _parse_backoff(data.get("backoff"))
|
|
1197
|
+
if backoff_ms is None:
|
|
1198
|
+
return True, (
|
|
1199
|
+
_parse_retry_after_from_header(ex.response)
|
|
1200
|
+
or DEFAULT_RETRY_AFTER_RATE_LIMIT_SEC
|
|
1201
|
+
)
|
|
1202
|
+
else:
|
|
1203
|
+
return True, max(0, int(int(backoff_ms) / 1000))
|
|
1204
|
+
|
|
1205
|
+
return debug_info.get("retriable", False), 0
|
|
1206
|
+
|
|
1207
|
+
if 500 <= status_code < 600:
|
|
1208
|
+
return True, (_parse_retry_after_from_header(ex.response) or 0)
|
|
1209
|
+
|
|
1210
|
+
return False, 0
|
|
1211
|
+
|
|
1212
|
+
|
|
1213
|
+
def _parse_backoff(backoff: T.Any) -> int | None:
|
|
1214
|
+
if backoff is not None:
|
|
1215
|
+
try:
|
|
1216
|
+
backoff_ms = int(backoff)
|
|
1217
|
+
except (ValueError, TypeError):
|
|
1218
|
+
backoff_ms = None
|
|
1219
|
+
else:
|
|
1220
|
+
backoff_ms = None
|
|
1221
|
+
return backoff_ms
|
|
1222
|
+
|
|
1223
|
+
|
|
1224
|
+
def _parse_retry_after_from_header(resp: requests.Response) -> int | None:
|
|
1225
|
+
"""
|
|
1226
|
+
Parse Retry-After header from HTTP response.
|
|
1227
|
+
See See https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Retry-After
|
|
1228
|
+
|
|
1229
|
+
Args:
|
|
1230
|
+
resp: HTTP response object with headers
|
|
1231
|
+
|
|
1232
|
+
Returns:
|
|
1233
|
+
Number of seconds to wait (>= 0) or None if header missing/invalid.
|
|
1234
|
+
|
|
1235
|
+
Examples:
|
|
1236
|
+
>>> resp = requests.Response()
|
|
1237
|
+
>>> resp.headers = {"Retry-After": "1"}
|
|
1238
|
+
>>> _parse_retry_after_from_header(resp)
|
|
1239
|
+
1
|
|
1240
|
+
>>> resp.headers = {"Retry-After": "-1"}
|
|
1241
|
+
>>> _parse_retry_after_from_header(resp)
|
|
1242
|
+
0
|
|
1243
|
+
>>> resp.headers = {"Retry-After": "Wed, 21 Oct 2015 07:28:00 GMT"}
|
|
1244
|
+
>>> _parse_retry_after_from_header(resp)
|
|
1245
|
+
0
|
|
1246
|
+
>>> resp.headers = {"Retry-After": "Wed, 21 Oct 2315 07:28:00"}
|
|
1247
|
+
>>> _parse_retry_after_from_header(resp)
|
|
1248
|
+
"""
|
|
1249
|
+
|
|
1250
|
+
value = resp.headers.get("Retry-After")
|
|
1251
|
+
if value is None:
|
|
1252
|
+
return None
|
|
1253
|
+
|
|
1254
|
+
try:
|
|
1255
|
+
return max(0, int(value))
|
|
1256
|
+
except (ValueError, TypeError):
|
|
1257
|
+
pass
|
|
1258
|
+
|
|
1259
|
+
# e.g. "Wed, 21 Oct 2015 07:28:00 GMT"
|
|
1260
|
+
try:
|
|
1261
|
+
dt = email.utils.parsedate_to_datetime(value)
|
|
1262
|
+
except (ValueError, TypeError):
|
|
1263
|
+
dt = None
|
|
1264
|
+
|
|
1265
|
+
if dt is None:
|
|
1266
|
+
LOG.warning(f"Error parsing Retry-After: {value}")
|
|
1267
|
+
return None
|
|
1268
|
+
|
|
1269
|
+
try:
|
|
1270
|
+
delta = dt - datetime.datetime.now(datetime.timezone.utc)
|
|
1271
|
+
except (TypeError, ValueError):
|
|
1272
|
+
# e.g. TypeError: can't subtract offset-naive and offset-aware datetimes
|
|
1273
|
+
return None
|
|
1274
|
+
|
|
1275
|
+
return max(0, int(delta.total_seconds()))
|
|
1128
1276
|
|
|
1129
1277
|
|
|
1130
1278
|
_SUFFIX_MAP: dict[api_v4.ClusterFileType | types.FileType, str] = {
|
|
@@ -1160,3 +1308,57 @@ def _prefixed_uuid4():
|
|
|
1160
1308
|
|
|
1161
1309
|
def _is_uuid(key: str) -> bool:
|
|
1162
1310
|
return key.startswith("uuid_") or key.startswith("mly_tools_uuid_")
|
|
1311
|
+
|
|
1312
|
+
|
|
1313
|
+
def _build_upload_cache_path(upload_options: UploadOptions) -> Path:
|
|
1314
|
+
# Different python/CLI versions use different cache formats.
|
|
1315
|
+
# Separate them to avoid conflicts
|
|
1316
|
+
py_version_parts = [str(part) for part in sys.version_info[:3]]
|
|
1317
|
+
version = f"py_{'_'.join(py_version_parts)}_{VERSION}"
|
|
1318
|
+
# File handles are not sharable between different users
|
|
1319
|
+
user_id = str(
|
|
1320
|
+
upload_options.user_items.get(
|
|
1321
|
+
"MAPSettingsUserKey", upload_options.user_items["user_upload_token"]
|
|
1322
|
+
)
|
|
1323
|
+
)
|
|
1324
|
+
# Use hash to avoid log sensitive data
|
|
1325
|
+
user_fingerprint = utils.md5sum_fp(
|
|
1326
|
+
io.BytesIO((api_v4.MAPILLARY_CLIENT_TOKEN + user_id).encode("utf-8")),
|
|
1327
|
+
md5=hashlib.sha256(),
|
|
1328
|
+
).hexdigest()[:24]
|
|
1329
|
+
|
|
1330
|
+
cache_path = (
|
|
1331
|
+
Path(constants.UPLOAD_CACHE_DIR)
|
|
1332
|
+
.joinpath(version)
|
|
1333
|
+
.joinpath(user_fingerprint)
|
|
1334
|
+
.joinpath("cached_file_handles")
|
|
1335
|
+
)
|
|
1336
|
+
|
|
1337
|
+
return cache_path
|
|
1338
|
+
|
|
1339
|
+
|
|
1340
|
+
def _maybe_create_persistent_cache_instance(
|
|
1341
|
+
upload_options: UploadOptions,
|
|
1342
|
+
) -> history.PersistentCache | None:
|
|
1343
|
+
"""Create a persistent cache instance if caching is enabled."""
|
|
1344
|
+
|
|
1345
|
+
if upload_options.dry_run:
|
|
1346
|
+
LOG.debug("Dry-run mode enabled, skipping caching upload file handles")
|
|
1347
|
+
return None
|
|
1348
|
+
|
|
1349
|
+
if upload_options.upload_cache_path is None:
|
|
1350
|
+
if not constants.UPLOAD_CACHE_DIR:
|
|
1351
|
+
LOG.debug(
|
|
1352
|
+
"Upload cache directory is set empty, skipping caching upload file handles"
|
|
1353
|
+
)
|
|
1354
|
+
return None
|
|
1355
|
+
|
|
1356
|
+
cache_path = _build_upload_cache_path(upload_options)
|
|
1357
|
+
else:
|
|
1358
|
+
cache_path = upload_options.upload_cache_path
|
|
1359
|
+
|
|
1360
|
+
LOG.debug(f"File handle cache path: {cache_path}")
|
|
1361
|
+
|
|
1362
|
+
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1363
|
+
|
|
1364
|
+
return history.PersistentCache(str(cache_path.resolve()))
|
|
@@ -247,8 +247,8 @@ def configure_logger(
|
|
|
247
247
|
try:
|
|
248
248
|
# Disable globally for now. TODO Disable it in non-interactive mode only
|
|
249
249
|
raise ImportError
|
|
250
|
-
from rich.console import Console # type: ignore
|
|
251
|
-
from rich.logging import RichHandler # type: ignore
|
|
250
|
+
from rich.console import Console # type: ignore[import]
|
|
251
|
+
from rich.logging import RichHandler # type: ignore[import]
|
|
252
252
|
except ImportError:
|
|
253
253
|
formatter = logging.Formatter(
|
|
254
254
|
"%(asctime)s.%(msecs)03d - %(levelname)-7s - %(message)s",
|
|
@@ -21,6 +21,7 @@ mapillary_tools/ipc.py
|
|
|
21
21
|
mapillary_tools/process_geotag_properties.py
|
|
22
22
|
mapillary_tools/process_sequence_properties.py
|
|
23
23
|
mapillary_tools/sample_video.py
|
|
24
|
+
mapillary_tools/store.py
|
|
24
25
|
mapillary_tools/telemetry.py
|
|
25
26
|
mapillary_tools/types.py
|
|
26
27
|
mapillary_tools/upload.py
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
VERSION = "0.14.1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/commands/process_and_upload.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_images_from_exif.py
RENAMED
|
File without changes
|
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_images_from_gpx.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_images_from_video.py
RENAMED
|
File without changes
|
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_videos_from_gpx.py
RENAMED
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/geotag_videos_from_video.py
RENAMED
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/image_extractors/base.py
RENAMED
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/image_extractors/exif.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/video_extractors/base.py
RENAMED
|
File without changes
|
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/video_extractors/gpx.py
RENAMED
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/geotag/video_extractors/native.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/mp4/construct_mp4_parser.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools/process_sequence_properties.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{mapillary_tools-0.14.1 → mapillary_tools-0.14.4}/mapillary_tools.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|