mapillary-downloader 0.8.1__tar.gz → 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/PKG-INFO +1 -1
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/pyproject.toml +1 -1
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/client.py +3 -5
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/downloader.py +3 -2
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/ia_check.py +3 -3
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/ia_stats.py +16 -8
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/metadata_reader.py +0 -47
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/utils.py +3 -5
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/webp_converter.py +4 -9
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/worker.py +1 -1
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/LICENSE.md +0 -0
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/README.md +0 -0
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/__init__.py +0 -0
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/__main__.py +0 -0
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/exif_writer.py +0 -0
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/ia_meta.py +0 -0
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/logging_config.py +0 -0
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/tar_sequences.py +0 -0
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/worker_pool.py +0 -0
- {mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/xmp_writer.py +0 -0
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/client.py
RENAMED
|
@@ -22,11 +22,12 @@ class MapillaryClient:
|
|
|
22
22
|
self.session = requests.Session()
|
|
23
23
|
self.session.headers.update({"Authorization": f"OAuth {access_token}"})
|
|
24
24
|
|
|
25
|
-
def get_user_images(self, username, bbox=None, limit=2000):
|
|
25
|
+
def get_user_images(self, username, quality, bbox=None, limit=2000):
|
|
26
26
|
"""Get images uploaded by a specific user.
|
|
27
27
|
|
|
28
28
|
Args:
|
|
29
29
|
username: Mapillary username
|
|
30
|
+
quality: Image quality (256, 1024, 2048, or original)
|
|
30
31
|
bbox: Optional bounding box [west, south, east, north]
|
|
31
32
|
limit: Number of results per page (max 2000)
|
|
32
33
|
|
|
@@ -56,10 +57,7 @@ class MapillaryClient:
|
|
|
56
57
|
"computed_rotation",
|
|
57
58
|
"height",
|
|
58
59
|
"width",
|
|
59
|
-
"
|
|
60
|
-
"thumb_1024_url",
|
|
61
|
-
"thumb_2048_url",
|
|
62
|
-
"thumb_original_url",
|
|
60
|
+
f"thumb_{quality}_url",
|
|
63
61
|
]
|
|
64
62
|
),
|
|
65
63
|
}
|
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/downloader.py
RENAMED
|
@@ -8,6 +8,7 @@ import shutil
|
|
|
8
8
|
import threading
|
|
9
9
|
import time
|
|
10
10
|
from pathlib import Path
|
|
11
|
+
import requests
|
|
11
12
|
from mapillary_downloader.utils import format_size, format_time, safe_json_save
|
|
12
13
|
from mapillary_downloader.ia_meta import generate_ia_metadata
|
|
13
14
|
from mapillary_downloader.ia_check import check_ia_exists
|
|
@@ -219,7 +220,7 @@ class MapillaryDownloader:
|
|
|
219
220
|
# Check if collection already exists on Internet Archive
|
|
220
221
|
if self.check_ia and self.collection_name:
|
|
221
222
|
logger.info(f"Checking if {self.collection_name} exists on Internet Archive...")
|
|
222
|
-
if check_ia_exists(self.collection_name):
|
|
223
|
+
if check_ia_exists(requests.Session(), self.collection_name):
|
|
223
224
|
logger.info("Collection already exists on archive.org, skipping download")
|
|
224
225
|
return
|
|
225
226
|
|
|
@@ -261,7 +262,7 @@ class MapillaryDownloader:
|
|
|
261
262
|
try:
|
|
262
263
|
logger.debug("API fetch thread starting")
|
|
263
264
|
with open(self.metadata_file, "a") as meta_f:
|
|
264
|
-
for image in self.client.get_user_images(self.username, bbox=bbox):
|
|
265
|
+
for image in self.client.get_user_images(self.username, self.quality, bbox=bbox):
|
|
265
266
|
new_images_count[0] += 1
|
|
266
267
|
|
|
267
268
|
# Save metadata (don't dedupe here, let the tailer handle it)
|
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/ia_check.py
RENAMED
|
@@ -6,20 +6,20 @@ import requests
|
|
|
6
6
|
logger = logging.getLogger("mapillary_downloader")
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
def check_ia_exists(collection_name):
|
|
9
|
+
def check_ia_exists(session, collection_name):
|
|
10
10
|
"""Check if a collection exists on Internet Archive.
|
|
11
11
|
|
|
12
12
|
Args:
|
|
13
|
+
session: requests.Session for connection pooling
|
|
13
14
|
collection_name: Name of the collection (e.g., mapillary-username-original-webp)
|
|
14
15
|
|
|
15
16
|
Returns:
|
|
16
17
|
Boolean indicating if the collection exists on IA
|
|
17
18
|
"""
|
|
18
|
-
# IA identifier format
|
|
19
19
|
ia_url = f"https://archive.org/metadata/{collection_name}"
|
|
20
20
|
|
|
21
21
|
try:
|
|
22
|
-
response =
|
|
22
|
+
response = session.get(ia_url, timeout=10)
|
|
23
23
|
# If we get a 200, the item exists
|
|
24
24
|
if response.status_code == 200:
|
|
25
25
|
data = response.json()
|
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/ia_stats.py
RENAMED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import json
|
|
4
4
|
import logging
|
|
5
5
|
import re
|
|
6
|
+
import requests
|
|
6
7
|
from mapillary_downloader.utils import safe_json_save, http_get_with_retry, format_size
|
|
7
8
|
from mapillary_downloader.downloader import get_cache_dir
|
|
8
9
|
|
|
@@ -11,9 +12,12 @@ logger = logging.getLogger("mapillary_downloader")
|
|
|
11
12
|
CACHE_FILE = get_cache_dir() / ".stats.json"
|
|
12
13
|
|
|
13
14
|
|
|
14
|
-
def search_ia_collections():
|
|
15
|
+
def search_ia_collections(session):
|
|
15
16
|
"""Search IA for all mapillary_downloader collections.
|
|
16
17
|
|
|
18
|
+
Args:
|
|
19
|
+
session: requests.Session for connection pooling
|
|
20
|
+
|
|
17
21
|
Returns:
|
|
18
22
|
List of dicts with: identifier, description, item_size, collection
|
|
19
23
|
"""
|
|
@@ -27,7 +31,7 @@ def search_ia_collections():
|
|
|
27
31
|
"output": "json",
|
|
28
32
|
}
|
|
29
33
|
|
|
30
|
-
response = http_get_with_retry(url, params=params, max_retries=3)
|
|
34
|
+
response = http_get_with_retry(session, url, params=params, max_retries=3)
|
|
31
35
|
data = response.json()
|
|
32
36
|
|
|
33
37
|
collections = data["response"]["docs"]
|
|
@@ -36,10 +40,11 @@ def search_ia_collections():
|
|
|
36
40
|
return collections
|
|
37
41
|
|
|
38
42
|
|
|
39
|
-
def fetch_uploader(identifier):
|
|
43
|
+
def fetch_uploader(session, identifier):
|
|
40
44
|
"""Fetch uploader email from item metadata.
|
|
41
45
|
|
|
42
46
|
Args:
|
|
47
|
+
session: requests.Session for connection pooling
|
|
43
48
|
identifier: IA item identifier
|
|
44
49
|
|
|
45
50
|
Returns:
|
|
@@ -47,7 +52,7 @@ def fetch_uploader(identifier):
|
|
|
47
52
|
"""
|
|
48
53
|
url = f"https://archive.org/metadata/{identifier}/metadata/uploader"
|
|
49
54
|
try:
|
|
50
|
-
response = http_get_with_retry(url, max_retries=2)
|
|
55
|
+
response = http_get_with_retry(session, url, max_retries=2)
|
|
51
56
|
data = response.json()
|
|
52
57
|
return data.get("result")
|
|
53
58
|
except Exception:
|
|
@@ -195,10 +200,11 @@ def aggregate_stats(cache):
|
|
|
195
200
|
return stats
|
|
196
201
|
|
|
197
202
|
|
|
198
|
-
def format_stats(stats, cache):
|
|
203
|
+
def format_stats(session, stats, cache):
|
|
199
204
|
"""Format statistics as human-readable text.
|
|
200
205
|
|
|
201
206
|
Args:
|
|
207
|
+
session: requests.Session for connection pooling
|
|
202
208
|
stats: Dict from aggregate_stats()
|
|
203
209
|
cache: Dict of collection data
|
|
204
210
|
|
|
@@ -257,7 +263,7 @@ def format_stats(stats, cache):
|
|
|
257
263
|
logger.info(f"Fetching uploader info for {len(need_uploader_fetch)} items...")
|
|
258
264
|
for i, identifier in enumerate(need_uploader_fetch, 1):
|
|
259
265
|
logger.info(f" [{i}/{len(need_uploader_fetch)}] {identifier}")
|
|
260
|
-
uploader = fetch_uploader(identifier)
|
|
266
|
+
uploader = fetch_uploader(session, identifier)
|
|
261
267
|
if uploader:
|
|
262
268
|
cache[identifier]["uploader"] = uploader
|
|
263
269
|
# Save updated cache with uploaders
|
|
@@ -307,9 +313,11 @@ def show_stats(refresh=True):
|
|
|
307
313
|
Args:
|
|
308
314
|
refresh: If True, fetch fresh data from IA. If False, use cache only.
|
|
309
315
|
"""
|
|
316
|
+
session = requests.Session()
|
|
317
|
+
|
|
310
318
|
if refresh:
|
|
311
319
|
try:
|
|
312
|
-
ia_collections = search_ia_collections()
|
|
320
|
+
ia_collections = search_ia_collections(session)
|
|
313
321
|
cache = update_cache(ia_collections)
|
|
314
322
|
except Exception as e:
|
|
315
323
|
logger.error(f"Failed to fetch IA data: {e}")
|
|
@@ -323,4 +331,4 @@ def show_stats(refresh=True):
|
|
|
323
331
|
return
|
|
324
332
|
|
|
325
333
|
stats = aggregate_stats(cache)
|
|
326
|
-
print(format_stats(stats, cache))
|
|
334
|
+
print(format_stats(session, stats, cache))
|
|
@@ -65,53 +65,6 @@ class MetadataReader:
|
|
|
65
65
|
except Exception:
|
|
66
66
|
return False
|
|
67
67
|
|
|
68
|
-
def iter_images(self, quality_field=None, downloaded_ids=None):
|
|
69
|
-
"""Stream images from metadata file with filtering.
|
|
70
|
-
|
|
71
|
-
Args:
|
|
72
|
-
quality_field: Optional field to check exists (e.g., 'thumb_1024_url')
|
|
73
|
-
downloaded_ids: Optional set of already downloaded IDs to skip
|
|
74
|
-
|
|
75
|
-
Yields:
|
|
76
|
-
Image metadata dicts that pass filters
|
|
77
|
-
"""
|
|
78
|
-
if not self.metadata_file.exists():
|
|
79
|
-
return
|
|
80
|
-
|
|
81
|
-
# Handle gzipped files
|
|
82
|
-
if self.metadata_file.suffix == ".gz":
|
|
83
|
-
file_handle = gzip.open(self.metadata_file, "rt")
|
|
84
|
-
else:
|
|
85
|
-
file_handle = open(self.metadata_file)
|
|
86
|
-
|
|
87
|
-
with file_handle as f:
|
|
88
|
-
for line in f:
|
|
89
|
-
line = line.strip()
|
|
90
|
-
if not line:
|
|
91
|
-
continue
|
|
92
|
-
|
|
93
|
-
image = json.loads(line)
|
|
94
|
-
|
|
95
|
-
# Check for completion marker
|
|
96
|
-
if image.get("__complete__"):
|
|
97
|
-
self.is_complete = True
|
|
98
|
-
logger.debug("Found API fetch completion marker")
|
|
99
|
-
continue
|
|
100
|
-
|
|
101
|
-
image_id = image.get("id")
|
|
102
|
-
if not image_id:
|
|
103
|
-
continue
|
|
104
|
-
|
|
105
|
-
# Filter by downloaded status
|
|
106
|
-
if downloaded_ids and image_id in downloaded_ids:
|
|
107
|
-
continue
|
|
108
|
-
|
|
109
|
-
# Filter by quality field availability
|
|
110
|
-
if quality_field and not image.get(quality_field):
|
|
111
|
-
continue
|
|
112
|
-
|
|
113
|
-
yield image
|
|
114
|
-
|
|
115
68
|
def get_all_ids(self):
|
|
116
69
|
"""Get set of all image IDs in metadata file.
|
|
117
70
|
|
|
@@ -5,7 +5,6 @@ import logging
|
|
|
5
5
|
import os
|
|
6
6
|
import time
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
import requests
|
|
9
8
|
from requests.exceptions import RequestException
|
|
10
9
|
|
|
11
10
|
logger = logging.getLogger("mapillary_downloader")
|
|
@@ -77,16 +76,16 @@ def safe_json_save(file_path, data):
|
|
|
77
76
|
temp_file.replace(file_path)
|
|
78
77
|
|
|
79
78
|
|
|
80
|
-
def http_get_with_retry(url, params=None, max_retries=5, base_delay=1.0, timeout=60
|
|
79
|
+
def http_get_with_retry(session, url, params=None, max_retries=5, base_delay=1.0, timeout=60):
|
|
81
80
|
"""HTTP GET with exponential backoff retry.
|
|
82
81
|
|
|
83
82
|
Args:
|
|
83
|
+
session: requests.Session for connection pooling
|
|
84
84
|
url: URL to fetch
|
|
85
85
|
params: Optional query parameters
|
|
86
86
|
max_retries: Maximum retry attempts (default: 5)
|
|
87
87
|
base_delay: Initial delay in seconds (default: 1.0)
|
|
88
88
|
timeout: Request timeout in seconds (default: 60)
|
|
89
|
-
session: Optional requests.Session for connection pooling
|
|
90
89
|
|
|
91
90
|
Returns:
|
|
92
91
|
requests.Response object
|
|
@@ -94,10 +93,9 @@ def http_get_with_retry(url, params=None, max_retries=5, base_delay=1.0, timeout
|
|
|
94
93
|
Raises:
|
|
95
94
|
requests.RequestException: If all retries exhausted
|
|
96
95
|
"""
|
|
97
|
-
getter = session or requests
|
|
98
96
|
for attempt in range(max_retries):
|
|
99
97
|
try:
|
|
100
|
-
response =
|
|
98
|
+
response = session.get(url, params=params, timeout=timeout)
|
|
101
99
|
response.raise_for_status()
|
|
102
100
|
return response
|
|
103
101
|
except RequestException as e:
|
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/webp_converter.py
RENAMED
|
@@ -17,25 +17,20 @@ def check_cwebp_available():
|
|
|
17
17
|
return shutil.which("cwebp") is not None
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
def convert_to_webp(jpg_path, output_path
|
|
20
|
+
def convert_to_webp(jpg_path, output_path, delete_original=True):
|
|
21
21
|
"""Convert a JPG image to WebP format, preserving EXIF metadata.
|
|
22
22
|
|
|
23
23
|
Args:
|
|
24
24
|
jpg_path: Path to the JPG file
|
|
25
|
-
output_path:
|
|
25
|
+
output_path: Path for the WebP output
|
|
26
26
|
delete_original: Whether to delete the original JPG after conversion (default: True)
|
|
27
27
|
|
|
28
28
|
Returns:
|
|
29
29
|
Path object to the new WebP file, or None if conversion failed
|
|
30
30
|
"""
|
|
31
31
|
jpg_path = Path(jpg_path)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
webp_path = jpg_path.with_suffix(".webp")
|
|
35
|
-
else:
|
|
36
|
-
webp_path = Path(output_path)
|
|
37
|
-
# Ensure output directory exists
|
|
38
|
-
webp_path.parent.mkdir(parents=True, exist_ok=True)
|
|
32
|
+
webp_path = Path(output_path)
|
|
33
|
+
webp_path.parent.mkdir(parents=True, exist_ok=True)
|
|
39
34
|
|
|
40
35
|
try:
|
|
41
36
|
# Convert with cwebp, preserving all metadata
|
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/worker.py
RENAMED
|
@@ -106,7 +106,7 @@ def download_and_convert_image(image_data, output_dir, quality, convert_webp, se
|
|
|
106
106
|
|
|
107
107
|
try:
|
|
108
108
|
# Use retry logic with 3 attempts for image downloads
|
|
109
|
-
response = http_get_with_retry(image_url, max_retries=3, base_delay=1.0, timeout=60
|
|
109
|
+
response = http_get_with_retry(session, image_url, max_retries=3, base_delay=1.0, timeout=60)
|
|
110
110
|
|
|
111
111
|
with open(jpg_path, "wb") as f:
|
|
112
112
|
for chunk in response.iter_content(chunk_size=8192):
|
|
File without changes
|
|
File without changes
|
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/__init__.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/__main__.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/exif_writer.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/ia_meta.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/logging_config.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/tar_sequences.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/worker_pool.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/xmp_writer.py
RENAMED
|
File without changes
|