mapillary-downloader 0.5.1__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/PKG-INFO +1 -1
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/pyproject.toml +1 -1
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/__main__.py +4 -4
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/downloader.py +15 -7
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/worker.py +4 -0
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/worker_pool.py +6 -8
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/LICENSE.md +0 -0
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/README.md +0 -0
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/__init__.py +0 -0
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/client.py +0 -0
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/exif_writer.py +0 -0
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/ia_check.py +0 -0
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/ia_meta.py +0 -0
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/logging_config.py +0 -0
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/metadata_reader.py +0 -0
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/tar_sequences.py +0 -0
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/utils.py +0 -0
- {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/webp_converter.py +0 -0
{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/__main__.py
RENAMED
|
@@ -41,10 +41,10 @@ def main():
|
|
|
41
41
|
help="Don't convert to WebP (WebP conversion is enabled by default, saves ~70%% disk space)",
|
|
42
42
|
)
|
|
43
43
|
parser.add_argument(
|
|
44
|
-
"--workers",
|
|
44
|
+
"--max-workers",
|
|
45
45
|
type=int,
|
|
46
|
-
default=
|
|
47
|
-
help="
|
|
46
|
+
default=128,
|
|
47
|
+
help="Maximum number of parallel workers (default: 128)",
|
|
48
48
|
)
|
|
49
49
|
parser.add_argument(
|
|
50
50
|
"--no-tar",
|
|
@@ -114,7 +114,7 @@ def main():
|
|
|
114
114
|
args.output,
|
|
115
115
|
username,
|
|
116
116
|
args.quality,
|
|
117
|
-
|
|
117
|
+
max_workers=args.max_workers,
|
|
118
118
|
tar_sequences=not args.no_tar,
|
|
119
119
|
convert_webp=convert_webp,
|
|
120
120
|
check_ia=not args.no_check_ia,
|
{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/downloader.py
RENAMED
|
@@ -45,7 +45,7 @@ class MapillaryDownloader:
|
|
|
45
45
|
output_dir,
|
|
46
46
|
username=None,
|
|
47
47
|
quality=None,
|
|
48
|
-
|
|
48
|
+
max_workers=128,
|
|
49
49
|
tar_sequences=True,
|
|
50
50
|
convert_webp=False,
|
|
51
51
|
check_ia=True,
|
|
@@ -57,7 +57,7 @@ class MapillaryDownloader:
|
|
|
57
57
|
output_dir: Base directory to save downloads (final destination)
|
|
58
58
|
username: Mapillary username (for collection directory)
|
|
59
59
|
quality: Image quality (for collection directory)
|
|
60
|
-
|
|
60
|
+
max_workers: Maximum number of parallel workers (default: 128)
|
|
61
61
|
tar_sequences: Whether to tar sequence directories after download (default: True)
|
|
62
62
|
convert_webp: Whether to convert images to WebP (affects collection name)
|
|
63
63
|
check_ia: Whether to check if collection exists on Internet Archive (default: True)
|
|
@@ -66,7 +66,8 @@ class MapillaryDownloader:
|
|
|
66
66
|
self.base_output_dir = Path(output_dir)
|
|
67
67
|
self.username = username
|
|
68
68
|
self.quality = quality
|
|
69
|
-
self.
|
|
69
|
+
self.max_workers = max_workers
|
|
70
|
+
self.initial_workers = os.cpu_count() or 1 # Start with CPU count
|
|
70
71
|
self.tar_sequences = tar_sequences
|
|
71
72
|
self.convert_webp = convert_webp
|
|
72
73
|
self.check_ia = check_ia
|
|
@@ -177,7 +178,7 @@ class MapillaryDownloader:
|
|
|
177
178
|
logger.info(f"Downloading images for user: {self.username}")
|
|
178
179
|
logger.info(f"Output directory: {self.output_dir}")
|
|
179
180
|
logger.info(f"Quality: {self.quality}")
|
|
180
|
-
logger.info(f"
|
|
181
|
+
logger.info(f"Worker pool: {self.initial_workers} initial, {self.max_workers} max")
|
|
181
182
|
|
|
182
183
|
start_time = time.time()
|
|
183
184
|
|
|
@@ -191,8 +192,10 @@ class MapillaryDownloader:
|
|
|
191
192
|
|
|
192
193
|
# Step 2: Start worker pool
|
|
193
194
|
# Since workers do both I/O (download) and CPU (WebP), need many more workers
|
|
194
|
-
#
|
|
195
|
-
pool = AdaptiveWorkerPool(
|
|
195
|
+
# Start with CPU count and scale up based on throughput
|
|
196
|
+
pool = AdaptiveWorkerPool(
|
|
197
|
+
worker_process, min_workers=self.initial_workers, max_workers=self.max_workers, monitoring_interval=10
|
|
198
|
+
)
|
|
196
199
|
pool.start()
|
|
197
200
|
|
|
198
201
|
# Step 3: Download images from metadata file while fetching new from API
|
|
@@ -247,8 +250,9 @@ class MapillaryDownloader:
|
|
|
247
250
|
# Helper to process results from queue
|
|
248
251
|
def process_results():
|
|
249
252
|
nonlocal downloaded_count, total_bytes, failed_count
|
|
253
|
+
# Drain ALL available results to prevent queue from filling up
|
|
250
254
|
while True:
|
|
251
|
-
result = pool.get_result(timeout=0
|
|
255
|
+
result = pool.get_result(timeout=0) # Non-blocking
|
|
252
256
|
if result is None:
|
|
253
257
|
break
|
|
254
258
|
|
|
@@ -379,6 +383,10 @@ class MapillaryDownloader:
|
|
|
379
383
|
|
|
380
384
|
last_position = f.tell()
|
|
381
385
|
|
|
386
|
+
# If API is already complete, we've read the whole file, so break
|
|
387
|
+
if api_fetch_complete is None:
|
|
388
|
+
break
|
|
389
|
+
|
|
382
390
|
# Sleep briefly before next tail iteration
|
|
383
391
|
time.sleep(0.1)
|
|
384
392
|
|
{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/worker.py
RENAMED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Worker process for parallel image download and conversion."""
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
import signal
|
|
4
5
|
import tempfile
|
|
5
6
|
from pathlib import Path
|
|
6
7
|
import requests
|
|
@@ -17,6 +18,9 @@ def worker_process(work_queue, result_queue, worker_id):
|
|
|
17
18
|
result_queue: Queue to push results to
|
|
18
19
|
worker_id: Unique worker identifier
|
|
19
20
|
"""
|
|
21
|
+
# Ignore SIGINT in worker process - parent will handle it
|
|
22
|
+
signal.signal(signal.SIGINT, signal.SIG_IGN)
|
|
23
|
+
|
|
20
24
|
# Create session once per worker (reuse HTTP connections)
|
|
21
25
|
session = requests.Session()
|
|
22
26
|
|
{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/worker_pool.py
RENAMED
|
@@ -185,20 +185,18 @@ class AdaptiveWorkerPool:
|
|
|
185
185
|
else:
|
|
186
186
|
logger.info(f"At optimal worker count: {current_workers} workers, {current_throughput:.1f} items/s")
|
|
187
187
|
|
|
188
|
-
def shutdown(self, timeout=
|
|
188
|
+
def shutdown(self, timeout=2):
|
|
189
189
|
"""Shutdown the worker pool gracefully."""
|
|
190
190
|
logger.info("Shutting down worker pool...")
|
|
191
191
|
self.running = False
|
|
192
192
|
|
|
193
|
-
#
|
|
194
|
-
for _ in self.workers:
|
|
195
|
-
self.work_queue.put(None)
|
|
196
|
-
|
|
197
|
-
# Wait for workers to finish
|
|
193
|
+
# Terminate all workers immediately (they ignore SIGINT so we need to be forceful)
|
|
198
194
|
for p in self.workers:
|
|
199
|
-
p.join(timeout=timeout)
|
|
200
195
|
if p.is_alive():
|
|
201
|
-
logger.warning(f"Worker {p.pid} did not exit cleanly, terminating")
|
|
202
196
|
p.terminate()
|
|
203
197
|
|
|
198
|
+
# Give them a brief moment to exit
|
|
199
|
+
for p in self.workers:
|
|
200
|
+
p.join(timeout=timeout)
|
|
201
|
+
|
|
204
202
|
logger.info("Worker pool shutdown complete")
|
|
File without changes
|
|
File without changes
|
{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/__init__.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/client.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/exif_writer.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/ia_check.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/ia_meta.py
RENAMED
|
File without changes
|
{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/logging_config.py
RENAMED
|
File without changes
|
|
File without changes
|
{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/tar_sequences.py
RENAMED
|
File without changes
|
|
File without changes
|
{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/webp_converter.py
RENAMED
|
File without changes
|