PyPI - mapillary-downloader - Versions diffs - 0.5.1__tar.gz → 0.5.2__tar.gz - Mend

mapillary-downloader 0.5.1tar.gz → 0.5.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mapillary_downloader
-Version: 0.5.1
+Version: 0.5.2
 Summary: Download your Mapillary data before it's gone
 Author-email: Gareth Davidson <gaz@bitplane.net>
 Requires-Python: >=3.10

{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "mapillary_downloader"
 description = "Download your Mapillary data before it's gone"
-version = "0.5.1"
+version = "0.5.2"
 authors = [
     { name = "Gareth Davidson", email = "gaz@bitplane.net" }
 ]

{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/__main__.py RENAMED Viewed

@@ -41,10 +41,10 @@ def main():
         help="Don't convert to WebP (WebP conversion is enabled by default, saves ~70%% disk space)",
     )
     parser.add_argument(
-        "--workers",
+        "--max-workers",
         type=int,
-        default=None,
-        help="Number of parallel workers (default: half of CPU cores)",
+        default=128,
+        help="Maximum number of parallel workers (default: 128)",
     )
     parser.add_argument(
         "--no-tar",
@@ -114,7 +114,7 @@ def main():
                 args.output,
                 username,
                 args.quality,
-                workers=args.workers,
+                max_workers=args.max_workers,
                 tar_sequences=not args.no_tar,
                 convert_webp=convert_webp,
                 check_ia=not args.no_check_ia,

{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/downloader.py RENAMED Viewed

@@ -45,7 +45,7 @@ class MapillaryDownloader:
         output_dir,
         username=None,
         quality=None,
-        workers=None,
+        max_workers=128,
         tar_sequences=True,
         convert_webp=False,
         check_ia=True,
@@ -57,7 +57,7 @@ class MapillaryDownloader:
             output_dir: Base directory to save downloads (final destination)
             username: Mapillary username (for collection directory)
             quality: Image quality (for collection directory)
-            workers: Number of parallel workers (default: half of cpu_count)
+            max_workers: Maximum number of parallel workers (default: 128)
             tar_sequences: Whether to tar sequence directories after download (default: True)
             convert_webp: Whether to convert images to WebP (affects collection name)
             check_ia: Whether to check if collection exists on Internet Archive (default: True)
@@ -66,7 +66,8 @@ class MapillaryDownloader:
         self.base_output_dir = Path(output_dir)
         self.username = username
         self.quality = quality
-        self.workers = workers if workers is not None else max(1, os.cpu_count() // 2)
+        self.max_workers = max_workers
+        self.initial_workers = os.cpu_count() or 1  # Start with CPU count
         self.tar_sequences = tar_sequences
         self.convert_webp = convert_webp
         self.check_ia = check_ia
@@ -177,7 +178,7 @@ class MapillaryDownloader:
         logger.info(f"Downloading images for user: {self.username}")
         logger.info(f"Output directory: {self.output_dir}")
         logger.info(f"Quality: {self.quality}")
-        logger.info(f"Using {self.workers} parallel workers")
+        logger.info(f"Worker pool: {self.initial_workers} initial, {self.max_workers} max")
         start_time = time.time()
@@ -191,8 +192,10 @@ class MapillaryDownloader:
         # Step 2: Start worker pool
         # Since workers do both I/O (download) and CPU (WebP), need many more workers
-        # Cap at 128 for now - will build proper dynamic scaling on a new branch later
-        pool = AdaptiveWorkerPool(worker_process, min_workers=self.workers, max_workers=128, monitoring_interval=10)
+        # Start with CPU count and scale up based on throughput
+        pool = AdaptiveWorkerPool(
+            worker_process, min_workers=self.initial_workers, max_workers=self.max_workers, monitoring_interval=10
+        )
         pool.start()
         # Step 3: Download images from metadata file while fetching new from API
@@ -247,8 +250,9 @@ class MapillaryDownloader:
             # Helper to process results from queue
             def process_results():
                 nonlocal downloaded_count, total_bytes, failed_count
+                # Drain ALL available results to prevent queue from filling up
                 while True:
-                    result = pool.get_result(timeout=0.001)
+                    result = pool.get_result(timeout=0)  # Non-blocking
                     if result is None:
                         break
@@ -379,6 +383,10 @@ class MapillaryDownloader:
                         last_position = f.tell()
+                # If API is already complete, we've read the whole file, so break
+                if api_fetch_complete is None:
+                    break
                 # Sleep briefly before next tail iteration
                 time.sleep(0.1)

{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/worker.py RENAMED Viewed

@@ -1,6 +1,7 @@
 """Worker process for parallel image download and conversion."""
 import os
+import signal
 import tempfile
 from pathlib import Path
 import requests
@@ -17,6 +18,9 @@ def worker_process(work_queue, result_queue, worker_id):
         result_queue: Queue to push results to
         worker_id: Unique worker identifier
     """
+    # Ignore SIGINT in worker process - parent will handle it
+    signal.signal(signal.SIGINT, signal.SIG_IGN)
     # Create session once per worker (reuse HTTP connections)
     session = requests.Session()

{mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/worker_pool.py RENAMED Viewed

@@ -185,20 +185,18 @@ class AdaptiveWorkerPool:
         else:
             logger.info(f"At optimal worker count: {current_workers} workers, {current_throughput:.1f} items/s")
-    def shutdown(self, timeout=30):
+    def shutdown(self, timeout=2):
         """Shutdown the worker pool gracefully."""
         logger.info("Shutting down worker pool...")
         self.running = False
-        # Send stop signals
-        for _ in self.workers:
-            self.work_queue.put(None)
-        # Wait for workers to finish
+        # Terminate all workers immediately (they ignore SIGINT so we need to be forceful)
         for p in self.workers:
-            p.join(timeout=timeout)
             if p.is_alive():
-                logger.warning(f"Worker {p.pid} did not exit cleanly, terminating")
                 p.terminate()
+        # Give them a brief moment to exit
+        for p in self.workers:
+            p.join(timeout=timeout)
         logger.info("Worker pool shutdown complete")