PyPI - mapillary-downloader - Versions diffs - 0.8.0__tar.gz → 0.8.1__tar.gz - Mend

mapillary-downloader 0.8.0tar.gz → 0.8.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mapillary_downloader
-Version: 0.8.0
+Version: 0.8.1
 Summary: Archive user data from Mapillary
 Author-email: Gareth Davidson <gaz@bitplane.net>
 Requires-Python: >=3.10

{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "mapillary_downloader"
 description = "Archive user data from Mapillary"
-version = "0.8.0"
+version = "0.8.1"
 authors = [
     { name = "Gareth Davidson", email = "gaz@bitplane.net" }
 ]

{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/downloader.py RENAMED Viewed

@@ -5,6 +5,7 @@ import json
 import logging
 import os
 import shutil
+import threading
 import time
 from pathlib import Path
 from mapillary_downloader.utils import format_size, format_time, safe_json_save
@@ -146,6 +147,65 @@ class MapillaryDownloader:
         # Write atomically using utility function
         safe_json_save(self.progress_file, progress)
+    def _submit_metadata_batch(self, file_handle, quality_field, pool, convert_webp, process_results, base_submitted):
+        """Read metadata lines from current position, submit to workers.
+        Args:
+            file_handle: Open file positioned at read point
+            quality_field: Field name for quality URL (e.g., "thumb_1024_url")
+            pool: Worker pool to submit to
+            convert_webp: Whether to convert to webp
+            process_results: Callback to drain result queue
+            base_submitted: Running total for cumulative logging
+        Returns:
+            tuple: (submitted_count, skipped_count) for this batch
+        """
+        submitted = 0
+        skipped = 0
+        for line in file_handle:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                image = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+            if image.get("__complete__"):
+                continue
+            image_id = image.get("id")
+            if not image_id:
+                continue
+            if image_id in self.downloaded:
+                skipped += 1
+                continue
+            if not image.get(quality_field):
+                continue
+            work_item = (
+                image,
+                str(self.output_dir),
+                self.quality,
+                convert_webp,
+                self.client.access_token,
+            )
+            pool.submit(work_item)
+            submitted += 1
+            total = base_submitted + submitted
+            if total % 1000 == 0:
+                logger.info(f"Queue: submitted {total:,} images")
+            process_results()
+        return submitted, skipped
     def download_user_data(self, bbox=None, convert_webp=False):
         """Download all images for a user using streaming queue-based architecture.
@@ -187,13 +247,13 @@ class MapillaryDownloader:
         total_bytes = 0
         failed_count = 0
         submitted = 0
+        skipped_count = 0
         try:
             # Step 3a: Fetch metadata from API in parallel (write-only, don't block on queue)
-            if not api_complete:
-                import threading
+            api_fetch_complete = threading.Event()
-                api_fetch_complete = threading.Event()
+            if not api_complete:
                 new_images_count = [0]  # Mutable so thread can update it
                 def fetch_api_metadata():
@@ -221,7 +281,7 @@ class MapillaryDownloader:
                 api_thread = threading.Thread(target=fetch_api_metadata, daemon=True)
                 api_thread.start()
             else:
-                api_fetch_complete = None
+                api_fetch_complete.set()
             # Step 3b: Tail metadata file and submit to workers
             logger.debug("Starting metadata tail and download queue feeder")
@@ -244,9 +304,10 @@ class MapillaryDownloader:
                         total_bytes += bytes_dl
                         # Log every download for first 10, then every 100
+                        total_downloaded = len(self.downloaded)
                         should_log = downloaded_count <= 10 or downloaded_count % 100 == 0
                         if should_log:
-                            logger.info(f"Downloaded: {downloaded_count:,} ({format_size(total_bytes)})")
+                            logger.info(f"Downloaded: {total_downloaded:,} ({format_size(total_bytes)} this session)")
                         if downloaded_count % 100 == 0:
                             pool.check_throughput(downloaded_count)
@@ -260,117 +321,20 @@ class MapillaryDownloader:
             # Tail the metadata file and submit to workers
             while True:
-                # Check if API fetch is done and we've processed everything
-                if api_fetch_complete and api_fetch_complete.is_set():
-                    # Read any remaining lines
-                    if self.metadata_file.exists():
-                        with open(self.metadata_file) as f:
-                            f.seek(last_position)
-                            for line in f:
-                                line = line.strip()
-                                if not line:
-                                    continue
-                                try:
-                                    image = json.loads(line)
-                                except json.JSONDecodeError:
-                                    # Incomplete line, will retry
-                                    continue
-                                # Skip completion marker
-                                if image.get("__complete__"):
-                                    continue
-                                image_id = image.get("id")
-                                if not image_id:
-                                    continue
-                                # Skip if already downloaded or no quality URL
-                                if image_id in self.downloaded:
-                                    downloaded_count += 1
-                                    continue
-                                if not image.get(quality_field):
-                                    continue
-                                # Submit to workers
-                                work_item = (
-                                    image,
-                                    str(self.output_dir),
-                                    self.quality,
-                                    convert_webp,
-                                    self.client.access_token,
-                                )
-                                pool.submit(work_item)
-                                submitted += 1
-                                if submitted % 1000 == 0:
-                                    logger.info(f"Queue: submitted {submitted:,} images")
-                                # Process results while submitting
-                                process_results()
-                            last_position = f.tell()
-                    # API done and all lines processed, break
-                    break
-                # API still running or API was already complete, tail the file
                 if self.metadata_file.exists():
                     with open(self.metadata_file) as f:
                         f.seek(last_position)
-                        for line in f:
-                            line = line.strip()
-                            if not line:
-                                continue
-                            try:
-                                image = json.loads(line)
-                            except json.JSONDecodeError:
-                                # Incomplete line, will retry next iteration
-                                continue
-                            # Skip completion marker
-                            if image.get("__complete__"):
-                                continue
-                            image_id = image.get("id")
-                            if not image_id:
-                                continue
-                            # Skip if already downloaded or no quality URL
-                            if image_id in self.downloaded:
-                                downloaded_count += 1
-                                continue
-                            if not image.get(quality_field):
-                                continue
-                            # Submit to workers
-                            work_item = (
-                                image,
-                                str(self.output_dir),
-                                self.quality,
-                                convert_webp,
-                                self.client.access_token,
-                            )
-                            pool.submit(work_item)
-                            submitted += 1
-                            if submitted % 1000 == 0:
-                                logger.info(f"Queue: submitted {submitted:,} images")
-                            # Process results while submitting
-                            process_results()
+                        batch_submitted, batch_skipped = self._submit_metadata_batch(
+                            f, quality_field, pool, convert_webp, process_results, submitted
+                        )
+                        submitted += batch_submitted
+                        skipped_count += batch_skipped
                         last_position = f.tell()
-                # If API is already complete, we've read the whole file, so break
-                if api_fetch_complete is None:
+                if api_fetch_complete.is_set():
                     break
-                # Sleep briefly before next tail iteration
                 time.sleep(0.1)
-                # Process any results that came in
                 process_results()
             # Send shutdown signals
@@ -397,7 +361,7 @@ class MapillaryDownloader:
                     total_bytes += bytes_dl
                     if downloaded_count % 100 == 0:
-                        logger.info(f"Downloaded: {downloaded_count:,} ({format_size(total_bytes)})")
+                        logger.info(f"Downloaded: {len(self.downloaded):,} ({format_size(total_bytes)} this session)")
                         pool.check_throughput(downloaded_count)
                         # Save progress every 5 minutes
                         if time.time() - self._last_save_time >= 300:
@@ -414,7 +378,10 @@ class MapillaryDownloader:
         self._save_progress()
         elapsed = time.time() - start_time
-        logger.info(f"Complete! Downloaded {downloaded_count:,} ({format_size(total_bytes)}), failed {failed_count:,}")
+        logger.info(
+            f"Complete! Downloaded {downloaded_count:,} this session ({format_size(total_bytes)}), "
+            f"{len(self.downloaded):,} total, skipped {skipped_count:,}, failed {failed_count:,}"
+        )
         logger.info(f"Total time: {format_time(elapsed)}")
         # Tar sequence directories for efficient IA uploads

{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/logging_config.py RENAMED Viewed

@@ -15,6 +15,7 @@ class ColoredFormatter(logging.Formatter):
         "DEBUG": "\033[94m",  # Blue
         "RESET": "\033[0m",
     }
+    CYAN = "\033[96m"
     def __init__(self, fmt=None, datefmt=None, use_color=True):
         """Initialize the formatter.
@@ -41,6 +42,10 @@ class ColoredFormatter(logging.Formatter):
             if levelname in self.COLORS:
                 record.levelname = f"{self.COLORS[levelname]}{levelname}{self.COLORS['RESET']}"
+            # Color API messages differently so they stand out
+            if record.msg.startswith("API"):
+                record.msg = f"{self.CYAN}{record.msg}{self.COLORS['RESET']}"
         return super().format(record)

{mapillary_downloader-0.8.0 → mapillary_downloader-0.8.1}/src/mapillary_downloader/webp_converter.py RENAMED Viewed

@@ -43,7 +43,6 @@ def convert_to_webp(jpg_path, output_path=None, delete_original=True):
             ["cwebp", "-metadata", "all", str(jpg_path), "-o", str(webp_path)],
             capture_output=True,
             text=True,
-            timeout=60,
         )
         if result.returncode != 0:
@@ -55,9 +54,6 @@ def convert_to_webp(jpg_path, output_path=None, delete_original=True):
             jpg_path.unlink()
         return webp_path
-    except subprocess.TimeoutExpired:
-        logger.error(f"cwebp conversion timed out for {jpg_path}")
-        return None
     except Exception as e:
         logger.error(f"Error converting {jpg_path} to WebP: {e}")
         return None