PyPI - mapillary-downloader - Versions diffs - 0.8.1__tar.gz → 0.9.0__tar.gz - Mend

mapillary-downloader 0.8.1tar.gz → 0.9.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mapillary_downloader
-Version: 0.8.1
+Version: 0.9.0
 Summary: Archive user data from Mapillary
 Author-email: Gareth Davidson <gaz@bitplane.net>
 Requires-Python: >=3.10

{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "mapillary_downloader"
 description = "Archive user data from Mapillary"
-version = "0.8.1"
+version = "0.9.0"
 authors = [
     { name = "Gareth Davidson", email = "gaz@bitplane.net" }
 ]

{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/client.py RENAMED Viewed

@@ -22,11 +22,12 @@ class MapillaryClient:
         self.session = requests.Session()
         self.session.headers.update({"Authorization": f"OAuth {access_token}"})
-    def get_user_images(self, username, bbox=None, limit=2000):
+    def get_user_images(self, username, quality, bbox=None, limit=2000):
         """Get images uploaded by a specific user.
         Args:
             username: Mapillary username
+            quality: Image quality (256, 1024, 2048, or original)
             bbox: Optional bounding box [west, south, east, north]
             limit: Number of results per page (max 2000)
@@ -56,10 +57,7 @@ class MapillaryClient:
                     "computed_rotation",
                     "height",
                     "width",
-                    "thumb_256_url",
-                    "thumb_1024_url",
-                    "thumb_2048_url",
-                    "thumb_original_url",
+                    f"thumb_{quality}_url",
                 ]
             ),
         }

{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/downloader.py RENAMED Viewed

@@ -8,6 +8,7 @@ import shutil
 import threading
 import time
 from pathlib import Path
+import requests
 from mapillary_downloader.utils import format_size, format_time, safe_json_save
 from mapillary_downloader.ia_meta import generate_ia_metadata
 from mapillary_downloader.ia_check import check_ia_exists
@@ -219,7 +220,7 @@ class MapillaryDownloader:
         # Check if collection already exists on Internet Archive
         if self.check_ia and self.collection_name:
             logger.info(f"Checking if {self.collection_name} exists on Internet Archive...")
-            if check_ia_exists(self.collection_name):
+            if check_ia_exists(requests.Session(), self.collection_name):
                 logger.info("Collection already exists on archive.org, skipping download")
                 return
@@ -261,7 +262,7 @@ class MapillaryDownloader:
                     try:
                         logger.debug("API fetch thread starting")
                         with open(self.metadata_file, "a") as meta_f:
-                            for image in self.client.get_user_images(self.username, bbox=bbox):
+                            for image in self.client.get_user_images(self.username, self.quality, bbox=bbox):
                                 new_images_count[0] += 1
                                 # Save metadata (don't dedupe here, let the tailer handle it)

{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/ia_check.py RENAMED Viewed

@@ -6,20 +6,20 @@ import requests
 logger = logging.getLogger("mapillary_downloader")
-def check_ia_exists(collection_name):
+def check_ia_exists(session, collection_name):
     """Check if a collection exists on Internet Archive.
     Args:
+        session: requests.Session for connection pooling
         collection_name: Name of the collection (e.g., mapillary-username-original-webp)
     Returns:
         Boolean indicating if the collection exists on IA
     """
-    # IA identifier format
     ia_url = f"https://archive.org/metadata/{collection_name}"
     try:
-        response = requests.get(ia_url, timeout=10)
+        response = session.get(ia_url, timeout=10)
         # If we get a 200, the item exists
         if response.status_code == 200:
             data = response.json()

{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/ia_stats.py RENAMED Viewed

@@ -3,6 +3,7 @@
 import json
 import logging
 import re
+import requests
 from mapillary_downloader.utils import safe_json_save, http_get_with_retry, format_size
 from mapillary_downloader.downloader import get_cache_dir
@@ -11,9 +12,12 @@ logger = logging.getLogger("mapillary_downloader")
 CACHE_FILE = get_cache_dir() / ".stats.json"
-def search_ia_collections():
+def search_ia_collections(session):
     """Search IA for all mapillary_downloader collections.
+    Args:
+        session: requests.Session for connection pooling
     Returns:
         List of dicts with: identifier, description, item_size, collection
     """
@@ -27,7 +31,7 @@ def search_ia_collections():
         "output": "json",
     }
-    response = http_get_with_retry(url, params=params, max_retries=3)
+    response = http_get_with_retry(session, url, params=params, max_retries=3)
     data = response.json()
     collections = data["response"]["docs"]
@@ -36,10 +40,11 @@ def search_ia_collections():
     return collections
-def fetch_uploader(identifier):
+def fetch_uploader(session, identifier):
     """Fetch uploader email from item metadata.
     Args:
+        session: requests.Session for connection pooling
         identifier: IA item identifier
     Returns:
@@ -47,7 +52,7 @@ def fetch_uploader(identifier):
     """
     url = f"https://archive.org/metadata/{identifier}/metadata/uploader"
     try:
-        response = http_get_with_retry(url, max_retries=2)
+        response = http_get_with_retry(session, url, max_retries=2)
         data = response.json()
         return data.get("result")
     except Exception:
@@ -195,10 +200,11 @@ def aggregate_stats(cache):
     return stats
-def format_stats(stats, cache):
+def format_stats(session, stats, cache):
     """Format statistics as human-readable text.
     Args:
+        session: requests.Session for connection pooling
         stats: Dict from aggregate_stats()
         cache: Dict of collection data
@@ -257,7 +263,7 @@ def format_stats(stats, cache):
         logger.info(f"Fetching uploader info for {len(need_uploader_fetch)} items...")
         for i, identifier in enumerate(need_uploader_fetch, 1):
             logger.info(f"  [{i}/{len(need_uploader_fetch)}] {identifier}")
-            uploader = fetch_uploader(identifier)
+            uploader = fetch_uploader(session, identifier)
             if uploader:
                 cache[identifier]["uploader"] = uploader
         # Save updated cache with uploaders
@@ -307,9 +313,11 @@ def show_stats(refresh=True):
     Args:
         refresh: If True, fetch fresh data from IA. If False, use cache only.
     """
+    session = requests.Session()
     if refresh:
         try:
-            ia_collections = search_ia_collections()
+            ia_collections = search_ia_collections(session)
             cache = update_cache(ia_collections)
         except Exception as e:
             logger.error(f"Failed to fetch IA data: {e}")
@@ -323,4 +331,4 @@ def show_stats(refresh=True):
         return
     stats = aggregate_stats(cache)
-    print(format_stats(stats, cache))
+    print(format_stats(session, stats, cache))

{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/metadata_reader.py RENAMED Viewed

@@ -65,53 +65,6 @@ class MetadataReader:
         except Exception:
             return False
-    def iter_images(self, quality_field=None, downloaded_ids=None):
-        """Stream images from metadata file with filtering.
-        Args:
-            quality_field: Optional field to check exists (e.g., 'thumb_1024_url')
-            downloaded_ids: Optional set of already downloaded IDs to skip
-        Yields:
-            Image metadata dicts that pass filters
-        """
-        if not self.metadata_file.exists():
-            return
-        # Handle gzipped files
-        if self.metadata_file.suffix == ".gz":
-            file_handle = gzip.open(self.metadata_file, "rt")
-        else:
-            file_handle = open(self.metadata_file)
-        with file_handle as f:
-            for line in f:
-                line = line.strip()
-                if not line:
-                    continue
-                image = json.loads(line)
-                # Check for completion marker
-                if image.get("__complete__"):
-                    self.is_complete = True
-                    logger.debug("Found API fetch completion marker")
-                    continue
-                image_id = image.get("id")
-                if not image_id:
-                    continue
-                # Filter by downloaded status
-                if downloaded_ids and image_id in downloaded_ids:
-                    continue
-                # Filter by quality field availability
-                if quality_field and not image.get(quality_field):
-                    continue
-                yield image
     def get_all_ids(self):
         """Get set of all image IDs in metadata file.

{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/utils.py RENAMED Viewed

@@ -5,7 +5,6 @@ import logging
 import os
 import time
 from pathlib import Path
-import requests
 from requests.exceptions import RequestException
 logger = logging.getLogger("mapillary_downloader")
@@ -77,16 +76,16 @@ def safe_json_save(file_path, data):
     temp_file.replace(file_path)
-def http_get_with_retry(url, params=None, max_retries=5, base_delay=1.0, timeout=60, session=None):
+def http_get_with_retry(session, url, params=None, max_retries=5, base_delay=1.0, timeout=60):
     """HTTP GET with exponential backoff retry.
     Args:
+        session: requests.Session for connection pooling
         url: URL to fetch
         params: Optional query parameters
         max_retries: Maximum retry attempts (default: 5)
         base_delay: Initial delay in seconds (default: 1.0)
         timeout: Request timeout in seconds (default: 60)
-        session: Optional requests.Session for connection pooling
     Returns:
         requests.Response object
@@ -94,10 +93,9 @@ def http_get_with_retry(url, params=None, max_retries=5, base_delay=1.0, timeout
     Raises:
         requests.RequestException: If all retries exhausted
     """
-    getter = session or requests
     for attempt in range(max_retries):
         try:
-            response = getter.get(url, params=params, timeout=timeout)
+            response = session.get(url, params=params, timeout=timeout)
             response.raise_for_status()
             return response
         except RequestException as e:

{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/webp_converter.py RENAMED Viewed

@@ -17,25 +17,20 @@ def check_cwebp_available():
     return shutil.which("cwebp") is not None
-def convert_to_webp(jpg_path, output_path=None, delete_original=True):
+def convert_to_webp(jpg_path, output_path, delete_original=True):
     """Convert a JPG image to WebP format, preserving EXIF metadata.
     Args:
         jpg_path: Path to the JPG file
-        output_path: Optional path for the WebP output. If None, uses jpg_path with .webp extension
+        output_path: Path for the WebP output
         delete_original: Whether to delete the original JPG after conversion (default: True)
     Returns:
         Path object to the new WebP file, or None if conversion failed
     """
     jpg_path = Path(jpg_path)
-    if output_path is None:
-        webp_path = jpg_path.with_suffix(".webp")
-    else:
-        webp_path = Path(output_path)
-        # Ensure output directory exists
-        webp_path.parent.mkdir(parents=True, exist_ok=True)
+    webp_path = Path(output_path)
+    webp_path.parent.mkdir(parents=True, exist_ok=True)
     try:
         # Convert with cwebp, preserving all metadata

{mapillary_downloader-0.8.1 → mapillary_downloader-0.9.0}/src/mapillary_downloader/worker.py RENAMED Viewed

@@ -106,7 +106,7 @@ def download_and_convert_image(image_data, output_dir, quality, convert_webp, se
         try:
             # Use retry logic with 3 attempts for image downloads
-            response = http_get_with_retry(image_url, max_retries=3, base_delay=1.0, timeout=60, session=session)
+            response = http_get_with_retry(session, image_url, max_retries=3, base_delay=1.0, timeout=60)
             with open(jpg_path, "wb") as f:
                 for chunk in response.iter_content(chunk_size=8192):