PyPI - mapillary-downloader - Versions diffs - 0.7.0__tar.gz → 0.7.3__tar.gz - Mend

mapillary-downloader 0.7.0tar.gz → 0.7.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{mapillary_downloader-0.7.0 → mapillary_downloader-0.7.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mapillary_downloader
-Version: 0.7.0
+Version: 0.7.3
 Summary: Archive user data from Mapillary
 Author-email: Gareth Davidson <gaz@bitplane.net>
 Requires-Python: >=3.10
@@ -66,7 +66,7 @@ mapillary-downloader --output ./downloads USERNAME1
 | `--quality`     | 256, 1024, 2048 or original                  | `original`         |
 | `--bbox`        | `west,south,east,north`                      | `None`             |
 | `--no-webp`     | Don't convert to WebP                        | `False`            |
-| `--max-workers` | Maximum number of parallel download workers  | `128`              |
+| `--max-workers` | Maximum number of parallel download workers  | CPU count          |
 | `--no-tar`      | Don't tar bucket directories                 | `False`            |
 | `--no-check-ia` | Don't check if exists on Internet Archive    | `False`            |

{mapillary_downloader-0.7.0 → mapillary_downloader-0.7.3}/README.md RENAMED Viewed

@@ -36,7 +36,7 @@ mapillary-downloader --output ./downloads USERNAME1
 | `--quality`     | 256, 1024, 2048 or original                  | `original`         |
 | `--bbox`        | `west,south,east,north`                      | `None`             |
 | `--no-webp`     | Don't convert to WebP                        | `False`            |
-| `--max-workers` | Maximum number of parallel download workers  | `128`              |
+| `--max-workers` | Maximum number of parallel download workers  | CPU count          |
 | `--no-tar`      | Don't tar bucket directories                 | `False`            |
 | `--no-check-ia` | Don't check if exists on Internet Archive    | `False`            |

{mapillary_downloader-0.7.0 → mapillary_downloader-0.7.3}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "mapillary_downloader"
 description = "Archive user data from Mapillary"
-version = "0.7.0"
+version = "0.7.3"
 authors = [
     { name = "Gareth Davidson", email = "gaz@bitplane.net" }
 ]

{mapillary_downloader-0.7.0 → mapillary_downloader-0.7.3}/src/mapillary_downloader/__main__.py RENAMED Viewed

@@ -43,8 +43,8 @@ def main():
     parser.add_argument(
         "--max-workers",
         type=int,
-        default=128,
-        help="Maximum number of parallel workers (default: 128)",
+        default=os.cpu_count() or 8,
+        help=f"Maximum number of parallel workers (default: CPU count = {os.cpu_count() or 8})",
     )
     parser.add_argument(
         "--no-tar",

{mapillary_downloader-0.7.0 → mapillary_downloader-0.7.3}/src/mapillary_downloader/downloader.py RENAMED Viewed

@@ -67,7 +67,6 @@ class MapillaryDownloader:
         self.username = username
         self.quality = quality
         self.max_workers = max_workers
-        self.initial_workers = os.cpu_count() or 1  # Start with CPU count
         self.tar_sequences = tar_sequences
         self.convert_webp = convert_webp
         self.check_ia = check_ia
@@ -100,7 +99,7 @@ class MapillaryDownloader:
         # Set up file logging for archival with timestamp for incremental runs
         timestamp = time.strftime("%Y%m%d-%H%M%S")
         log_file = self.output_dir / f"download.log.{timestamp}"
-        add_file_handler(log_file)
+        self.file_handler = add_file_handler(log_file)
         logger.info(f"Logging to: {log_file}")
         self.metadata_file = self.output_dir / "metadata.jsonl"
@@ -170,27 +169,16 @@ class MapillaryDownloader:
         quality_field = f"thumb_{self.quality}_url"
-        logger.info(f"Downloading images for user: {self.username}")
-        logger.info(f"Output directory: {self.output_dir}")
-        logger.info(f"Quality: {self.quality}")
-        logger.info(f"Worker pool: {self.initial_workers} initial, {self.max_workers} max")
+        logger.info(f"Downloading {self.username} @ {self.quality} (max {self.max_workers} workers)")
         start_time = time.time()
         # Step 1: Check if API fetch is already complete
         reader = MetadataReader(self.metadata_file)
         api_complete = reader.is_complete
-        if api_complete:
-            logger.info("API fetch already complete, will only download")
-        else:
-            logger.info("API fetch incomplete, will fetch and download in parallel")
         # Step 2: Start worker pool
-        # Since workers do both I/O (download) and CPU (WebP), need many more workers
-        # Start with CPU count and scale up based on throughput
-        pool = AdaptiveWorkerPool(
-            worker_process, min_workers=self.initial_workers, max_workers=self.max_workers, monitoring_interval=10
-        )
+        pool = AdaptiveWorkerPool(worker_process, max_workers=self.max_workers, monitoring_interval=10)
         pool.start()
         # Step 3: Download images from metadata file while fetching new from API
@@ -198,9 +186,6 @@ class MapillaryDownloader:
         total_bytes = 0
         failed_count = 0
         submitted = 0
-        batch_start = time.time()
-        logger.info("Starting parallel download and API fetch...")
         try:
             # Step 3a: Fetch metadata from API in parallel (write-only, don't block on queue)
@@ -213,7 +198,7 @@ class MapillaryDownloader:
                 def fetch_api_metadata():
                     """Fetch metadata from API and write to file (runs in thread)."""
                     try:
-                        logger.info("API fetch thread: Starting...")
+                        logger.debug("API fetch thread starting")
                         with open(self.metadata_file, "a") as meta_f:
                             for image in self.client.get_user_images(self.username, bbox=bbox):
                                 new_images_count[0] += 1
@@ -223,11 +208,11 @@ class MapillaryDownloader:
                                 meta_f.flush()
                                 if new_images_count[0] % 1000 == 0:
-                                    logger.info(f"API: Fetched {new_images_count[0]} images from API")
+                                    logger.info(f"API: fetched {new_images_count[0]:,} image URLs")
                             # Mark as complete
                             MetadataReader.mark_complete(self.metadata_file)
-                            logger.info(f"API fetch complete: {new_images_count[0]} images")
+                            logger.info(f"API fetch complete: {new_images_count[0]:,} images")
                     finally:
                         api_fetch_complete.set()
@@ -235,11 +220,10 @@ class MapillaryDownloader:
                 api_thread = threading.Thread(target=fetch_api_metadata, daemon=True)
                 api_thread.start()
             else:
-                logger.info("API fetch already complete, skipping API thread")
                 api_fetch_complete = None
             # Step 3b: Tail metadata file and submit to workers
-            logger.info("Starting metadata tail and download queue feeder...")
+            logger.debug("Starting metadata tail and download queue feeder")
             last_position = 0
             # Helper to process results from queue
@@ -261,12 +245,7 @@ class MapillaryDownloader:
                         # Log every download for first 10, then every 100
                         should_log = downloaded_count <= 10 or downloaded_count % 100 == 0
                         if should_log:
-                            elapsed = time.time() - batch_start
-                            rate = downloaded_count / elapsed if elapsed > 0 else 0
-                            logger.info(
-                                f"Downloaded: {downloaded_count} ({format_size(total_bytes)}) "
-                                f"- Rate: {rate:.1f} images/sec"
-                            )
+                            logger.info(f"Downloaded: {downloaded_count:,} ({format_size(total_bytes)})")
                         if downloaded_count % 100 == 0:
                             self._save_progress()
@@ -304,6 +283,7 @@ class MapillaryDownloader:
                                 # Skip if already downloaded or no quality URL
                                 if image_id in self.downloaded:
+                                    downloaded_count += 1
                                     continue
                                 if not image.get(quality_field):
                                     continue
@@ -320,7 +300,7 @@ class MapillaryDownloader:
                                 submitted += 1
                                 if submitted % 1000 == 0:
-                                    logger.info(f"Queue: Submitted {submitted} images")
+                                    logger.info(f"Queue: submitted {submitted:,} images")
                                 # Process results while submitting
                                 process_results()
@@ -355,6 +335,7 @@ class MapillaryDownloader:
                             # Skip if already downloaded or no quality URL
                             if image_id in self.downloaded:
+                                downloaded_count += 1
                                 continue
                             if not image.get(quality_field):
                                 continue
@@ -371,7 +352,7 @@ class MapillaryDownloader:
                             submitted += 1
                             if submitted % 1000 == 0:
-                                logger.info(f"Queue: Submitted {submitted} images")
+                                logger.info(f"Queue: submitted {submitted:,} images")
                             # Process results while submitting
                             process_results()
@@ -389,7 +370,7 @@ class MapillaryDownloader:
                 process_results()
             # Send shutdown signals
-            logger.info(f"Submitted {submitted} images, waiting for workers to finish...")
+            logger.debug(f"Submitted {submitted:,} images, waiting for workers")
             for _ in range(pool.current_workers):
                 pool.submit(None)
@@ -411,16 +392,8 @@ class MapillaryDownloader:
                     downloaded_count += 1
                     total_bytes += bytes_dl
-                    if downloaded_count % 10 == 0:
-                        elapsed = time.time() - batch_start
-                        rate = downloaded_count / elapsed if elapsed > 0 else 0
-                        remaining = submitted - completed
-                        eta_seconds = remaining / rate if rate > 0 else 0
-                        logger.info(
-                            f"Downloaded: {downloaded_count}/{submitted} ({format_size(total_bytes)}) "
-                            f"- ETA: {format_time(eta_seconds)}"
-                        )
+                    if downloaded_count % 100 == 0:
+                        logger.info(f"Downloaded: {downloaded_count:,} ({format_size(total_bytes)})")
                         self._save_progress()
                         pool.check_throughput(downloaded_count)
                 else:
@@ -434,7 +407,7 @@ class MapillaryDownloader:
         self._save_progress()
         elapsed = time.time() - start_time
-        logger.info(f"Complete! Downloaded {downloaded_count} ({format_size(total_bytes)}), " f"failed {failed_count}")
+        logger.info(f"Complete! Downloaded {downloaded_count:,} ({format_size(total_bytes)}), failed {failed_count:,}")
         logger.info(f"Total time: {format_time(elapsed)}")
         # Tar sequence directories for efficient IA uploads
@@ -463,12 +436,16 @@ class MapillaryDownloader:
         # Generate IA metadata
         generate_ia_metadata(self.output_dir)
+        # Close log file handler before moving directory
+        self.file_handler.close()
+        logger.removeHandler(self.file_handler)
         # Move from staging to final destination
-        logger.info("Moving collection from staging to final destination...")
+        logger.info("Moving to final destination...")
         if self.final_dir.exists():
             logger.warning(f"Destination already exists, removing: {self.final_dir}")
             shutil.rmtree(self.final_dir)
         self.final_dir.parent.mkdir(parents=True, exist_ok=True)
         shutil.move(str(self.staging_dir), str(self.final_dir))
-        logger.info(f"Collection moved to: {self.final_dir}")
+        logger.info(f"Done: {self.final_dir}")

mapillary_downloader-0.7.3/src/mapillary_downloader/graphql_web.py ADDED Viewed

@@ -0,0 +1,193 @@
+"""GraphQL web API utilities (unofficial, experimental).
+This module provides access to Mapillary's GraphQL endpoint used by the web interface.
+Unlike the official v4 REST API, this requires a public web token extracted from the
+JavaScript bundle.
+Use cases:
+- Get user image counts without pagination
+- Access leaderboard data
+- Check for updates to existing downloads
+WARNING: This is not officially documented and may break at any time.
+"""
+import json
+import logging
+import re
+from datetime import datetime
+from urllib.parse import urlencode, quote
+import requests
+logger = logging.getLogger("mapillary_downloader")
+# Fallback token (extracted from main JS bundle as of 2025-01-09)
+FALLBACK_TOKEN = "MLY|4223665974375089|d62822dd792b6a823d0794ef26450398"
+def extract_token_from_js():
+    """Extract public web token from Mapillary's JavaScript bundle.
+    This fetches the main page, finds the main JS bundle, and extracts
+    the hardcoded MLY token used for GraphQL queries.
+    Returns:
+        Token string (e.g., "MLY|123|abc...") or None if extraction failed
+    """
+    try:
+        # Fetch main page to find JS bundle URL
+        # Need consent cookie to get actual page (not GDPR banner)
+        logger.debug("Fetching Mapillary main page...")
+        # Generate today's date in the format YYYY_MM_DD for cookie
+        today = datetime.now().strftime("%Y_%m_%d")
+        cookies = {
+            "mly_cb": f'{{"version":"1","date":"{today}","third_party_consent":"withdrawn","categories":{{"content_and_media":"withdrawn"}},"integration_controls":{{"YOUTUBE":"withdrawn"}}}}'
+        }
+        headers = {
+            "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:144.0) Gecko/20100101 Firefox/144.0",
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+            "Accept-Language": "en-GB,en;q=0.5",
+            "Sec-GPC": "1",
+            "Upgrade-Insecure-Requests": "1",
+            "Sec-Fetch-Dest": "document",
+            "Sec-Fetch-Mode": "navigate",
+            "Sec-Fetch-Site": "none",
+            "Sec-Fetch-User": "?1",
+        }
+        response = requests.get("https://www.mapillary.com/app/", cookies=cookies, headers=headers, timeout=30)
+        response.raise_for_status()
+        # Find main JS file URL
+        # Pattern: <script src="main.{hash}.js" type="module"></script>
+        js_match = re.search(r'src="(main\.[a-f0-9]+\.js)"', response.text)
+        if not js_match:
+            logger.warning("Could not find main JS bundle URL in page")
+            return None
+        # URL is relative to /app/ base path
+        js_url = f"https://www.mapillary.com/app/{js_match.group(1)}"
+        logger.debug(f"Found JS bundle: {js_url}")
+        # Fetch JS bundle
+        logger.debug("Fetching JS bundle...")
+        js_response = requests.get(js_url, timeout=30)
+        js_response.raise_for_status()
+        # Extract token
+        # Pattern: "MLY|{client_id}|{secret}"
+        token_match = re.search(r'"(MLY\|[^"]+)"', js_response.text)
+        if not token_match:
+            logger.warning("Could not find MLY token in JS bundle")
+            return None
+        token = token_match.group(1)
+        logger.info(f"Extracted web token: {token[:20]}...")
+        return token
+    except requests.RequestException as e:
+        logger.error(f"Failed to extract web token: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unexpected error extracting web token: {e}")
+        return None
+def get_leaderboard(key="global", token=None):
+    """Get leaderboard data from Mapillary GraphQL API.
+    Args:
+        key: Leaderboard key (e.g., "global", country name, etc.)
+        token: MLY token (if None, will extract from JS bundle or use fallback)
+    Returns:
+        Dict with leaderboard data, or None on error
+    """
+    if token is None:
+        token = extract_token_from_js()
+        if token is None:
+            logger.warning("Failed to extract token, using fallback")
+            token = FALLBACK_TOKEN
+    # GraphQL query for leaderboard (lifetime stats only)
+    query = """query getUserLeaderboard($key: String!) {
+  user_leaderboards(key: $key) {
+    lifetime {
+      count
+      user {
+        id
+        username
+        profile_photo_url
+        __typename
+      }
+      __typename
+    }
+    __typename
+  }
+}"""
+    try:
+        headers = {
+            "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:144.0) Gecko/20100101 Firefox/144.0",
+            "Accept": "*/*",
+            "Accept-Language": "en-GB,en;q=0.5",
+            "Referer": "https://www.mapillary.com/",
+            "content-type": "application/json",
+            "authorization": f"OAuth {token}",
+            "Origin": "https://www.mapillary.com",
+            "Sec-Fetch-Dest": "empty",
+            "Sec-Fetch-Mode": "cors",
+            "Sec-Fetch-Site": "same-site",
+        }
+        # Build query params - use quote_via=quote to get %20 instead of +
+        # Note: both 'doc' and 'query' params seem to be required (from observed curl)
+        params = {
+            "doc": query,
+            "query": query,
+            "operationName": "getUserLeaderboard",
+            "variables": json.dumps({"key": key}, separators=(',', ':')),
+        }
+        # Build URL with proper percent encoding (not + for spaces)
+        # Don't encode parentheses to match curl behavior
+        query_string = urlencode(params, quote_via=lambda s, safe='', encoding=None, errors=None: quote(s, safe='()!'))
+        url = f"https://graph.mapillary.com/graphql?{query_string}"
+        logger.debug(f"Querying leaderboard for key: {key}")
+        response = requests.get(
+            url,
+            headers=headers,
+            timeout=30
+        )
+        response.raise_for_status()
+        return response.json()
+    except requests.RequestException as e:
+        logger.error(f"Failed to query leaderboard: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"Unexpected error querying leaderboard: {e}")
+        return None
+if __name__ == "__main__":
+    # Test the extraction and leaderboard query
+    logging.basicConfig(level=logging.DEBUG)
+    print("=== Extracting token ===")
+    token = extract_token_from_js()
+    if token:
+        print(f"Success! Token: {token}")
+    else:
+        print("Failed to extract token")
+        print(f"Fallback: {FALLBACK_TOKEN}")
+        token = FALLBACK_TOKEN
+    print("\n=== Querying global leaderboard ===")
+    data = get_leaderboard("global", token=token)
+    if data:
+        print(json.dumps(data, indent=2))
+    else:
+        print("Failed to get leaderboard data")

{mapillary_downloader-0.7.0 → mapillary_downloader-0.7.3}/src/mapillary_downloader/ia_meta.py RENAMED Viewed

@@ -182,7 +182,7 @@ def generate_ia_metadata(collection_dir):
     write_meta_tag(meta_dir, "coverage", f"{first_date} - {last_date}")
     write_meta_tag(meta_dir, "licenseurl", "https://creativecommons.org/licenses/by-sa/4.0/")
     write_meta_tag(meta_dir, "mediatype", "data")
-    write_meta_tag(meta_dir, "collection", "opensource_media")
+    write_meta_tag(meta_dir, "collection", "mapillary-images")
     # Source and scanner metadata
     write_meta_tag(meta_dir, "source", f"https://www.mapillary.com/app/user/{username}")

{mapillary_downloader-0.7.0 → mapillary_downloader-0.7.3}/src/mapillary_downloader/ia_stats.py RENAMED Viewed

@@ -15,14 +15,14 @@ def search_ia_collections():
     """Search IA for all mapillary_downloader collections.
     Returns:
-        List of dicts with: identifier, description, item_size, uploader
+        List of dicts with: identifier, description, item_size, collection
     """
     logger.info("Searching archive.org for mapillary_downloader collections...")
     url = "https://archive.org/advancedsearch.php"
     params = {
         "q": "mapillary_downloader:*",
-        "fl[]": ["identifier", "description", "item_size", "uploader"],
+        "fl[]": ["identifier", "description", "item_size", "collection"],
         "rows": 10000,
         "output": "json",
     }
@@ -31,11 +31,29 @@ def search_ia_collections():
     data = response.json()
     collections = data["response"]["docs"]
-    logger.info(f"Found {len(collections)} collections on archive.org")
+    logger.info(f"Found {len(collections):,} collections on archive.org")
     return collections
+def fetch_uploader(identifier):
+    """Fetch uploader email from item metadata.
+    Args:
+        identifier: IA item identifier
+    Returns:
+        Uploader email or None
+    """
+    url = f"https://archive.org/metadata/{identifier}/metadata/uploader"
+    try:
+        response = http_get_with_retry(url, max_retries=2)
+        data = response.json()
+        return data.get("result")
+    except Exception:
+        return None
 def parse_collection_info(identifier):
     """Parse username, quality, webp from collection identifier.
@@ -104,19 +122,28 @@ def update_cache(ia_collections):
         image_count = extract_image_count(item.get("description"))
+        # Get IA collection(s) - can be a string or list
+        ia_collection = item.get("collection", [])
+        if isinstance(ia_collection, str):
+            ia_collection = [ia_collection]
+        # Preserve existing uploader if we have it cached
+        existing = cache.get(identifier, {})
         # Update cache entry
         cache[identifier] = {
             "size": size_bytes,
-            "uploader": item.get("uploader"),
+            "uploader": existing.get("uploader"),  # Preserve cached uploader
             "images": image_count,
             "quality": info["quality"],
             "username": info["username"],
             "is_webp": info["is_webp"],
+            "ia_collection": ia_collection,
         }
     # Save updated cache
     safe_json_save(CACHE_FILE, cache)
-    logger.info(f"Updated cache with {len(cache)} collections")
+    logger.info(f"Updated cache with {len(cache):,} collections")
     return cache
@@ -168,11 +195,12 @@ def aggregate_stats(cache):
     return stats
-def format_stats(stats):
+def format_stats(stats, cache):
     """Format statistics as human-readable text.
     Args:
         stats: Dict from aggregate_stats()
+        cache: Dict of collection data
     Returns:
         Formatted string
@@ -212,6 +240,62 @@ def format_stats(stats):
         )
     output.append("")
+    # Find items not in mapillary-images and fetch uploaders
+    not_in_mapillary_images = []
+    need_uploader_fetch = []
+    for identifier, data in cache.items():
+        ia_collections = data.get("ia_collection", [])
+        if "mapillary-images" not in ia_collections:
+            not_in_mapillary_images.append(identifier)
+            if not data.get("uploader"):
+                need_uploader_fetch.append(identifier)
+    # Fetch missing uploaders
+    if need_uploader_fetch:
+        logger.info(f"Fetching uploader info for {len(need_uploader_fetch)} items...")
+        for i, identifier in enumerate(need_uploader_fetch, 1):
+            logger.info(f"  [{i}/{len(need_uploader_fetch)}] {identifier}")
+            uploader = fetch_uploader(identifier)
+            if uploader:
+                cache[identifier]["uploader"] = uploader
+        # Save updated cache with uploaders
+        safe_json_save(CACHE_FILE, cache)
+    # Group by uploader (only for items not in mapillary-images)
+    by_uploader = {}
+    for identifier in not_in_mapillary_images:
+        uploader = cache[identifier].get("uploader") or "unknown"
+        if uploader not in by_uploader:
+            by_uploader[uploader] = {"items": [], "images": 0, "size": 0}
+        by_uploader[uploader]["items"].append(identifier)
+        by_uploader[uploader]["images"] += cache[identifier].get("images") or 0
+        by_uploader[uploader]["size"] += cache[identifier].get("size") or 0
+    # By uploader (only those with items outside mapillary-images)
+    if by_uploader:
+        output.append("Uploaders with items outside mapillary-images:")
+        output.append("-" * 70)
+        for uploader, data in sorted(by_uploader.items(), key=lambda x: -len(x[1]["items"])):
+            output.append(
+                f"  {uploader}: {len(data['items'])} items, " f"{data['images']:,} images, {format_size(data['size'])}"
+            )
+        output.append("")
+    # Items not in mapillary-images, grouped by uploader
+    if not_in_mapillary_images:
+        output.append(f"Items NOT in mapillary-images ({len(not_in_mapillary_images)}):")
+        output.append("-" * 70)
+        for uploader, data in sorted(by_uploader.items(), key=lambda x: x[0].lower()):
+            output.append(f"{uploader}:")
+            for identifier in sorted(data["items"]):
+                output.append(identifier)
+            output.append("")
+    else:
+        output.append("All items are in mapillary-images collection!")
+        output.append("")
     output.append(f"Cache: {CACHE_FILE}")
     return "\n".join(output)
@@ -239,4 +323,4 @@ def show_stats(refresh=True):
         return
     stats = aggregate_stats(cache)
-    print(format_stats(stats))
+    print(format_stats(stats, cache))

{mapillary_downloader-0.7.0 → mapillary_downloader-0.7.3}/src/mapillary_downloader/worker_pool.py RENAMED Viewed

@@ -17,17 +17,15 @@ class AdaptiveWorkerPool:
     - If throughput plateauing/decreasing: reduce workers
     """
-    def __init__(self, worker_func, min_workers=4, max_workers=16, monitoring_interval=10):
+    def __init__(self, worker_func, max_workers=16, monitoring_interval=10):
         """Initialize adaptive worker pool.
         Args:
             worker_func: Function to run in each worker (must accept work_queue, result_queue)
-            min_workers: Minimum number of workers
             max_workers: Maximum number of workers
             monitoring_interval: Seconds between throughput checks
         """
         self.worker_func = worker_func
-        self.min_workers = min_workers
         self.max_workers = max_workers
         self.monitoring_interval = monitoring_interval
@@ -37,7 +35,8 @@ class AdaptiveWorkerPool:
         # Worker management
         self.workers = []
-        self.current_workers = min_workers  # Start small and ramp up
+        # Start at 25% of max_workers (at least 1)
+        self.current_workers = max(1, int(max_workers * 0.25))
         # Throughput monitoring
         self.throughput_history = deque(maxlen=5)  # Last 5 measurements
@@ -50,7 +49,7 @@ class AdaptiveWorkerPool:
     def start(self):
         """Start the worker pool."""
         self.running = True
-        logger.info(f"Starting worker pool with {self.current_workers} workers")
+        logger.debug(f"Starting worker pool with {self.current_workers} workers")
         for i in range(self.current_workers):
             self._add_worker(i)
@@ -100,10 +99,7 @@ class AdaptiveWorkerPool:
         self.last_processed = total_processed
         self.last_check_time = now
-        logger.info(
-            f"Throughput: {throughput:.1f} items/s (workers: {current_workers}/{self.max_workers}, "
-            f"history: {len(self.throughput_history)} measurements)"
-        )
+        logger.info(f"Throughput: {throughput:.1f} items/s (workers: {current_workers}/{self.max_workers})")
         # Need at least 2 measurements to calculate gain per worker
         if len(self.throughput_history) < 2:
@@ -182,12 +178,10 @@ class AdaptiveWorkerPool:
                         self.current_workers += 1
                         added += 1
                 logger.info(f"Ramping up: added {added} workers (now {self.current_workers}/{self.max_workers})")
-        else:
-            logger.info(f"At optimal worker count: {current_workers} workers, {current_throughput:.1f} items/s")
     def shutdown(self, timeout=2):
         """Shutdown the worker pool gracefully."""
-        logger.info("Shutting down worker pool...")
+        logger.debug("Shutting down worker pool")
         self.running = False
         # Terminate all workers immediately (they ignore SIGINT so we need to be forceful)
@@ -198,5 +192,3 @@ class AdaptiveWorkerPool:
         # Give them a brief moment to exit
         for p in self.workers:
             p.join(timeout=timeout)
-        logger.info("Worker pool shutdown complete")