PyPI - mapillary-downloader - Versions diffs - 0.5.2__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

mapillary-downloader 0.5.2py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

mapillary_downloader/tar_sequences.py CHANGED Viewed

@@ -23,51 +23,43 @@ def tar_sequence_directories(collection_dir):
         logger.error(f"Collection directory not found: {collection_dir}")
         return 0, 0
-    # Find all sequence directories (skip special dirs)
+    # Find all bucket directories (skip special dirs)
+    # Now we tar entire bucket dirs (e.g., a/, b/, etc) to get ~62 tar files
     skip_dirs = {".meta", "__pycache__"}
-    sequence_dirs = []
+    bucket_dirs = []
     for item in collection_dir.iterdir():
         if item.is_dir() and item.name not in skip_dirs:
-            sequence_dirs.append(item)
+            # Check if this is a bucket dir (single char)
+            if len(item.name) == 1:
+                bucket_dirs.append(item)
-    if not sequence_dirs:
-        logger.info("No sequence directories to tar")
+    if not bucket_dirs:
+        logger.info("No bucket directories to tar")
         return 0, 0
-    logger.info(f"Tarring {len(sequence_dirs)} sequence directories...")
+    logger.info(f"Tarring {len(bucket_dirs)} bucket directories...")
     tarred_count = 0
     total_files = 0
     total_tar_bytes = 0
-    for seq_dir in sequence_dirs:
-        seq_name = seq_dir.name
-        tar_path = collection_dir / f"{seq_name}.tar"
+    for bucket_dir in bucket_dirs:
+        bucket_name = bucket_dir.name
+        tar_path = collection_dir / f"{bucket_name}.tar"
-        # Handle naming collision - find next available name
-        counter = 1
-        while tar_path.exists():
-            counter += 1
-            tar_path = collection_dir / f"{seq_name}.{counter}.tar"
-        # Count files in sequence
-        files = list(seq_dir.glob("*"))
-        file_count = len([f for f in files if f.is_file()])
+        # Count files in bucket
+        files_to_tar = sorted([f for f in bucket_dir.rglob("*") if f.is_file()], key=lambda x: str(x))
+        file_count = len(files_to_tar)
         if file_count == 0:
-            logger.warning(f"Skipping empty directory: {seq_name}")
+            logger.warning(f"Skipping empty bucket directory: {bucket_name}")
             continue
         try:
-            # Create reproducible uncompressed tar (WebP already compressed)
-            # Sort files by name for deterministic ordering
-            files_to_tar = sorted([f for f in seq_dir.rglob("*") if f.is_file()], key=lambda x: x.name)
-            if not files_to_tar:
-                logger.warning(f"Skipping directory with no files: {seq_name}")
-                continue
+            logger.info(f"Tarring bucket '{bucket_name}' ({file_count} files)...")
+            # Create reproducible uncompressed tar (WebP already compressed)
             with tarfile.open(tar_path, "w") as tar:
                 for file_path in files_to_tar:
                     # Get path relative to collection_dir for tar archive
@@ -92,33 +84,32 @@ def tar_sequence_directories(collection_dir):
                 tar_size = tar_path.stat().st_size
                 total_tar_bytes += tar_size
-                # Remove original directory
-                for file in seq_dir.rglob("*"):
+                # Remove original bucket directory
+                for file in bucket_dir.rglob("*"):
                     if file.is_file():
                         file.unlink()
                 # Remove empty subdirs and main dir
-                for subdir in list(seq_dir.rglob("*")):
+                for subdir in list(bucket_dir.rglob("*")):
                     if subdir.is_dir():
                         try:
                             subdir.rmdir()
                         except OSError:
                             pass  # Not empty yet
-                seq_dir.rmdir()
+                bucket_dir.rmdir()
                 tarred_count += 1
                 total_files += file_count
-                if tarred_count % 10 == 0:
-                    logger.info(f"Tarred {tarred_count}/{len(sequence_dirs)} sequences...")
+                logger.info(f"Tarred bucket '{bucket_name}': {file_count:,} files, {format_size(tar_size)}")
             else:
                 logger.error(f"Tar file empty or not created: {tar_path}")
                 if tar_path.exists():
                     tar_path.unlink()
         except Exception as e:
-            logger.error(f"Error tarring {seq_name}: {e}")
+            logger.error(f"Error tarring bucket {bucket_name}: {e}")
             if tar_path.exists():
                 tar_path.unlink()

mapillary_downloader/worker.py CHANGED Viewed

@@ -69,11 +69,13 @@ def download_and_convert_image(image_data, output_dir, quality, convert_webp, se
         if not image_url:
             return (image_id, 0, False, f"No {quality} URL")
-        # Determine final output directory
+        # Determine final output directory - organize by first char of sequence ID
         output_dir = Path(output_dir)
         sequence_id = image_data.get("sequence")
         if sequence_id:
-            img_dir = output_dir / sequence_id
+            # Use first character as bucket (gives us ~62 dirs instead of millions)
+            first_char = sequence_id[0]
+            img_dir = output_dir / first_char / sequence_id
             img_dir.mkdir(parents=True, exist_ok=True)
         else:
             img_dir = output_dir

{mapillary_downloader-0.5.2.dist-info → mapillary_downloader-0.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mapillary_downloader
-Version: 0.5.2
+Version: 0.6.0
 Summary: Download your Mapillary data before it's gone
 Author-email: Gareth Davidson <gaz@bitplane.net>
 Requires-Python: >=3.10
@@ -66,8 +66,8 @@ mapillary-downloader --output ./downloads USERNAME1
 | `--quality`     | 256, 1024, 2048 or original                  | `original`         |
 | `--bbox`        | `west,south,east,north`                      | `None`             |
 | `--no-webp`     | Don't convert to WebP                        | `False`            |
-| `--workers`     | Number of parallel download workers          | Half of CPU count  |
-| `--no-tar`      | Don't tar sequence directories               | `False`            |
+| `--max-workers` | Maximum number of parallel download workers  | `128`              |
+| `--no-tar`      | Don't tar bucket directories                 | `False`            |
 | `--no-check-ia` | Don't check if exists on Internet Archive    | `False`            |
 The downloader will:
@@ -98,11 +98,23 @@ To disable WebP conversion and keep original JPEGs, use `--no-webp`:
 mapillary-downloader --no-webp USERNAME
 ```
-## Sequence Tarball Creation
+## Tarballs
-By default, sequence directories are automatically tarred after download because
-if they weren't, you'd spend more time setting up upload metadata than actually
-uploading files to IA.
+Images are organized by sequence ID, bucketed by the first character of the
+sequence to reduce directory count:
+```
+mapillary-username-quality/
+  a/
+    abc123/
+      image1.webp
+      image2.webp
+```
+By default, these bucket directories are automatically tarred after download
+(resulting in `a.tar`, `b.tar`, etc. - about 62 tar files total). This is done
+because large collections with millions of images would otherwise create hundreds
+of thousands of tiny tars, and anger the archive gods.
 To keep individual files instead of creating tars, use the `--no-tar` flag.

{mapillary_downloader-0.5.2.dist-info → mapillary_downloader-0.6.0.dist-info}/RECORD RENAMED Viewed

@@ -7,13 +7,13 @@ mapillary_downloader/ia_check.py,sha256=L2MEbG_KmlAd5NLmo2HQkO8HWvRN0brE5wXXoyNM
 mapillary_downloader/ia_meta.py,sha256=78rcybHIPnQDsF02KGj6RYmDXzYzrU8sdVx4Q9Y0sfI,6266
 mapillary_downloader/logging_config.py,sha256=Z-wNq34nt7aIhJWdeKc1feTY46P9-Or7HtiX7eUFjEI,2324
 mapillary_downloader/metadata_reader.py,sha256=Re-HN0Vfc7Hs1eOut7uOoW7jWJ2PIbKoNzC7Ak3ah5o,4933
-mapillary_downloader/tar_sequences.py,sha256=mqs5p3N7osV_bxTkw6i34GVmxCBBEbIiKKxeh-fWNdU,4430
+mapillary_downloader/tar_sequences.py,sha256=758yVQGSLC_x8tT7h1qzAdo8b-4OmARZYseNacM1Nv8,4223
 mapillary_downloader/utils.py,sha256=yzVgS1mwsklDAqrimaFafgTTXtRYQUbKP98Xgh9d2KA,1174
 mapillary_downloader/webp_converter.py,sha256=vYLLQxDmdnqRz0nm7wXwRUd4x9mQZNah-DrncpA8sNs,1901
-mapillary_downloader/worker.py,sha256=n9m6PzSjlLOOYZJd9j1vH-2ag9aOeNndfgRlunzI14s,4637
+mapillary_downloader/worker.py,sha256=Q82Q1mnTL_CUwNXum9GAg2Fz40dolh_gByDkeN72p9o,4814
 mapillary_downloader/worker_pool.py,sha256=iGRq5uFwBNNVQnI4vEjbKHkbKTaEVCdmvMvXcRGuDMg,8203
-mapillary_downloader-0.5.2.dist-info/entry_points.txt,sha256=PdYtxOXHMJrUhmiPO4G-F98VuhUI4MN9D_T4KPrVZ5w,75
-mapillary_downloader-0.5.2.dist-info/licenses/LICENSE.md,sha256=7_BIuQ-veOrsF-WarH8kTkm0-xrCLvJ1PFE1C4Ebs64,146
-mapillary_downloader-0.5.2.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
-mapillary_downloader-0.5.2.dist-info/METADATA,sha256=PHO4jDVxqsIo9Hs9GX3J2Cfnfc8gy_PI6xhssE9jrMk,4982
-mapillary_downloader-0.5.2.dist-info/RECORD,,
+mapillary_downloader-0.6.0.dist-info/entry_points.txt,sha256=PdYtxOXHMJrUhmiPO4G-F98VuhUI4MN9D_T4KPrVZ5w,75
+mapillary_downloader-0.6.0.dist-info/licenses/LICENSE.md,sha256=7_BIuQ-veOrsF-WarH8kTkm0-xrCLvJ1PFE1C4Ebs64,146
+mapillary_downloader-0.6.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
+mapillary_downloader-0.6.0.dist-info/METADATA,sha256=dvPNrWfk-wB_xIFoowuIH5-17Oib14hpHpik4FpqC7k,5277
+mapillary_downloader-0.6.0.dist-info/RECORD,,

{mapillary_downloader-0.5.2.dist-info → mapillary_downloader-0.6.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{mapillary_downloader-0.5.2.dist-info → mapillary_downloader-0.6.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{mapillary_downloader-0.5.2.dist-info → mapillary_downloader-0.6.0.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

mapillary-downloader 0.5.2__py3-none-any.whl → 0.6.0__py3-none-any.whl

mapillary-downloader 0.5.2py3-none-any.whl → 0.6.0py3-none-any.whl