mapillary-downloader 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,7 +26,7 @@ def main():
26
26
  default=os.environ.get("MAPILLARY_TOKEN"),
27
27
  help="Mapillary API access token (or set MAPILLARY_TOKEN env var)",
28
28
  )
29
- parser.add_argument("--username", required=True, help="Mapillary username")
29
+ parser.add_argument("usernames", nargs="+", help="Mapillary username(s) to download")
30
30
  parser.add_argument("--output", default="./mapillary_data", help="Output directory (default: ./mapillary_data)")
31
31
  parser.add_argument(
32
32
  "--quality",
@@ -36,9 +36,9 @@ def main():
36
36
  )
37
37
  parser.add_argument("--bbox", help="Bounding box: west,south,east,north")
38
38
  parser.add_argument(
39
- "--webp",
39
+ "--no-webp",
40
40
  action="store_true",
41
- help="Convert images to WebP format (saves ~70%% disk space, requires cwebp binary)",
41
+ help="Don't convert to WebP (WebP conversion is enabled by default, saves ~70%% disk space)",
42
42
  )
43
43
  parser.add_argument(
44
44
  "--workers",
@@ -51,6 +51,11 @@ def main():
51
51
  action="store_true",
52
52
  help="Don't tar sequence directories (keep individual files)",
53
53
  )
54
+ parser.add_argument(
55
+ "--no-check-ia",
56
+ action="store_true",
57
+ help="Don't check if collection exists on Internet Archive before downloading",
58
+ )
54
59
 
55
60
  args = parser.parse_args()
56
61
 
@@ -69,19 +74,41 @@ def main():
69
74
  logger.error("Error: bbox must be four comma-separated numbers")
70
75
  sys.exit(1)
71
76
 
72
- # Check for cwebp binary if WebP conversion is requested
73
- if args.webp:
77
+ # WebP is enabled by default, disabled with --no-webp
78
+ convert_webp = not args.no_webp
79
+
80
+ # Check for cwebp binary if WebP conversion is enabled
81
+ if convert_webp:
74
82
  if not check_cwebp_available():
75
- logger.error("Error: cwebp binary not found. Install webp package (e.g., apt install webp)")
83
+ logger.error(
84
+ "Error: cwebp binary not found. Install webp package (e.g., apt install webp) or use --no-webp"
85
+ )
76
86
  sys.exit(1)
77
87
  logger.info("WebP conversion enabled - images will be converted after download")
78
88
 
79
89
  try:
80
90
  client = MapillaryClient(args.token)
81
- downloader = MapillaryDownloader(
82
- client, args.output, args.username, args.quality, workers=args.workers, tar_sequences=not args.no_tar
83
- )
84
- downloader.download_user_data(bbox=bbox, convert_webp=args.webp)
91
+
92
+ # Process each username
93
+ for username in args.usernames:
94
+ logger.info("")
95
+ logger.info("=" * 60)
96
+ logger.info(f"Processing user: {username}")
97
+ logger.info("=" * 60)
98
+ logger.info("")
99
+
100
+ downloader = MapillaryDownloader(
101
+ client,
102
+ args.output,
103
+ username,
104
+ args.quality,
105
+ workers=args.workers,
106
+ tar_sequences=not args.no_tar,
107
+ convert_webp=convert_webp,
108
+ check_ia=not args.no_check_ia,
109
+ )
110
+ downloader.download_user_data(bbox=bbox, convert_webp=convert_webp)
111
+
85
112
  except KeyboardInterrupt:
86
113
  logger.info("\nInterrupted by user")
87
114
  sys.exit(1)
@@ -1,13 +1,16 @@
1
1
  """Main downloader logic."""
2
2
 
3
+ import gzip
3
4
  import json
4
5
  import logging
5
6
  import os
7
+ import shutil
6
8
  import time
7
9
  from pathlib import Path
8
10
  from concurrent.futures import ProcessPoolExecutor, as_completed
9
11
  from mapillary_downloader.utils import format_size, format_time
10
12
  from mapillary_downloader.ia_meta import generate_ia_metadata
13
+ from mapillary_downloader.ia_check import check_ia_exists
11
14
  from mapillary_downloader.worker import download_and_convert_image
12
15
  from mapillary_downloader.tar_sequences import tar_sequence_directories
13
16
  from mapillary_downloader.logging_config import add_file_handler
@@ -15,19 +18,48 @@ from mapillary_downloader.logging_config import add_file_handler
15
18
  logger = logging.getLogger("mapillary_downloader")
16
19
 
17
20
 
21
+ def get_cache_dir():
22
+ """Get XDG cache directory for staging downloads.
23
+
24
+ Returns:
25
+ Path to cache directory for mapillary_downloader
26
+ """
27
+ xdg_cache = os.environ.get("XDG_CACHE_HOME")
28
+ if xdg_cache:
29
+ cache_dir = Path(xdg_cache)
30
+ else:
31
+ cache_dir = Path.home() / ".cache"
32
+
33
+ mapillary_cache = cache_dir / "mapillary_downloader"
34
+ mapillary_cache.mkdir(parents=True, exist_ok=True)
35
+ return mapillary_cache
36
+
37
+
18
38
  class MapillaryDownloader:
19
39
  """Handles downloading Mapillary data for a user."""
20
40
 
21
- def __init__(self, client, output_dir, username=None, quality=None, workers=None, tar_sequences=True):
41
+ def __init__(
42
+ self,
43
+ client,
44
+ output_dir,
45
+ username=None,
46
+ quality=None,
47
+ workers=None,
48
+ tar_sequences=True,
49
+ convert_webp=False,
50
+ check_ia=True,
51
+ ):
22
52
  """Initialize the downloader.
23
53
 
24
54
  Args:
25
55
  client: MapillaryClient instance
26
- output_dir: Base directory to save downloads
56
+ output_dir: Base directory to save downloads (final destination)
27
57
  username: Mapillary username (for collection directory)
28
58
  quality: Image quality (for collection directory)
29
59
  workers: Number of parallel workers (default: half of cpu_count)
30
60
  tar_sequences: Whether to tar sequence directories after download (default: True)
61
+ convert_webp: Whether to convert images to WebP (affects collection name)
62
+ check_ia: Whether to check if collection exists on Internet Archive (default: True)
31
63
  """
32
64
  self.client = client
33
65
  self.base_output_dir = Path(output_dir)
@@ -35,18 +67,37 @@ class MapillaryDownloader:
35
67
  self.quality = quality
36
68
  self.workers = workers if workers is not None else max(1, os.cpu_count() // 2)
37
69
  self.tar_sequences = tar_sequences
70
+ self.convert_webp = convert_webp
71
+ self.check_ia = check_ia
38
72
 
39
- # If username and quality provided, create collection directory
73
+ # Determine collection name
40
74
  if username and quality:
41
75
  collection_name = f"mapillary-{username}-{quality}"
42
- self.output_dir = self.base_output_dir / collection_name
76
+ if convert_webp:
77
+ collection_name += "-webp"
78
+ self.collection_name = collection_name
43
79
  else:
44
- self.output_dir = self.base_output_dir
80
+ self.collection_name = None
45
81
 
82
+ # Set up staging directory in cache
83
+ cache_dir = get_cache_dir()
84
+ if self.collection_name:
85
+ self.staging_dir = cache_dir / self.collection_name
86
+ self.final_dir = self.base_output_dir / self.collection_name
87
+ else:
88
+ self.staging_dir = cache_dir / "download"
89
+ self.final_dir = self.base_output_dir
90
+
91
+ # Work in staging directory during download
92
+ self.output_dir = self.staging_dir
46
93
  self.output_dir.mkdir(parents=True, exist_ok=True)
47
94
 
48
- # Set up file logging for archival
49
- log_file = self.output_dir / "download.log"
95
+ logger.info(f"Staging directory: {self.staging_dir}")
96
+ logger.info(f"Final destination: {self.final_dir}")
97
+
98
+ # Set up file logging for archival with timestamp for incremental runs
99
+ timestamp = time.strftime("%Y%m%d-%H%M%S")
100
+ log_file = self.output_dir / f"download.log.{timestamp}"
50
101
  add_file_handler(log_file)
51
102
  logger.info(f"Logging to: {log_file}")
52
103
 
@@ -80,6 +131,18 @@ class MapillaryDownloader:
80
131
  if not self.username or not self.quality:
81
132
  raise ValueError("Username and quality must be provided during initialization")
82
133
 
134
+ # Check if collection already exists on Internet Archive
135
+ if self.check_ia and self.collection_name:
136
+ logger.info(f"Checking if {self.collection_name} exists on Internet Archive...")
137
+ if check_ia_exists(self.collection_name):
138
+ logger.info("Collection already exists on archive.org, skipping download")
139
+ return
140
+
141
+ # Check if collection already exists in final destination
142
+ if self.final_dir.exists():
143
+ logger.info(f"Collection already exists at {self.final_dir}, skipping download")
144
+ return
145
+
83
146
  quality_field = f"thumb_{self.quality}_url"
84
147
 
85
148
  logger.info(f"Downloading images for user: {self.username}")
@@ -174,9 +237,38 @@ class MapillaryDownloader:
174
237
  if self.tar_sequences:
175
238
  tar_sequence_directories(self.output_dir)
176
239
 
240
+ # Gzip metadata.jsonl to save space
241
+ if self.metadata_file.exists():
242
+ logger.info("Compressing metadata.jsonl...")
243
+ original_size = self.metadata_file.stat().st_size
244
+ gzipped_file = self.metadata_file.with_suffix(".jsonl.gz")
245
+
246
+ with open(self.metadata_file, "rb") as f_in:
247
+ with gzip.open(gzipped_file, "wb", compresslevel=9) as f_out:
248
+ shutil.copyfileobj(f_in, f_out)
249
+
250
+ compressed_size = gzipped_file.stat().st_size
251
+ self.metadata_file.unlink()
252
+
253
+ savings = 100 * (1 - compressed_size / original_size)
254
+ logger.info(
255
+ f"Compressed metadata: {format_size(original_size)} → {format_size(compressed_size)} "
256
+ f"({savings:.1f}% savings)"
257
+ )
258
+
177
259
  # Generate IA metadata
178
260
  generate_ia_metadata(self.output_dir)
179
261
 
262
+ # Move from staging to final destination
263
+ logger.info("Moving collection from staging to final destination...")
264
+ if self.final_dir.exists():
265
+ logger.warning(f"Destination already exists, removing: {self.final_dir}")
266
+ shutil.rmtree(self.final_dir)
267
+
268
+ self.final_dir.parent.mkdir(parents=True, exist_ok=True)
269
+ shutil.move(str(self.staging_dir), str(self.final_dir))
270
+ logger.info(f"Collection moved to: {self.final_dir}")
271
+
180
272
  def _download_images_parallel(self, images, convert_webp):
181
273
  """Download images in parallel using worker pool.
182
274
 
@@ -190,6 +282,7 @@ class MapillaryDownloader:
190
282
  downloaded_count = 0
191
283
  total_bytes = 0
192
284
  failed_count = 0
285
+ batch_start_time = time.time()
193
286
 
194
287
  with ProcessPoolExecutor(max_workers=self.workers) as executor:
195
288
  # Submit all tasks
@@ -215,7 +308,16 @@ class MapillaryDownloader:
215
308
  total_bytes += bytes_dl
216
309
 
217
310
  if downloaded_count % 10 == 0:
218
- logger.info(f"Downloaded: {downloaded_count}/{len(images)} ({format_size(total_bytes)})")
311
+ # Calculate ETA
312
+ elapsed = time.time() - batch_start_time
313
+ rate = downloaded_count / elapsed if elapsed > 0 else 0
314
+ remaining = len(images) - downloaded_count
315
+ eta_seconds = remaining / rate if rate > 0 else 0
316
+
317
+ logger.info(
318
+ f"Downloaded: {downloaded_count}/{len(images)} ({format_size(total_bytes)}) "
319
+ f"- ETA: {format_time(eta_seconds)}"
320
+ )
219
321
  self._save_progress()
220
322
  else:
221
323
  failed_count += 1
@@ -0,0 +1,33 @@
1
+ """Check if collections exist on Internet Archive."""
2
+
3
+ import logging
4
+ import requests
5
+
6
+ logger = logging.getLogger("mapillary_downloader")
7
+
8
+
9
+ def check_ia_exists(collection_name):
10
+ """Check if a collection exists on Internet Archive.
11
+
12
+ Args:
13
+ collection_name: Name of the collection (e.g., mapillary-username-original-webp)
14
+
15
+ Returns:
16
+ Boolean indicating if the collection exists on IA
17
+ """
18
+ # IA identifier format
19
+ ia_url = f"https://archive.org/metadata/{collection_name}"
20
+
21
+ try:
22
+ response = requests.get(ia_url, timeout=10)
23
+ # If we get a 200, the item exists
24
+ if response.status_code == 200:
25
+ data = response.json()
26
+ # Check if it's a valid item (not just metadata for non-existent item)
27
+ if "metadata" in data and data.get("is_dark") is not True:
28
+ return True
29
+ return False
30
+ except requests.RequestException as e:
31
+ logger.warning(f"Failed to check IA for {collection_name}: {e}")
32
+ # On error, assume it doesn't exist (better to download than skip)
33
+ return False
@@ -1,5 +1,6 @@
1
1
  """Internet Archive metadata generation for Mapillary collections."""
2
2
 
3
+ import gzip
3
4
  import json
4
5
  import logging
5
6
  import re
@@ -14,22 +15,22 @@ def parse_collection_name(directory):
14
15
  """Parse username and quality from directory name.
15
16
 
16
17
  Args:
17
- directory: Path to collection directory (e.g., mapillary-username-original)
18
+ directory: Path to collection directory (e.g., mapillary-username-original or mapillary-username-original-webp)
18
19
 
19
20
  Returns:
20
21
  Tuple of (username, quality) or (None, None) if parsing fails
21
22
  """
22
- match = re.match(r"mapillary-(.+)-(256|1024|2048|original)$", Path(directory).name)
23
+ match = re.match(r"mapillary-(.+)-(256|1024|2048|original)(?:-webp)?$", Path(directory).name)
23
24
  if match:
24
25
  return match.group(1), match.group(2)
25
26
  return None, None
26
27
 
27
28
 
28
29
  def get_date_range(metadata_file):
29
- """Get first and last captured_at dates from metadata.jsonl.
30
+ """Get first and last captured_at dates from metadata.jsonl.gz.
30
31
 
31
32
  Args:
32
- metadata_file: Path to metadata.jsonl file
33
+ metadata_file: Path to metadata.jsonl.gz file
33
34
 
34
35
  Returns:
35
36
  Tuple of (first_date, last_date) as ISO format strings, or (None, None)
@@ -38,7 +39,7 @@ def get_date_range(metadata_file):
38
39
  return None, None
39
40
 
40
41
  timestamps = []
41
- with open(metadata_file) as f:
42
+ with gzip.open(metadata_file, "rt") as f:
42
43
  for line in f:
43
44
  if line.strip():
44
45
  data = json.loads(line)
@@ -59,10 +60,10 @@ def get_date_range(metadata_file):
59
60
 
60
61
 
61
62
  def count_images(metadata_file):
62
- """Count number of images in metadata.jsonl.
63
+ """Count number of images in metadata.jsonl.gz.
63
64
 
64
65
  Args:
65
- metadata_file: Path to metadata.jsonl file
66
+ metadata_file: Path to metadata.jsonl.gz file
66
67
 
67
68
  Returns:
68
69
  Number of images
@@ -71,7 +72,7 @@ def count_images(metadata_file):
71
72
  return 0
72
73
 
73
74
  count = 0
74
- with open(metadata_file) as f:
75
+ with gzip.open(metadata_file, "rt") as f:
75
76
  for line in f:
76
77
  if line.strip():
77
78
  count += 1
@@ -112,9 +113,9 @@ def generate_ia_metadata(collection_dir):
112
113
  logger.error(f"Could not parse username/quality from directory: {collection_dir.name}")
113
114
  return False
114
115
 
115
- metadata_file = collection_dir / "metadata.jsonl"
116
+ metadata_file = collection_dir / "metadata.jsonl.gz"
116
117
  if not metadata_file.exists():
117
- logger.error(f"metadata.jsonl not found in {collection_dir}")
118
+ logger.error(f"metadata.jsonl.gz not found in {collection_dir}")
118
119
  return False
119
120
 
120
121
  logger.info(f"Generating IA metadata for {collection_dir.name}...")
@@ -127,6 +128,10 @@ def generate_ia_metadata(collection_dir):
127
128
  logger.warning("Could not determine date range from metadata")
128
129
  first_date = last_date = "unknown"
129
130
 
131
+ # Detect WebP conversion and tarring
132
+ is_webp = "-webp" in collection_dir.name
133
+ has_tars = len(list(collection_dir.glob("*.tar"))) > 0
134
+
130
135
  # Create .meta directory
131
136
  meta_dir = collection_dir / ".meta"
132
137
  meta_dir.mkdir(exist_ok=True)
@@ -138,14 +143,31 @@ def generate_ia_metadata(collection_dir):
138
143
  f"Mapillary images by {username}",
139
144
  )
140
145
 
146
+ # Build resolution string
147
+ if quality == "original":
148
+ resolution_str = "original resolution"
149
+ else:
150
+ resolution_str = f"{quality}px resolution"
151
+
152
+ # Build description with processing details
141
153
  description = (
142
154
  f"Street-level imagery from Mapillary user '{username}'. "
143
- f"Contains {image_count:,} images captured between {first_date} and {last_date}. "
144
- f"Images are organized by sequence ID and include EXIF metadata with GPS coordinates, "
145
- f"camera information, and compass direction.\n\n"
146
- f"Downloaded using mapillary_downloader (https://bitplane.net/dev/python/mapillary_downloader/). "
147
- f"Uploaded using rip (https://bitplane.net/dev/sh/rip)."
155
+ f"Contains {image_count:,} images in {resolution_str} captured between {first_date} and {last_date}."
148
156
  )
157
+
158
+ if has_tars:
159
+ description += " Sequences have been individually tarred."
160
+
161
+ if is_webp:
162
+ description += " Images were recompressed with WebP."
163
+
164
+ description += (
165
+ " Images are organized by sequence ID and include EXIF metadata with GPS coordinates, "
166
+ "camera information, and compass direction.\n\n"
167
+ "Downloaded using mapillary_downloader (https://bitplane.net/dev/python/mapillary_downloader/). "
168
+ "Uploaded using rip (https://bitplane.net/dev/sh/rip)."
169
+ )
170
+
149
171
  write_meta_tag(meta_dir, "description", description)
150
172
 
151
173
  # Subject tags
@@ -1,8 +1,9 @@
1
1
  """Tar sequence directories for efficient Internet Archive uploads."""
2
2
 
3
3
  import logging
4
- import subprocess
4
+ import tarfile
5
5
  from pathlib import Path
6
+ from mapillary_downloader.utils import format_size
6
7
 
7
8
  logger = logging.getLogger("mapillary_downloader")
8
9
 
@@ -38,6 +39,7 @@ def tar_sequence_directories(collection_dir):
38
39
 
39
40
  tarred_count = 0
40
41
  total_files = 0
42
+ total_tar_bytes = 0
41
43
 
42
44
  for seq_dir in sequence_dirs:
43
45
  seq_name = seq_dir.name
@@ -58,22 +60,38 @@ def tar_sequence_directories(collection_dir):
58
60
  continue
59
61
 
60
62
  try:
61
- # Create uncompressed tar (WebP already compressed)
62
- # Use -C to change directory so paths in tar are relative
63
- # Use -- to prevent sequence IDs starting with - from being interpreted as options
64
- result = subprocess.run(
65
- ["tar", "-cf", str(tar_path), "-C", str(collection_dir), "--", seq_name],
66
- capture_output=True,
67
- text=True,
68
- timeout=300, # 5 minute timeout per tar
69
- )
70
-
71
- if result.returncode != 0:
72
- logger.error(f"Failed to tar {seq_name}: {result.stderr}")
63
+ # Create reproducible uncompressed tar (WebP already compressed)
64
+ # Sort files by name for deterministic ordering
65
+ files_to_tar = sorted([f for f in seq_dir.rglob("*") if f.is_file()], key=lambda x: x.name)
66
+
67
+ if not files_to_tar:
68
+ logger.warning(f"Skipping directory with no files: {seq_name}")
73
69
  continue
74
70
 
71
+ with tarfile.open(tar_path, "w") as tar:
72
+ for file_path in files_to_tar:
73
+ # Get path relative to collection_dir for tar archive
74
+ arcname = file_path.relative_to(collection_dir)
75
+
76
+ # Create TarInfo for reproducibility
77
+ tarinfo = tar.gettarinfo(str(file_path), arcname=str(arcname))
78
+
79
+ # Normalize for reproducibility across platforms
80
+ tarinfo.uid = 0
81
+ tarinfo.gid = 0
82
+ tarinfo.uname = ""
83
+ tarinfo.gname = ""
84
+ # mtime already set on file by worker, preserve it
85
+
86
+ # Add file to tar
87
+ with open(file_path, "rb") as f:
88
+ tar.addfile(tarinfo, f)
89
+
75
90
  # Verify tar was created and has size
76
91
  if tar_path.exists() and tar_path.stat().st_size > 0:
92
+ tar_size = tar_path.stat().st_size
93
+ total_tar_bytes += tar_size
94
+
77
95
  # Remove original directory
78
96
  for file in seq_dir.rglob("*"):
79
97
  if file.is_file():
@@ -99,14 +117,12 @@ def tar_sequence_directories(collection_dir):
99
117
  if tar_path.exists():
100
118
  tar_path.unlink()
101
119
 
102
- except subprocess.TimeoutExpired:
103
- logger.error(f"Timeout tarring {seq_name}")
104
- if tar_path.exists():
105
- tar_path.unlink()
106
120
  except Exception as e:
107
121
  logger.error(f"Error tarring {seq_name}: {e}")
108
122
  if tar_path.exists():
109
123
  tar_path.unlink()
110
124
 
111
- logger.info(f"Tarred {tarred_count} sequences ({total_files:,} files total)")
125
+ logger.info(
126
+ f"Tarred {tarred_count} sequences ({total_files:,} files, {format_size(total_tar_bytes)} total tar size)"
127
+ )
112
128
  return tarred_count, total_files
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mapillary_downloader
3
- Version: 0.3.2
3
+ Version: 0.4.1
4
4
  Summary: Download your Mapillary data before it's gone
5
5
  Author-email: Gareth Davidson <gaz@bitplane.net>
6
6
  Requires-Python: >=3.10
@@ -47,37 +47,42 @@ First, get your Mapillary API access token from
47
47
  [the developer dashboard](https://www.mapillary.com/dashboard/developers)
48
48
 
49
49
  ```bash
50
- # Set token via environment variable
50
+ # Set token via environment variable (recommended)
51
51
  export MAPILLARY_TOKEN=YOUR_TOKEN
52
- mapillary-downloader --username SOME_USERNAME --output ./downloads
52
+ mapillary-downloader USERNAME1 USERNAME2 USERNAME3
53
53
 
54
54
  # Or pass token directly, and have it in your shell history 💩👀
55
- mapillary-downloader --token YOUR_TOKEN --username SOME_USERNAME --output ./downloads
55
+ mapillary-downloader --token YOUR_TOKEN USERNAME1 USERNAME2
56
+
57
+ # Download to specific directory
58
+ mapillary-downloader --output ./downloads USERNAME1
56
59
  ```
57
60
 
58
- | option | because | default |
59
- | ------------- | ------------------------------------- | ------------------ |
60
- | `--username` | Mapillary username | None (required) |
61
- | `--token` | Mapillary API token (or env var) | `$MAPILLARY_TOKEN` |
62
- | `--output` | Output directory | `./mapillary_data` |
63
- | `--quality` | 256, 1024, 2048 or original | `original` |
64
- | `--bbox` | `west,south,east,north` | `None` |
65
- | `--webp` | Convert to WebP (saves ~70% space) | `False` |
66
- | `--workers` | Number of parallel download workers | Half of CPU count |
67
- | `--no-tar` | Don't tar sequence directories | `False` |
61
+ | option | because | default |
62
+ | --------------- | -------------------------------------------- | ------------------ |
63
+ | `usernames` | One or more Mapillary usernames | (required) |
64
+ | `--token` | Mapillary API token (or env var) | `$MAPILLARY_TOKEN` |
65
+ | `--output` | Output directory | `./mapillary_data` |
66
+ | `--quality` | 256, 1024, 2048 or original | `original` |
67
+ | `--bbox` | `west,south,east,north` | `None` |
68
+ | `--no-webp` | Don't convert to WebP | `False` |
69
+ | `--workers` | Number of parallel download workers | Half of CPU count |
70
+ | `--no-tar` | Don't tar sequence directories | `False` |
71
+ | `--no-check-ia` | Don't check if exists on Internet Archive | `False` |
68
72
 
69
73
  The downloader will:
70
74
 
71
- * 📷 Download a user's images organized by sequence
75
+ * 🏛️ Check Internet Archive to avoid duplicate downloads
76
+ * 📷 Download multiple users' images organized by sequence
72
77
  * 📜 Inject EXIF metadata (GPS coordinates, camera info, timestamps,
73
78
  compass direction)
79
+ * 🗜️ Convert to WebP (by default) to save ~70% disk space
74
80
  * 🛟 Save progress so you can safely resume if interrupted
75
- * 🗜️ Optionally convert to WebP to save space
76
- * 📦 Tar sequence directories for faster uploads
81
+ * 📦 Tar sequence directories (by default) for faster uploads to Internet Archive
77
82
 
78
83
  ## WebP Conversion
79
84
 
80
- You'll need `cwebp` to use the `--webp` flag. So install it:
85
+ You'll need the `cwebp` binary installed:
81
86
 
82
87
  ```bash
83
88
  # Debian/Ubuntu
@@ -87,24 +92,25 @@ sudo apt install webp
87
92
  brew install webp
88
93
  ```
89
94
 
95
+ To disable WebP conversion and keep original JPEGs, use `--no-webp`:
96
+
97
+ ```bash
98
+ mapillary-downloader --no-webp USERNAME
99
+ ```
100
+
90
101
  ## Sequence Tarball Creation
91
102
 
92
103
  By default, sequence directories are automatically tarred after download because
93
104
  if they weren't, you'd spend more time setting up upload metadata than actually
94
105
  uploading files to IA.
95
106
 
96
- To keep individual files instead of creating tars, use the `--no-tar` flag:
97
-
98
- ```bash
99
- mapillary-downloader --username WHOEVER --no-tar
100
- ```
107
+ To keep individual files instead of creating tars, use the `--no-tar` flag.
101
108
 
102
109
  ## Internet Archive upload
103
110
 
104
111
  I've written a bash tool to rip media then tag, queue, and upload to The
105
- Internet Archive. The metadata is in the same format. If you copy completed
106
- download dirs into the `4.ship` dir, they'll find their way into an
107
- appropriately named item.
112
+ Internet Archive. The metadata is in the same format. If you symlink your
113
+ `./mapillary_data` dir to `rip`'s `4.ship` dir, they'll be queued for upload.
108
114
 
109
115
  See inlay for details:
110
116
 
@@ -126,7 +132,7 @@ make help # See other make options
126
132
  * [📖 pydoc](https://bitplane.net/dev/python/mapillary_downloader/pydoc)
127
133
  * [🐍 pypi](https://pypi.org/project/mapillary-downloader)
128
134
  * [🐱 github](https://github.com/bitplane/mapillary_downloader)
129
- * [📀 rip](https://bitplane.net/dev/sh/rip
135
+ * [📀 rip](https://bitplane.net/dev/sh/rip)
130
136
 
131
137
  ## License
132
138
 
@@ -0,0 +1,17 @@
1
+ mapillary_downloader/__init__.py,sha256=KEjiBRghXDeA7E15RJeLBfQm-yNJkowZarL59QOh_1w,120
2
+ mapillary_downloader/__main__.py,sha256=avh546grDz379HbA4JOOH2ovSH64Z69okGZO8LKciJ8,3964
3
+ mapillary_downloader/client.py,sha256=O7JgshaM3QKUv0xXuBbe_uPqsTr4lgyuVUHYndvXTfA,4611
4
+ mapillary_downloader/downloader.py,sha256=cVV24uIc3nQ_YXzqpwdVSr-L4fkME3sXq3pCfFS-0Ls,12476
5
+ mapillary_downloader/exif_writer.py,sha256=Bn1u3QULfHtae86FnUGcqN450NccJwtwW9wVaSRyx9E,4615
6
+ mapillary_downloader/ia_check.py,sha256=L2MEbG_KmlAd5NLmo2HQkO8HWvRN0brE5wXXoyNMbq8,1100
7
+ mapillary_downloader/ia_meta.py,sha256=78rcybHIPnQDsF02KGj6RYmDXzYzrU8sdVx4Q9Y0sfI,6266
8
+ mapillary_downloader/logging_config.py,sha256=Z-wNq34nt7aIhJWdeKc1feTY46P9-Or7HtiX7eUFjEI,2324
9
+ mapillary_downloader/tar_sequences.py,sha256=mqs5p3N7osV_bxTkw6i34GVmxCBBEbIiKKxeh-fWNdU,4430
10
+ mapillary_downloader/utils.py,sha256=yzVgS1mwsklDAqrimaFafgTTXtRYQUbKP98Xgh9d2KA,1174
11
+ mapillary_downloader/webp_converter.py,sha256=vYLLQxDmdnqRz0nm7wXwRUd4x9mQZNah-DrncpA8sNs,1901
12
+ mapillary_downloader/worker.py,sha256=eqaBhP5NE_VoJSTZfFb4OAqGyVX65xyoVUp2vosYBM8,3722
13
+ mapillary_downloader-0.4.1.dist-info/entry_points.txt,sha256=PdYtxOXHMJrUhmiPO4G-F98VuhUI4MN9D_T4KPrVZ5w,75
14
+ mapillary_downloader-0.4.1.dist-info/licenses/LICENSE.md,sha256=7_BIuQ-veOrsF-WarH8kTkm0-xrCLvJ1PFE1C4Ebs64,146
15
+ mapillary_downloader-0.4.1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
16
+ mapillary_downloader-0.4.1.dist-info/METADATA,sha256=wgVRFgLesT4OFb-dsyQ-14KvXQTagnx7WjqHkAt2aFQ,4982
17
+ mapillary_downloader-0.4.1.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- mapillary_downloader/__init__.py,sha256=KEjiBRghXDeA7E15RJeLBfQm-yNJkowZarL59QOh_1w,120
2
- mapillary_downloader/__main__.py,sha256=Zvvib_XFhErPLY6AWviQ83iT0TiXFHWV4CDJ5vtzNYQ,3187
3
- mapillary_downloader/client.py,sha256=O7JgshaM3QKUv0xXuBbe_uPqsTr4lgyuVUHYndvXTfA,4611
4
- mapillary_downloader/downloader.py,sha256=JmR-0HbCBXGoRylgVzVbEdvlRey3pklZffhf2BRCw9s,8476
5
- mapillary_downloader/exif_writer.py,sha256=Bn1u3QULfHtae86FnUGcqN450NccJwtwW9wVaSRyx9E,4615
6
- mapillary_downloader/ia_meta.py,sha256=zUG23Vv2X8hFa_oNL_wfvFVt7yqkQkvXiZ4feoKt7S0,5596
7
- mapillary_downloader/logging_config.py,sha256=Z-wNq34nt7aIhJWdeKc1feTY46P9-Or7HtiX7eUFjEI,2324
8
- mapillary_downloader/tar_sequences.py,sha256=X5Vru0vp4pm_unmFjdEIZ7SBIk0gwi49NwOLPhUKeYk,3809
9
- mapillary_downloader/utils.py,sha256=yzVgS1mwsklDAqrimaFafgTTXtRYQUbKP98Xgh9d2KA,1174
10
- mapillary_downloader/webp_converter.py,sha256=vYLLQxDmdnqRz0nm7wXwRUd4x9mQZNah-DrncpA8sNs,1901
11
- mapillary_downloader/worker.py,sha256=eqaBhP5NE_VoJSTZfFb4OAqGyVX65xyoVUp2vosYBM8,3722
12
- mapillary_downloader-0.3.2.dist-info/entry_points.txt,sha256=PdYtxOXHMJrUhmiPO4G-F98VuhUI4MN9D_T4KPrVZ5w,75
13
- mapillary_downloader-0.3.2.dist-info/licenses/LICENSE.md,sha256=7_BIuQ-veOrsF-WarH8kTkm0-xrCLvJ1PFE1C4Ebs64,146
14
- mapillary_downloader-0.3.2.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
15
- mapillary_downloader-0.3.2.dist-info/METADATA,sha256=ckXRXc0rHTQdPleWCAo8oRwbu-FKJwOSdkc7g5DrKug,4607
16
- mapillary_downloader-0.3.2.dist-info/RECORD,,