mapillary-downloader 0.5.1__tar.gz → 0.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/PKG-INFO +1 -1
  2. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/pyproject.toml +1 -1
  3. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/__main__.py +4 -4
  4. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/downloader.py +15 -7
  5. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/worker.py +4 -0
  6. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/worker_pool.py +6 -8
  7. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/LICENSE.md +0 -0
  8. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/README.md +0 -0
  9. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/__init__.py +0 -0
  10. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/client.py +0 -0
  11. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/exif_writer.py +0 -0
  12. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/ia_check.py +0 -0
  13. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/ia_meta.py +0 -0
  14. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/logging_config.py +0 -0
  15. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/metadata_reader.py +0 -0
  16. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/tar_sequences.py +0 -0
  17. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/utils.py +0 -0
  18. {mapillary_downloader-0.5.1 → mapillary_downloader-0.5.2}/src/mapillary_downloader/webp_converter.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mapillary_downloader
3
- Version: 0.5.1
3
+ Version: 0.5.2
4
4
  Summary: Download your Mapillary data before it's gone
5
5
  Author-email: Gareth Davidson <gaz@bitplane.net>
6
6
  Requires-Python: >=3.10
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "mapillary_downloader"
3
3
  description = "Download your Mapillary data before it's gone"
4
- version = "0.5.1"
4
+ version = "0.5.2"
5
5
  authors = [
6
6
  { name = "Gareth Davidson", email = "gaz@bitplane.net" }
7
7
  ]
@@ -41,10 +41,10 @@ def main():
41
41
  help="Don't convert to WebP (WebP conversion is enabled by default, saves ~70%% disk space)",
42
42
  )
43
43
  parser.add_argument(
44
- "--workers",
44
+ "--max-workers",
45
45
  type=int,
46
- default=None,
47
- help="Number of parallel workers (default: half of CPU cores)",
46
+ default=128,
47
+ help="Maximum number of parallel workers (default: 128)",
48
48
  )
49
49
  parser.add_argument(
50
50
  "--no-tar",
@@ -114,7 +114,7 @@ def main():
114
114
  args.output,
115
115
  username,
116
116
  args.quality,
117
- workers=args.workers,
117
+ max_workers=args.max_workers,
118
118
  tar_sequences=not args.no_tar,
119
119
  convert_webp=convert_webp,
120
120
  check_ia=not args.no_check_ia,
@@ -45,7 +45,7 @@ class MapillaryDownloader:
45
45
  output_dir,
46
46
  username=None,
47
47
  quality=None,
48
- workers=None,
48
+ max_workers=128,
49
49
  tar_sequences=True,
50
50
  convert_webp=False,
51
51
  check_ia=True,
@@ -57,7 +57,7 @@ class MapillaryDownloader:
57
57
  output_dir: Base directory to save downloads (final destination)
58
58
  username: Mapillary username (for collection directory)
59
59
  quality: Image quality (for collection directory)
60
- workers: Number of parallel workers (default: half of cpu_count)
60
+ max_workers: Maximum number of parallel workers (default: 128)
61
61
  tar_sequences: Whether to tar sequence directories after download (default: True)
62
62
  convert_webp: Whether to convert images to WebP (affects collection name)
63
63
  check_ia: Whether to check if collection exists on Internet Archive (default: True)
@@ -66,7 +66,8 @@ class MapillaryDownloader:
66
66
  self.base_output_dir = Path(output_dir)
67
67
  self.username = username
68
68
  self.quality = quality
69
- self.workers = workers if workers is not None else max(1, os.cpu_count() // 2)
69
+ self.max_workers = max_workers
70
+ self.initial_workers = os.cpu_count() or 1 # Start with CPU count
70
71
  self.tar_sequences = tar_sequences
71
72
  self.convert_webp = convert_webp
72
73
  self.check_ia = check_ia
@@ -177,7 +178,7 @@ class MapillaryDownloader:
177
178
  logger.info(f"Downloading images for user: {self.username}")
178
179
  logger.info(f"Output directory: {self.output_dir}")
179
180
  logger.info(f"Quality: {self.quality}")
180
- logger.info(f"Using {self.workers} parallel workers")
181
+ logger.info(f"Worker pool: {self.initial_workers} initial, {self.max_workers} max")
181
182
 
182
183
  start_time = time.time()
183
184
 
@@ -191,8 +192,10 @@ class MapillaryDownloader:
191
192
 
192
193
  # Step 2: Start worker pool
193
194
  # Since workers do both I/O (download) and CPU (WebP), need many more workers
194
- # Cap at 128 for now - will build proper dynamic scaling on a new branch later
195
- pool = AdaptiveWorkerPool(worker_process, min_workers=self.workers, max_workers=128, monitoring_interval=10)
195
+ # Start with CPU count and scale up based on throughput
196
+ pool = AdaptiveWorkerPool(
197
+ worker_process, min_workers=self.initial_workers, max_workers=self.max_workers, monitoring_interval=10
198
+ )
196
199
  pool.start()
197
200
 
198
201
  # Step 3: Download images from metadata file while fetching new from API
@@ -247,8 +250,9 @@ class MapillaryDownloader:
247
250
  # Helper to process results from queue
248
251
  def process_results():
249
252
  nonlocal downloaded_count, total_bytes, failed_count
253
+ # Drain ALL available results to prevent queue from filling up
250
254
  while True:
251
- result = pool.get_result(timeout=0.001)
255
+ result = pool.get_result(timeout=0) # Non-blocking
252
256
  if result is None:
253
257
  break
254
258
 
@@ -379,6 +383,10 @@ class MapillaryDownloader:
379
383
 
380
384
  last_position = f.tell()
381
385
 
386
+ # If API is already complete, we've read the whole file, so break
387
+ if api_fetch_complete is None:
388
+ break
389
+
382
390
  # Sleep briefly before next tail iteration
383
391
  time.sleep(0.1)
384
392
 
@@ -1,6 +1,7 @@
1
1
  """Worker process for parallel image download and conversion."""
2
2
 
3
3
  import os
4
+ import signal
4
5
  import tempfile
5
6
  from pathlib import Path
6
7
  import requests
@@ -17,6 +18,9 @@ def worker_process(work_queue, result_queue, worker_id):
17
18
  result_queue: Queue to push results to
18
19
  worker_id: Unique worker identifier
19
20
  """
21
+ # Ignore SIGINT in worker process - parent will handle it
22
+ signal.signal(signal.SIGINT, signal.SIG_IGN)
23
+
20
24
  # Create session once per worker (reuse HTTP connections)
21
25
  session = requests.Session()
22
26
 
@@ -185,20 +185,18 @@ class AdaptiveWorkerPool:
185
185
  else:
186
186
  logger.info(f"At optimal worker count: {current_workers} workers, {current_throughput:.1f} items/s")
187
187
 
188
- def shutdown(self, timeout=30):
188
+ def shutdown(self, timeout=2):
189
189
  """Shutdown the worker pool gracefully."""
190
190
  logger.info("Shutting down worker pool...")
191
191
  self.running = False
192
192
 
193
- # Send stop signals
194
- for _ in self.workers:
195
- self.work_queue.put(None)
196
-
197
- # Wait for workers to finish
193
+ # Terminate all workers immediately (they ignore SIGINT so we need to be forceful)
198
194
  for p in self.workers:
199
- p.join(timeout=timeout)
200
195
  if p.is_alive():
201
- logger.warning(f"Worker {p.pid} did not exit cleanly, terminating")
202
196
  p.terminate()
203
197
 
198
+ # Give them a brief moment to exit
199
+ for p in self.workers:
200
+ p.join(timeout=timeout)
201
+
204
202
  logger.info("Worker pool shutdown complete")