mapillary-downloader 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mapillary_downloader
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Download your Mapillary data before it's gone
5
5
  Author-email: Gareth Davidson <gaz@bitplane.net>
6
6
  Requires-Python: >=3.10
@@ -63,7 +63,7 @@ mapillary-downloader --token YOUR_TOKEN --username SOME_USERNAME --output ./down
63
63
  | `--quality` | 256, 1024, 2048 or original | `original` |
64
64
  | `--bbox` | `west,south,east,north` | `None` |
65
65
  | `--webp` | Convert to WebP (saves ~70% space) | `False` |
66
- | `--workers` | Number of parallel download workers | CPU count |
66
+ | `--workers` | Number of parallel download workers | Half of CPU count |
67
67
  | `--no-tar` | Don't tar sequence directories | `False` |
68
68
 
69
69
  The downloader will:
@@ -33,7 +33,7 @@ mapillary-downloader --token YOUR_TOKEN --username SOME_USERNAME --output ./down
33
33
  | `--quality` | 256, 1024, 2048 or original | `original` |
34
34
  | `--bbox` | `west,south,east,north` | `None` |
35
35
  | `--webp` | Convert to WebP (saves ~70% space) | `False` |
36
- | `--workers` | Number of parallel download workers | CPU count |
36
+ | `--workers` | Number of parallel download workers | Half of CPU count |
37
37
  | `--no-tar` | Don't tar sequence directories | `False` |
38
38
 
39
39
  The downloader will:
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "mapillary_downloader"
3
3
  description = "Download your Mapillary data before it's gone"
4
- version = "0.3.0"
4
+ version = "0.3.2"
5
5
  authors = [
6
6
  { name = "Gareth Davidson", email = "gaz@bitplane.net" }
7
7
  ]
@@ -3,6 +3,7 @@
3
3
  import argparse
4
4
  import os
5
5
  import sys
6
+ from importlib.metadata import version
6
7
  from mapillary_downloader.client import MapillaryClient
7
8
  from mapillary_downloader.downloader import MapillaryDownloader
8
9
  from mapillary_downloader.logging_config import setup_logging
@@ -15,6 +16,11 @@ def main():
15
16
  logger = setup_logging()
16
17
 
17
18
  parser = argparse.ArgumentParser(description="Download your Mapillary data before it's gone")
19
+ parser.add_argument(
20
+ "--version",
21
+ action="version",
22
+ version=f"%(prog)s {version('mapillary-downloader')}",
23
+ )
18
24
  parser.add_argument(
19
25
  "--token",
20
26
  default=os.environ.get("MAPILLARY_TOKEN"),
@@ -38,7 +44,7 @@ def main():
38
44
  "--workers",
39
45
  type=int,
40
46
  default=None,
41
- help="Number of parallel workers (default: number of CPU cores)",
47
+ help="Number of parallel workers (default: half of CPU cores)",
42
48
  )
43
49
  parser.add_argument(
44
50
  "--no-tar",
@@ -10,6 +10,7 @@ from mapillary_downloader.utils import format_size, format_time
10
10
  from mapillary_downloader.ia_meta import generate_ia_metadata
11
11
  from mapillary_downloader.worker import download_and_convert_image
12
12
  from mapillary_downloader.tar_sequences import tar_sequence_directories
13
+ from mapillary_downloader.logging_config import add_file_handler
13
14
 
14
15
  logger = logging.getLogger("mapillary_downloader")
15
16
 
@@ -25,14 +26,14 @@ class MapillaryDownloader:
25
26
  output_dir: Base directory to save downloads
26
27
  username: Mapillary username (for collection directory)
27
28
  quality: Image quality (for collection directory)
28
- workers: Number of parallel workers (default: cpu_count)
29
+ workers: Number of parallel workers (default: half of cpu_count)
29
30
  tar_sequences: Whether to tar sequence directories after download (default: True)
30
31
  """
31
32
  self.client = client
32
33
  self.base_output_dir = Path(output_dir)
33
34
  self.username = username
34
35
  self.quality = quality
35
- self.workers = workers if workers is not None else os.cpu_count()
36
+ self.workers = workers if workers is not None else max(1, os.cpu_count() // 2)
36
37
  self.tar_sequences = tar_sequences
37
38
 
38
39
  # If username and quality provided, create collection directory
@@ -44,6 +45,11 @@ class MapillaryDownloader:
44
45
 
45
46
  self.output_dir.mkdir(parents=True, exist_ok=True)
46
47
 
48
+ # Set up file logging for archival
49
+ log_file = self.output_dir / "download.log"
50
+ add_file_handler(log_file)
51
+ logger.info(f"Logging to: {log_file}")
52
+
47
53
  self.metadata_file = self.output_dir / "metadata.jsonl"
48
54
  self.progress_file = self.output_dir / "progress.json"
49
55
  self.downloaded = self._load_progress()
@@ -135,7 +135,7 @@ def generate_ia_metadata(collection_dir):
135
135
  write_meta_tag(
136
136
  meta_dir,
137
137
  "title",
138
- f"Mapillary images by {username} ({quality} quality)",
138
+ f"Mapillary images by {username}",
139
139
  )
140
140
 
141
141
  description = (
@@ -60,3 +60,23 @@ def setup_logging(level=logging.INFO):
60
60
  logger.addHandler(handler)
61
61
 
62
62
  return logger
63
+
64
+
65
+ def add_file_handler(log_file, level=logging.INFO):
66
+ """Add a file handler to the logger for archival.
67
+
68
+ Args:
69
+ log_file: Path to log file
70
+ level: Logging level for file handler
71
+ """
72
+ # Use plain formatter for file (no colors)
73
+ formatter = logging.Formatter(fmt="%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
74
+
75
+ handler = logging.FileHandler(log_file, mode="a", encoding="utf-8")
76
+ handler.setFormatter(formatter)
77
+ handler.setLevel(level)
78
+
79
+ logger = logging.getLogger("mapillary_downloader")
80
+ logger.addHandler(handler)
81
+
82
+ return handler
@@ -1,10 +1,10 @@
1
1
  """Worker process for parallel image download and conversion."""
2
2
 
3
+ import os
3
4
  import tempfile
4
5
  from pathlib import Path
5
6
  import requests
6
7
  from requests.exceptions import RequestException
7
- import time
8
8
  from mapillary_downloader.exif_writer import write_exif_to_image
9
9
  from mapillary_downloader.webp_converter import convert_to_webp
10
10
 
@@ -54,29 +54,23 @@ def download_and_convert_image(image_data, output_dir, quality, convert_webp, ac
54
54
  final_path = jpg_path
55
55
 
56
56
  # Download image
57
+ # No retries for CDN images - they're cheap, just skip failures and move on
57
58
  session = requests.Session()
58
59
  session.headers.update({"Authorization": f"OAuth {access_token}"})
59
60
 
60
- max_retries = 10
61
- base_delay = 1.0
62
61
  bytes_downloaded = 0
63
62
 
64
- for attempt in range(max_retries):
65
- try:
66
- response = session.get(image_url, stream=True, timeout=60)
67
- response.raise_for_status()
68
-
69
- with open(jpg_path, "wb") as f:
70
- for chunk in response.iter_content(chunk_size=8192):
71
- f.write(chunk)
72
- bytes_downloaded += len(chunk)
73
- break
74
- except RequestException as e:
75
- if attempt == max_retries - 1:
76
- return (image_id, 0, False, f"Download failed: {e}")
63
+ try:
64
+ # 60 second timeout for entire download (connection + read)
65
+ response = session.get(image_url, stream=True, timeout=60)
66
+ response.raise_for_status()
77
67
 
78
- delay = base_delay * (2**attempt)
79
- time.sleep(delay)
68
+ with open(jpg_path, "wb") as f:
69
+ for chunk in response.iter_content(chunk_size=8192):
70
+ f.write(chunk)
71
+ bytes_downloaded += len(chunk)
72
+ except RequestException as e:
73
+ return (image_id, 0, False, f"Download failed: {e}")
80
74
 
81
75
  # Write EXIF metadata
82
76
  write_exif_to_image(jpg_path, image_data)
@@ -87,6 +81,12 @@ def download_and_convert_image(image_data, output_dir, quality, convert_webp, ac
87
81
  if not webp_path:
88
82
  return (image_id, bytes_downloaded, False, "WebP conversion failed")
89
83
 
84
+ # Set file mtime to captured_at timestamp for reproducibility
85
+ if "captured_at" in image_data:
86
+ # captured_at is in milliseconds, convert to seconds
87
+ mtime = image_data["captured_at"] / 1000
88
+ os.utime(final_path, (mtime, mtime))
89
+
90
90
  return (image_id, bytes_downloaded, True, None)
91
91
 
92
92
  except Exception as e: