mapillary-downloader 0.1.2__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mapillary_downloader
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: Download your Mapillary data before it's gone
5
5
  Author-email: Gareth Davidson <gaz@bitplane.net>
6
6
  Requires-Python: >=3.10
@@ -28,7 +28,7 @@ Project-URL: Issues, https://github.com/bitplane/mapillary_downloader/issues
28
28
  Project-URL: Repository, https://github.com/bitplane/mapillary_downloader
29
29
  Provides-Extra: dev
30
30
 
31
- # Mapillary Downloader
31
+ # 🗺️ Mapillary Downloader
32
32
 
33
33
  Download your Mapillary data before it's gone.
34
34
 
@@ -59,13 +59,35 @@ mapillary-downloader --token YOUR_TOKEN --username YOUR_USERNAME --output ./down
59
59
  | `--output` | Output directory | `./mapillary_data` |
60
60
  | `--quality` | 256, 1024, 2048 or original | `original` |
61
61
  | `--bbox` | `west,south,east,north` | `None` |
62
+ | `--webp` | Convert to WebP (saves ~70% space) | `False` |
62
63
 
63
64
  The downloader will:
64
65
 
65
66
  * 💾 Fetch all your uploaded images from Mapillary
66
- - 📷 Download full-resolution images organized by sequence
67
- - 📜 Inject EXIF metadata (GPS coordinates, camera info, timestamps, compass direction)
68
- - 🛟 Save progress so you can safely resume if interrupted
67
+ * 📷 Download full-resolution images organized by sequence
68
+ * 📜 Inject EXIF metadata (GPS coordinates, camera info, timestamps,
69
+ compass direction)
70
+ * 🛟 Save progress so you can safely resume if interrupted
71
+ * 🗜️ Optionally convert to WebP format for massive space savings
72
+
73
+ ## WebP Conversion
74
+
75
+ Use the `--webp` flag to convert images to WebP format after download:
76
+
77
+ ```bash
78
+ mapillary-downloader --token YOUR_TOKEN --username YOUR_USERNAME --webp
79
+ ```
80
+
81
+ This reduces storage by approximately 70% while preserving all EXIF metadata
82
+ including GPS coordinates. Requires the `cwebp` binary to be installed:
83
+
84
+ ```bash
85
+ # Debian/Ubuntu
86
+ sudo apt install webp
87
+
88
+ # macOS
89
+ brew install webp
90
+ ```
69
91
 
70
92
  ## Development
71
93
 
@@ -88,5 +110,6 @@ WTFPL with one additional clause
88
110
 
89
111
  1. Don't blame me
90
112
 
91
- Do wtf you want, but don't blame me when it breaks.
113
+ Do wtf you want, but don't blame me if it makes jokes about the size of your
114
+ disk drive.
92
115
 
@@ -1,4 +1,4 @@
1
- # Mapillary Downloader
1
+ # 🗺️ Mapillary Downloader
2
2
 
3
3
  Download your Mapillary data before it's gone.
4
4
 
@@ -29,13 +29,35 @@ mapillary-downloader --token YOUR_TOKEN --username YOUR_USERNAME --output ./down
29
29
  | `--output` | Output directory | `./mapillary_data` |
30
30
  | `--quality` | 256, 1024, 2048 or original | `original` |
31
31
  | `--bbox` | `west,south,east,north` | `None` |
32
+ | `--webp` | Convert to WebP (saves ~70% space) | `False` |
32
33
 
33
34
  The downloader will:
34
35
 
35
36
  * 💾 Fetch all your uploaded images from Mapillary
36
- - 📷 Download full-resolution images organized by sequence
37
- - 📜 Inject EXIF metadata (GPS coordinates, camera info, timestamps, compass direction)
38
- - 🛟 Save progress so you can safely resume if interrupted
37
+ * 📷 Download full-resolution images organized by sequence
38
+ * 📜 Inject EXIF metadata (GPS coordinates, camera info, timestamps,
39
+ compass direction)
40
+ * 🛟 Save progress so you can safely resume if interrupted
41
+ * 🗜️ Optionally convert to WebP format for massive space savings
42
+
43
+ ## WebP Conversion
44
+
45
+ Use the `--webp` flag to convert images to WebP format after download:
46
+
47
+ ```bash
48
+ mapillary-downloader --token YOUR_TOKEN --username YOUR_USERNAME --webp
49
+ ```
50
+
51
+ This reduces storage by approximately 70% while preserving all EXIF metadata
52
+ including GPS coordinates. Requires the `cwebp` binary to be installed:
53
+
54
+ ```bash
55
+ # Debian/Ubuntu
56
+ sudo apt install webp
57
+
58
+ # macOS
59
+ brew install webp
60
+ ```
39
61
 
40
62
  ## Development
41
63
 
@@ -58,4 +80,5 @@ WTFPL with one additional clause
58
80
 
59
81
  1. Don't blame me
60
82
 
61
- Do wtf you want, but don't blame me when it breaks.
83
+ Do wtf you want, but don't blame me if it makes jokes about the size of your
84
+ disk drive.
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "mapillary_downloader"
3
3
  description = "Download your Mapillary data before it's gone"
4
- version = "0.1.2"
4
+ version = "0.2.0"
5
5
  authors = [
6
6
  { name = "Gareth Davidson", email = "gaz@bitplane.net" }
7
7
  ]
@@ -0,0 +1,65 @@
1
+ """CLI entry point."""
2
+
3
+ import argparse
4
+ import sys
5
+ from mapillary_downloader.client import MapillaryClient
6
+ from mapillary_downloader.downloader import MapillaryDownloader
7
+ from mapillary_downloader.logging_config import setup_logging
8
+ from mapillary_downloader.webp_converter import check_cwebp_available
9
+
10
+
11
+ def main():
12
+ """Main CLI entry point."""
13
+ # Set up logging
14
+ logger = setup_logging()
15
+
16
+ parser = argparse.ArgumentParser(description="Download your Mapillary data before it's gone")
17
+ parser.add_argument("--token", required=True, help="Mapillary API access token")
18
+ parser.add_argument("--username", required=True, help="Your Mapillary username")
19
+ parser.add_argument("--output", default="./mapillary_data", help="Output directory (default: ./mapillary_data)")
20
+ parser.add_argument(
21
+ "--quality",
22
+ choices=["256", "1024", "2048", "original"],
23
+ default="original",
24
+ help="Image quality to download (default: original)",
25
+ )
26
+ parser.add_argument("--bbox", help="Bounding box: west,south,east,north")
27
+ parser.add_argument(
28
+ "--webp",
29
+ action="store_true",
30
+ help="Convert images to WebP format (saves ~70%% disk space, requires cwebp binary)",
31
+ )
32
+
33
+ args = parser.parse_args()
34
+
35
+ bbox = None
36
+ if args.bbox:
37
+ try:
38
+ bbox = [float(x) for x in args.bbox.split(",")]
39
+ if len(bbox) != 4:
40
+ raise ValueError
41
+ except ValueError:
42
+ logger.error("Error: bbox must be four comma-separated numbers")
43
+ sys.exit(1)
44
+
45
+ # Check for cwebp binary if WebP conversion is requested
46
+ if args.webp:
47
+ if not check_cwebp_available():
48
+ logger.error("Error: cwebp binary not found. Install webp package (e.g., apt install webp)")
49
+ sys.exit(1)
50
+ logger.info("WebP conversion enabled - images will be converted after download")
51
+
52
+ try:
53
+ client = MapillaryClient(args.token)
54
+ downloader = MapillaryDownloader(client, args.output)
55
+ downloader.download_user_data(args.username, args.quality, bbox, convert_webp=args.webp)
56
+ except KeyboardInterrupt:
57
+ logger.info("\nInterrupted by user")
58
+ sys.exit(1)
59
+ except Exception as e:
60
+ logger.error(f"Error: {e}")
61
+ sys.exit(1)
62
+
63
+
64
+ if __name__ == "__main__":
65
+ main()
@@ -1,9 +1,12 @@
1
1
  """Mapillary API client."""
2
2
 
3
+ import logging
3
4
  import time
4
5
  import requests
5
6
  from requests.exceptions import RequestException
6
7
 
8
+ logger = logging.getLogger("mapillary_downloader")
9
+
7
10
 
8
11
  class MapillaryClient:
9
12
  """Client for interacting with Mapillary API v4."""
@@ -65,6 +68,7 @@ class MapillaryClient:
65
68
  params["bbox"] = ",".join(map(str, bbox))
66
69
 
67
70
  url = f"{self.base_url}/images"
71
+ total_fetched = 0
68
72
 
69
73
  while url:
70
74
  max_retries = 10
@@ -72,21 +76,24 @@ class MapillaryClient:
72
76
 
73
77
  for attempt in range(max_retries):
74
78
  try:
75
- response = self.session.get(url, params=params)
79
+ response = self.session.get(url, params=params, timeout=60)
76
80
  response.raise_for_status()
77
81
  break
78
82
  except RequestException as e:
79
83
  if attempt == max_retries - 1:
80
84
  raise
81
85
 
82
- delay = base_delay * (2 ** attempt)
83
- print(f"Request failed (attempt {attempt + 1}/{max_retries}): {e}")
84
- print(f"Retrying in {delay:.1f} seconds...")
86
+ delay = base_delay * (2**attempt)
87
+ logger.warning(f"Request failed (attempt {attempt + 1}/{max_retries}): {e}")
88
+ logger.info(f"Retrying in {delay:.1f} seconds...")
85
89
  time.sleep(delay)
86
90
 
87
91
  data = response.json()
92
+ images = data.get("data", [])
93
+ total_fetched += len(images)
94
+ logger.info(f"Fetched metadata for {total_fetched:,} images...")
88
95
 
89
- for image in data.get("data", []):
96
+ for image in images:
90
97
  yield image
91
98
 
92
99
  # Get next page URL
@@ -111,7 +118,7 @@ class MapillaryClient:
111
118
 
112
119
  for attempt in range(max_retries):
113
120
  try:
114
- response = self.session.get(image_url, stream=True)
121
+ response = self.session.get(image_url, stream=True, timeout=60)
115
122
  response.raise_for_status()
116
123
 
117
124
  total_bytes = 0
@@ -123,10 +130,10 @@ class MapillaryClient:
123
130
  return total_bytes
124
131
  except RequestException as e:
125
132
  if attempt == max_retries - 1:
126
- print(f"Error downloading {image_url} after {max_retries} attempts: {e}")
133
+ logger.error(f"Error downloading {image_url} after {max_retries} attempts: {e}")
127
134
  return 0
128
135
 
129
- delay = base_delay * (2 ** attempt)
130
- print(f"Download failed (attempt {attempt + 1}/{max_retries}): {e}")
131
- print(f"Retrying in {delay:.1f} seconds...")
136
+ delay = base_delay * (2**attempt)
137
+ logger.warning(f"Download failed (attempt {attempt + 1}/{max_retries}): {e}")
138
+ logger.info(f"Retrying in {delay:.1f} seconds...")
132
139
  time.sleep(delay)
@@ -0,0 +1,206 @@
1
+ """Main downloader logic."""
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import time
7
+ from pathlib import Path
8
+ from collections import deque
9
+ from mapillary_downloader.exif_writer import write_exif_to_image
10
+ from mapillary_downloader.utils import format_size, format_time
11
+ from mapillary_downloader.webp_converter import convert_to_webp
12
+
13
+ logger = logging.getLogger("mapillary_downloader")
14
+
15
+
16
+ class MapillaryDownloader:
17
+ """Handles downloading Mapillary data for a user."""
18
+
19
+ def __init__(self, client, output_dir):
20
+ """Initialize the downloader.
21
+
22
+ Args:
23
+ client: MapillaryClient instance
24
+ output_dir: Directory to save downloads
25
+ """
26
+ self.client = client
27
+ self.output_dir = Path(output_dir)
28
+ self.output_dir.mkdir(parents=True, exist_ok=True)
29
+
30
+ self.metadata_file = self.output_dir / "metadata.jsonl"
31
+ self.progress_file = self.output_dir / "progress.json"
32
+ self.downloaded = self._load_progress()
33
+
34
+ def _load_progress(self):
35
+ """Load previously downloaded image IDs."""
36
+ if self.progress_file.exists():
37
+ with open(self.progress_file) as f:
38
+ return set(json.load(f).get("downloaded", []))
39
+ return set()
40
+
41
+ def _save_progress(self):
42
+ """Save progress to disk atomically."""
43
+ temp_file = self.progress_file.with_suffix(".json.tmp")
44
+ with open(temp_file, "w") as f:
45
+ json.dump({"downloaded": list(self.downloaded)}, f)
46
+ f.flush()
47
+ os.fsync(f.fileno())
48
+ temp_file.replace(self.progress_file)
49
+
50
+ def download_user_data(self, username, quality="original", bbox=None, convert_webp=False):
51
+ """Download all images for a user.
52
+
53
+ Args:
54
+ username: Mapillary username
55
+ quality: Image quality to download (256, 1024, 2048, original)
56
+ bbox: Optional bounding box [west, south, east, north]
57
+ convert_webp: Convert images to WebP format after download
58
+ """
59
+ quality_field = f"thumb_{quality}_url"
60
+
61
+ logger.info(f"Downloading images for user: {username}")
62
+ logger.info(f"Output directory: {self.output_dir}")
63
+ logger.info(f"Quality: {quality}")
64
+
65
+ processed = 0
66
+ downloaded_count = 0
67
+ skipped = 0
68
+ total_bytes = 0
69
+
70
+ # Track download times for adaptive ETA (last 50 downloads)
71
+ download_times = deque(maxlen=50)
72
+ start_time = time.time()
73
+
74
+ # Track which image IDs we've seen in metadata to avoid re-fetching
75
+ seen_ids = set()
76
+
77
+ # First, process any existing metadata without re-fetching from API
78
+ if self.metadata_file.exists():
79
+ logger.info("Processing existing metadata file...")
80
+ with open(self.metadata_file) as f:
81
+ for line in f:
82
+ if line.strip():
83
+ image = json.loads(line)
84
+ image_id = image["id"]
85
+ seen_ids.add(image_id)
86
+ processed += 1
87
+
88
+ if image_id in self.downloaded:
89
+ skipped += 1
90
+ continue
91
+
92
+ # Download this un-downloaded image
93
+ image_url = image.get(quality_field)
94
+ if not image_url:
95
+ logger.warning(f"No {quality} URL for image {image_id}")
96
+ continue
97
+
98
+ sequence_id = image.get("sequence")
99
+ if sequence_id:
100
+ img_dir = self.output_dir / sequence_id
101
+ img_dir.mkdir(exist_ok=True)
102
+ else:
103
+ img_dir = self.output_dir
104
+
105
+ output_path = img_dir / f"{image_id}.jpg"
106
+
107
+ download_start = time.time()
108
+ bytes_downloaded = self.client.download_image(image_url, output_path)
109
+ if bytes_downloaded:
110
+ download_time = time.time() - download_start
111
+ download_times.append(download_time)
112
+
113
+ write_exif_to_image(output_path, image)
114
+
115
+ # Convert to WebP if requested
116
+ if convert_webp:
117
+ webp_path = convert_to_webp(output_path)
118
+ if webp_path:
119
+ output_path = webp_path
120
+
121
+ self.downloaded.add(image_id)
122
+ downloaded_count += 1
123
+ total_bytes += bytes_downloaded
124
+
125
+ progress_str = (
126
+ f"Processed: {processed}, Downloaded: {downloaded_count} ({format_size(total_bytes)})"
127
+ )
128
+ logger.info(progress_str)
129
+
130
+ if downloaded_count % 10 == 0:
131
+ self._save_progress()
132
+
133
+ # Always check API for new images (will skip duplicates via seen_ids)
134
+ logger.info("Checking for new images from API...")
135
+ with open(self.metadata_file, "a") as meta_f:
136
+ for image in self.client.get_user_images(username, bbox=bbox):
137
+ image_id = image["id"]
138
+
139
+ # Skip if we already have this in our metadata file
140
+ if image_id in seen_ids:
141
+ continue
142
+
143
+ seen_ids.add(image_id)
144
+ processed += 1
145
+
146
+ # Save new metadata
147
+ meta_f.write(json.dumps(image) + "\n")
148
+ meta_f.flush()
149
+
150
+ # Skip if already downloaded
151
+ if image_id in self.downloaded:
152
+ skipped += 1
153
+ continue
154
+
155
+ # Download image
156
+ image_url = image.get(quality_field)
157
+ if not image_url:
158
+ logger.warning(f"No {quality} URL for image {image_id}")
159
+ continue
160
+
161
+ # Use sequence ID for organization
162
+ sequence_id = image.get("sequence")
163
+ if sequence_id:
164
+ img_dir = self.output_dir / sequence_id
165
+ img_dir.mkdir(exist_ok=True)
166
+ else:
167
+ img_dir = self.output_dir
168
+
169
+ output_path = img_dir / f"{image_id}.jpg"
170
+
171
+ download_start = time.time()
172
+ bytes_downloaded = self.client.download_image(image_url, output_path)
173
+ if bytes_downloaded:
174
+ download_time = time.time() - download_start
175
+ download_times.append(download_time)
176
+
177
+ # Write EXIF metadata to the downloaded image
178
+ write_exif_to_image(output_path, image)
179
+
180
+ # Convert to WebP if requested
181
+ if convert_webp:
182
+ webp_path = convert_to_webp(output_path)
183
+ if webp_path:
184
+ output_path = webp_path
185
+
186
+ self.downloaded.add(image_id)
187
+ downloaded_count += 1
188
+ total_bytes += bytes_downloaded
189
+
190
+ # Calculate progress
191
+ progress_str = (
192
+ f"Processed: {processed}, Downloaded: {downloaded_count} ({format_size(total_bytes)})"
193
+ )
194
+
195
+ logger.info(progress_str)
196
+
197
+ # Save progress every 10 images
198
+ if downloaded_count % 10 == 0:
199
+ self._save_progress()
200
+
201
+ self._save_progress()
202
+ elapsed = time.time() - start_time
203
+ logger.info(
204
+ f"Complete! Processed {processed} images, downloaded {downloaded_count} ({format_size(total_bytes)}), skipped {skipped}"
205
+ )
206
+ logger.info(f"Total time: {format_time(elapsed)}")
@@ -0,0 +1,62 @@
1
+ """Logging configuration with colored output for TTY."""
2
+
3
+ import logging
4
+ import sys
5
+
6
+
7
+ class ColoredFormatter(logging.Formatter):
8
+ """Formatter that adds color to log levels when output is a TTY."""
9
+
10
+ # ANSI color codes
11
+ COLORS = {
12
+ "ERROR": "\033[91m", # Red
13
+ "WARNING": "\033[93m", # Yellow
14
+ "INFO": "\033[92m", # Green
15
+ "DEBUG": "\033[94m", # Blue
16
+ "RESET": "\033[0m",
17
+ }
18
+
19
+ def __init__(self, fmt=None, datefmt=None, use_color=True):
20
+ """Initialize the formatter.
21
+
22
+ Args:
23
+ fmt: Log format string
24
+ datefmt: Date format string
25
+ use_color: Whether to use colored output
26
+ """
27
+ super().__init__(fmt, datefmt)
28
+ self.use_color = use_color and sys.stdout.isatty()
29
+
30
+ def format(self, record):
31
+ """Format the log record with colors if appropriate.
32
+
33
+ Args:
34
+ record: LogRecord to format
35
+
36
+ Returns:
37
+ Formatted log string
38
+ """
39
+ if self.use_color:
40
+ levelname = record.levelname
41
+ if levelname in self.COLORS:
42
+ record.levelname = f"{self.COLORS[levelname]}{levelname}{self.COLORS['RESET']}"
43
+
44
+ return super().format(record)
45
+
46
+
47
+ def setup_logging(level=logging.INFO):
48
+ """Set up logging with timestamps and colored output.
49
+
50
+ Args:
51
+ level: Logging level to use
52
+ """
53
+ formatter = ColoredFormatter(fmt="%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
54
+
55
+ handler = logging.StreamHandler(sys.stdout)
56
+ handler.setFormatter(formatter)
57
+
58
+ logger = logging.getLogger("mapillary_downloader")
59
+ logger.setLevel(level)
60
+ logger.addHandler(handler)
61
+
62
+ return logger
@@ -0,0 +1,47 @@
1
+ """Utility functions for formatting and display."""
2
+
3
+
4
+ def format_size(bytes_count):
5
+ """Format bytes as human-readable size.
6
+
7
+ Args:
8
+ bytes_count: Number of bytes
9
+
10
+ Returns:
11
+ Formatted string (e.g. "1.23 GB", "456.78 MB")
12
+ """
13
+ if bytes_count >= 1_000_000_000:
14
+ return f"{bytes_count / 1_000_000_000:.2f} GB"
15
+ if bytes_count >= 1_000_000:
16
+ return f"{bytes_count / 1_000_000:.2f} MB"
17
+ if bytes_count >= 1_000:
18
+ return f"{bytes_count / 1000:.2f} KB"
19
+ return f"{bytes_count} B"
20
+
21
+
22
+ def format_time(seconds):
23
+ """Format seconds as human-readable time.
24
+
25
+ Args:
26
+ seconds: Number of seconds
27
+
28
+ Returns:
29
+ Formatted string (e.g. "2h 15m", "45m 30s", "30s")
30
+ """
31
+ if seconds < 60:
32
+ return f"{int(seconds)}s"
33
+
34
+ minutes = int(seconds / 60)
35
+ remaining_seconds = int(seconds % 60)
36
+
37
+ if minutes < 60:
38
+ if remaining_seconds > 0:
39
+ return f"{minutes}m {remaining_seconds}s"
40
+ return f"{minutes}m"
41
+
42
+ hours = int(minutes / 60)
43
+ remaining_minutes = minutes % 60
44
+
45
+ if remaining_minutes > 0:
46
+ return f"{hours}h {remaining_minutes}m"
47
+ return f"{hours}h"
@@ -0,0 +1,54 @@
1
+ """WebP image conversion utilities."""
2
+
3
+ import logging
4
+ import shutil
5
+ import subprocess
6
+ from pathlib import Path
7
+
8
+ logger = logging.getLogger("mapillary_downloader")
9
+
10
+
11
+ def check_cwebp_available():
12
+ """Check if cwebp binary is available.
13
+
14
+ Returns:
15
+ bool: True if cwebp is found, False otherwise
16
+ """
17
+ return shutil.which("cwebp") is not None
18
+
19
+
20
+ def convert_to_webp(jpg_path):
21
+ """Convert a JPG image to WebP format, preserving EXIF metadata.
22
+
23
+ Args:
24
+ jpg_path: Path to the JPG file
25
+
26
+ Returns:
27
+ Path object to the new WebP file, or None if conversion failed
28
+ """
29
+ jpg_path = Path(jpg_path)
30
+ webp_path = jpg_path.with_suffix(".webp")
31
+
32
+ try:
33
+ # Convert with cwebp, preserving all metadata
34
+ result = subprocess.run(
35
+ ["cwebp", "-metadata", "all", str(jpg_path), "-o", str(webp_path)],
36
+ capture_output=True,
37
+ text=True,
38
+ timeout=60,
39
+ )
40
+
41
+ if result.returncode != 0:
42
+ logger.error(f"cwebp conversion failed for {jpg_path}: {result.stderr}")
43
+ return None
44
+
45
+ # Delete original JPG after successful conversion
46
+ jpg_path.unlink()
47
+ return webp_path
48
+
49
+ except subprocess.TimeoutExpired:
50
+ logger.error(f"cwebp conversion timed out for {jpg_path}")
51
+ return None
52
+ except Exception as e:
53
+ logger.error(f"Error converting {jpg_path} to WebP: {e}")
54
+ return None
@@ -1,65 +0,0 @@
1
- """CLI entry point."""
2
-
3
- import argparse
4
- import sys
5
- from mapillary_downloader.client import MapillaryClient
6
- from mapillary_downloader.downloader import MapillaryDownloader
7
-
8
-
9
- def main():
10
- """Main CLI entry point."""
11
- parser = argparse.ArgumentParser(
12
- description="Download your Mapillary data before it's gone"
13
- )
14
- parser.add_argument(
15
- "--token",
16
- required=True,
17
- help="Mapillary API access token"
18
- )
19
- parser.add_argument(
20
- "--username",
21
- required=True,
22
- help="Your Mapillary username"
23
- )
24
- parser.add_argument(
25
- "--output",
26
- default="./mapillary_data",
27
- help="Output directory (default: ./mapillary_data)"
28
- )
29
- parser.add_argument(
30
- "--quality",
31
- choices=["256", "1024", "2048", "original"],
32
- default="original",
33
- help="Image quality to download (default: original)"
34
- )
35
- parser.add_argument(
36
- "--bbox",
37
- help="Bounding box: west,south,east,north"
38
- )
39
-
40
- args = parser.parse_args()
41
-
42
- bbox = None
43
- if args.bbox:
44
- try:
45
- bbox = [float(x) for x in args.bbox.split(",")]
46
- if len(bbox) != 4:
47
- raise ValueError
48
- except ValueError:
49
- print("Error: bbox must be four comma-separated numbers")
50
- sys.exit(1)
51
-
52
- try:
53
- client = MapillaryClient(args.token)
54
- downloader = MapillaryDownloader(client, args.output)
55
- downloader.download_user_data(args.username, args.quality, bbox)
56
- except KeyboardInterrupt:
57
- print("\nInterrupted by user")
58
- sys.exit(1)
59
- except Exception as e:
60
- print(f"Error: {e}")
61
- sys.exit(1)
62
-
63
-
64
- if __name__ == "__main__":
65
- main()
@@ -1,119 +0,0 @@
1
- """Main downloader logic."""
2
-
3
- import json
4
- import os
5
- from pathlib import Path
6
- from mapillary_downloader.exif_writer import write_exif_to_image
7
-
8
-
9
- def format_bytes(bytes_count):
10
- """Format bytes as human-readable string."""
11
- if bytes_count < 1024:
12
- return f"{bytes_count} B"
13
- if bytes_count < 1024 * 1024:
14
- return f"{bytes_count / 1024:.3f} KB"
15
- if bytes_count < 1024 * 1024 * 1024:
16
- return f"{bytes_count / (1024 * 1024):.3f} MB"
17
- return f"{bytes_count / (1024 * 1024 * 1024):.3f} GB"
18
-
19
-
20
- class MapillaryDownloader:
21
- """Handles downloading Mapillary data for a user."""
22
-
23
- def __init__(self, client, output_dir):
24
- """Initialize the downloader.
25
-
26
- Args:
27
- client: MapillaryClient instance
28
- output_dir: Directory to save downloads
29
- """
30
- self.client = client
31
- self.output_dir = Path(output_dir)
32
- self.output_dir.mkdir(parents=True, exist_ok=True)
33
-
34
- self.metadata_file = self.output_dir / "metadata.jsonl"
35
- self.progress_file = self.output_dir / "progress.json"
36
- self.downloaded = self._load_progress()
37
-
38
- def _load_progress(self):
39
- """Load previously downloaded image IDs."""
40
- if self.progress_file.exists():
41
- with open(self.progress_file) as f:
42
- return set(json.load(f).get("downloaded", []))
43
- return set()
44
-
45
- def _save_progress(self):
46
- """Save progress to disk atomically."""
47
- temp_file = self.progress_file.with_suffix(".json.tmp")
48
- with open(temp_file, "w") as f:
49
- json.dump({"downloaded": list(self.downloaded)}, f)
50
- f.flush()
51
- os.fsync(f.fileno())
52
- temp_file.replace(self.progress_file)
53
-
54
- def download_user_data(self, username, quality="original", bbox=None):
55
- """Download all images for a user.
56
-
57
- Args:
58
- username: Mapillary username
59
- quality: Image quality to download (256, 1024, 2048, original)
60
- bbox: Optional bounding box [west, south, east, north]
61
- """
62
- quality_field = f"thumb_{quality}_url"
63
-
64
- print(f"Downloading images for user: {username}")
65
- print(f"Output directory: {self.output_dir}")
66
- print(f"Quality: {quality}")
67
-
68
- processed = 0
69
- downloaded_count = 0
70
- skipped = 0
71
- total_bytes = 0
72
-
73
- with open(self.metadata_file, "a") as meta_f:
74
- for image in self.client.get_user_images(username, bbox=bbox):
75
- image_id = image["id"]
76
- processed += 1
77
-
78
- if image_id in self.downloaded:
79
- skipped += 1
80
- continue
81
-
82
- # Save metadata
83
- meta_f.write(json.dumps(image) + "\n")
84
- meta_f.flush()
85
-
86
- # Download image
87
- image_url = image.get(quality_field)
88
- if not image_url:
89
- print(f"No {quality} URL for image {image_id}")
90
- continue
91
-
92
- # Use sequence ID for organization
93
- sequence_id = image.get("sequence")
94
- if sequence_id:
95
- img_dir = self.output_dir / sequence_id
96
- img_dir.mkdir(exist_ok=True)
97
- else:
98
- img_dir = self.output_dir
99
-
100
- output_path = img_dir / f"{image_id}.jpg"
101
-
102
- bytes_downloaded = self.client.download_image(image_url, output_path)
103
- if bytes_downloaded:
104
- # Write EXIF metadata to the downloaded image
105
- write_exif_to_image(output_path, image)
106
-
107
- self.downloaded.add(image_id)
108
- downloaded_count += 1
109
- total_bytes += bytes_downloaded
110
- print(f"Processed: {processed}, Downloaded: {downloaded_count} ({format_bytes(total_bytes)})")
111
-
112
- # Save progress every 10 images
113
- if downloaded_count % 10 == 0:
114
- self._save_progress()
115
-
116
- self._save_progress()
117
- print(
118
- f"\nComplete! Processed {processed} images, downloaded {downloaded_count} ({format_bytes(total_bytes)}), skipped {skipped}"
119
- )