mapillary-downloader 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,38 +4,25 @@ import argparse
4
4
  import sys
5
5
  from mapillary_downloader.client import MapillaryClient
6
6
  from mapillary_downloader.downloader import MapillaryDownloader
7
+ from mapillary_downloader.logging_config import setup_logging
7
8
 
8
9
 
9
10
  def main():
10
11
  """Main CLI entry point."""
11
- parser = argparse.ArgumentParser(
12
- description="Download your Mapillary data before it's gone"
13
- )
14
- parser.add_argument(
15
- "--token",
16
- required=True,
17
- help="Mapillary API access token"
18
- )
19
- parser.add_argument(
20
- "--username",
21
- required=True,
22
- help="Your Mapillary username"
23
- )
24
- parser.add_argument(
25
- "--output",
26
- default="./mapillary_data",
27
- help="Output directory (default: ./mapillary_data)"
28
- )
12
+ # Set up logging
13
+ logger = setup_logging()
14
+
15
+ parser = argparse.ArgumentParser(description="Download your Mapillary data before it's gone")
16
+ parser.add_argument("--token", required=True, help="Mapillary API access token")
17
+ parser.add_argument("--username", required=True, help="Your Mapillary username")
18
+ parser.add_argument("--output", default="./mapillary_data", help="Output directory (default: ./mapillary_data)")
29
19
  parser.add_argument(
30
20
  "--quality",
31
21
  choices=["256", "1024", "2048", "original"],
32
22
  default="original",
33
- help="Image quality to download (default: original)"
34
- )
35
- parser.add_argument(
36
- "--bbox",
37
- help="Bounding box: west,south,east,north"
23
+ help="Image quality to download (default: original)",
38
24
  )
25
+ parser.add_argument("--bbox", help="Bounding box: west,south,east,north")
39
26
 
40
27
  args = parser.parse_args()
41
28
 
@@ -46,7 +33,7 @@ def main():
46
33
  if len(bbox) != 4:
47
34
  raise ValueError
48
35
  except ValueError:
49
- print("Error: bbox must be four comma-separated numbers")
36
+ logger.error("Error: bbox must be four comma-separated numbers")
50
37
  sys.exit(1)
51
38
 
52
39
  try:
@@ -54,10 +41,10 @@ def main():
54
41
  downloader = MapillaryDownloader(client, args.output)
55
42
  downloader.download_user_data(args.username, args.quality, bbox)
56
43
  except KeyboardInterrupt:
57
- print("\nInterrupted by user")
44
+ logger.info("\nInterrupted by user")
58
45
  sys.exit(1)
59
46
  except Exception as e:
60
- print(f"Error: {e}")
47
+ logger.error(f"Error: {e}")
61
48
  sys.exit(1)
62
49
 
63
50
 
@@ -1,9 +1,12 @@
1
1
  """Mapillary API client."""
2
2
 
3
+ import logging
3
4
  import time
4
5
  import requests
5
6
  from requests.exceptions import RequestException
6
7
 
8
+ logger = logging.getLogger("mapillary_downloader")
9
+
7
10
 
8
11
  class MapillaryClient:
9
12
  """Client for interacting with Mapillary API v4."""
@@ -65,6 +68,7 @@ class MapillaryClient:
65
68
  params["bbox"] = ",".join(map(str, bbox))
66
69
 
67
70
  url = f"{self.base_url}/images"
71
+ total_fetched = 0
68
72
 
69
73
  while url:
70
74
  max_retries = 10
@@ -72,21 +76,24 @@ class MapillaryClient:
72
76
 
73
77
  for attempt in range(max_retries):
74
78
  try:
75
- response = self.session.get(url, params=params)
79
+ response = self.session.get(url, params=params, timeout=60)
76
80
  response.raise_for_status()
77
81
  break
78
82
  except RequestException as e:
79
83
  if attempt == max_retries - 1:
80
84
  raise
81
85
 
82
- delay = base_delay * (2 ** attempt)
83
- print(f"Request failed (attempt {attempt + 1}/{max_retries}): {e}")
84
- print(f"Retrying in {delay:.1f} seconds...")
86
+ delay = base_delay * (2**attempt)
87
+ logger.warning(f"Request failed (attempt {attempt + 1}/{max_retries}): {e}")
88
+ logger.info(f"Retrying in {delay:.1f} seconds...")
85
89
  time.sleep(delay)
86
90
 
87
91
  data = response.json()
92
+ images = data.get("data", [])
93
+ total_fetched += len(images)
94
+ logger.info(f"Fetched metadata for {total_fetched:,} images...")
88
95
 
89
- for image in data.get("data", []):
96
+ for image in images:
90
97
  yield image
91
98
 
92
99
  # Get next page URL
@@ -111,7 +118,7 @@ class MapillaryClient:
111
118
 
112
119
  for attempt in range(max_retries):
113
120
  try:
114
- response = self.session.get(image_url, stream=True)
121
+ response = self.session.get(image_url, stream=True, timeout=60)
115
122
  response.raise_for_status()
116
123
 
117
124
  total_bytes = 0
@@ -123,10 +130,10 @@ class MapillaryClient:
123
130
  return total_bytes
124
131
  except RequestException as e:
125
132
  if attempt == max_retries - 1:
126
- print(f"Error downloading {image_url} after {max_retries} attempts: {e}")
133
+ logger.error(f"Error downloading {image_url} after {max_retries} attempts: {e}")
127
134
  return 0
128
135
 
129
- delay = base_delay * (2 ** attempt)
130
- print(f"Download failed (attempt {attempt + 1}/{max_retries}): {e}")
131
- print(f"Retrying in {delay:.1f} seconds...")
136
+ delay = base_delay * (2**attempt)
137
+ logger.warning(f"Download failed (attempt {attempt + 1}/{max_retries}): {e}")
138
+ logger.info(f"Retrying in {delay:.1f} seconds...")
132
139
  time.sleep(delay)
@@ -1,20 +1,15 @@
1
1
  """Main downloader logic."""
2
2
 
3
3
  import json
4
+ import logging
4
5
  import os
6
+ import time
5
7
  from pathlib import Path
8
+ from collections import deque
6
9
  from mapillary_downloader.exif_writer import write_exif_to_image
10
+ from mapillary_downloader.utils import format_size, format_time
7
11
 
8
-
9
- def format_bytes(bytes_count):
10
- """Format bytes as human-readable string."""
11
- if bytes_count < 1024:
12
- return f"{bytes_count} B"
13
- if bytes_count < 1024 * 1024:
14
- return f"{bytes_count / 1024:.3f} KB"
15
- if bytes_count < 1024 * 1024 * 1024:
16
- return f"{bytes_count / (1024 * 1024):.3f} MB"
17
- return f"{bytes_count / (1024 * 1024 * 1024):.3f} GB"
12
+ logger = logging.getLogger("mapillary_downloader")
18
13
 
19
14
 
20
15
  class MapillaryDownloader:
@@ -61,32 +56,98 @@ class MapillaryDownloader:
61
56
  """
62
57
  quality_field = f"thumb_{quality}_url"
63
58
 
64
- print(f"Downloading images for user: {username}")
65
- print(f"Output directory: {self.output_dir}")
66
- print(f"Quality: {quality}")
59
+ logger.info(f"Downloading images for user: {username}")
60
+ logger.info(f"Output directory: {self.output_dir}")
61
+ logger.info(f"Quality: {quality}")
67
62
 
68
63
  processed = 0
69
64
  downloaded_count = 0
70
65
  skipped = 0
71
66
  total_bytes = 0
72
67
 
68
+ # Track download times for adaptive ETA (last 50 downloads)
69
+ download_times = deque(maxlen=50)
70
+ start_time = time.time()
71
+
72
+ # Track which image IDs we've seen in metadata to avoid re-fetching
73
+ seen_ids = set()
74
+
75
+ # First, process any existing metadata without re-fetching from API
76
+ if self.metadata_file.exists():
77
+ logger.info("Processing existing metadata file...")
78
+ with open(self.metadata_file) as f:
79
+ for line in f:
80
+ if line.strip():
81
+ image = json.loads(line)
82
+ image_id = image["id"]
83
+ seen_ids.add(image_id)
84
+ processed += 1
85
+
86
+ if image_id in self.downloaded:
87
+ skipped += 1
88
+ continue
89
+
90
+ # Download this un-downloaded image
91
+ image_url = image.get(quality_field)
92
+ if not image_url:
93
+ logger.warning(f"No {quality} URL for image {image_id}")
94
+ continue
95
+
96
+ sequence_id = image.get("sequence")
97
+ if sequence_id:
98
+ img_dir = self.output_dir / sequence_id
99
+ img_dir.mkdir(exist_ok=True)
100
+ else:
101
+ img_dir = self.output_dir
102
+
103
+ output_path = img_dir / f"{image_id}.jpg"
104
+
105
+ download_start = time.time()
106
+ bytes_downloaded = self.client.download_image(image_url, output_path)
107
+ if bytes_downloaded:
108
+ download_time = time.time() - download_start
109
+ download_times.append(download_time)
110
+
111
+ write_exif_to_image(output_path, image)
112
+
113
+ self.downloaded.add(image_id)
114
+ downloaded_count += 1
115
+ total_bytes += bytes_downloaded
116
+
117
+ progress_str = (
118
+ f"Processed: {processed}, Downloaded: {downloaded_count} ({format_size(total_bytes)})"
119
+ )
120
+ logger.info(progress_str)
121
+
122
+ if downloaded_count % 10 == 0:
123
+ self._save_progress()
124
+
125
+ # Always check API for new images (will skip duplicates via seen_ids)
126
+ logger.info("Checking for new images from API...")
73
127
  with open(self.metadata_file, "a") as meta_f:
74
128
  for image in self.client.get_user_images(username, bbox=bbox):
75
129
  image_id = image["id"]
76
- processed += 1
77
130
 
78
- if image_id in self.downloaded:
79
- skipped += 1
131
+ # Skip if we already have this in our metadata file
132
+ if image_id in seen_ids:
80
133
  continue
81
134
 
82
- # Save metadata
135
+ seen_ids.add(image_id)
136
+ processed += 1
137
+
138
+ # Save new metadata
83
139
  meta_f.write(json.dumps(image) + "\n")
84
140
  meta_f.flush()
85
141
 
142
+ # Skip if already downloaded
143
+ if image_id in self.downloaded:
144
+ skipped += 1
145
+ continue
146
+
86
147
  # Download image
87
148
  image_url = image.get(quality_field)
88
149
  if not image_url:
89
- print(f"No {quality} URL for image {image_id}")
150
+ logger.warning(f"No {quality} URL for image {image_id}")
90
151
  continue
91
152
 
92
153
  # Use sequence ID for organization
@@ -99,21 +160,33 @@ class MapillaryDownloader:
99
160
 
100
161
  output_path = img_dir / f"{image_id}.jpg"
101
162
 
163
+ download_start = time.time()
102
164
  bytes_downloaded = self.client.download_image(image_url, output_path)
103
165
  if bytes_downloaded:
166
+ download_time = time.time() - download_start
167
+ download_times.append(download_time)
168
+
104
169
  # Write EXIF metadata to the downloaded image
105
170
  write_exif_to_image(output_path, image)
106
171
 
107
172
  self.downloaded.add(image_id)
108
173
  downloaded_count += 1
109
174
  total_bytes += bytes_downloaded
110
- print(f"Processed: {processed}, Downloaded: {downloaded_count} ({format_bytes(total_bytes)})")
175
+
176
+ # Calculate progress
177
+ progress_str = (
178
+ f"Processed: {processed}, Downloaded: {downloaded_count} ({format_size(total_bytes)})"
179
+ )
180
+
181
+ logger.info(progress_str)
111
182
 
112
183
  # Save progress every 10 images
113
184
  if downloaded_count % 10 == 0:
114
185
  self._save_progress()
115
186
 
116
187
  self._save_progress()
117
- print(
118
- f"\nComplete! Processed {processed} images, downloaded {downloaded_count} ({format_bytes(total_bytes)}), skipped {skipped}"
188
+ elapsed = time.time() - start_time
189
+ logger.info(
190
+ f"Complete! Processed {processed} images, downloaded {downloaded_count} ({format_size(total_bytes)}), skipped {skipped}"
119
191
  )
192
+ logger.info(f"Total time: {format_time(elapsed)}")
@@ -0,0 +1,62 @@
1
+ """Logging configuration with colored output for TTY."""
2
+
3
+ import logging
4
+ import sys
5
+
6
+
7
+ class ColoredFormatter(logging.Formatter):
8
+ """Formatter that adds color to log levels when output is a TTY."""
9
+
10
+ # ANSI color codes
11
+ COLORS = {
12
+ "ERROR": "\033[91m", # Red
13
+ "WARNING": "\033[93m", # Yellow
14
+ "INFO": "\033[92m", # Green
15
+ "DEBUG": "\033[94m", # Blue
16
+ "RESET": "\033[0m",
17
+ }
18
+
19
+ def __init__(self, fmt=None, datefmt=None, use_color=True):
20
+ """Initialize the formatter.
21
+
22
+ Args:
23
+ fmt: Log format string
24
+ datefmt: Date format string
25
+ use_color: Whether to use colored output
26
+ """
27
+ super().__init__(fmt, datefmt)
28
+ self.use_color = use_color and sys.stdout.isatty()
29
+
30
+ def format(self, record):
31
+ """Format the log record with colors if appropriate.
32
+
33
+ Args:
34
+ record: LogRecord to format
35
+
36
+ Returns:
37
+ Formatted log string
38
+ """
39
+ if self.use_color:
40
+ levelname = record.levelname
41
+ if levelname in self.COLORS:
42
+ record.levelname = f"{self.COLORS[levelname]}{levelname}{self.COLORS['RESET']}"
43
+
44
+ return super().format(record)
45
+
46
+
47
+ def setup_logging(level=logging.INFO):
48
+ """Set up logging with timestamps and colored output.
49
+
50
+ Args:
51
+ level: Logging level to use
52
+ """
53
+ formatter = ColoredFormatter(fmt="%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
54
+
55
+ handler = logging.StreamHandler(sys.stdout)
56
+ handler.setFormatter(formatter)
57
+
58
+ logger = logging.getLogger("mapillary_downloader")
59
+ logger.setLevel(level)
60
+ logger.addHandler(handler)
61
+
62
+ return logger
@@ -0,0 +1,47 @@
1
+ """Utility functions for formatting and display."""
2
+
3
+
4
+ def format_size(bytes_count):
5
+ """Format bytes as human-readable size.
6
+
7
+ Args:
8
+ bytes_count: Number of bytes
9
+
10
+ Returns:
11
+ Formatted string (e.g. "1.23 GB", "456.78 MB")
12
+ """
13
+ if bytes_count >= 1_000_000_000:
14
+ return f"{bytes_count / 1_000_000_000:.2f} GB"
15
+ if bytes_count >= 1_000_000:
16
+ return f"{bytes_count / 1_000_000:.2f} MB"
17
+ if bytes_count >= 1_000:
18
+ return f"{bytes_count / 1000:.2f} KB"
19
+ return f"{bytes_count} B"
20
+
21
+
22
+ def format_time(seconds):
23
+ """Format seconds as human-readable time.
24
+
25
+ Args:
26
+ seconds: Number of seconds
27
+
28
+ Returns:
29
+ Formatted string (e.g. "2h 15m", "45m 30s", "30s")
30
+ """
31
+ if seconds < 60:
32
+ return f"{int(seconds)}s"
33
+
34
+ minutes = int(seconds / 60)
35
+ remaining_seconds = int(seconds % 60)
36
+
37
+ if minutes < 60:
38
+ if remaining_seconds > 0:
39
+ return f"{minutes}m {remaining_seconds}s"
40
+ return f"{minutes}m"
41
+
42
+ hours = int(minutes / 60)
43
+ remaining_minutes = minutes % 60
44
+
45
+ if remaining_minutes > 0:
46
+ return f"{hours}h {remaining_minutes}m"
47
+ return f"{hours}h"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mapillary_downloader
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Download your Mapillary data before it's gone
5
5
  Author-email: Gareth Davidson <gaz@bitplane.net>
6
6
  Requires-Python: >=3.10
@@ -28,7 +28,7 @@ Project-URL: Issues, https://github.com/bitplane/mapillary_downloader/issues
28
28
  Project-URL: Repository, https://github.com/bitplane/mapillary_downloader
29
29
  Provides-Extra: dev
30
30
 
31
- # Mapillary Downloader
31
+ # 🗺️ Mapillary Downloader
32
32
 
33
33
  Download your Mapillary data before it's gone.
34
34
 
@@ -49,28 +49,24 @@ make install
49
49
  First, get your Mapillary API access token from https://www.mapillary.com/dashboard/developers
50
50
 
51
51
  ```bash
52
- mapillary-download --token YOUR_TOKEN --username YOUR_USERNAME --output ./downloads
52
+ mapillary-downloader --token YOUR_TOKEN --username YOUR_USERNAME --output ./downloads
53
53
  ```
54
54
 
55
- Options:
56
- - `--token`: Your Mapillary API access token (required)
57
- - `--username`: Your Mapillary username (required)
58
- - `--output`: Output directory (default: ./mapillary_data)
59
- - `--quality`: Image quality - 256, 1024, 2048, or original (default: original)
60
- - `--bbox`: Bounding box filter: west,south,east,north
55
+ | option | because | default |
56
+ | ------------- | ------------------------------------- | ------------------ |
57
+ | `--token` | Your Mapillary API access token | None (required) |
58
+ | `--username` | Your Mapillary username | None (required) |
59
+ | `--output` | Output directory | `./mapillary_data` |
60
+ | `--quality` | 256, 1024, 2048 or original | `original` |
61
+ | `--bbox` | `west,south,east,north` | `None` |
61
62
 
62
63
  The downloader will:
63
- - Fetch all your uploaded images from Mapillary
64
- - Download full-resolution images organized by sequence
65
- - Inject EXIF metadata (GPS coordinates, camera info, timestamps, compass direction)
66
- - Save progress so you can safely resume if interrupted
67
64
 
68
- ## Features
69
-
70
- - **Resume capability**: Interrupt and restart anytime - it tracks what's downloaded
71
- - **EXIF restoration**: Restores GPS, camera, and timestamp metadata that Mapillary stripped
72
- - **Atomic writes**: Progress tracking uses atomic file operations to prevent corruption
73
- - **Organized output**: Images organized by sequence ID with metadata in JSONL format
65
+ * 💾 Fetch all your uploaded images from Mapillary
66
+ * 📷 Download full-resolution images organized by sequence
67
+ * 📜 Inject EXIF metadata (GPS coordinates, camera info, timestamps,
68
+ compass direction)
69
+ * 🛟 Save progress so you can safely resume if interrupted
74
70
 
75
71
  ## Development
76
72
 
@@ -93,5 +89,6 @@ WTFPL with one additional clause
93
89
 
94
90
  1. Don't blame me
95
91
 
96
- Do wtf you want, but don't blame me when it breaks.
92
+ Do wtf you want, but don't blame me if it makes jokes about the size of your
93
+ disk drive.
97
94
 
@@ -0,0 +1,12 @@
1
+ mapillary_downloader/__init__.py,sha256=KEjiBRghXDeA7E15RJeLBfQm-yNJkowZarL59QOh_1w,120
2
+ mapillary_downloader/__main__.py,sha256=i6nqTnH-K5LYbjUo_tgFvO7JkepchQyyChfqi9tyAE4,1728
3
+ mapillary_downloader/client.py,sha256=O7JgshaM3QKUv0xXuBbe_uPqsTr4lgyuVUHYndvXTfA,4611
4
+ mapillary_downloader/downloader.py,sha256=_AT1U2Y3FmGYDP0aOTQYVO3U_JDEl2nfvZyrgwCMZv0,7276
5
+ mapillary_downloader/exif_writer.py,sha256=Bn1u3QULfHtae86FnUGcqN450NccJwtwW9wVaSRyx9E,4615
6
+ mapillary_downloader/logging_config.py,sha256=WgjXthS2Lpl0RPO5sb-Aj60W2maog1bbzJzhblMLs1c,1715
7
+ mapillary_downloader/utils.py,sha256=yzVgS1mwsklDAqrimaFafgTTXtRYQUbKP98Xgh9d2KA,1174
8
+ mapillary_downloader-0.1.3.dist-info/entry_points.txt,sha256=PdYtxOXHMJrUhmiPO4G-F98VuhUI4MN9D_T4KPrVZ5w,75
9
+ mapillary_downloader-0.1.3.dist-info/licenses/LICENSE.md,sha256=7_BIuQ-veOrsF-WarH8kTkm0-xrCLvJ1PFE1C4Ebs64,146
10
+ mapillary_downloader-0.1.3.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
11
+ mapillary_downloader-0.1.3.dist-info/METADATA,sha256=I81NFr74xyJPKbZfC91sGOHzyIOalVmL6KNkq3YLL3o,3060
12
+ mapillary_downloader-0.1.3.dist-info/RECORD,,
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ mapillary-downloader=mapillary_downloader.__main__:main
3
+
@@ -1,10 +0,0 @@
1
- mapillary_downloader/__init__.py,sha256=KEjiBRghXDeA7E15RJeLBfQm-yNJkowZarL59QOh_1w,120
2
- mapillary_downloader/__main__.py,sha256=xKYhamK0HYXqx98fGb5CVOEw0syURWgX7jnFIdsK5Ao,1720
3
- mapillary_downloader/client.py,sha256=w936htOireWnKgfXJDYls-umYxOzr0FQ4yzklQ6fPM0,4315
4
- mapillary_downloader/downloader.py,sha256=n5Y7aAoin3vBa_H3et9hpTNoPrEarbU_LdnHT619c5Y,4216
5
- mapillary_downloader/exif_writer.py,sha256=Bn1u3QULfHtae86FnUGcqN450NccJwtwW9wVaSRyx9E,4615
6
- mapillary_downloader-0.1.1.dist-info/entry_points.txt,sha256=qJK_zi0at7Mi6uzrcQ2U8Q31bG9NhRfhKphvIytjiIY,73
7
- mapillary_downloader-0.1.1.dist-info/licenses/LICENSE.md,sha256=7_BIuQ-veOrsF-WarH8kTkm0-xrCLvJ1PFE1C4Ebs64,146
8
- mapillary_downloader-0.1.1.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
9
- mapillary_downloader-0.1.1.dist-info/METADATA,sha256=ZoxfQ9-RBDxfyl1ws9eGIprLwZMqX3kumH20ttzf8GU,3114
10
- mapillary_downloader-0.1.1.dist-info/RECORD,,
@@ -1,3 +0,0 @@
1
- [console_scripts]
2
- mapillary-download=mapillary_downloader.__main__:main
3
-