mapillary-downloader 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mapillary_downloader-0.1.2 → mapillary_downloader-0.1.3}/PKG-INFO +8 -6
- {mapillary_downloader-0.1.2 → mapillary_downloader-0.1.3}/README.md +7 -5
- {mapillary_downloader-0.1.2 → mapillary_downloader-0.1.3}/pyproject.toml +1 -1
- {mapillary_downloader-0.1.2 → mapillary_downloader-0.1.3}/src/mapillary_downloader/__main__.py +13 -26
- {mapillary_downloader-0.1.2 → mapillary_downloader-0.1.3}/src/mapillary_downloader/client.py +17 -10
- mapillary_downloader-0.1.3/src/mapillary_downloader/downloader.py +192 -0
- mapillary_downloader-0.1.3/src/mapillary_downloader/logging_config.py +62 -0
- mapillary_downloader-0.1.3/src/mapillary_downloader/utils.py +47 -0
- mapillary_downloader-0.1.2/src/mapillary_downloader/downloader.py +0 -119
- {mapillary_downloader-0.1.2 → mapillary_downloader-0.1.3}/LICENSE.md +0 -0
- {mapillary_downloader-0.1.2 → mapillary_downloader-0.1.3}/src/mapillary_downloader/__init__.py +0 -0
- {mapillary_downloader-0.1.2 → mapillary_downloader-0.1.3}/src/mapillary_downloader/exif_writer.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: mapillary_downloader
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.3
|
4
4
|
Summary: Download your Mapillary data before it's gone
|
5
5
|
Author-email: Gareth Davidson <gaz@bitplane.net>
|
6
6
|
Requires-Python: >=3.10
|
@@ -28,7 +28,7 @@ Project-URL: Issues, https://github.com/bitplane/mapillary_downloader/issues
|
|
28
28
|
Project-URL: Repository, https://github.com/bitplane/mapillary_downloader
|
29
29
|
Provides-Extra: dev
|
30
30
|
|
31
|
-
# Mapillary Downloader
|
31
|
+
# 🗺️ Mapillary Downloader
|
32
32
|
|
33
33
|
Download your Mapillary data before it's gone.
|
34
34
|
|
@@ -63,9 +63,10 @@ mapillary-downloader --token YOUR_TOKEN --username YOUR_USERNAME --output ./down
|
|
63
63
|
The downloader will:
|
64
64
|
|
65
65
|
* 💾 Fetch all your uploaded images from Mapillary
|
66
|
-
|
67
|
-
|
68
|
-
|
66
|
+
* 📷 Download full-resolution images organized by sequence
|
67
|
+
* 📜 Inject EXIF metadata (GPS coordinates, camera info, timestamps,
|
68
|
+
compass direction)
|
69
|
+
* 🛟 Save progress so you can safely resume if interrupted
|
69
70
|
|
70
71
|
## Development
|
71
72
|
|
@@ -88,5 +89,6 @@ WTFPL with one additional clause
|
|
88
89
|
|
89
90
|
1. Don't blame me
|
90
91
|
|
91
|
-
Do wtf you want, but don't blame me
|
92
|
+
Do wtf you want, but don't blame me if it makes jokes about the size of your
|
93
|
+
disk drive.
|
92
94
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Mapillary Downloader
|
1
|
+
# 🗺️ Mapillary Downloader
|
2
2
|
|
3
3
|
Download your Mapillary data before it's gone.
|
4
4
|
|
@@ -33,9 +33,10 @@ mapillary-downloader --token YOUR_TOKEN --username YOUR_USERNAME --output ./down
|
|
33
33
|
The downloader will:
|
34
34
|
|
35
35
|
* 💾 Fetch all your uploaded images from Mapillary
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
* 📷 Download full-resolution images organized by sequence
|
37
|
+
* 📜 Inject EXIF metadata (GPS coordinates, camera info, timestamps,
|
38
|
+
compass direction)
|
39
|
+
* 🛟 Save progress so you can safely resume if interrupted
|
39
40
|
|
40
41
|
## Development
|
41
42
|
|
@@ -58,4 +59,5 @@ WTFPL with one additional clause
|
|
58
59
|
|
59
60
|
1. Don't blame me
|
60
61
|
|
61
|
-
Do wtf you want, but don't blame me
|
62
|
+
Do wtf you want, but don't blame me if it makes jokes about the size of your
|
63
|
+
disk drive.
|
{mapillary_downloader-0.1.2 → mapillary_downloader-0.1.3}/src/mapillary_downloader/__main__.py
RENAMED
@@ -4,38 +4,25 @@ import argparse
|
|
4
4
|
import sys
|
5
5
|
from mapillary_downloader.client import MapillaryClient
|
6
6
|
from mapillary_downloader.downloader import MapillaryDownloader
|
7
|
+
from mapillary_downloader.logging_config import setup_logging
|
7
8
|
|
8
9
|
|
9
10
|
def main():
|
10
11
|
"""Main CLI entry point."""
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
parser.
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
)
|
19
|
-
parser.add_argument(
|
20
|
-
"--username",
|
21
|
-
required=True,
|
22
|
-
help="Your Mapillary username"
|
23
|
-
)
|
24
|
-
parser.add_argument(
|
25
|
-
"--output",
|
26
|
-
default="./mapillary_data",
|
27
|
-
help="Output directory (default: ./mapillary_data)"
|
28
|
-
)
|
12
|
+
# Set up logging
|
13
|
+
logger = setup_logging()
|
14
|
+
|
15
|
+
parser = argparse.ArgumentParser(description="Download your Mapillary data before it's gone")
|
16
|
+
parser.add_argument("--token", required=True, help="Mapillary API access token")
|
17
|
+
parser.add_argument("--username", required=True, help="Your Mapillary username")
|
18
|
+
parser.add_argument("--output", default="./mapillary_data", help="Output directory (default: ./mapillary_data)")
|
29
19
|
parser.add_argument(
|
30
20
|
"--quality",
|
31
21
|
choices=["256", "1024", "2048", "original"],
|
32
22
|
default="original",
|
33
|
-
help="Image quality to download (default: original)"
|
34
|
-
)
|
35
|
-
parser.add_argument(
|
36
|
-
"--bbox",
|
37
|
-
help="Bounding box: west,south,east,north"
|
23
|
+
help="Image quality to download (default: original)",
|
38
24
|
)
|
25
|
+
parser.add_argument("--bbox", help="Bounding box: west,south,east,north")
|
39
26
|
|
40
27
|
args = parser.parse_args()
|
41
28
|
|
@@ -46,7 +33,7 @@ def main():
|
|
46
33
|
if len(bbox) != 4:
|
47
34
|
raise ValueError
|
48
35
|
except ValueError:
|
49
|
-
|
36
|
+
logger.error("Error: bbox must be four comma-separated numbers")
|
50
37
|
sys.exit(1)
|
51
38
|
|
52
39
|
try:
|
@@ -54,10 +41,10 @@ def main():
|
|
54
41
|
downloader = MapillaryDownloader(client, args.output)
|
55
42
|
downloader.download_user_data(args.username, args.quality, bbox)
|
56
43
|
except KeyboardInterrupt:
|
57
|
-
|
44
|
+
logger.info("\nInterrupted by user")
|
58
45
|
sys.exit(1)
|
59
46
|
except Exception as e:
|
60
|
-
|
47
|
+
logger.error(f"Error: {e}")
|
61
48
|
sys.exit(1)
|
62
49
|
|
63
50
|
|
{mapillary_downloader-0.1.2 → mapillary_downloader-0.1.3}/src/mapillary_downloader/client.py
RENAMED
@@ -1,9 +1,12 @@
|
|
1
1
|
"""Mapillary API client."""
|
2
2
|
|
3
|
+
import logging
|
3
4
|
import time
|
4
5
|
import requests
|
5
6
|
from requests.exceptions import RequestException
|
6
7
|
|
8
|
+
logger = logging.getLogger("mapillary_downloader")
|
9
|
+
|
7
10
|
|
8
11
|
class MapillaryClient:
|
9
12
|
"""Client for interacting with Mapillary API v4."""
|
@@ -65,6 +68,7 @@ class MapillaryClient:
|
|
65
68
|
params["bbox"] = ",".join(map(str, bbox))
|
66
69
|
|
67
70
|
url = f"{self.base_url}/images"
|
71
|
+
total_fetched = 0
|
68
72
|
|
69
73
|
while url:
|
70
74
|
max_retries = 10
|
@@ -72,21 +76,24 @@ class MapillaryClient:
|
|
72
76
|
|
73
77
|
for attempt in range(max_retries):
|
74
78
|
try:
|
75
|
-
response = self.session.get(url, params=params)
|
79
|
+
response = self.session.get(url, params=params, timeout=60)
|
76
80
|
response.raise_for_status()
|
77
81
|
break
|
78
82
|
except RequestException as e:
|
79
83
|
if attempt == max_retries - 1:
|
80
84
|
raise
|
81
85
|
|
82
|
-
delay = base_delay * (2
|
83
|
-
|
84
|
-
|
86
|
+
delay = base_delay * (2**attempt)
|
87
|
+
logger.warning(f"Request failed (attempt {attempt + 1}/{max_retries}): {e}")
|
88
|
+
logger.info(f"Retrying in {delay:.1f} seconds...")
|
85
89
|
time.sleep(delay)
|
86
90
|
|
87
91
|
data = response.json()
|
92
|
+
images = data.get("data", [])
|
93
|
+
total_fetched += len(images)
|
94
|
+
logger.info(f"Fetched metadata for {total_fetched:,} images...")
|
88
95
|
|
89
|
-
for image in
|
96
|
+
for image in images:
|
90
97
|
yield image
|
91
98
|
|
92
99
|
# Get next page URL
|
@@ -111,7 +118,7 @@ class MapillaryClient:
|
|
111
118
|
|
112
119
|
for attempt in range(max_retries):
|
113
120
|
try:
|
114
|
-
response = self.session.get(image_url, stream=True)
|
121
|
+
response = self.session.get(image_url, stream=True, timeout=60)
|
115
122
|
response.raise_for_status()
|
116
123
|
|
117
124
|
total_bytes = 0
|
@@ -123,10 +130,10 @@ class MapillaryClient:
|
|
123
130
|
return total_bytes
|
124
131
|
except RequestException as e:
|
125
132
|
if attempt == max_retries - 1:
|
126
|
-
|
133
|
+
logger.error(f"Error downloading {image_url} after {max_retries} attempts: {e}")
|
127
134
|
return 0
|
128
135
|
|
129
|
-
delay = base_delay * (2
|
130
|
-
|
131
|
-
|
136
|
+
delay = base_delay * (2**attempt)
|
137
|
+
logger.warning(f"Download failed (attempt {attempt + 1}/{max_retries}): {e}")
|
138
|
+
logger.info(f"Retrying in {delay:.1f} seconds...")
|
132
139
|
time.sleep(delay)
|
@@ -0,0 +1,192 @@
|
|
1
|
+
"""Main downloader logic."""
|
2
|
+
|
3
|
+
import json
|
4
|
+
import logging
|
5
|
+
import os
|
6
|
+
import time
|
7
|
+
from pathlib import Path
|
8
|
+
from collections import deque
|
9
|
+
from mapillary_downloader.exif_writer import write_exif_to_image
|
10
|
+
from mapillary_downloader.utils import format_size, format_time
|
11
|
+
|
12
|
+
logger = logging.getLogger("mapillary_downloader")
|
13
|
+
|
14
|
+
|
15
|
+
class MapillaryDownloader:
|
16
|
+
"""Handles downloading Mapillary data for a user."""
|
17
|
+
|
18
|
+
def __init__(self, client, output_dir):
|
19
|
+
"""Initialize the downloader.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
client: MapillaryClient instance
|
23
|
+
output_dir: Directory to save downloads
|
24
|
+
"""
|
25
|
+
self.client = client
|
26
|
+
self.output_dir = Path(output_dir)
|
27
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
28
|
+
|
29
|
+
self.metadata_file = self.output_dir / "metadata.jsonl"
|
30
|
+
self.progress_file = self.output_dir / "progress.json"
|
31
|
+
self.downloaded = self._load_progress()
|
32
|
+
|
33
|
+
def _load_progress(self):
|
34
|
+
"""Load previously downloaded image IDs."""
|
35
|
+
if self.progress_file.exists():
|
36
|
+
with open(self.progress_file) as f:
|
37
|
+
return set(json.load(f).get("downloaded", []))
|
38
|
+
return set()
|
39
|
+
|
40
|
+
def _save_progress(self):
|
41
|
+
"""Save progress to disk atomically."""
|
42
|
+
temp_file = self.progress_file.with_suffix(".json.tmp")
|
43
|
+
with open(temp_file, "w") as f:
|
44
|
+
json.dump({"downloaded": list(self.downloaded)}, f)
|
45
|
+
f.flush()
|
46
|
+
os.fsync(f.fileno())
|
47
|
+
temp_file.replace(self.progress_file)
|
48
|
+
|
49
|
+
def download_user_data(self, username, quality="original", bbox=None):
|
50
|
+
"""Download all images for a user.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
username: Mapillary username
|
54
|
+
quality: Image quality to download (256, 1024, 2048, original)
|
55
|
+
bbox: Optional bounding box [west, south, east, north]
|
56
|
+
"""
|
57
|
+
quality_field = f"thumb_{quality}_url"
|
58
|
+
|
59
|
+
logger.info(f"Downloading images for user: {username}")
|
60
|
+
logger.info(f"Output directory: {self.output_dir}")
|
61
|
+
logger.info(f"Quality: {quality}")
|
62
|
+
|
63
|
+
processed = 0
|
64
|
+
downloaded_count = 0
|
65
|
+
skipped = 0
|
66
|
+
total_bytes = 0
|
67
|
+
|
68
|
+
# Track download times for adaptive ETA (last 50 downloads)
|
69
|
+
download_times = deque(maxlen=50)
|
70
|
+
start_time = time.time()
|
71
|
+
|
72
|
+
# Track which image IDs we've seen in metadata to avoid re-fetching
|
73
|
+
seen_ids = set()
|
74
|
+
|
75
|
+
# First, process any existing metadata without re-fetching from API
|
76
|
+
if self.metadata_file.exists():
|
77
|
+
logger.info("Processing existing metadata file...")
|
78
|
+
with open(self.metadata_file) as f:
|
79
|
+
for line in f:
|
80
|
+
if line.strip():
|
81
|
+
image = json.loads(line)
|
82
|
+
image_id = image["id"]
|
83
|
+
seen_ids.add(image_id)
|
84
|
+
processed += 1
|
85
|
+
|
86
|
+
if image_id in self.downloaded:
|
87
|
+
skipped += 1
|
88
|
+
continue
|
89
|
+
|
90
|
+
# Download this un-downloaded image
|
91
|
+
image_url = image.get(quality_field)
|
92
|
+
if not image_url:
|
93
|
+
logger.warning(f"No {quality} URL for image {image_id}")
|
94
|
+
continue
|
95
|
+
|
96
|
+
sequence_id = image.get("sequence")
|
97
|
+
if sequence_id:
|
98
|
+
img_dir = self.output_dir / sequence_id
|
99
|
+
img_dir.mkdir(exist_ok=True)
|
100
|
+
else:
|
101
|
+
img_dir = self.output_dir
|
102
|
+
|
103
|
+
output_path = img_dir / f"{image_id}.jpg"
|
104
|
+
|
105
|
+
download_start = time.time()
|
106
|
+
bytes_downloaded = self.client.download_image(image_url, output_path)
|
107
|
+
if bytes_downloaded:
|
108
|
+
download_time = time.time() - download_start
|
109
|
+
download_times.append(download_time)
|
110
|
+
|
111
|
+
write_exif_to_image(output_path, image)
|
112
|
+
|
113
|
+
self.downloaded.add(image_id)
|
114
|
+
downloaded_count += 1
|
115
|
+
total_bytes += bytes_downloaded
|
116
|
+
|
117
|
+
progress_str = (
|
118
|
+
f"Processed: {processed}, Downloaded: {downloaded_count} ({format_size(total_bytes)})"
|
119
|
+
)
|
120
|
+
logger.info(progress_str)
|
121
|
+
|
122
|
+
if downloaded_count % 10 == 0:
|
123
|
+
self._save_progress()
|
124
|
+
|
125
|
+
# Always check API for new images (will skip duplicates via seen_ids)
|
126
|
+
logger.info("Checking for new images from API...")
|
127
|
+
with open(self.metadata_file, "a") as meta_f:
|
128
|
+
for image in self.client.get_user_images(username, bbox=bbox):
|
129
|
+
image_id = image["id"]
|
130
|
+
|
131
|
+
# Skip if we already have this in our metadata file
|
132
|
+
if image_id in seen_ids:
|
133
|
+
continue
|
134
|
+
|
135
|
+
seen_ids.add(image_id)
|
136
|
+
processed += 1
|
137
|
+
|
138
|
+
# Save new metadata
|
139
|
+
meta_f.write(json.dumps(image) + "\n")
|
140
|
+
meta_f.flush()
|
141
|
+
|
142
|
+
# Skip if already downloaded
|
143
|
+
if image_id in self.downloaded:
|
144
|
+
skipped += 1
|
145
|
+
continue
|
146
|
+
|
147
|
+
# Download image
|
148
|
+
image_url = image.get(quality_field)
|
149
|
+
if not image_url:
|
150
|
+
logger.warning(f"No {quality} URL for image {image_id}")
|
151
|
+
continue
|
152
|
+
|
153
|
+
# Use sequence ID for organization
|
154
|
+
sequence_id = image.get("sequence")
|
155
|
+
if sequence_id:
|
156
|
+
img_dir = self.output_dir / sequence_id
|
157
|
+
img_dir.mkdir(exist_ok=True)
|
158
|
+
else:
|
159
|
+
img_dir = self.output_dir
|
160
|
+
|
161
|
+
output_path = img_dir / f"{image_id}.jpg"
|
162
|
+
|
163
|
+
download_start = time.time()
|
164
|
+
bytes_downloaded = self.client.download_image(image_url, output_path)
|
165
|
+
if bytes_downloaded:
|
166
|
+
download_time = time.time() - download_start
|
167
|
+
download_times.append(download_time)
|
168
|
+
|
169
|
+
# Write EXIF metadata to the downloaded image
|
170
|
+
write_exif_to_image(output_path, image)
|
171
|
+
|
172
|
+
self.downloaded.add(image_id)
|
173
|
+
downloaded_count += 1
|
174
|
+
total_bytes += bytes_downloaded
|
175
|
+
|
176
|
+
# Calculate progress
|
177
|
+
progress_str = (
|
178
|
+
f"Processed: {processed}, Downloaded: {downloaded_count} ({format_size(total_bytes)})"
|
179
|
+
)
|
180
|
+
|
181
|
+
logger.info(progress_str)
|
182
|
+
|
183
|
+
# Save progress every 10 images
|
184
|
+
if downloaded_count % 10 == 0:
|
185
|
+
self._save_progress()
|
186
|
+
|
187
|
+
self._save_progress()
|
188
|
+
elapsed = time.time() - start_time
|
189
|
+
logger.info(
|
190
|
+
f"Complete! Processed {processed} images, downloaded {downloaded_count} ({format_size(total_bytes)}), skipped {skipped}"
|
191
|
+
)
|
192
|
+
logger.info(f"Total time: {format_time(elapsed)}")
|
@@ -0,0 +1,62 @@
|
|
1
|
+
"""Logging configuration with colored output for TTY."""
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import sys
|
5
|
+
|
6
|
+
|
7
|
+
class ColoredFormatter(logging.Formatter):
|
8
|
+
"""Formatter that adds color to log levels when output is a TTY."""
|
9
|
+
|
10
|
+
# ANSI color codes
|
11
|
+
COLORS = {
|
12
|
+
"ERROR": "\033[91m", # Red
|
13
|
+
"WARNING": "\033[93m", # Yellow
|
14
|
+
"INFO": "\033[92m", # Green
|
15
|
+
"DEBUG": "\033[94m", # Blue
|
16
|
+
"RESET": "\033[0m",
|
17
|
+
}
|
18
|
+
|
19
|
+
def __init__(self, fmt=None, datefmt=None, use_color=True):
|
20
|
+
"""Initialize the formatter.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
fmt: Log format string
|
24
|
+
datefmt: Date format string
|
25
|
+
use_color: Whether to use colored output
|
26
|
+
"""
|
27
|
+
super().__init__(fmt, datefmt)
|
28
|
+
self.use_color = use_color and sys.stdout.isatty()
|
29
|
+
|
30
|
+
def format(self, record):
|
31
|
+
"""Format the log record with colors if appropriate.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
record: LogRecord to format
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
Formatted log string
|
38
|
+
"""
|
39
|
+
if self.use_color:
|
40
|
+
levelname = record.levelname
|
41
|
+
if levelname in self.COLORS:
|
42
|
+
record.levelname = f"{self.COLORS[levelname]}{levelname}{self.COLORS['RESET']}"
|
43
|
+
|
44
|
+
return super().format(record)
|
45
|
+
|
46
|
+
|
47
|
+
def setup_logging(level=logging.INFO):
|
48
|
+
"""Set up logging with timestamps and colored output.
|
49
|
+
|
50
|
+
Args:
|
51
|
+
level: Logging level to use
|
52
|
+
"""
|
53
|
+
formatter = ColoredFormatter(fmt="%(asctime)s [%(levelname)s] %(message)s", datefmt="%Y-%m-%d %H:%M:%S")
|
54
|
+
|
55
|
+
handler = logging.StreamHandler(sys.stdout)
|
56
|
+
handler.setFormatter(formatter)
|
57
|
+
|
58
|
+
logger = logging.getLogger("mapillary_downloader")
|
59
|
+
logger.setLevel(level)
|
60
|
+
logger.addHandler(handler)
|
61
|
+
|
62
|
+
return logger
|
@@ -0,0 +1,47 @@
|
|
1
|
+
"""Utility functions for formatting and display."""
|
2
|
+
|
3
|
+
|
4
|
+
def format_size(bytes_count):
|
5
|
+
"""Format bytes as human-readable size.
|
6
|
+
|
7
|
+
Args:
|
8
|
+
bytes_count: Number of bytes
|
9
|
+
|
10
|
+
Returns:
|
11
|
+
Formatted string (e.g. "1.23 GB", "456.78 MB")
|
12
|
+
"""
|
13
|
+
if bytes_count >= 1_000_000_000:
|
14
|
+
return f"{bytes_count / 1_000_000_000:.2f} GB"
|
15
|
+
if bytes_count >= 1_000_000:
|
16
|
+
return f"{bytes_count / 1_000_000:.2f} MB"
|
17
|
+
if bytes_count >= 1_000:
|
18
|
+
return f"{bytes_count / 1000:.2f} KB"
|
19
|
+
return f"{bytes_count} B"
|
20
|
+
|
21
|
+
|
22
|
+
def format_time(seconds):
|
23
|
+
"""Format seconds as human-readable time.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
seconds: Number of seconds
|
27
|
+
|
28
|
+
Returns:
|
29
|
+
Formatted string (e.g. "2h 15m", "45m 30s", "30s")
|
30
|
+
"""
|
31
|
+
if seconds < 60:
|
32
|
+
return f"{int(seconds)}s"
|
33
|
+
|
34
|
+
minutes = int(seconds / 60)
|
35
|
+
remaining_seconds = int(seconds % 60)
|
36
|
+
|
37
|
+
if minutes < 60:
|
38
|
+
if remaining_seconds > 0:
|
39
|
+
return f"{minutes}m {remaining_seconds}s"
|
40
|
+
return f"{minutes}m"
|
41
|
+
|
42
|
+
hours = int(minutes / 60)
|
43
|
+
remaining_minutes = minutes % 60
|
44
|
+
|
45
|
+
if remaining_minutes > 0:
|
46
|
+
return f"{hours}h {remaining_minutes}m"
|
47
|
+
return f"{hours}h"
|
@@ -1,119 +0,0 @@
|
|
1
|
-
"""Main downloader logic."""
|
2
|
-
|
3
|
-
import json
|
4
|
-
import os
|
5
|
-
from pathlib import Path
|
6
|
-
from mapillary_downloader.exif_writer import write_exif_to_image
|
7
|
-
|
8
|
-
|
9
|
-
def format_bytes(bytes_count):
|
10
|
-
"""Format bytes as human-readable string."""
|
11
|
-
if bytes_count < 1024:
|
12
|
-
return f"{bytes_count} B"
|
13
|
-
if bytes_count < 1024 * 1024:
|
14
|
-
return f"{bytes_count / 1024:.3f} KB"
|
15
|
-
if bytes_count < 1024 * 1024 * 1024:
|
16
|
-
return f"{bytes_count / (1024 * 1024):.3f} MB"
|
17
|
-
return f"{bytes_count / (1024 * 1024 * 1024):.3f} GB"
|
18
|
-
|
19
|
-
|
20
|
-
class MapillaryDownloader:
|
21
|
-
"""Handles downloading Mapillary data for a user."""
|
22
|
-
|
23
|
-
def __init__(self, client, output_dir):
|
24
|
-
"""Initialize the downloader.
|
25
|
-
|
26
|
-
Args:
|
27
|
-
client: MapillaryClient instance
|
28
|
-
output_dir: Directory to save downloads
|
29
|
-
"""
|
30
|
-
self.client = client
|
31
|
-
self.output_dir = Path(output_dir)
|
32
|
-
self.output_dir.mkdir(parents=True, exist_ok=True)
|
33
|
-
|
34
|
-
self.metadata_file = self.output_dir / "metadata.jsonl"
|
35
|
-
self.progress_file = self.output_dir / "progress.json"
|
36
|
-
self.downloaded = self._load_progress()
|
37
|
-
|
38
|
-
def _load_progress(self):
|
39
|
-
"""Load previously downloaded image IDs."""
|
40
|
-
if self.progress_file.exists():
|
41
|
-
with open(self.progress_file) as f:
|
42
|
-
return set(json.load(f).get("downloaded", []))
|
43
|
-
return set()
|
44
|
-
|
45
|
-
def _save_progress(self):
|
46
|
-
"""Save progress to disk atomically."""
|
47
|
-
temp_file = self.progress_file.with_suffix(".json.tmp")
|
48
|
-
with open(temp_file, "w") as f:
|
49
|
-
json.dump({"downloaded": list(self.downloaded)}, f)
|
50
|
-
f.flush()
|
51
|
-
os.fsync(f.fileno())
|
52
|
-
temp_file.replace(self.progress_file)
|
53
|
-
|
54
|
-
def download_user_data(self, username, quality="original", bbox=None):
|
55
|
-
"""Download all images for a user.
|
56
|
-
|
57
|
-
Args:
|
58
|
-
username: Mapillary username
|
59
|
-
quality: Image quality to download (256, 1024, 2048, original)
|
60
|
-
bbox: Optional bounding box [west, south, east, north]
|
61
|
-
"""
|
62
|
-
quality_field = f"thumb_{quality}_url"
|
63
|
-
|
64
|
-
print(f"Downloading images for user: {username}")
|
65
|
-
print(f"Output directory: {self.output_dir}")
|
66
|
-
print(f"Quality: {quality}")
|
67
|
-
|
68
|
-
processed = 0
|
69
|
-
downloaded_count = 0
|
70
|
-
skipped = 0
|
71
|
-
total_bytes = 0
|
72
|
-
|
73
|
-
with open(self.metadata_file, "a") as meta_f:
|
74
|
-
for image in self.client.get_user_images(username, bbox=bbox):
|
75
|
-
image_id = image["id"]
|
76
|
-
processed += 1
|
77
|
-
|
78
|
-
if image_id in self.downloaded:
|
79
|
-
skipped += 1
|
80
|
-
continue
|
81
|
-
|
82
|
-
# Save metadata
|
83
|
-
meta_f.write(json.dumps(image) + "\n")
|
84
|
-
meta_f.flush()
|
85
|
-
|
86
|
-
# Download image
|
87
|
-
image_url = image.get(quality_field)
|
88
|
-
if not image_url:
|
89
|
-
print(f"No {quality} URL for image {image_id}")
|
90
|
-
continue
|
91
|
-
|
92
|
-
# Use sequence ID for organization
|
93
|
-
sequence_id = image.get("sequence")
|
94
|
-
if sequence_id:
|
95
|
-
img_dir = self.output_dir / sequence_id
|
96
|
-
img_dir.mkdir(exist_ok=True)
|
97
|
-
else:
|
98
|
-
img_dir = self.output_dir
|
99
|
-
|
100
|
-
output_path = img_dir / f"{image_id}.jpg"
|
101
|
-
|
102
|
-
bytes_downloaded = self.client.download_image(image_url, output_path)
|
103
|
-
if bytes_downloaded:
|
104
|
-
# Write EXIF metadata to the downloaded image
|
105
|
-
write_exif_to_image(output_path, image)
|
106
|
-
|
107
|
-
self.downloaded.add(image_id)
|
108
|
-
downloaded_count += 1
|
109
|
-
total_bytes += bytes_downloaded
|
110
|
-
print(f"Processed: {processed}, Downloaded: {downloaded_count} ({format_bytes(total_bytes)})")
|
111
|
-
|
112
|
-
# Save progress every 10 images
|
113
|
-
if downloaded_count % 10 == 0:
|
114
|
-
self._save_progress()
|
115
|
-
|
116
|
-
self._save_progress()
|
117
|
-
print(
|
118
|
-
f"\nComplete! Processed {processed} images, downloaded {downloaded_count} ({format_bytes(total_bytes)}), skipped {skipped}"
|
119
|
-
)
|
File without changes
|
{mapillary_downloader-0.1.2 → mapillary_downloader-0.1.3}/src/mapillary_downloader/__init__.py
RENAMED
File without changes
|
{mapillary_downloader-0.1.2 → mapillary_downloader-0.1.3}/src/mapillary_downloader/exif_writer.py
RENAMED
File without changes
|