mapillary-dl 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapillary_dl-0.1.0/PKG-INFO +87 -0
- mapillary_dl-0.1.0/README.md +73 -0
- mapillary_dl-0.1.0/cli.py +488 -0
- mapillary_dl-0.1.0/config.py +117 -0
- mapillary_dl-0.1.0/database.py +115 -0
- mapillary_dl-0.1.0/downloader.py +407 -0
- mapillary_dl-0.1.0/mapillary_dl.egg-info/PKG-INFO +87 -0
- mapillary_dl-0.1.0/mapillary_dl.egg-info/SOURCES.txt +12 -0
- mapillary_dl-0.1.0/mapillary_dl.egg-info/dependency_links.txt +1 -0
- mapillary_dl-0.1.0/mapillary_dl.egg-info/entry_points.txt +2 -0
- mapillary_dl-0.1.0/mapillary_dl.egg-info/requires.txt +7 -0
- mapillary_dl-0.1.0/mapillary_dl.egg-info/top_level.txt +4 -0
- mapillary_dl-0.1.0/pyproject.toml +25 -0
- mapillary_dl-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mapillary-dl
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI tool to bulk-download street-level imagery from Mapillary
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: folium>=0.17
|
|
8
|
+
Requires-Dist: mapillary>=1.0
|
|
9
|
+
Requires-Dist: piexif>=1.1
|
|
10
|
+
Requires-Dist: python-dotenv>=1.0
|
|
11
|
+
Requires-Dist: questionary>=2.0
|
|
12
|
+
Requires-Dist: requests>=2.31
|
|
13
|
+
Requires-Dist: tqdm>=4.66
|
|
14
|
+
|
|
15
|
+
# Mapillary Bulk Downloader
|
|
16
|
+
|
|
17
|
+
A CLI tool for downloading street-level imagery from [Mapillary](https://www.mapillary.com/) at city scale. Define a bounding box, discover every available image inside it, and download them all — with GPS coordinates embedded in EXIF, resumable downloads, and a SQLite-backed discovery cache that makes re-runs instant.
|
|
18
|
+
|
|
19
|
+
Built to collect training data for 3D city reconstruction (COLMAP + Gaussian Splatting), where you need tens of thousands of geo-tagged street photos covering contiguous areas.
|
|
20
|
+
|
|
21
|
+
## What it does
|
|
22
|
+
|
|
23
|
+
1. **Discover** — Splits a bounding box into a grid, queries every cell in parallel (30 workers), and recursively subdivides cells that hit the API limit. Finds every image Mapillary has in the area.
|
|
24
|
+
2. **Cache** — Stores all discovered image IDs and coordinates in a local SQLite database (`images.db`). Subsequent runs skip the API entirely unless you ask to re-discover.
|
|
25
|
+
3. **Download** — Pulls images at 2048px resolution with progress bars. Embeds GPS lat/lon into JPEG EXIF so each file is self-contained. Tracks what's been downloaded with atomic SQLite writes, so you can interrupt and resume at any time.
|
|
26
|
+
|
|
27
|
+
## Quick start
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
# Install dependencies
|
|
31
|
+
uv sync
|
|
32
|
+
|
|
33
|
+
# Set your Mapillary API token
|
|
34
|
+
echo 'MAPILLARY_CLIENT_TOKEN=MLY|...' > .env
|
|
35
|
+
|
|
36
|
+
# Interactive mode — pick a city, preview the area on a map, then download
|
|
37
|
+
uv run python3 cli.py
|
|
38
|
+
|
|
39
|
+
# Or go headless
|
|
40
|
+
uv run python3 cli.py --city "San Francisco"
|
|
41
|
+
uv run python3 cli.py --bbox "-122.52,37.70,-122.35,37.83" --limit 500
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Usage
|
|
45
|
+
|
|
46
|
+
```
|
|
47
|
+
uv run python3 cli.py [OPTIONS]
|
|
48
|
+
|
|
49
|
+
Options:
|
|
50
|
+
--city NAME Download from a predefined city
|
|
51
|
+
--bbox W,S,E,N Custom bounding box (overrides --city)
|
|
52
|
+
--limit N Cap the number of images to download
|
|
53
|
+
--output-dir PATH Output directory (default: data/<city>)
|
|
54
|
+
--preview Open an interactive map in the browser before downloading
|
|
55
|
+
--state STATE Discovery state when resuming: maintain | merge | rediscover
|
|
56
|
+
--no-save-discovery Don't persist discovered IDs to the database
|
|
57
|
+
--list-cities Show predefined cities and exit
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**No arguments** launches interactive mode: arrow-key city selection, optional map preview via [Folium](https://python-visualization.github.io/folium/), discovery summary, and a confirmation prompt before downloading.
|
|
61
|
+
|
|
62
|
+
### Discovery states
|
|
63
|
+
|
|
64
|
+
When an `images.db` already exists for a city:
|
|
65
|
+
|
|
66
|
+
| State | Behavior |
|
|
67
|
+
|-------|----------|
|
|
68
|
+
| `maintain` | Load from DB, skip API calls (default) |
|
|
69
|
+
| `merge` | Re-discover and add any new images to the existing DB |
|
|
70
|
+
| `rediscover` | Wipe the DB and run a full fresh discovery |
|
|
71
|
+
|
|
72
|
+
## Architecture
|
|
73
|
+
|
|
74
|
+
```
|
|
75
|
+
cli.py — CLI entry point: argparse, interactive prompts, map preview
|
|
76
|
+
downloader.py — MapillaryClient (API) + ImageDownloader (grid split, parallel discovery, download loop)
|
|
77
|
+
database.py — DiscoveryDB: SQLite cache with singleton pattern, tracks discovered/downloaded state
|
|
78
|
+
config.py — Dataclasses (MapillaryConfig, BoundingBox), env loading, predefined city bounding boxes
|
|
79
|
+
scripts/ — Standalone utilities (GPS coordinate enrichment)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Key design decisions
|
|
83
|
+
|
|
84
|
+
- **Adaptive grid splitting**: The API caps results at 2,000 per query. Dense urban areas easily exceed that. The downloader starts with coarse grid cells and recursively subdivides any cell that saturates the limit, down to a minimum cell size. This guarantees complete coverage without manual tuning.
|
|
85
|
+
- **SQLite over JSON**: Early versions used `download_metadata.json`. Switched to SQLite for atomic writes (no corruption on Ctrl+C), fast set-membership lookups on 100k+ image IDs, and clean separation of discovery vs. download state.
|
|
86
|
+
- **GPS in EXIF**: Coordinates are embedded directly into each JPEG at download time. This means images work standalone — no sidecar files, no separate metadata lookup. Precision is normalized to 7 decimal places (~1 cm) so DB and EXIF values match exactly.
|
|
87
|
+
- **Disk reconciliation**: On resume, the downloader checks what's actually on disk (not just what the DB says) and reconciles the two. Images on disk missing GPS get coordinates embedded; images in the DB but missing from disk get re-queued.
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# Mapillary Bulk Downloader
|
|
2
|
+
|
|
3
|
+
A CLI tool for downloading street-level imagery from [Mapillary](https://www.mapillary.com/) at city scale. Define a bounding box, discover every available image inside it, and download them all — with GPS coordinates embedded in EXIF, resumable downloads, and a SQLite-backed discovery cache that makes re-runs instant.
|
|
4
|
+
|
|
5
|
+
Built to collect training data for 3D city reconstruction (COLMAP + Gaussian Splatting), where you need tens of thousands of geo-tagged street photos covering contiguous areas.
|
|
6
|
+
|
|
7
|
+
## What it does
|
|
8
|
+
|
|
9
|
+
1. **Discover** — Splits a bounding box into a grid, queries every cell in parallel (30 workers), and recursively subdivides cells that hit the API limit. Finds every image Mapillary has in the area.
|
|
10
|
+
2. **Cache** — Stores all discovered image IDs and coordinates in a local SQLite database (`images.db`). Subsequent runs skip the API entirely unless you ask to re-discover.
|
|
11
|
+
3. **Download** — Pulls images at 2048px resolution with progress bars. Embeds GPS lat/lon into JPEG EXIF so each file is self-contained. Tracks what's been downloaded with atomic SQLite writes, so you can interrupt and resume at any time.
|
|
12
|
+
|
|
13
|
+
## Quick start
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
# Install dependencies
|
|
17
|
+
uv sync
|
|
18
|
+
|
|
19
|
+
# Set your Mapillary API token
|
|
20
|
+
echo 'MAPILLARY_CLIENT_TOKEN=MLY|...' > .env
|
|
21
|
+
|
|
22
|
+
# Interactive mode — pick a city, preview the area on a map, then download
|
|
23
|
+
uv run python3 cli.py
|
|
24
|
+
|
|
25
|
+
# Or go headless
|
|
26
|
+
uv run python3 cli.py --city "San Francisco"
|
|
27
|
+
uv run python3 cli.py --bbox "-122.52,37.70,-122.35,37.83" --limit 500
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Usage
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
uv run python3 cli.py [OPTIONS]
|
|
34
|
+
|
|
35
|
+
Options:
|
|
36
|
+
--city NAME Download from a predefined city
|
|
37
|
+
--bbox W,S,E,N Custom bounding box (overrides --city)
|
|
38
|
+
--limit N Cap the number of images to download
|
|
39
|
+
--output-dir PATH Output directory (default: data/<city>)
|
|
40
|
+
--preview Open an interactive map in the browser before downloading
|
|
41
|
+
--state STATE Discovery state when resuming: maintain | merge | rediscover
|
|
42
|
+
--no-save-discovery Don't persist discovered IDs to the database
|
|
43
|
+
--list-cities Show predefined cities and exit
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
**No arguments** launches interactive mode: arrow-key city selection, optional map preview via [Folium](https://python-visualization.github.io/folium/), discovery summary, and a confirmation prompt before downloading.
|
|
47
|
+
|
|
48
|
+
### Discovery states
|
|
49
|
+
|
|
50
|
+
When an `images.db` already exists for a city:
|
|
51
|
+
|
|
52
|
+
| State | Behavior |
|
|
53
|
+
|-------|----------|
|
|
54
|
+
| `maintain` | Load from DB, skip API calls (default) |
|
|
55
|
+
| `merge` | Re-discover and add any new images to the existing DB |
|
|
56
|
+
| `rediscover` | Wipe the DB and run a full fresh discovery |
|
|
57
|
+
|
|
58
|
+
## Architecture
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
cli.py — CLI entry point: argparse, interactive prompts, map preview
|
|
62
|
+
downloader.py — MapillaryClient (API) + ImageDownloader (grid split, parallel discovery, download loop)
|
|
63
|
+
database.py — DiscoveryDB: SQLite cache with singleton pattern, tracks discovered/downloaded state
|
|
64
|
+
config.py — Dataclasses (MapillaryConfig, BoundingBox), env loading, predefined city bounding boxes
|
|
65
|
+
scripts/ — Standalone utilities (GPS coordinate enrichment)
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Key design decisions
|
|
69
|
+
|
|
70
|
+
- **Adaptive grid splitting**: The API caps results at 2,000 per query. Dense urban areas easily exceed that. The downloader starts with coarse grid cells and recursively subdivides any cell that saturates the limit, down to a minimum cell size. This guarantees complete coverage without manual tuning.
|
|
71
|
+
- **SQLite over JSON**: Early versions used `download_metadata.json`. Switched to SQLite for atomic writes (no corruption on Ctrl+C), fast set-membership lookups on 100k+ image IDs, and clean separation of discovery vs. download state.
|
|
72
|
+
- **GPS in EXIF**: Coordinates are embedded directly into each JPEG at download time. This means images work standalone — no sidecar files, no separate metadata lookup. Precision is normalized to 7 decimal places (~1 cm) so DB and EXIF values match exactly.
|
|
73
|
+
- **Disk reconciliation**: On resume, the downloader checks what's actually on disk (not just what the DB says) and reconciles the two. Images on disk missing GPS get coordinates embedded; images in the DB but missing from disk get re-queued.
|
|
@@ -0,0 +1,488 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""CLI tool to download street-level imagery from Mapillary for any city.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
# Interactive mode (no arguments)
|
|
6
|
+
uv run python3 cli.py
|
|
7
|
+
|
|
8
|
+
# Non-interactive: specify city by name
|
|
9
|
+
uv run python3 cli.py --city "New York"
|
|
10
|
+
|
|
11
|
+
# Non-interactive: custom bounding box
|
|
12
|
+
uv run python3 cli.py --bbox "-122.52,37.70,-122.35,37.83"
|
|
13
|
+
|
|
14
|
+
# With image limit (for testing)
|
|
15
|
+
uv run python3 cli.py --city "San Francisco" --limit 100
|
|
16
|
+
|
|
17
|
+
# Show map preview (off by default)
|
|
18
|
+
uv run python3 cli.py --city "San Francisco" --preview
|
|
19
|
+
|
|
20
|
+
# Resume without re-hitting API (default when images.db exists)
|
|
21
|
+
uv run python3 cli.py --city "San Francisco" --state maintain
|
|
22
|
+
|
|
23
|
+
# Re-discover and merge new images into existing DB
|
|
24
|
+
uv run python3 cli.py --city "San Francisco" --state merge
|
|
25
|
+
|
|
26
|
+
# Wipe DB and discover fresh
|
|
27
|
+
uv run python3 cli.py --city "San Francisco" --state rediscover
|
|
28
|
+
|
|
29
|
+
# Fine-grained discovery (finds more images, much slower)
|
|
30
|
+
uv run python3 cli.py --city "San Francisco" --granularity 80
|
|
31
|
+
|
|
32
|
+
# Show available cities
|
|
33
|
+
uv run python3 cli.py --list-cities
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
import argparse
|
|
37
|
+
import atexit
|
|
38
|
+
import sys
|
|
39
|
+
import tempfile
|
|
40
|
+
import webbrowser
|
|
41
|
+
from datetime import datetime, timezone
|
|
42
|
+
from pathlib import Path
|
|
43
|
+
|
|
44
|
+
import folium
|
|
45
|
+
import folium.plugins
|
|
46
|
+
import questionary
|
|
47
|
+
|
|
48
|
+
from config import get_mapillary_config, BoundingBox, DATA_DIR, CITY_BBOXES, GRANULARITY_MIN, GRANULARITY_MAX, GRANULARITY_DEFAULT, granularity_to_grid_params
|
|
49
|
+
from downloader import MapillaryClient, ImageDownloader
|
|
50
|
+
from database import DiscoveryDB
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
DISCOVERY_STALENESS_DAYS = 21
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def ask_or_exit(question):
|
|
57
|
+
"""Ask a questionary prompt and exit if the user cancels with Ctrl+C."""
|
|
58
|
+
answer = question.ask()
|
|
59
|
+
if answer is None:
|
|
60
|
+
sys.exit(0)
|
|
61
|
+
return answer
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_bbox_for_city(city_name: str) -> BoundingBox:
|
|
65
|
+
"""Get bounding box for a known city.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
city_name: Name of the city
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
BoundingBox object
|
|
72
|
+
"""
|
|
73
|
+
city_lower = city_name.lower()
|
|
74
|
+
|
|
75
|
+
if city_lower in CITY_BBOXES:
|
|
76
|
+
return CITY_BBOXES[city_lower]
|
|
77
|
+
|
|
78
|
+
print(f"\n⚠️ City '{city_name}' not found in predefined list.")
|
|
79
|
+
print("\nAvailable cities:")
|
|
80
|
+
for city in sorted(CITY_BBOXES.keys()):
|
|
81
|
+
print(f" - {city.title()}")
|
|
82
|
+
print("\nPlease use --bbox to specify custom coordinates.")
|
|
83
|
+
sys.exit(1)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def generate_map_preview(
|
|
87
|
+
bbox: BoundingBox,
|
|
88
|
+
location_name: str,
|
|
89
|
+
heat_coords: list[list[float]] | None = None,
|
|
90
|
+
) -> str:
|
|
91
|
+
"""Generate an interactive folium map showing the bounding box and optional heat map.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
bbox: Bounding box to visualize
|
|
95
|
+
location_name: Name of the location for the map title
|
|
96
|
+
heat_coords: Optional list of [lat, lon] pairs to render as heat map
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Path to the generated HTML file
|
|
100
|
+
"""
|
|
101
|
+
center_lat = (bbox.south + bbox.north) / 2
|
|
102
|
+
center_lon = (bbox.west + bbox.east) / 2
|
|
103
|
+
|
|
104
|
+
m = folium.Map(
|
|
105
|
+
location=[center_lat, center_lon],
|
|
106
|
+
zoom_start=12,
|
|
107
|
+
tiles="OpenStreetMap"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
bbox_coords = [
|
|
111
|
+
[bbox.south, bbox.west],
|
|
112
|
+
[bbox.south, bbox.east],
|
|
113
|
+
[bbox.north, bbox.east],
|
|
114
|
+
[bbox.north, bbox.west],
|
|
115
|
+
[bbox.south, bbox.west],
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
folium.PolyLine(
|
|
119
|
+
bbox_coords,
|
|
120
|
+
color="red",
|
|
121
|
+
weight=3,
|
|
122
|
+
opacity=0.8,
|
|
123
|
+
popup=f"Download Area: {location_name}"
|
|
124
|
+
).add_to(m)
|
|
125
|
+
|
|
126
|
+
folium.Marker(
|
|
127
|
+
location=[center_lat, center_lon],
|
|
128
|
+
popup=f"Center of {location_name}",
|
|
129
|
+
tooltip="Download area center"
|
|
130
|
+
).add_to(m)
|
|
131
|
+
|
|
132
|
+
if heat_coords:
|
|
133
|
+
folium.plugins.HeatMap(heat_coords, radius=8, blur=10, min_opacity=0.3).add_to(m)
|
|
134
|
+
|
|
135
|
+
temp_file = Path(tempfile.gettempdir()) / "cityzero_preview.html"
|
|
136
|
+
m.save(str(temp_file))
|
|
137
|
+
atexit.register(lambda: temp_file.unlink(missing_ok=True))
|
|
138
|
+
|
|
139
|
+
return str(temp_file)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def warn_if_stale(db: DiscoveryDB) -> None:
|
|
143
|
+
last = db.get_last_discovered_at()
|
|
144
|
+
if last is None:
|
|
145
|
+
return
|
|
146
|
+
age = datetime.now(timezone.utc) - last
|
|
147
|
+
if age.days >= DISCOVERY_STALENESS_DAYS:
|
|
148
|
+
print()
|
|
149
|
+
print(f"⚠️ Discovery data is {age.days} days old.")
|
|
150
|
+
print(" Consider --state merge or --state rediscover to refresh.")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def prompt_discovery_state() -> str:
|
|
154
|
+
print()
|
|
155
|
+
state = questionary.select(
|
|
156
|
+
"An existing database for this city was found. Discovery state?",
|
|
157
|
+
choices=[
|
|
158
|
+
questionary.Choice(title="Maintain: load from DB, skip API discovery", value="maintain"),
|
|
159
|
+
questionary.Choice(title="Merge: re-discover and add new images to existing DB", value="merge"),
|
|
160
|
+
questionary.Choice(title="Rediscover: wipe DB and run a full fresh discovery", value="rediscover"),
|
|
161
|
+
],
|
|
162
|
+
)
|
|
163
|
+
return ask_or_exit(state) or "maintain"
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def show_download_summary(
|
|
167
|
+
downloader: ImageDownloader,
|
|
168
|
+
bbox: BoundingBox,
|
|
169
|
+
location_name: str,
|
|
170
|
+
db: DiscoveryDB,
|
|
171
|
+
state: str,
|
|
172
|
+
save_to_db: bool,
|
|
173
|
+
max_images: int = None,
|
|
174
|
+
is_interactive: bool = True,
|
|
175
|
+
show_preview: bool = True,
|
|
176
|
+
) -> tuple[bool, list[dict]]:
|
|
177
|
+
"""Determine images to download and show summary before download.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
state: 'maintain' | 'merge' | 'rediscover'
|
|
181
|
+
save_to_db: Whether to persist discovered images to DB.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
(confirmed, pending_images)
|
|
185
|
+
"""
|
|
186
|
+
print(f"\n📊 Analyzing {location_name}...")
|
|
187
|
+
|
|
188
|
+
if state == "rediscover":
|
|
189
|
+
db.wipe_images()
|
|
190
|
+
|
|
191
|
+
if state in ("merge", "rediscover"):
|
|
192
|
+
if save_to_db:
|
|
193
|
+
db.set_meta("city", location_name)
|
|
194
|
+
db.set_meta("bbox_west", str(bbox.west))
|
|
195
|
+
db.set_meta("bbox_south", str(bbox.south))
|
|
196
|
+
db.set_meta("bbox_east", str(bbox.east))
|
|
197
|
+
db.set_meta("bbox_north", str(bbox.north))
|
|
198
|
+
|
|
199
|
+
discovery_db = db if save_to_db else None
|
|
200
|
+
discovered = downloader.discover_images(bbox, db=discovery_db)
|
|
201
|
+
|
|
202
|
+
if save_to_db:
|
|
203
|
+
db.set_meta("last_discovered_at", str(int(datetime.now(timezone.utc).timestamp())))
|
|
204
|
+
|
|
205
|
+
if not save_to_db and state in ("merge", "rediscover"):
|
|
206
|
+
downloaded_ids = db.get_downloaded_ids()
|
|
207
|
+
pending_raw = [img for img in discovered if img.get("id") not in downloaded_ids]
|
|
208
|
+
else:
|
|
209
|
+
pending_raw = db.get_pending_images_metadata()
|
|
210
|
+
|
|
211
|
+
if not pending_raw:
|
|
212
|
+
if db.get_image_count() > 0:
|
|
213
|
+
print("✓ All images already downloaded!")
|
|
214
|
+
else:
|
|
215
|
+
print("❌ No images found in existing database. Consider running with --state rediscover.")
|
|
216
|
+
return False, []
|
|
217
|
+
|
|
218
|
+
# Delete old disk images before reconcile so reconcile sees a clean slate
|
|
219
|
+
if state == "rediscover":
|
|
220
|
+
existing_images = list(downloader.output_dir.glob("*.jpg"))
|
|
221
|
+
if existing_images and ask_or_exit(questionary.confirm(
|
|
222
|
+
f"Found {len(existing_images):,} downloaded images on disk. Delete?",
|
|
223
|
+
default=False,
|
|
224
|
+
)):
|
|
225
|
+
for img_path in existing_images:
|
|
226
|
+
img_path.unlink()
|
|
227
|
+
print(f"✓ Deleted {len(existing_images):,} existing images")
|
|
228
|
+
|
|
229
|
+
# Reconcile disk state before applying --limit so the limit picks genuinely new images
|
|
230
|
+
pending = downloader.reconcile_disk_images(pending_raw, db)
|
|
231
|
+
|
|
232
|
+
if max_images and len(pending) > max_images:
|
|
233
|
+
pending = pending[:max_images]
|
|
234
|
+
|
|
235
|
+
# pending is either DB format {lat, lon} or raw API format {geometry.coordinates},
|
|
236
|
+
# depending on whether --no-save-discovery was used
|
|
237
|
+
heat_coords = []
|
|
238
|
+
for img in pending:
|
|
239
|
+
if "lat" in img:
|
|
240
|
+
heat_coords.append([img["lat"], img["lon"]])
|
|
241
|
+
else:
|
|
242
|
+
coords = img.get("geometry", {}).get("coordinates", [])
|
|
243
|
+
if len(coords) >= 2:
|
|
244
|
+
heat_coords.append([coords[1], coords[0]])
|
|
245
|
+
if show_preview:
|
|
246
|
+
print(f"\n📍 Generating coverage map...")
|
|
247
|
+
coverage_map = generate_map_preview(bbox, location_name, heat_coords)
|
|
248
|
+
print(f" Opening in browser: {coverage_map}")
|
|
249
|
+
webbrowser.open(f"file://{coverage_map}")
|
|
250
|
+
|
|
251
|
+
if save_to_db:
|
|
252
|
+
total = db.get_image_count()
|
|
253
|
+
downloaded_count = total - db.get_pending_count()
|
|
254
|
+
else:
|
|
255
|
+
total = len(discovered)
|
|
256
|
+
downloaded_count = total - len(pending)
|
|
257
|
+
print("\n📋 Discovery Summary:")
|
|
258
|
+
print(f" {'Location:':<22} {location_name}")
|
|
259
|
+
print(f" {'Total found:':<22} {total:,}")
|
|
260
|
+
print(f" {'Already downloaded:':<22} {downloaded_count:,}")
|
|
261
|
+
print(f" {'New to download:':<22} {len(pending):,}")
|
|
262
|
+
|
|
263
|
+
proceed = ask_or_exit(questionary.confirm(
|
|
264
|
+
f"Download {len(pending):,} new images?",
|
|
265
|
+
default=True,
|
|
266
|
+
))
|
|
267
|
+
|
|
268
|
+
return bool(proceed), pending
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def prompt_granularity() -> int:
|
|
272
|
+
"""Prompt user for discovery granularity (1–100) with guidance."""
|
|
273
|
+
print(f"\n📐 Discovery granularity — how hard to look ({GRANULARITY_MIN}=fast, {GRANULARITY_MAX}=thorough)")
|
|
274
|
+
print(f" Low values work best with smaller bounding boxes.")
|
|
275
|
+
print(f" At 80+ for large areas, expect hours to days of discovery.")
|
|
276
|
+
|
|
277
|
+
raw = ask_or_exit(questionary.text(
|
|
278
|
+
f"Granularity ({GRANULARITY_MIN}–{GRANULARITY_MAX}):",
|
|
279
|
+
default=str(GRANULARITY_DEFAULT),
|
|
280
|
+
validate=lambda v: v.isdigit() and GRANULARITY_MIN <= int(v) <= GRANULARITY_MAX,
|
|
281
|
+
))
|
|
282
|
+
return int(raw)
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def interactive_mode(show_preview: bool = True) -> tuple[BoundingBox, str]:
|
|
286
|
+
"""Run interactive mode: prompt user to select city and show map preview.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
Tuple of (BoundingBox, location_name)
|
|
290
|
+
"""
|
|
291
|
+
print("\n" + "="*70)
|
|
292
|
+
print("🗺️ CityZero Image Downloader")
|
|
293
|
+
print("="*70)
|
|
294
|
+
|
|
295
|
+
city_choices = [city.title() for city in sorted(CITY_BBOXES.keys())]
|
|
296
|
+
city_choices.append("Custom bounding box...")
|
|
297
|
+
|
|
298
|
+
selected = ask_or_exit(questionary.select(
|
|
299
|
+
"Select a city or custom area:",
|
|
300
|
+
choices=city_choices
|
|
301
|
+
))
|
|
302
|
+
|
|
303
|
+
if selected == "Custom bounding box...":
|
|
304
|
+
bbox_str = ask_or_exit(questionary.text(
|
|
305
|
+
"Enter bounding box (west,south,east,north):",
|
|
306
|
+
default="-122.52,37.70,-122.35,37.83"
|
|
307
|
+
))
|
|
308
|
+
|
|
309
|
+
bbox = BoundingBox.from_string(bbox_str)
|
|
310
|
+
if bbox is None:
|
|
311
|
+
print(f"Invalid bbox format: '{bbox_str}'. Expected: west,south,east,north")
|
|
312
|
+
sys.exit(1)
|
|
313
|
+
location_name = "Custom Area"
|
|
314
|
+
else:
|
|
315
|
+
location_name = selected
|
|
316
|
+
bbox = get_bbox_for_city(selected)
|
|
317
|
+
|
|
318
|
+
if show_preview:
|
|
319
|
+
print(f"\n📍 Generating map preview for {location_name}...")
|
|
320
|
+
map_file = generate_map_preview(bbox, location_name)
|
|
321
|
+
print(f" Opening in browser: {map_file}")
|
|
322
|
+
webbrowser.open(f"file://{map_file}")
|
|
323
|
+
|
|
324
|
+
return bbox, location_name
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def main():
|
|
328
|
+
"""Main CLI entry point."""
|
|
329
|
+
parser = argparse.ArgumentParser(
|
|
330
|
+
description="Download street-level imagery from Mapillary",
|
|
331
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
332
|
+
epilog="""
|
|
333
|
+
Examples:
|
|
334
|
+
Interactive mode (recommended):
|
|
335
|
+
uv run python3 cli.py
|
|
336
|
+
|
|
337
|
+
Non-interactive: specify city by name:
|
|
338
|
+
uv run python3 cli.py --city "New York"
|
|
339
|
+
|
|
340
|
+
Non-interactive: custom bounding box:
|
|
341
|
+
uv run python3 cli.py --bbox "-74.05,40.68,-73.91,40.88"
|
|
342
|
+
|
|
343
|
+
Limit download for testing:
|
|
344
|
+
uv run python3 cli.py --city "San Francisco" --limit 50
|
|
345
|
+
|
|
346
|
+
Specify output directory:
|
|
347
|
+
uv run python3 cli.py --output-dir data/sf_images
|
|
348
|
+
|
|
349
|
+
Show available cities:
|
|
350
|
+
uv run python3 cli.py --list-cities
|
|
351
|
+
"""
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
parser.add_argument('--city', type=str, help='City name (enables non-interactive mode)')
|
|
355
|
+
parser.add_argument('--bbox', type=str, help='Custom bounding box as "west,south,east,north" (overrides --city)')
|
|
356
|
+
parser.add_argument('--limit', type=int, help='Maximum number of images to download (useful for testing)')
|
|
357
|
+
parser.add_argument('--output-dir', type=Path, default=None, help=f'Output directory for images (default: {DATA_DIR}/<city>)')
|
|
358
|
+
parser.add_argument('--list-cities', action='store_true', help='List available predefined cities and exit')
|
|
359
|
+
parser.add_argument('--preview', action='store_true', help='Open browser map previews before downloading')
|
|
360
|
+
parser.add_argument(
|
|
361
|
+
'--state',
|
|
362
|
+
choices=['maintain', 'merge', 'rediscover'],
|
|
363
|
+
default=None,
|
|
364
|
+
help='Discovery state when images.db exists: maintain (default) | merge | rediscover',
|
|
365
|
+
)
|
|
366
|
+
parser.add_argument(
|
|
367
|
+
'--no-save-discovery',
|
|
368
|
+
action='store_true',
|
|
369
|
+
help='Skip saving discovered image IDs to images.db (headless only)',
|
|
370
|
+
)
|
|
371
|
+
parser.add_argument(
|
|
372
|
+
'--granularity',
|
|
373
|
+
type=int,
|
|
374
|
+
default=GRANULARITY_DEFAULT,
|
|
375
|
+
metavar='1-100',
|
|
376
|
+
help=f'Discovery granularity: 1 = fast/coarse, 100 = slow/thorough (default: {GRANULARITY_DEFAULT})',
|
|
377
|
+
)
|
|
378
|
+
args = parser.parse_args()
|
|
379
|
+
|
|
380
|
+
if not (GRANULARITY_MIN <= args.granularity <= GRANULARITY_MAX):
|
|
381
|
+
print(f"❌ --granularity must be between {GRANULARITY_MIN} and {GRANULARITY_MAX}")
|
|
382
|
+
sys.exit(1)
|
|
383
|
+
|
|
384
|
+
if args.list_cities:
|
|
385
|
+
print("\n📍 Available cities:")
|
|
386
|
+
for city in sorted(CITY_BBOXES.keys()):
|
|
387
|
+
bbox = CITY_BBOXES[city]
|
|
388
|
+
print(f" {city.title():20} {bbox.to_tuple()}")
|
|
389
|
+
return
|
|
390
|
+
|
|
391
|
+
is_interactive = not (args.city or args.bbox)
|
|
392
|
+
|
|
393
|
+
show_preview = is_interactive or args.preview
|
|
394
|
+
|
|
395
|
+
if is_interactive:
|
|
396
|
+
bbox, location_name = interactive_mode(show_preview=show_preview)
|
|
397
|
+
elif args.bbox:
|
|
398
|
+
print(f"\n📍 Using custom bounding box")
|
|
399
|
+
bbox = BoundingBox.from_string(args.bbox)
|
|
400
|
+
if bbox is None:
|
|
401
|
+
print(f"Invalid bbox format: '{args.bbox}'. Expected: west,south,east,north")
|
|
402
|
+
print(" Example: -122.52,37.70,-122.35,37.83")
|
|
403
|
+
sys.exit(1)
|
|
404
|
+
location_name = "Custom Area"
|
|
405
|
+
else:
|
|
406
|
+
print(f"\n📍 Location: {args.city}")
|
|
407
|
+
bbox = get_bbox_for_city(args.city)
|
|
408
|
+
location_name = args.city
|
|
409
|
+
|
|
410
|
+
if not is_interactive and args.preview and show_preview:
|
|
411
|
+
print(f"\n📍 Generating map preview...")
|
|
412
|
+
map_file = generate_map_preview(bbox, location_name)
|
|
413
|
+
print(f" Opening in browser: {map_file}")
|
|
414
|
+
webbrowser.open(f"file://{map_file}")
|
|
415
|
+
input("\nPress Enter to continue...")
|
|
416
|
+
|
|
417
|
+
if args.output_dir is None:
|
|
418
|
+
if location_name == "Custom Area":
|
|
419
|
+
args.output_dir = DATA_DIR
|
|
420
|
+
else:
|
|
421
|
+
normalized = location_name.lower().replace(" ", "_")
|
|
422
|
+
args.output_dir = DATA_DIR / normalized
|
|
423
|
+
|
|
424
|
+
args.output_dir.mkdir(parents=True, exist_ok=True)
|
|
425
|
+
print(f"📁 Output: {args.output_dir}")
|
|
426
|
+
|
|
427
|
+
config = get_mapillary_config()
|
|
428
|
+
if config is None:
|
|
429
|
+
print("\n❌ MAPILLARY_CLIENT_TOKEN not set.")
|
|
430
|
+
print("\nPlease ensure:")
|
|
431
|
+
print("1. .env file exists in project root")
|
|
432
|
+
print("2. MAPILLARY_CLIENT_TOKEN is set correctly")
|
|
433
|
+
print("3. Token format: MLY|numeric_id|hex_string")
|
|
434
|
+
sys.exit(1)
|
|
435
|
+
|
|
436
|
+
client = MapillaryClient(config)
|
|
437
|
+
downloader = ImageDownloader(client, output_dir=args.output_dir / "images")
|
|
438
|
+
db = DiscoveryDB.get(args.output_dir / "images.db")
|
|
439
|
+
|
|
440
|
+
db_has_data = db.get_image_count() > 0
|
|
441
|
+
if db_has_data:
|
|
442
|
+
if is_interactive:
|
|
443
|
+
warn_if_stale(db)
|
|
444
|
+
state = prompt_discovery_state()
|
|
445
|
+
else:
|
|
446
|
+
state = args.state or "maintain"
|
|
447
|
+
if state == "maintain":
|
|
448
|
+
warn_if_stale(db)
|
|
449
|
+
save_to_db = True
|
|
450
|
+
else:
|
|
451
|
+
state = "rediscover"
|
|
452
|
+
save_to_db = not args.no_save_discovery
|
|
453
|
+
if is_interactive:
|
|
454
|
+
if not ask_or_exit(questionary.confirm("Proceed with discovery?", default=True)):
|
|
455
|
+
sys.exit(0)
|
|
456
|
+
|
|
457
|
+
if state != "maintain":
|
|
458
|
+
granularity = prompt_granularity() if is_interactive else args.granularity
|
|
459
|
+
downloader.grid = granularity_to_grid_params(granularity)
|
|
460
|
+
print(f"🔬 Granularity: {granularity}/{GRANULARITY_MAX} (grid={downloader.grid.grid_cell_size}°, min={downloader.grid.min_cell_size}°)")
|
|
461
|
+
|
|
462
|
+
confirmed, pending_images = show_download_summary(
|
|
463
|
+
downloader, bbox, location_name, db, state, save_to_db, args.limit, is_interactive, show_preview
|
|
464
|
+
)
|
|
465
|
+
if not confirmed:
|
|
466
|
+
print("\nCancelled by user.")
|
|
467
|
+
sys.exit(0)
|
|
468
|
+
|
|
469
|
+
try:
|
|
470
|
+
stats = downloader.download_images(
|
|
471
|
+
bbox=bbox, db=db, max_images=args.limit, images=pending_images
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
sys.exit(1 if stats['failed'] > 0 else 0)
|
|
475
|
+
|
|
476
|
+
except KeyboardInterrupt:
|
|
477
|
+
print("\n\n⚠️ Download interrupted by user")
|
|
478
|
+
print("Run the same command again to resume from where you left off.")
|
|
479
|
+
sys.exit(130)
|
|
480
|
+
except Exception as e:
|
|
481
|
+
print(f"\n❌ Error during download: {e}")
|
|
482
|
+
import traceback
|
|
483
|
+
traceback.print_exc()
|
|
484
|
+
sys.exit(1)
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
if __name__ == "__main__":
|
|
488
|
+
main()
|