mapillary-downloader 0.7.7__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/PKG-INFO +23 -25
  2. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/README.md +22 -24
  3. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/pyproject.toml +1 -1
  4. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/exif_writer.py +4 -5
  5. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/worker.py +4 -0
  6. mapillary_downloader-0.8.0/src/mapillary_downloader/xmp_writer.py +154 -0
  7. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/LICENSE.md +0 -0
  8. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/__init__.py +0 -0
  9. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/__main__.py +0 -0
  10. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/client.py +0 -0
  11. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/downloader.py +0 -0
  12. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/ia_check.py +0 -0
  13. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/ia_meta.py +0 -0
  14. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/ia_stats.py +0 -0
  15. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/logging_config.py +0 -0
  16. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/metadata_reader.py +0 -0
  17. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/tar_sequences.py +0 -0
  18. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/utils.py +0 -0
  19. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/webp_converter.py +0 -0
  20. {mapillary_downloader-0.7.7 → mapillary_downloader-0.8.0}/src/mapillary_downloader/worker_pool.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mapillary_downloader
3
- Version: 0.7.7
3
+ Version: 0.8.0
4
4
  Summary: Archive user data from Mapillary
5
5
  Author-email: Gareth Davidson <gaz@bitplane.net>
6
6
  Requires-Python: >=3.10
@@ -32,7 +32,7 @@ Provides-Extra: dev
32
32
 
33
33
  Download your Mapillary data before it's gone.
34
34
 
35
- ## Installation
35
+ ## ▶️ Installation
36
36
 
37
37
  Installation is optional, you can prefix the command with `uvx` or `pipx` to
38
38
  download and run it. Or if you're oldskool you can do:
@@ -41,7 +41,7 @@ download and run it. Or if you're oldskool you can do:
41
41
  pip install mapillary-downloader
42
42
  ```
43
43
 
44
- ## Usage
44
+ ## Usage
45
45
 
46
46
  First, get your Mapillary API access token from
47
47
  [the developer dashboard](https://www.mapillary.com/dashboard/developers)
@@ -75,12 +75,14 @@ The downloader will:
75
75
  * 🏛️ Check Internet Archive to avoid duplicate downloads
76
76
  * 📷 Download multiple users' images organized by sequence
77
77
  * 📜 Inject EXIF metadata (GPS coordinates, camera info, timestamps,
78
- compass direction)
78
+ compass direction) and XMP data for panoramas.
79
79
  * 🗜️ Convert to WebP (by default) to save ~70% disk space
80
- * 🛟 Save progress so you can safely resume if interrupted
81
- * 📦 Tar sequence directories (by default) for faster uploads to Internet Archive
80
+ * 🛟 Save progress every 5 minutes so you can safely resume if interrupted
81
+ ()
82
+ * 📦 Tar sequence directories (by default) for faster uploads to Internet
83
+ Archive
82
84
 
83
- ## WebP Conversion
85
+ ## 🖼️ WebP Conversion
84
86
 
85
87
  You'll need the `cwebp` binary installed:
86
88
 
@@ -94,11 +96,7 @@ brew install webp
94
96
 
95
97
  To disable WebP conversion and keep original JPEGs, use `--no-webp`:
96
98
 
97
- ```bash
98
- mapillary-downloader --no-webp USERNAME
99
- ```
100
-
101
- ## Tarballs
99
+ ## 📦 Tarballs
102
100
 
103
101
  Images are organized by capture date (YYYY-MM-DD) for incremental archiving:
104
102
 
@@ -116,16 +114,20 @@ mapillary-username-quality/
116
114
  ```
117
115
 
118
116
  By default, these date directories are automatically tarred after download
119
- (resulting in `2024-01-15.tar`, `2024-01-16.tar`, etc.). This date-based
120
- organization enables:
117
+ (`2024-01-15.tar`, `2024-01-16.tar`, etc.). Reasons:
121
118
 
122
- - **Incremental uploads** - Upload each day's tar as soon as it's ready
123
- - **Manageable file counts** - ~365 days/year × 10 years = 3,650 tars max
124
- - **Chronological organization** - Natural sorting and progress tracking
119
+ * ⤴️ Incremental uploads. Add more to a collection. Well, eventually anyway.
120
+ This won't work yet unless you delete the jsonl file and start again.
121
+ * 📂 Fewer files - ~365 days/year × 10 years = 3,650 tars max. IA only want
122
+ 5k items per collection
123
+ * 🧨 Avoids blowing up IA's derive workers. We don't want Brewster's computers
124
+ to create thumbs for 2 billion images.
125
+ * 💾 I like to have a few inodes available for things other than this. I'm sure
126
+ you do too.
125
127
 
126
128
  To keep individual files instead of creating tars, use the `--no-tar` flag.
127
129
 
128
- ## Internet Archive upload
130
+ ## 🏛️ Internet Archive upload
129
131
 
130
132
  I've written a bash tool to rip media then tag, queue, and upload to The
131
133
  Internet Archive. The metadata is in the same format. If you symlink your
@@ -139,15 +141,11 @@ See inlay for details:
139
141
 
140
142
  To see overall project progress, or an estimate, use `--stats`
141
143
 
142
- ```bash
143
- mapillary-downloader --stats
144
- ```
145
-
146
144
  ## 🚧 Development
147
145
 
148
146
  ```bash
149
147
  make dev # Setup dev environment
150
- make test # Run tests
148
+ make test # Run tests. Note: requires `exiftool`
151
149
  make dist # Build the distribution
152
150
  make help # See other make options
153
151
  ```
@@ -160,12 +158,12 @@ make help # See other make options
160
158
  * [🐱 github](https://github.com/bitplane/mapillary_downloader)
161
159
  * [📀 rip](https://bitplane.net/dev/sh/rip)
162
160
 
163
- ## License
161
+ ## ⚖️ License
164
162
 
165
163
  WTFPL with one additional clause
166
164
 
167
165
  1. Don't blame me
168
166
 
169
167
  Do wtf you want, but don't blame me if it makes jokes about the size of your
170
- disk drive.
168
+ disk.
171
169
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  Download your Mapillary data before it's gone.
4
4
 
5
- ## Installation
5
+ ## ▶️ Installation
6
6
 
7
7
  Installation is optional, you can prefix the command with `uvx` or `pipx` to
8
8
  download and run it. Or if you're oldskool you can do:
@@ -11,7 +11,7 @@ download and run it. Or if you're oldskool you can do:
11
11
  pip install mapillary-downloader
12
12
  ```
13
13
 
14
- ## Usage
14
+ ## Usage
15
15
 
16
16
  First, get your Mapillary API access token from
17
17
  [the developer dashboard](https://www.mapillary.com/dashboard/developers)
@@ -45,12 +45,14 @@ The downloader will:
45
45
  * 🏛️ Check Internet Archive to avoid duplicate downloads
46
46
  * 📷 Download multiple users' images organized by sequence
47
47
  * 📜 Inject EXIF metadata (GPS coordinates, camera info, timestamps,
48
- compass direction)
48
+ compass direction) and XMP data for panoramas.
49
49
  * 🗜️ Convert to WebP (by default) to save ~70% disk space
50
- * 🛟 Save progress so you can safely resume if interrupted
51
- * 📦 Tar sequence directories (by default) for faster uploads to Internet Archive
50
+ * 🛟 Save progress every 5 minutes so you can safely resume if interrupted
51
+ ()
52
+ * 📦 Tar sequence directories (by default) for faster uploads to Internet
53
+ Archive
52
54
 
53
- ## WebP Conversion
55
+ ## 🖼️ WebP Conversion
54
56
 
55
57
  You'll need the `cwebp` binary installed:
56
58
 
@@ -64,11 +66,7 @@ brew install webp
64
66
 
65
67
  To disable WebP conversion and keep original JPEGs, use `--no-webp`:
66
68
 
67
- ```bash
68
- mapillary-downloader --no-webp USERNAME
69
- ```
70
-
71
- ## Tarballs
69
+ ## 📦 Tarballs
72
70
 
73
71
  Images are organized by capture date (YYYY-MM-DD) for incremental archiving:
74
72
 
@@ -86,16 +84,20 @@ mapillary-username-quality/
86
84
  ```
87
85
 
88
86
  By default, these date directories are automatically tarred after download
89
- (resulting in `2024-01-15.tar`, `2024-01-16.tar`, etc.). This date-based
90
- organization enables:
87
+ (`2024-01-15.tar`, `2024-01-16.tar`, etc.). Reasons:
91
88
 
92
- - **Incremental uploads** - Upload each day's tar as soon as it's ready
93
- - **Manageable file counts** - ~365 days/year × 10 years = 3,650 tars max
94
- - **Chronological organization** - Natural sorting and progress tracking
89
+ * ⤴️ Incremental uploads. Add more to a collection. Well, eventually anyway.
90
+ This won't work yet unless you delete the jsonl file and start again.
91
+ * 📂 Fewer files - ~365 days/year × 10 years = 3,650 tars max. IA only want
92
+ 5k items per collection
93
+ * 🧨 Avoids blowing up IA's derive workers. We don't want Brewster's computers
94
+ to create thumbs for 2 billion images.
95
+ * 💾 I like to have a few inodes available for things other than this. I'm sure
96
+ you do too.
95
97
 
96
98
  To keep individual files instead of creating tars, use the `--no-tar` flag.
97
99
 
98
- ## Internet Archive upload
100
+ ## 🏛️ Internet Archive upload
99
101
 
100
102
  I've written a bash tool to rip media then tag, queue, and upload to The
101
103
  Internet Archive. The metadata is in the same format. If you symlink your
@@ -109,15 +111,11 @@ See inlay for details:
109
111
 
110
112
  To see overall project progress, or an estimate, use `--stats`
111
113
 
112
- ```bash
113
- mapillary-downloader --stats
114
- ```
115
-
116
114
  ## 🚧 Development
117
115
 
118
116
  ```bash
119
117
  make dev # Setup dev environment
120
- make test # Run tests
118
+ make test # Run tests. Note: requires `exiftool`
121
119
  make dist # Build the distribution
122
120
  make help # See other make options
123
121
  ```
@@ -130,11 +128,11 @@ make help # See other make options
130
128
  * [🐱 github](https://github.com/bitplane/mapillary_downloader)
131
129
  * [📀 rip](https://bitplane.net/dev/sh/rip)
132
130
 
133
- ## License
131
+ ## ⚖️ License
134
132
 
135
133
  WTFPL with one additional clause
136
134
 
137
135
  1. Don't blame me
138
136
 
139
137
  Do wtf you want, but don't blame me if it makes jokes about the size of your
140
- disk drive.
138
+ disk.
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "mapillary_downloader"
3
3
  description = "Archive user data from Mapillary"
4
- version = "0.7.7"
4
+ version = "0.8.0"
5
5
  authors = [
6
6
  { name = "Gareth Davidson", email = "gaz@bitplane.net" }
7
7
  ]
@@ -72,9 +72,6 @@ def write_exif_to_image(image_path, metadata):
72
72
  if "model" in metadata and metadata["model"]:
73
73
  exif_dict["0th"][piexif.ImageIFD.Model] = metadata["model"].encode("utf-8")
74
74
 
75
- if "exif_orientation" in metadata and metadata["exif_orientation"]:
76
- exif_dict["0th"][piexif.ImageIFD.Orientation] = metadata["exif_orientation"]
77
-
78
75
  if "width" in metadata and metadata["width"]:
79
76
  exif_dict["0th"][piexif.ImageIFD.ImageWidth] = metadata["width"]
80
77
 
@@ -88,6 +85,8 @@ def write_exif_to_image(image_path, metadata):
88
85
  exif_dict["0th"][piexif.ImageIFD.DateTime] = datetime_bytes
89
86
  exif_dict["Exif"][piexif.ExifIFD.DateTimeOriginal] = datetime_bytes
90
87
  exif_dict["Exif"][piexif.ExifIFD.DateTimeDigitized] = datetime_bytes
88
+ exif_dict["Exif"][piexif.ExifIFD.SubSecTimeOriginal] = ("000" + str(metadata["captured_at"] % 1000))[-3:]
89
+ exif_dict["Exif"][piexif.ExifIFD.SubSecTimeDigitized] = ("000" + str(metadata["captured_at"] % 1000))[-3:]
91
90
 
92
91
  # GPS data - prefer computed_geometry over geometry
93
92
  geometry = metadata.get("computed_geometry") or metadata.get("geometry")
@@ -102,8 +101,8 @@ def write_exif_to_image(image_path, metadata):
102
101
  exif_dict["GPS"][piexif.GPSIFD.GPSLongitude] = decimal_to_dms(lon)
103
102
  exif_dict["GPS"][piexif.GPSIFD.GPSLongitudeRef] = b"E" if lon >= 0 else b"W"
104
103
 
105
- # GPS Altitude - prefer computed_altitude over altitude
106
- altitude = metadata.get("computed_altitude") or metadata.get("altitude")
104
+ # GPS Altitude - prefer raw altitude (photogrammetry can't compute elevation)
105
+ altitude = metadata.get("altitude") or metadata.get("computed_altitude")
107
106
  if altitude is not None:
108
107
  altitude_val = int(abs(altitude) * 100)
109
108
  logger.debug(f"Raw altitude value: {altitude}, calculated: {altitude_val}")
@@ -7,6 +7,7 @@ from datetime import datetime
7
7
  from pathlib import Path
8
8
  import requests
9
9
  from mapillary_downloader.exif_writer import write_exif_to_image
10
+ from mapillary_downloader.xmp_writer import write_xmp_to_image
10
11
  from mapillary_downloader.webp_converter import convert_to_webp
11
12
  from mapillary_downloader.utils import http_get_with_retry
12
13
 
@@ -117,6 +118,9 @@ def download_and_convert_image(image_data, output_dir, quality, convert_webp, se
117
118
  # Write EXIF metadata
118
119
  write_exif_to_image(jpg_path, image_data)
119
120
 
121
+ # Write XMP metadata for panoramas
122
+ write_xmp_to_image(jpg_path, image_data)
123
+
120
124
  # Convert to WebP if requested
121
125
  if convert_webp:
122
126
  webp_path = convert_to_webp(jpg_path, output_path=final_path, delete_original=False)
@@ -0,0 +1,154 @@
1
+ """XMP metadata writer for panoramic Mapillary images."""
2
+
3
+ import logging
4
+
5
+ logger = logging.getLogger("mapillary_downloader")
6
+
7
+ # XMP namespace identifier for APP1 segment
8
+ XMP_NAMESPACE = b"http://ns.adobe.com/xap/1.0/\x00"
9
+
10
+ # XMP packet template for GPano metadata
11
+ XMP_TEMPLATE = """<?xpacket begin="\ufeff" id="W5M0MpCehiHzreSzNTczkc9d"?>
12
+ <x:xmpmeta xmlns:x="adobe:ns:meta/">
13
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
14
+ <rdf:Description rdf:about=""
15
+ xmlns:GPano="http://ns.google.com/photos/1.0/panorama/"
16
+ GPano:ProjectionType="equirectangular"
17
+ GPano:UsePanoramaViewer="True"
18
+ GPano:FullPanoWidthPixels="{width}"
19
+ GPano:FullPanoHeightPixels="{height}"
20
+ GPano:CroppedAreaImageWidthPixels="{width}"
21
+ GPano:CroppedAreaImageHeightPixels="{height}"
22
+ GPano:CroppedAreaLeftPixels="0"
23
+ GPano:CroppedAreaTopPixels="0"{pose_heading}/>
24
+ </rdf:RDF>
25
+ </x:xmpmeta>
26
+ <?xpacket end="w"?>"""
27
+
28
+
29
+ def build_xmp_packet(metadata):
30
+ """Build XMP packet with GPano metadata.
31
+
32
+ Args:
33
+ metadata: Dictionary with width, height, and optionally compass_angle
34
+
35
+ Returns:
36
+ XMP XML string
37
+ """
38
+ width = metadata.get("width", 0)
39
+ height = metadata.get("height", 0)
40
+
41
+ # Get compass angle (prefer computed)
42
+ compass = metadata.get("computed_compass_angle") or metadata.get("compass_angle")
43
+
44
+ # Build pose heading attribute if available
45
+ if compass is not None:
46
+ pose_heading = f'\n GPano:PoseHeadingDegrees="{compass:.1f}"'
47
+ else:
48
+ pose_heading = ""
49
+
50
+ return XMP_TEMPLATE.format(
51
+ width=width,
52
+ height=height,
53
+ pose_heading=pose_heading,
54
+ )
55
+
56
+
57
+ def write_xmp_to_image(image_path, metadata):
58
+ """Write XMP GPano metadata to a JPEG image for panoramas.
59
+
60
+ Only writes metadata if is_pano is True in the metadata dict.
61
+
62
+ Args:
63
+ image_path: Path to the JPEG image file
64
+ metadata: Dictionary of metadata from Mapillary API
65
+
66
+ Returns:
67
+ True if XMP was written, False if skipped or failed
68
+ """
69
+ # Only write XMP for panoramas
70
+ if not metadata.get("is_pano"):
71
+ return False
72
+
73
+ # Need dimensions to write meaningful GPano data
74
+ if not metadata.get("width") or not metadata.get("height"):
75
+ logger.warning(f"Skipping XMP for {image_path}: missing dimensions")
76
+ return False
77
+
78
+ try:
79
+ # Read the JPEG file
80
+ with open(image_path, "rb") as f:
81
+ data = f.read()
82
+
83
+ # Verify JPEG signature
84
+ if data[:2] != b"\xff\xd8":
85
+ logger.warning(f"Skipping XMP for {image_path}: not a valid JPEG")
86
+ return False
87
+
88
+ # Build XMP packet
89
+ xmp_xml = build_xmp_packet(metadata)
90
+ xmp_bytes = xmp_xml.encode("utf-8")
91
+
92
+ # Build APP1 segment with XMP namespace
93
+ xmp_segment = XMP_NAMESPACE + xmp_bytes
94
+ segment_length = len(xmp_segment) + 2 # +2 for length bytes
95
+
96
+ if segment_length > 65535:
97
+ logger.warning(f"Skipping XMP for {image_path}: XMP too large")
98
+ return False
99
+
100
+ # APP1 marker (0xFFE1) + length + data
101
+ app1_marker = b"\xff\xe1"
102
+ length_bytes = segment_length.to_bytes(2, byteorder="big")
103
+ full_segment = app1_marker + length_bytes + xmp_segment
104
+
105
+ # Find insertion point - after SOI (0xFFD8) and any existing APP0/APP1 segments
106
+ # We want to insert after EXIF APP1 but before other segments
107
+ pos = 2 # Skip SOI
108
+
109
+ while pos < len(data) - 1:
110
+ if data[pos] != 0xFF:
111
+ break
112
+
113
+ marker = data[pos + 1]
114
+
115
+ # Stop at SOS (start of scan) or non-marker data
116
+ if marker == 0xDA or marker == 0x00:
117
+ break
118
+
119
+ # Check if this is an APP1 with XMP namespace (skip if exists)
120
+ if marker == 0xE1: # APP1
121
+ seg_len = int.from_bytes(data[pos + 2 : pos + 4], byteorder="big")
122
+ seg_data = data[pos + 4 : pos + 2 + seg_len]
123
+ if seg_data.startswith(XMP_NAMESPACE):
124
+ # XMP already exists, replace it
125
+ new_data = data[:pos] + full_segment + data[pos + 2 + seg_len :]
126
+ with open(image_path, "wb") as f:
127
+ f.write(new_data)
128
+ logger.debug(f"Replaced XMP in {image_path}")
129
+ return True
130
+ # Skip this APP1 (probably EXIF)
131
+ pos += 2 + seg_len
132
+ continue
133
+
134
+ # Skip APP0 (JFIF) segments
135
+ if marker == 0xE0: # APP0
136
+ seg_len = int.from_bytes(data[pos + 2 : pos + 4], byteorder="big")
137
+ pos += 2 + seg_len
138
+ continue
139
+
140
+ # Found a different marker, insert XMP here
141
+ break
142
+
143
+ # Insert XMP segment at current position
144
+ new_data = data[:pos] + full_segment + data[pos:]
145
+
146
+ with open(image_path, "wb") as f:
147
+ f.write(new_data)
148
+
149
+ logger.debug(f"Wrote XMP GPano metadata to {image_path}")
150
+ return True
151
+
152
+ except Exception as e:
153
+ logger.warning(f"Failed to write XMP to {image_path}: {e}")
154
+ return False