mapillary-tools 0.10.2a0__py3-none-any.whl → 0.10.3a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapillary_tools/__init__.py +1 -1
- mapillary_tools/commands/process.py +4 -3
- mapillary_tools/exceptions.py +4 -0
- mapillary_tools/exif_read.py +543 -65
- mapillary_tools/exiftool_read.py +406 -0
- mapillary_tools/exiftool_read_video.py +360 -0
- mapillary_tools/geo.py +10 -2
- mapillary_tools/geotag/geotag_from_generic.py +13 -2
- mapillary_tools/geotag/{geotag_from_exif.py → geotag_images_from_exif.py} +51 -67
- mapillary_tools/geotag/geotag_images_from_exiftool.py +123 -0
- mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +81 -0
- mapillary_tools/geotag/{geotag_from_gpx.py → geotag_images_from_gpx.py} +16 -13
- mapillary_tools/geotag/{geotag_from_gpx_file.py → geotag_images_from_gpx_file.py} +52 -36
- mapillary_tools/geotag/{geotag_from_nmea_file.py → geotag_images_from_nmea_file.py} +4 -5
- mapillary_tools/geotag/geotag_images_from_video.py +87 -0
- mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +105 -0
- mapillary_tools/geotag/geotag_videos_from_video.py +175 -0
- mapillary_tools/process_geotag_properties.py +65 -31
- mapillary_tools/sample_video.py +19 -6
- mapillary_tools/types.py +2 -0
- mapillary_tools/utils.py +24 -2
- {mapillary_tools-0.10.2a0.dist-info → mapillary_tools-0.10.3a1.dist-info}/METADATA +1 -1
- {mapillary_tools-0.10.2a0.dist-info → mapillary_tools-0.10.3a1.dist-info}/RECORD +27 -24
- mapillary_tools/geotag/geotag_from_blackvue.py +0 -93
- mapillary_tools/geotag/geotag_from_camm.py +0 -94
- mapillary_tools/geotag/geotag_from_gopro.py +0 -96
- mapillary_tools/geotag/geotag_from_video.py +0 -145
- {mapillary_tools-0.10.2a0.dist-info → mapillary_tools-0.10.3a1.dist-info}/LICENSE +0 -0
- {mapillary_tools-0.10.2a0.dist-info → mapillary_tools-0.10.3a1.dist-info}/WHEEL +0 -0
- {mapillary_tools-0.10.2a0.dist-info → mapillary_tools-0.10.3a1.dist-info}/entry_points.txt +0 -0
- {mapillary_tools-0.10.2a0.dist-info → mapillary_tools-0.10.3a1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,360 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import typing as T
|
|
3
|
+
import xml.etree.ElementTree as ET
|
|
4
|
+
|
|
5
|
+
from . import exif_read, geo
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
MAX_TRACK_ID = 10
|
|
9
|
+
EXIFTOOL_NAMESPACES: T.Dict[str, str] = {
|
|
10
|
+
"Keys": "http://ns.exiftool.org/QuickTime/Keys/1.0/",
|
|
11
|
+
"IFD0": "http://ns.exiftool.org/EXIF/IFD0/1.0/",
|
|
12
|
+
"QuickTime": "http://ns.exiftool.org/QuickTime/QuickTime/1.0/",
|
|
13
|
+
"UserData": "http://ns.exiftool.org/QuickTime/UserData/1.0/",
|
|
14
|
+
"Insta360": "http://ns.exiftool.org/Trailer/Insta360/1.0/",
|
|
15
|
+
"GoPro": "http://ns.exiftool.org/QuickTime/GoPro/1.0/",
|
|
16
|
+
**{
|
|
17
|
+
f"Track{track_id}": f"http://ns.exiftool.org/QuickTime/Track{track_id}/1.0/"
|
|
18
|
+
for track_id in range(1, MAX_TRACK_ID + 1)
|
|
19
|
+
},
|
|
20
|
+
}
|
|
21
|
+
LOG = logging.getLogger(__name__)
|
|
22
|
+
_FIELD_TYPE = T.TypeVar("_FIELD_TYPE", int, float, str, T.List[str])
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _maybe_float(text: T.Optional[str]) -> T.Optional[float]:
|
|
26
|
+
if text is None:
|
|
27
|
+
return None
|
|
28
|
+
try:
|
|
29
|
+
return float(text)
|
|
30
|
+
except (ValueError, TypeError):
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _expand_tag(ns_tag: str) -> str:
|
|
35
|
+
try:
|
|
36
|
+
ns, tag = ns_tag.split(":", maxsplit=2)
|
|
37
|
+
except ValueError:
|
|
38
|
+
raise ValueError(f"Invalid tag {ns_tag}")
|
|
39
|
+
return "{" + EXIFTOOL_NAMESPACES[ns] + "}" + tag
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _index_text_by_tag(elements: T.Iterable[ET.Element]) -> T.Dict[str, T.List[str]]:
|
|
43
|
+
texts_by_tag: T.Dict[str, T.List[str]] = {}
|
|
44
|
+
for element in elements:
|
|
45
|
+
tag = element.tag
|
|
46
|
+
if element.text is not None:
|
|
47
|
+
texts_by_tag.setdefault(tag, []).append(element.text)
|
|
48
|
+
return texts_by_tag
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _extract_alternative_fields(
|
|
52
|
+
texts_by_tag: T.Dict[str, T.List[str]],
|
|
53
|
+
fields: T.Sequence[str],
|
|
54
|
+
field_type: T.Type[_FIELD_TYPE],
|
|
55
|
+
) -> T.Optional[_FIELD_TYPE]:
|
|
56
|
+
for field in fields:
|
|
57
|
+
values = texts_by_tag.get(_expand_tag(field))
|
|
58
|
+
if values is None:
|
|
59
|
+
continue
|
|
60
|
+
if field_type is int:
|
|
61
|
+
value = values[0]
|
|
62
|
+
try:
|
|
63
|
+
return T.cast(_FIELD_TYPE, int(value))
|
|
64
|
+
except (ValueError, TypeError):
|
|
65
|
+
pass
|
|
66
|
+
elif field_type is float:
|
|
67
|
+
value = values[0]
|
|
68
|
+
try:
|
|
69
|
+
return T.cast(_FIELD_TYPE, float(value))
|
|
70
|
+
except (ValueError, TypeError):
|
|
71
|
+
pass
|
|
72
|
+
elif field_type is str:
|
|
73
|
+
value = values[0]
|
|
74
|
+
try:
|
|
75
|
+
return T.cast(_FIELD_TYPE, str(value))
|
|
76
|
+
except (ValueError, TypeError):
|
|
77
|
+
pass
|
|
78
|
+
elif field_type is list:
|
|
79
|
+
return T.cast(_FIELD_TYPE, values)
|
|
80
|
+
else:
|
|
81
|
+
raise ValueError(f"Invalid field type {field_type}")
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _aggregate_gps_track(
|
|
86
|
+
texts_by_tag: T.Dict[str, T.List[str]],
|
|
87
|
+
time_tag: T.Optional[str],
|
|
88
|
+
lon_tag: str,
|
|
89
|
+
lat_tag: str,
|
|
90
|
+
alt_tag: T.Optional[str] = None,
|
|
91
|
+
direction_tag: T.Optional[str] = None,
|
|
92
|
+
speed_tag: T.Optional[str] = None,
|
|
93
|
+
) -> T.List[geo.Point]:
|
|
94
|
+
# aggregate coordinates (required)
|
|
95
|
+
lons = [
|
|
96
|
+
_maybe_float(lon)
|
|
97
|
+
for lon in _extract_alternative_fields(texts_by_tag, [lon_tag], list) or []
|
|
98
|
+
]
|
|
99
|
+
lats = [
|
|
100
|
+
_maybe_float(lat)
|
|
101
|
+
for lat in _extract_alternative_fields(texts_by_tag, [lat_tag], list) or []
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
if len(lons) != len(lats):
|
|
105
|
+
# no idea what to do if we have different number of lons and lats
|
|
106
|
+
LOG.warning(
|
|
107
|
+
"Found different number of longitudes %d and latitudes %d",
|
|
108
|
+
len(lons),
|
|
109
|
+
len(lats),
|
|
110
|
+
)
|
|
111
|
+
return []
|
|
112
|
+
|
|
113
|
+
expected_length = len(lats)
|
|
114
|
+
|
|
115
|
+
# aggregate timestamps (optional)
|
|
116
|
+
if time_tag is not None:
|
|
117
|
+
dts = [
|
|
118
|
+
exif_read.parse_gps_datetime(text)
|
|
119
|
+
for text in _extract_alternative_fields(texts_by_tag, [time_tag], list)
|
|
120
|
+
or []
|
|
121
|
+
]
|
|
122
|
+
timestamps = [geo.as_unix_time(dt) if dt is not None else None for dt in dts]
|
|
123
|
+
if expected_length != len(timestamps):
|
|
124
|
+
# no idea what to do if we have different number of timestamps and coordinates
|
|
125
|
+
LOG.warning(
|
|
126
|
+
"Found different number of timestamps %d and coordinates %d",
|
|
127
|
+
len(timestamps),
|
|
128
|
+
expected_length,
|
|
129
|
+
)
|
|
130
|
+
return []
|
|
131
|
+
else:
|
|
132
|
+
timestamps = [0.0] * expected_length
|
|
133
|
+
|
|
134
|
+
assert len(timestamps) == expected_length
|
|
135
|
+
|
|
136
|
+
def _aggregate_float_values_same_length(
|
|
137
|
+
tag: T.Optional[str],
|
|
138
|
+
) -> T.List[T.Optional[float]]:
|
|
139
|
+
if tag is not None:
|
|
140
|
+
vals = [
|
|
141
|
+
_maybe_float(val)
|
|
142
|
+
for val in _extract_alternative_fields(texts_by_tag, [tag], list) or []
|
|
143
|
+
]
|
|
144
|
+
else:
|
|
145
|
+
vals = []
|
|
146
|
+
while len(vals) < expected_length:
|
|
147
|
+
vals.append(None)
|
|
148
|
+
return vals
|
|
149
|
+
|
|
150
|
+
# aggregate altitudes (optional)
|
|
151
|
+
alts = _aggregate_float_values_same_length(alt_tag)
|
|
152
|
+
|
|
153
|
+
# aggregate directions (optional)
|
|
154
|
+
directions = _aggregate_float_values_same_length(direction_tag)
|
|
155
|
+
|
|
156
|
+
# aggregate speeds (optional)
|
|
157
|
+
speeds = _aggregate_float_values_same_length(speed_tag)
|
|
158
|
+
|
|
159
|
+
# build track
|
|
160
|
+
track = []
|
|
161
|
+
for timestamp, lon, lat, alt, direction, _speed in zip(
|
|
162
|
+
timestamps,
|
|
163
|
+
lons,
|
|
164
|
+
lats,
|
|
165
|
+
alts,
|
|
166
|
+
directions,
|
|
167
|
+
speeds,
|
|
168
|
+
):
|
|
169
|
+
if timestamp is None or lon is None or lat is None:
|
|
170
|
+
continue
|
|
171
|
+
track.append(
|
|
172
|
+
geo.Point(
|
|
173
|
+
time=timestamp,
|
|
174
|
+
lon=lon,
|
|
175
|
+
lat=lat,
|
|
176
|
+
alt=alt,
|
|
177
|
+
angle=direction,
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
track.sort(key=lambda point: point.time)
|
|
182
|
+
|
|
183
|
+
if track:
|
|
184
|
+
first_time = track[0].time
|
|
185
|
+
for point in track:
|
|
186
|
+
point.time = point.time - first_time
|
|
187
|
+
|
|
188
|
+
deduplicated_track = []
|
|
189
|
+
if track:
|
|
190
|
+
prev = None
|
|
191
|
+
for point in track:
|
|
192
|
+
cur = (point.time, point.lon, point.lat)
|
|
193
|
+
if prev is None or cur != prev:
|
|
194
|
+
deduplicated_track.append(point)
|
|
195
|
+
prev = cur
|
|
196
|
+
|
|
197
|
+
return deduplicated_track
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _aggregate_samples(
|
|
201
|
+
elements: T.Iterable[ET.Element],
|
|
202
|
+
sample_time_tag: str,
|
|
203
|
+
sample_duration_tag: str,
|
|
204
|
+
) -> T.Generator[T.Tuple[float, float, T.List[ET.Element]], None, None]:
|
|
205
|
+
expanded_sample_time_tag = _expand_tag(sample_time_tag)
|
|
206
|
+
expanded_sample_duration_tag = _expand_tag(sample_duration_tag)
|
|
207
|
+
|
|
208
|
+
accumulated_elements: T.List[ET.Element] = []
|
|
209
|
+
sample_time = None
|
|
210
|
+
sample_duration = None
|
|
211
|
+
for element in elements:
|
|
212
|
+
if element.tag == expanded_sample_time_tag:
|
|
213
|
+
if sample_time is not None and sample_duration is not None:
|
|
214
|
+
yield (sample_time, sample_duration, accumulated_elements)
|
|
215
|
+
accumulated_elements = []
|
|
216
|
+
sample_time = _maybe_float(element.text)
|
|
217
|
+
elif element.tag == expanded_sample_duration_tag:
|
|
218
|
+
sample_duration = _maybe_float(element.text)
|
|
219
|
+
else:
|
|
220
|
+
accumulated_elements.append(element)
|
|
221
|
+
if sample_time is not None and sample_duration is not None:
|
|
222
|
+
yield (sample_time, sample_duration, accumulated_elements)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def _aggregate_gps_track_by_sample_time(
|
|
226
|
+
sample_iterator: T.Iterable[T.Tuple[float, float, T.List[ET.Element]]],
|
|
227
|
+
lon_tag: str,
|
|
228
|
+
lat_tag: str,
|
|
229
|
+
alt_tag: T.Optional[str] = None,
|
|
230
|
+
direction_tag: T.Optional[str] = None,
|
|
231
|
+
speed_tag: T.Optional[str] = None,
|
|
232
|
+
) -> T.List[geo.Point]:
|
|
233
|
+
track: T.List[geo.Point] = []
|
|
234
|
+
|
|
235
|
+
for sample_time, sample_duration, elements in sample_iterator:
|
|
236
|
+
points = _aggregate_gps_track(
|
|
237
|
+
_index_text_by_tag(elements),
|
|
238
|
+
time_tag=None,
|
|
239
|
+
lon_tag=lon_tag,
|
|
240
|
+
lat_tag=lat_tag,
|
|
241
|
+
alt_tag=alt_tag,
|
|
242
|
+
direction_tag=direction_tag,
|
|
243
|
+
speed_tag=speed_tag,
|
|
244
|
+
)
|
|
245
|
+
if points:
|
|
246
|
+
avg_timedelta = sample_duration / len(points)
|
|
247
|
+
for idx, point in enumerate(points):
|
|
248
|
+
point.time = sample_time + idx * avg_timedelta
|
|
249
|
+
track.extend(points)
|
|
250
|
+
|
|
251
|
+
track.sort(key=lambda point: point.time)
|
|
252
|
+
|
|
253
|
+
return track
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
class ExifToolReadVideo:
|
|
257
|
+
def __init__(
|
|
258
|
+
self,
|
|
259
|
+
etree: ET.ElementTree,
|
|
260
|
+
) -> None:
|
|
261
|
+
self.etree = etree
|
|
262
|
+
self._texts_by_tag = _index_text_by_tag(self.etree.getroot())
|
|
263
|
+
self._all_tags = set(self._texts_by_tag.keys())
|
|
264
|
+
|
|
265
|
+
def extract_gps_track(self) -> T.List[geo.Point]:
|
|
266
|
+
track = self._extract_gps_track_from_quicktime()
|
|
267
|
+
if track:
|
|
268
|
+
return track
|
|
269
|
+
|
|
270
|
+
track = self._extract_gps_track_from_quicktime(namespace="Insta360")
|
|
271
|
+
if track:
|
|
272
|
+
return track
|
|
273
|
+
|
|
274
|
+
track = self._extract_gps_track_from_track()
|
|
275
|
+
if track:
|
|
276
|
+
return track
|
|
277
|
+
|
|
278
|
+
return []
|
|
279
|
+
|
|
280
|
+
def extract_make(self) -> T.Optional[str]:
|
|
281
|
+
make = self._extract_alternative_fields(
|
|
282
|
+
["IFD0:Make", "Keys:Make", "UserData:Make", "Insta360:Make", "GoPro:Make"],
|
|
283
|
+
str,
|
|
284
|
+
)
|
|
285
|
+
if make is None:
|
|
286
|
+
return None
|
|
287
|
+
return make.strip()
|
|
288
|
+
|
|
289
|
+
def extract_model(self) -> T.Optional[str]:
|
|
290
|
+
model = self._extract_alternative_fields(
|
|
291
|
+
[
|
|
292
|
+
"IFD0:Model",
|
|
293
|
+
"Keys:Model",
|
|
294
|
+
"UserData:Model",
|
|
295
|
+
"Insta360:Model",
|
|
296
|
+
"GoPro:Model",
|
|
297
|
+
],
|
|
298
|
+
str,
|
|
299
|
+
)
|
|
300
|
+
if model is None:
|
|
301
|
+
return None
|
|
302
|
+
return model.strip()
|
|
303
|
+
|
|
304
|
+
def _extract_gps_track_from_track(self) -> T.List[geo.Point]:
|
|
305
|
+
for track_id in range(1, MAX_TRACK_ID + 1):
|
|
306
|
+
track_ns = f"Track{track_id}"
|
|
307
|
+
if self._all_tags_exists(
|
|
308
|
+
{
|
|
309
|
+
_expand_tag(f"{track_ns}:SampleTime"),
|
|
310
|
+
_expand_tag(f"{track_ns}:SampleDuration"),
|
|
311
|
+
_expand_tag(f"{track_ns}:GPSLongitude"),
|
|
312
|
+
_expand_tag(f"{track_ns}:GPSLatitude"),
|
|
313
|
+
}
|
|
314
|
+
):
|
|
315
|
+
sample_iterator = _aggregate_samples(
|
|
316
|
+
self.etree.getroot(),
|
|
317
|
+
f"{track_ns}:SampleTime",
|
|
318
|
+
f"{track_ns}:SampleDuration",
|
|
319
|
+
)
|
|
320
|
+
track = _aggregate_gps_track_by_sample_time(
|
|
321
|
+
sample_iterator,
|
|
322
|
+
lon_tag=f"{track_ns}:GPSLongitude",
|
|
323
|
+
lat_tag=f"{track_ns}:GPSLatitude",
|
|
324
|
+
alt_tag=f"{track_ns}:GPSAltitude",
|
|
325
|
+
direction_tag=f"{track_ns}:GPSTrack",
|
|
326
|
+
)
|
|
327
|
+
if track:
|
|
328
|
+
return track
|
|
329
|
+
return []
|
|
330
|
+
|
|
331
|
+
def _extract_alternative_fields(
|
|
332
|
+
self,
|
|
333
|
+
fields: T.Sequence[str],
|
|
334
|
+
field_type: T.Type[_FIELD_TYPE],
|
|
335
|
+
) -> T.Optional[_FIELD_TYPE]:
|
|
336
|
+
return _extract_alternative_fields(self._texts_by_tag, fields, field_type)
|
|
337
|
+
|
|
338
|
+
def _all_tags_exists(self, tags: T.Set[str]) -> bool:
|
|
339
|
+
return self._all_tags.issuperset(tags)
|
|
340
|
+
|
|
341
|
+
def _extract_gps_track_from_quicktime(
|
|
342
|
+
self, namespace: str = "QuickTime"
|
|
343
|
+
) -> T.List[geo.Point]:
|
|
344
|
+
if not self._all_tags_exists(
|
|
345
|
+
{
|
|
346
|
+
_expand_tag(f"{namespace}:GPSDateTime"),
|
|
347
|
+
_expand_tag(f"{namespace}:GPSLongitude"),
|
|
348
|
+
_expand_tag(f"{namespace}:GPSLatitude"),
|
|
349
|
+
}
|
|
350
|
+
):
|
|
351
|
+
return []
|
|
352
|
+
|
|
353
|
+
return _aggregate_gps_track(
|
|
354
|
+
self._texts_by_tag,
|
|
355
|
+
time_tag=f"{namespace}:GPSDateTime",
|
|
356
|
+
lon_tag=f"{namespace}:GPSLongitude",
|
|
357
|
+
lat_tag=f"{namespace}:GPSLatitude",
|
|
358
|
+
alt_tag=f"{namespace}:GPSAltitude",
|
|
359
|
+
direction_tag=f"{namespace}:GPSTrack",
|
|
360
|
+
)
|
mapillary_tools/geo.py
CHANGED
|
@@ -175,8 +175,16 @@ def as_unix_time(dt: T.Union[datetime.datetime, int, float]) -> float:
|
|
|
175
175
|
if isinstance(dt, (int, float)):
|
|
176
176
|
return dt
|
|
177
177
|
else:
|
|
178
|
-
|
|
179
|
-
|
|
178
|
+
try:
|
|
179
|
+
# if dt is naive, assume it's in local timezone
|
|
180
|
+
return dt.timestamp()
|
|
181
|
+
except ValueError:
|
|
182
|
+
# Some datetimes can't be converted to timestamp
|
|
183
|
+
# e.g. 0001-01-01 00:00:00 will throw ValueError: year 0 is out of range
|
|
184
|
+
try:
|
|
185
|
+
return dt.replace(year=1970).timestamp()
|
|
186
|
+
except ValueError:
|
|
187
|
+
return 0.0
|
|
180
188
|
|
|
181
189
|
|
|
182
190
|
def _interpolate_segment(start: Point, end: Point, t: float) -> Point:
|
|
@@ -1,11 +1,22 @@
|
|
|
1
|
+
import abc
|
|
1
2
|
import typing as T
|
|
2
3
|
|
|
3
4
|
from .. import types
|
|
4
5
|
|
|
5
6
|
|
|
6
|
-
class
|
|
7
|
+
class GeotagImagesFromGeneric(abc.ABC):
|
|
7
8
|
def __init__(self) -> None:
|
|
8
9
|
pass
|
|
9
10
|
|
|
11
|
+
@abc.abstractmethod
|
|
10
12
|
def to_description(self) -> T.List[types.ImageMetadataOrError]:
|
|
11
|
-
|
|
13
|
+
raise NotImplementedError
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class GeotagVideosFromGeneric(abc.ABC):
|
|
17
|
+
def __init__(self) -> None:
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
@abc.abstractmethod
|
|
21
|
+
def to_description(self) -> T.List[types.VideoMetadataOrError]:
|
|
22
|
+
raise NotImplementedError
|
|
@@ -4,96 +4,58 @@ import typing as T
|
|
|
4
4
|
from multiprocessing import Pool
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
|
|
7
|
-
import piexif
|
|
8
|
-
|
|
9
7
|
from tqdm import tqdm
|
|
10
8
|
|
|
11
|
-
from .. import exif_write, geo, types
|
|
12
|
-
from ..
|
|
13
|
-
from
|
|
14
|
-
|
|
15
|
-
from .geotag_from_generic import GeotagFromGeneric
|
|
9
|
+
from .. import exceptions, exif_write, geo, types
|
|
10
|
+
from ..exif_read import ExifRead, ExifReadABC
|
|
11
|
+
from .geotag_from_generic import GeotagImagesFromGeneric
|
|
16
12
|
|
|
17
13
|
LOG = logging.getLogger(__name__)
|
|
18
14
|
|
|
19
15
|
|
|
20
16
|
def verify_image_exif_write(
|
|
21
17
|
metadata: types.ImageMetadata,
|
|
22
|
-
|
|
23
|
-
) ->
|
|
24
|
-
if
|
|
18
|
+
image_bytes: T.Optional[bytes] = None,
|
|
19
|
+
) -> None:
|
|
20
|
+
if image_bytes is None:
|
|
25
21
|
edit = exif_write.ExifEdit(metadata.filename)
|
|
26
22
|
else:
|
|
27
|
-
edit = exif_write.ExifEdit(
|
|
23
|
+
edit = exif_write.ExifEdit(image_bytes)
|
|
28
24
|
|
|
29
25
|
# The cast is to fix the type error in Python3.6:
|
|
30
26
|
# Argument 1 to "add_image_description" of "ExifEdit" has incompatible type "ImageDescription"; expected "Dict[str, Any]"
|
|
31
27
|
edit.add_image_description(
|
|
32
28
|
T.cast(T.Dict, types.desc_file_to_exif(types.as_desc(metadata)))
|
|
33
29
|
)
|
|
34
|
-
try:
|
|
35
|
-
edit.dump_image_bytes()
|
|
36
|
-
except piexif.InvalidImageDataError as exc:
|
|
37
|
-
return types.describe_error_metadata(
|
|
38
|
-
exc,
|
|
39
|
-
metadata.filename,
|
|
40
|
-
filetype=types.FileType.IMAGE,
|
|
41
|
-
)
|
|
42
|
-
except Exception as exc:
|
|
43
|
-
# possible error here: struct.error: 'H' format requires 0 <= number <= 65535
|
|
44
|
-
LOG.warning(
|
|
45
|
-
"Unknown error test writing image %s", metadata.filename, exc_info=True
|
|
46
|
-
)
|
|
47
|
-
return types.describe_error_metadata(
|
|
48
|
-
exc,
|
|
49
|
-
metadata.filename,
|
|
50
|
-
filetype=types.FileType.IMAGE,
|
|
51
|
-
)
|
|
52
|
-
return metadata
|
|
53
30
|
|
|
31
|
+
# Possible errors thrown here:
|
|
32
|
+
# - struct.error: 'H' format requires 0 <= number <= 65535
|
|
33
|
+
# - piexif.InvalidImageDataError
|
|
34
|
+
edit.dump_image_bytes()
|
|
54
35
|
|
|
55
|
-
|
|
36
|
+
|
|
37
|
+
class GeotagImagesFromEXIF(GeotagImagesFromGeneric):
|
|
56
38
|
def __init__(self, image_paths: T.Sequence[Path]):
|
|
57
39
|
self.image_paths = image_paths
|
|
58
40
|
super().__init__()
|
|
59
41
|
|
|
60
42
|
@staticmethod
|
|
61
|
-
def
|
|
62
|
-
image_path: Path, skip_lonlat_error: bool = False
|
|
63
|
-
) -> types.
|
|
64
|
-
with image_path.open("rb") as fp:
|
|
65
|
-
image_data = fp.read()
|
|
66
|
-
image_bytesio = io.BytesIO(image_data)
|
|
67
|
-
|
|
68
|
-
try:
|
|
69
|
-
exif = ExifRead(image_bytesio)
|
|
70
|
-
except Exception as ex:
|
|
71
|
-
LOG.warning(
|
|
72
|
-
"Unknown error reading EXIF from image %s",
|
|
73
|
-
image_path,
|
|
74
|
-
exc_info=True,
|
|
75
|
-
)
|
|
76
|
-
return types.describe_error_metadata(
|
|
77
|
-
ex, image_path, filetype=types.FileType.IMAGE
|
|
78
|
-
)
|
|
79
|
-
|
|
43
|
+
def build_image_metadata(
|
|
44
|
+
image_path: Path, exif: ExifReadABC, skip_lonlat_error: bool = False
|
|
45
|
+
) -> types.ImageMetadata:
|
|
80
46
|
lonlat = exif.extract_lon_lat()
|
|
81
47
|
if lonlat is None:
|
|
82
48
|
if not skip_lonlat_error:
|
|
83
|
-
|
|
49
|
+
raise exceptions.MapillaryGeoTaggingError(
|
|
84
50
|
"Unable to extract GPS Longitude or GPS Latitude from the image"
|
|
85
51
|
)
|
|
86
|
-
return types.describe_error_metadata(
|
|
87
|
-
exc, image_path, filetype=types.FileType.IMAGE
|
|
88
|
-
)
|
|
89
52
|
lonlat = (0.0, 0.0)
|
|
90
53
|
lon, lat = lonlat
|
|
91
54
|
|
|
92
55
|
capture_time = exif.extract_capture_time()
|
|
93
56
|
if capture_time is None:
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
exc, image_path, filetype=types.FileType.IMAGE
|
|
57
|
+
raise exceptions.MapillaryGeoTaggingError(
|
|
58
|
+
"Unable to extract timestamp from the image"
|
|
97
59
|
)
|
|
98
60
|
|
|
99
61
|
image_metadata = types.ImageMetadata(
|
|
@@ -111,26 +73,48 @@ class GeotagFromEXIF(GeotagFromGeneric):
|
|
|
111
73
|
MAPDeviceModel=exif.extract_model(),
|
|
112
74
|
)
|
|
113
75
|
|
|
114
|
-
|
|
115
|
-
|
|
76
|
+
return image_metadata
|
|
77
|
+
|
|
78
|
+
@staticmethod
|
|
79
|
+
def geotag_image(
|
|
80
|
+
image_path: Path, skip_lonlat_error: bool = False
|
|
81
|
+
) -> types.ImageMetadataOrError:
|
|
82
|
+
try:
|
|
83
|
+
# load the image bytes into memory to avoid reading it multiple times
|
|
84
|
+
with image_path.open("rb") as fp:
|
|
85
|
+
image_bytesio = io.BytesIO(fp.read())
|
|
86
|
+
|
|
87
|
+
image_bytesio.seek(0, io.SEEK_SET)
|
|
88
|
+
exif = ExifRead(image_bytesio)
|
|
89
|
+
|
|
90
|
+
image_metadata = GeotagImagesFromEXIF.build_image_metadata(
|
|
91
|
+
image_path, exif, skip_lonlat_error=skip_lonlat_error
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
image_bytesio.seek(0, io.SEEK_SET)
|
|
95
|
+
verify_image_exif_write(
|
|
96
|
+
image_metadata,
|
|
97
|
+
image_bytes=image_bytesio.read(),
|
|
98
|
+
)
|
|
99
|
+
except Exception as ex:
|
|
100
|
+
return types.describe_error_metadata(
|
|
101
|
+
ex, image_path, filetype=types.FileType.IMAGE
|
|
102
|
+
)
|
|
116
103
|
|
|
117
104
|
image_bytesio.seek(0, io.SEEK_SET)
|
|
118
|
-
|
|
119
|
-
image_metadata,
|
|
120
|
-
image_data=image_bytesio.read(),
|
|
121
|
-
)
|
|
105
|
+
image_metadata.update_md5sum(image_bytesio)
|
|
122
106
|
|
|
123
|
-
return
|
|
107
|
+
return image_metadata
|
|
124
108
|
|
|
125
109
|
def to_description(self) -> T.List[types.ImageMetadataOrError]:
|
|
126
110
|
with Pool() as pool:
|
|
127
|
-
|
|
128
|
-
|
|
111
|
+
image_metadatas_iter = pool.imap(
|
|
112
|
+
GeotagImagesFromEXIF.geotag_image,
|
|
129
113
|
self.image_paths,
|
|
130
114
|
)
|
|
131
115
|
return list(
|
|
132
116
|
tqdm(
|
|
133
|
-
|
|
117
|
+
image_metadatas_iter,
|
|
134
118
|
desc="Extracting geotags from images",
|
|
135
119
|
unit="images",
|
|
136
120
|
disable=LOG.getEffectiveLevel() <= logging.DEBUG,
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import logging
|
|
3
|
+
import typing as T
|
|
4
|
+
import xml.etree.ElementTree as ET
|
|
5
|
+
from multiprocessing import Pool
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from tqdm import tqdm
|
|
9
|
+
|
|
10
|
+
from .. import exceptions, types, utils
|
|
11
|
+
from ..exiftool_read import EXIFTOOL_NAMESPACES, ExifToolRead
|
|
12
|
+
from .geotag_from_generic import GeotagImagesFromGeneric
|
|
13
|
+
from .geotag_images_from_exif import GeotagImagesFromEXIF, verify_image_exif_write
|
|
14
|
+
|
|
15
|
+
LOG = logging.getLogger(__name__)
|
|
16
|
+
_DESCRIPTION_TAG = "rdf:Description"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def canonical_path(path: Path) -> str:
|
|
20
|
+
return str(path.resolve().as_posix())
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def find_rdf_description_path(element: ET.Element) -> T.Optional[Path]:
|
|
24
|
+
about = element.get("{" + EXIFTOOL_NAMESPACES["rdf"] + "}about")
|
|
25
|
+
if about is None:
|
|
26
|
+
return None
|
|
27
|
+
return Path(about)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def index_rdf_description_by_path(
|
|
31
|
+
xml_paths: T.Sequence[Path],
|
|
32
|
+
) -> T.Dict[str, ET.Element]:
|
|
33
|
+
rdf_description_by_path: T.Dict[str, ET.Element] = {}
|
|
34
|
+
|
|
35
|
+
for xml_path in utils.find_xml_files(xml_paths):
|
|
36
|
+
try:
|
|
37
|
+
etree = ET.parse(xml_path)
|
|
38
|
+
except ET.ParseError as ex:
|
|
39
|
+
verbose = LOG.getEffectiveLevel() <= logging.DEBUG
|
|
40
|
+
if verbose:
|
|
41
|
+
LOG.warning(f"Failed to parse {xml_path}", exc_info=verbose)
|
|
42
|
+
else:
|
|
43
|
+
LOG.warning(f"Failed to parse {xml_path}: {ex}", exc_info=verbose)
|
|
44
|
+
continue
|
|
45
|
+
|
|
46
|
+
elements = etree.iterfind(_DESCRIPTION_TAG, namespaces=EXIFTOOL_NAMESPACES)
|
|
47
|
+
for element in elements:
|
|
48
|
+
path = find_rdf_description_path(element)
|
|
49
|
+
if path is not None:
|
|
50
|
+
rdf_description_by_path[canonical_path(path)] = element
|
|
51
|
+
|
|
52
|
+
return rdf_description_by_path
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class GeotagImagesFromExifTool(GeotagImagesFromGeneric):
|
|
56
|
+
def __init__(self, image_paths: T.Sequence[Path], xml_path: Path):
|
|
57
|
+
self.image_paths = image_paths
|
|
58
|
+
self.xml_path = xml_path
|
|
59
|
+
super().__init__()
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
def geotag_image(element: ET.Element) -> types.ImageMetadataOrError:
|
|
63
|
+
image_path = find_rdf_description_path(element)
|
|
64
|
+
assert image_path is not None, "must find the path from the element"
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
exif = ExifToolRead(ET.ElementTree(element))
|
|
68
|
+
image_metadata = GeotagImagesFromEXIF.build_image_metadata(
|
|
69
|
+
image_path, exif, skip_lonlat_error=False
|
|
70
|
+
)
|
|
71
|
+
# load the image bytes into memory to avoid reading it multiple times
|
|
72
|
+
with image_path.open("rb") as fp:
|
|
73
|
+
image_bytesio = io.BytesIO(fp.read())
|
|
74
|
+
image_bytesio.seek(0, io.SEEK_SET)
|
|
75
|
+
verify_image_exif_write(
|
|
76
|
+
image_metadata,
|
|
77
|
+
image_bytes=image_bytesio.read(),
|
|
78
|
+
)
|
|
79
|
+
except Exception as ex:
|
|
80
|
+
return types.describe_error_metadata(
|
|
81
|
+
ex, image_path, filetype=types.FileType.IMAGE
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
image_bytesio.seek(0, io.SEEK_SET)
|
|
85
|
+
image_metadata.update_md5sum(image_bytesio)
|
|
86
|
+
|
|
87
|
+
return image_metadata
|
|
88
|
+
|
|
89
|
+
def to_description(self) -> T.List[types.ImageMetadataOrError]:
|
|
90
|
+
rdf_description_by_path = index_rdf_description_by_path([self.xml_path])
|
|
91
|
+
|
|
92
|
+
error_metadatas: T.List[types.ErrorMetadata] = []
|
|
93
|
+
rdf_descriptions: T.List[ET.Element] = []
|
|
94
|
+
for path in self.image_paths:
|
|
95
|
+
rdf_description = rdf_description_by_path.get(canonical_path(path))
|
|
96
|
+
if rdf_description is None:
|
|
97
|
+
exc = exceptions.MapillaryEXIFNotFoundError(
|
|
98
|
+
f"The {_DESCRIPTION_TAG} XML element for the image not found"
|
|
99
|
+
)
|
|
100
|
+
error_metadatas.append(
|
|
101
|
+
types.describe_error_metadata(
|
|
102
|
+
exc, path, filetype=types.FileType.IMAGE
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
else:
|
|
106
|
+
rdf_descriptions.append(rdf_description)
|
|
107
|
+
|
|
108
|
+
with Pool() as pool:
|
|
109
|
+
image_metadatas_iter = pool.imap(
|
|
110
|
+
GeotagImagesFromExifTool.geotag_image,
|
|
111
|
+
rdf_descriptions,
|
|
112
|
+
)
|
|
113
|
+
image_metadata_or_errors = list(
|
|
114
|
+
tqdm(
|
|
115
|
+
image_metadatas_iter,
|
|
116
|
+
desc="Extracting geotags from ExifTool XML",
|
|
117
|
+
unit="images",
|
|
118
|
+
disable=LOG.getEffectiveLevel() <= logging.DEBUG,
|
|
119
|
+
total=len(self.image_paths),
|
|
120
|
+
)
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
return error_metadatas + image_metadata_or_errors
|