mapillary-tools 0.10.2a0__py3-none-any.whl → 0.10.3a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/commands/process.py +4 -3
  3. mapillary_tools/exceptions.py +4 -0
  4. mapillary_tools/exif_read.py +543 -65
  5. mapillary_tools/exiftool_read.py +406 -0
  6. mapillary_tools/exiftool_read_video.py +360 -0
  7. mapillary_tools/geo.py +10 -2
  8. mapillary_tools/geotag/geotag_from_generic.py +13 -2
  9. mapillary_tools/geotag/{geotag_from_exif.py → geotag_images_from_exif.py} +51 -67
  10. mapillary_tools/geotag/geotag_images_from_exiftool.py +123 -0
  11. mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +81 -0
  12. mapillary_tools/geotag/{geotag_from_gpx.py → geotag_images_from_gpx.py} +16 -13
  13. mapillary_tools/geotag/{geotag_from_gpx_file.py → geotag_images_from_gpx_file.py} +52 -36
  14. mapillary_tools/geotag/{geotag_from_nmea_file.py → geotag_images_from_nmea_file.py} +4 -5
  15. mapillary_tools/geotag/geotag_images_from_video.py +87 -0
  16. mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +105 -0
  17. mapillary_tools/geotag/geotag_videos_from_video.py +175 -0
  18. mapillary_tools/process_geotag_properties.py +65 -31
  19. mapillary_tools/sample_video.py +19 -6
  20. mapillary_tools/types.py +2 -0
  21. mapillary_tools/utils.py +24 -2
  22. {mapillary_tools-0.10.2a0.dist-info → mapillary_tools-0.10.3a1.dist-info}/METADATA +1 -1
  23. {mapillary_tools-0.10.2a0.dist-info → mapillary_tools-0.10.3a1.dist-info}/RECORD +27 -24
  24. mapillary_tools/geotag/geotag_from_blackvue.py +0 -93
  25. mapillary_tools/geotag/geotag_from_camm.py +0 -94
  26. mapillary_tools/geotag/geotag_from_gopro.py +0 -96
  27. mapillary_tools/geotag/geotag_from_video.py +0 -145
  28. {mapillary_tools-0.10.2a0.dist-info → mapillary_tools-0.10.3a1.dist-info}/LICENSE +0 -0
  29. {mapillary_tools-0.10.2a0.dist-info → mapillary_tools-0.10.3a1.dist-info}/WHEEL +0 -0
  30. {mapillary_tools-0.10.2a0.dist-info → mapillary_tools-0.10.3a1.dist-info}/entry_points.txt +0 -0
  31. {mapillary_tools-0.10.2a0.dist-info → mapillary_tools-0.10.3a1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,360 @@
1
+ import logging
2
+ import typing as T
3
+ import xml.etree.ElementTree as ET
4
+
5
+ from . import exif_read, geo
6
+
7
+
8
+ MAX_TRACK_ID = 10
9
+ EXIFTOOL_NAMESPACES: T.Dict[str, str] = {
10
+ "Keys": "http://ns.exiftool.org/QuickTime/Keys/1.0/",
11
+ "IFD0": "http://ns.exiftool.org/EXIF/IFD0/1.0/",
12
+ "QuickTime": "http://ns.exiftool.org/QuickTime/QuickTime/1.0/",
13
+ "UserData": "http://ns.exiftool.org/QuickTime/UserData/1.0/",
14
+ "Insta360": "http://ns.exiftool.org/Trailer/Insta360/1.0/",
15
+ "GoPro": "http://ns.exiftool.org/QuickTime/GoPro/1.0/",
16
+ **{
17
+ f"Track{track_id}": f"http://ns.exiftool.org/QuickTime/Track{track_id}/1.0/"
18
+ for track_id in range(1, MAX_TRACK_ID + 1)
19
+ },
20
+ }
21
+ LOG = logging.getLogger(__name__)
22
+ _FIELD_TYPE = T.TypeVar("_FIELD_TYPE", int, float, str, T.List[str])
23
+
24
+
25
+ def _maybe_float(text: T.Optional[str]) -> T.Optional[float]:
26
+ if text is None:
27
+ return None
28
+ try:
29
+ return float(text)
30
+ except (ValueError, TypeError):
31
+ return None
32
+
33
+
34
+ def _expand_tag(ns_tag: str) -> str:
35
+ try:
36
+ ns, tag = ns_tag.split(":", maxsplit=2)
37
+ except ValueError:
38
+ raise ValueError(f"Invalid tag {ns_tag}")
39
+ return "{" + EXIFTOOL_NAMESPACES[ns] + "}" + tag
40
+
41
+
42
+ def _index_text_by_tag(elements: T.Iterable[ET.Element]) -> T.Dict[str, T.List[str]]:
43
+ texts_by_tag: T.Dict[str, T.List[str]] = {}
44
+ for element in elements:
45
+ tag = element.tag
46
+ if element.text is not None:
47
+ texts_by_tag.setdefault(tag, []).append(element.text)
48
+ return texts_by_tag
49
+
50
+
51
+ def _extract_alternative_fields(
52
+ texts_by_tag: T.Dict[str, T.List[str]],
53
+ fields: T.Sequence[str],
54
+ field_type: T.Type[_FIELD_TYPE],
55
+ ) -> T.Optional[_FIELD_TYPE]:
56
+ for field in fields:
57
+ values = texts_by_tag.get(_expand_tag(field))
58
+ if values is None:
59
+ continue
60
+ if field_type is int:
61
+ value = values[0]
62
+ try:
63
+ return T.cast(_FIELD_TYPE, int(value))
64
+ except (ValueError, TypeError):
65
+ pass
66
+ elif field_type is float:
67
+ value = values[0]
68
+ try:
69
+ return T.cast(_FIELD_TYPE, float(value))
70
+ except (ValueError, TypeError):
71
+ pass
72
+ elif field_type is str:
73
+ value = values[0]
74
+ try:
75
+ return T.cast(_FIELD_TYPE, str(value))
76
+ except (ValueError, TypeError):
77
+ pass
78
+ elif field_type is list:
79
+ return T.cast(_FIELD_TYPE, values)
80
+ else:
81
+ raise ValueError(f"Invalid field type {field_type}")
82
+ return None
83
+
84
+
85
+ def _aggregate_gps_track(
86
+ texts_by_tag: T.Dict[str, T.List[str]],
87
+ time_tag: T.Optional[str],
88
+ lon_tag: str,
89
+ lat_tag: str,
90
+ alt_tag: T.Optional[str] = None,
91
+ direction_tag: T.Optional[str] = None,
92
+ speed_tag: T.Optional[str] = None,
93
+ ) -> T.List[geo.Point]:
94
+ # aggregate coordinates (required)
95
+ lons = [
96
+ _maybe_float(lon)
97
+ for lon in _extract_alternative_fields(texts_by_tag, [lon_tag], list) or []
98
+ ]
99
+ lats = [
100
+ _maybe_float(lat)
101
+ for lat in _extract_alternative_fields(texts_by_tag, [lat_tag], list) or []
102
+ ]
103
+
104
+ if len(lons) != len(lats):
105
+ # no idea what to do if we have different number of lons and lats
106
+ LOG.warning(
107
+ "Found different number of longitudes %d and latitudes %d",
108
+ len(lons),
109
+ len(lats),
110
+ )
111
+ return []
112
+
113
+ expected_length = len(lats)
114
+
115
+ # aggregate timestamps (optional)
116
+ if time_tag is not None:
117
+ dts = [
118
+ exif_read.parse_gps_datetime(text)
119
+ for text in _extract_alternative_fields(texts_by_tag, [time_tag], list)
120
+ or []
121
+ ]
122
+ timestamps = [geo.as_unix_time(dt) if dt is not None else None for dt in dts]
123
+ if expected_length != len(timestamps):
124
+ # no idea what to do if we have different number of timestamps and coordinates
125
+ LOG.warning(
126
+ "Found different number of timestamps %d and coordinates %d",
127
+ len(timestamps),
128
+ expected_length,
129
+ )
130
+ return []
131
+ else:
132
+ timestamps = [0.0] * expected_length
133
+
134
+ assert len(timestamps) == expected_length
135
+
136
+ def _aggregate_float_values_same_length(
137
+ tag: T.Optional[str],
138
+ ) -> T.List[T.Optional[float]]:
139
+ if tag is not None:
140
+ vals = [
141
+ _maybe_float(val)
142
+ for val in _extract_alternative_fields(texts_by_tag, [tag], list) or []
143
+ ]
144
+ else:
145
+ vals = []
146
+ while len(vals) < expected_length:
147
+ vals.append(None)
148
+ return vals
149
+
150
+ # aggregate altitudes (optional)
151
+ alts = _aggregate_float_values_same_length(alt_tag)
152
+
153
+ # aggregate directions (optional)
154
+ directions = _aggregate_float_values_same_length(direction_tag)
155
+
156
+ # aggregate speeds (optional)
157
+ speeds = _aggregate_float_values_same_length(speed_tag)
158
+
159
+ # build track
160
+ track = []
161
+ for timestamp, lon, lat, alt, direction, _speed in zip(
162
+ timestamps,
163
+ lons,
164
+ lats,
165
+ alts,
166
+ directions,
167
+ speeds,
168
+ ):
169
+ if timestamp is None or lon is None or lat is None:
170
+ continue
171
+ track.append(
172
+ geo.Point(
173
+ time=timestamp,
174
+ lon=lon,
175
+ lat=lat,
176
+ alt=alt,
177
+ angle=direction,
178
+ )
179
+ )
180
+
181
+ track.sort(key=lambda point: point.time)
182
+
183
+ if track:
184
+ first_time = track[0].time
185
+ for point in track:
186
+ point.time = point.time - first_time
187
+
188
+ deduplicated_track = []
189
+ if track:
190
+ prev = None
191
+ for point in track:
192
+ cur = (point.time, point.lon, point.lat)
193
+ if prev is None or cur != prev:
194
+ deduplicated_track.append(point)
195
+ prev = cur
196
+
197
+ return deduplicated_track
198
+
199
+
200
+ def _aggregate_samples(
201
+ elements: T.Iterable[ET.Element],
202
+ sample_time_tag: str,
203
+ sample_duration_tag: str,
204
+ ) -> T.Generator[T.Tuple[float, float, T.List[ET.Element]], None, None]:
205
+ expanded_sample_time_tag = _expand_tag(sample_time_tag)
206
+ expanded_sample_duration_tag = _expand_tag(sample_duration_tag)
207
+
208
+ accumulated_elements: T.List[ET.Element] = []
209
+ sample_time = None
210
+ sample_duration = None
211
+ for element in elements:
212
+ if element.tag == expanded_sample_time_tag:
213
+ if sample_time is not None and sample_duration is not None:
214
+ yield (sample_time, sample_duration, accumulated_elements)
215
+ accumulated_elements = []
216
+ sample_time = _maybe_float(element.text)
217
+ elif element.tag == expanded_sample_duration_tag:
218
+ sample_duration = _maybe_float(element.text)
219
+ else:
220
+ accumulated_elements.append(element)
221
+ if sample_time is not None and sample_duration is not None:
222
+ yield (sample_time, sample_duration, accumulated_elements)
223
+
224
+
225
+ def _aggregate_gps_track_by_sample_time(
226
+ sample_iterator: T.Iterable[T.Tuple[float, float, T.List[ET.Element]]],
227
+ lon_tag: str,
228
+ lat_tag: str,
229
+ alt_tag: T.Optional[str] = None,
230
+ direction_tag: T.Optional[str] = None,
231
+ speed_tag: T.Optional[str] = None,
232
+ ) -> T.List[geo.Point]:
233
+ track: T.List[geo.Point] = []
234
+
235
+ for sample_time, sample_duration, elements in sample_iterator:
236
+ points = _aggregate_gps_track(
237
+ _index_text_by_tag(elements),
238
+ time_tag=None,
239
+ lon_tag=lon_tag,
240
+ lat_tag=lat_tag,
241
+ alt_tag=alt_tag,
242
+ direction_tag=direction_tag,
243
+ speed_tag=speed_tag,
244
+ )
245
+ if points:
246
+ avg_timedelta = sample_duration / len(points)
247
+ for idx, point in enumerate(points):
248
+ point.time = sample_time + idx * avg_timedelta
249
+ track.extend(points)
250
+
251
+ track.sort(key=lambda point: point.time)
252
+
253
+ return track
254
+
255
+
256
+ class ExifToolReadVideo:
257
+ def __init__(
258
+ self,
259
+ etree: ET.ElementTree,
260
+ ) -> None:
261
+ self.etree = etree
262
+ self._texts_by_tag = _index_text_by_tag(self.etree.getroot())
263
+ self._all_tags = set(self._texts_by_tag.keys())
264
+
265
+ def extract_gps_track(self) -> T.List[geo.Point]:
266
+ track = self._extract_gps_track_from_quicktime()
267
+ if track:
268
+ return track
269
+
270
+ track = self._extract_gps_track_from_quicktime(namespace="Insta360")
271
+ if track:
272
+ return track
273
+
274
+ track = self._extract_gps_track_from_track()
275
+ if track:
276
+ return track
277
+
278
+ return []
279
+
280
+ def extract_make(self) -> T.Optional[str]:
281
+ make = self._extract_alternative_fields(
282
+ ["IFD0:Make", "Keys:Make", "UserData:Make", "Insta360:Make", "GoPro:Make"],
283
+ str,
284
+ )
285
+ if make is None:
286
+ return None
287
+ return make.strip()
288
+
289
+ def extract_model(self) -> T.Optional[str]:
290
+ model = self._extract_alternative_fields(
291
+ [
292
+ "IFD0:Model",
293
+ "Keys:Model",
294
+ "UserData:Model",
295
+ "Insta360:Model",
296
+ "GoPro:Model",
297
+ ],
298
+ str,
299
+ )
300
+ if model is None:
301
+ return None
302
+ return model.strip()
303
+
304
+ def _extract_gps_track_from_track(self) -> T.List[geo.Point]:
305
+ for track_id in range(1, MAX_TRACK_ID + 1):
306
+ track_ns = f"Track{track_id}"
307
+ if self._all_tags_exists(
308
+ {
309
+ _expand_tag(f"{track_ns}:SampleTime"),
310
+ _expand_tag(f"{track_ns}:SampleDuration"),
311
+ _expand_tag(f"{track_ns}:GPSLongitude"),
312
+ _expand_tag(f"{track_ns}:GPSLatitude"),
313
+ }
314
+ ):
315
+ sample_iterator = _aggregate_samples(
316
+ self.etree.getroot(),
317
+ f"{track_ns}:SampleTime",
318
+ f"{track_ns}:SampleDuration",
319
+ )
320
+ track = _aggregate_gps_track_by_sample_time(
321
+ sample_iterator,
322
+ lon_tag=f"{track_ns}:GPSLongitude",
323
+ lat_tag=f"{track_ns}:GPSLatitude",
324
+ alt_tag=f"{track_ns}:GPSAltitude",
325
+ direction_tag=f"{track_ns}:GPSTrack",
326
+ )
327
+ if track:
328
+ return track
329
+ return []
330
+
331
+ def _extract_alternative_fields(
332
+ self,
333
+ fields: T.Sequence[str],
334
+ field_type: T.Type[_FIELD_TYPE],
335
+ ) -> T.Optional[_FIELD_TYPE]:
336
+ return _extract_alternative_fields(self._texts_by_tag, fields, field_type)
337
+
338
+ def _all_tags_exists(self, tags: T.Set[str]) -> bool:
339
+ return self._all_tags.issuperset(tags)
340
+
341
+ def _extract_gps_track_from_quicktime(
342
+ self, namespace: str = "QuickTime"
343
+ ) -> T.List[geo.Point]:
344
+ if not self._all_tags_exists(
345
+ {
346
+ _expand_tag(f"{namespace}:GPSDateTime"),
347
+ _expand_tag(f"{namespace}:GPSLongitude"),
348
+ _expand_tag(f"{namespace}:GPSLatitude"),
349
+ }
350
+ ):
351
+ return []
352
+
353
+ return _aggregate_gps_track(
354
+ self._texts_by_tag,
355
+ time_tag=f"{namespace}:GPSDateTime",
356
+ lon_tag=f"{namespace}:GPSLongitude",
357
+ lat_tag=f"{namespace}:GPSLatitude",
358
+ alt_tag=f"{namespace}:GPSAltitude",
359
+ direction_tag=f"{namespace}:GPSTrack",
360
+ )
mapillary_tools/geo.py CHANGED
@@ -175,8 +175,16 @@ def as_unix_time(dt: T.Union[datetime.datetime, int, float]) -> float:
175
175
  if isinstance(dt, (int, float)):
176
176
  return dt
177
177
  else:
178
- # if dt is naive, assume it's in local timezone
179
- return dt.timestamp()
178
+ try:
179
+ # if dt is naive, assume it's in local timezone
180
+ return dt.timestamp()
181
+ except ValueError:
182
+ # Some datetimes can't be converted to timestamp
183
+ # e.g. 0001-01-01 00:00:00 will throw ValueError: year 0 is out of range
184
+ try:
185
+ return dt.replace(year=1970).timestamp()
186
+ except ValueError:
187
+ return 0.0
180
188
 
181
189
 
182
190
  def _interpolate_segment(start: Point, end: Point, t: float) -> Point:
@@ -1,11 +1,22 @@
1
+ import abc
1
2
  import typing as T
2
3
 
3
4
  from .. import types
4
5
 
5
6
 
6
- class GeotagFromGeneric:
7
+ class GeotagImagesFromGeneric(abc.ABC):
7
8
  def __init__(self) -> None:
8
9
  pass
9
10
 
11
+ @abc.abstractmethod
10
12
  def to_description(self) -> T.List[types.ImageMetadataOrError]:
11
- return []
13
+ raise NotImplementedError
14
+
15
+
16
+ class GeotagVideosFromGeneric(abc.ABC):
17
+ def __init__(self) -> None:
18
+ pass
19
+
20
+ @abc.abstractmethod
21
+ def to_description(self) -> T.List[types.VideoMetadataOrError]:
22
+ raise NotImplementedError
@@ -4,96 +4,58 @@ import typing as T
4
4
  from multiprocessing import Pool
5
5
  from pathlib import Path
6
6
 
7
- import piexif
8
-
9
7
  from tqdm import tqdm
10
8
 
11
- from .. import exif_write, geo, types
12
- from ..exceptions import MapillaryGeoTaggingError
13
- from ..exif_read import ExifRead
14
-
15
- from .geotag_from_generic import GeotagFromGeneric
9
+ from .. import exceptions, exif_write, geo, types
10
+ from ..exif_read import ExifRead, ExifReadABC
11
+ from .geotag_from_generic import GeotagImagesFromGeneric
16
12
 
17
13
  LOG = logging.getLogger(__name__)
18
14
 
19
15
 
20
16
  def verify_image_exif_write(
21
17
  metadata: types.ImageMetadata,
22
- image_data: T.Optional[bytes] = None,
23
- ) -> types.ImageMetadataOrError:
24
- if image_data is None:
18
+ image_bytes: T.Optional[bytes] = None,
19
+ ) -> None:
20
+ if image_bytes is None:
25
21
  edit = exif_write.ExifEdit(metadata.filename)
26
22
  else:
27
- edit = exif_write.ExifEdit(image_data)
23
+ edit = exif_write.ExifEdit(image_bytes)
28
24
 
29
25
  # The cast is to fix the type error in Python3.6:
30
26
  # Argument 1 to "add_image_description" of "ExifEdit" has incompatible type "ImageDescription"; expected "Dict[str, Any]"
31
27
  edit.add_image_description(
32
28
  T.cast(T.Dict, types.desc_file_to_exif(types.as_desc(metadata)))
33
29
  )
34
- try:
35
- edit.dump_image_bytes()
36
- except piexif.InvalidImageDataError as exc:
37
- return types.describe_error_metadata(
38
- exc,
39
- metadata.filename,
40
- filetype=types.FileType.IMAGE,
41
- )
42
- except Exception as exc:
43
- # possible error here: struct.error: 'H' format requires 0 <= number <= 65535
44
- LOG.warning(
45
- "Unknown error test writing image %s", metadata.filename, exc_info=True
46
- )
47
- return types.describe_error_metadata(
48
- exc,
49
- metadata.filename,
50
- filetype=types.FileType.IMAGE,
51
- )
52
- return metadata
53
30
 
31
+ # Possible errors thrown here:
32
+ # - struct.error: 'H' format requires 0 <= number <= 65535
33
+ # - piexif.InvalidImageDataError
34
+ edit.dump_image_bytes()
54
35
 
55
- class GeotagFromEXIF(GeotagFromGeneric):
36
+
37
+ class GeotagImagesFromEXIF(GeotagImagesFromGeneric):
56
38
  def __init__(self, image_paths: T.Sequence[Path]):
57
39
  self.image_paths = image_paths
58
40
  super().__init__()
59
41
 
60
42
  @staticmethod
61
- def geotag_image(
62
- image_path: Path, skip_lonlat_error: bool = False
63
- ) -> types.ImageMetadataOrError:
64
- with image_path.open("rb") as fp:
65
- image_data = fp.read()
66
- image_bytesio = io.BytesIO(image_data)
67
-
68
- try:
69
- exif = ExifRead(image_bytesio)
70
- except Exception as ex:
71
- LOG.warning(
72
- "Unknown error reading EXIF from image %s",
73
- image_path,
74
- exc_info=True,
75
- )
76
- return types.describe_error_metadata(
77
- ex, image_path, filetype=types.FileType.IMAGE
78
- )
79
-
43
+ def build_image_metadata(
44
+ image_path: Path, exif: ExifReadABC, skip_lonlat_error: bool = False
45
+ ) -> types.ImageMetadata:
80
46
  lonlat = exif.extract_lon_lat()
81
47
  if lonlat is None:
82
48
  if not skip_lonlat_error:
83
- exc = MapillaryGeoTaggingError(
49
+ raise exceptions.MapillaryGeoTaggingError(
84
50
  "Unable to extract GPS Longitude or GPS Latitude from the image"
85
51
  )
86
- return types.describe_error_metadata(
87
- exc, image_path, filetype=types.FileType.IMAGE
88
- )
89
52
  lonlat = (0.0, 0.0)
90
53
  lon, lat = lonlat
91
54
 
92
55
  capture_time = exif.extract_capture_time()
93
56
  if capture_time is None:
94
- exc = MapillaryGeoTaggingError("Unable to extract timestamp from the image")
95
- return types.describe_error_metadata(
96
- exc, image_path, filetype=types.FileType.IMAGE
57
+ raise exceptions.MapillaryGeoTaggingError(
58
+ "Unable to extract timestamp from the image"
97
59
  )
98
60
 
99
61
  image_metadata = types.ImageMetadata(
@@ -111,26 +73,48 @@ class GeotagFromEXIF(GeotagFromGeneric):
111
73
  MAPDeviceModel=exif.extract_model(),
112
74
  )
113
75
 
114
- image_bytesio.seek(0, io.SEEK_SET)
115
- image_metadata.update_md5sum(image_bytesio)
76
+ return image_metadata
77
+
78
+ @staticmethod
79
+ def geotag_image(
80
+ image_path: Path, skip_lonlat_error: bool = False
81
+ ) -> types.ImageMetadataOrError:
82
+ try:
83
+ # load the image bytes into memory to avoid reading it multiple times
84
+ with image_path.open("rb") as fp:
85
+ image_bytesio = io.BytesIO(fp.read())
86
+
87
+ image_bytesio.seek(0, io.SEEK_SET)
88
+ exif = ExifRead(image_bytesio)
89
+
90
+ image_metadata = GeotagImagesFromEXIF.build_image_metadata(
91
+ image_path, exif, skip_lonlat_error=skip_lonlat_error
92
+ )
93
+
94
+ image_bytesio.seek(0, io.SEEK_SET)
95
+ verify_image_exif_write(
96
+ image_metadata,
97
+ image_bytes=image_bytesio.read(),
98
+ )
99
+ except Exception as ex:
100
+ return types.describe_error_metadata(
101
+ ex, image_path, filetype=types.FileType.IMAGE
102
+ )
116
103
 
117
104
  image_bytesio.seek(0, io.SEEK_SET)
118
- image_metadata_or_error = verify_image_exif_write(
119
- image_metadata,
120
- image_data=image_bytesio.read(),
121
- )
105
+ image_metadata.update_md5sum(image_bytesio)
122
106
 
123
- return image_metadata_or_error
107
+ return image_metadata
124
108
 
125
109
  def to_description(self) -> T.List[types.ImageMetadataOrError]:
126
110
  with Pool() as pool:
127
- image_metadatas = pool.imap(
128
- GeotagFromEXIF.geotag_image,
111
+ image_metadatas_iter = pool.imap(
112
+ GeotagImagesFromEXIF.geotag_image,
129
113
  self.image_paths,
130
114
  )
131
115
  return list(
132
116
  tqdm(
133
- image_metadatas,
117
+ image_metadatas_iter,
134
118
  desc="Extracting geotags from images",
135
119
  unit="images",
136
120
  disable=LOG.getEffectiveLevel() <= logging.DEBUG,
@@ -0,0 +1,123 @@
1
+ import io
2
+ import logging
3
+ import typing as T
4
+ import xml.etree.ElementTree as ET
5
+ from multiprocessing import Pool
6
+ from pathlib import Path
7
+
8
+ from tqdm import tqdm
9
+
10
+ from .. import exceptions, types, utils
11
+ from ..exiftool_read import EXIFTOOL_NAMESPACES, ExifToolRead
12
+ from .geotag_from_generic import GeotagImagesFromGeneric
13
+ from .geotag_images_from_exif import GeotagImagesFromEXIF, verify_image_exif_write
14
+
15
+ LOG = logging.getLogger(__name__)
16
+ _DESCRIPTION_TAG = "rdf:Description"
17
+
18
+
19
+ def canonical_path(path: Path) -> str:
20
+ return str(path.resolve().as_posix())
21
+
22
+
23
+ def find_rdf_description_path(element: ET.Element) -> T.Optional[Path]:
24
+ about = element.get("{" + EXIFTOOL_NAMESPACES["rdf"] + "}about")
25
+ if about is None:
26
+ return None
27
+ return Path(about)
28
+
29
+
30
+ def index_rdf_description_by_path(
31
+ xml_paths: T.Sequence[Path],
32
+ ) -> T.Dict[str, ET.Element]:
33
+ rdf_description_by_path: T.Dict[str, ET.Element] = {}
34
+
35
+ for xml_path in utils.find_xml_files(xml_paths):
36
+ try:
37
+ etree = ET.parse(xml_path)
38
+ except ET.ParseError as ex:
39
+ verbose = LOG.getEffectiveLevel() <= logging.DEBUG
40
+ if verbose:
41
+ LOG.warning(f"Failed to parse {xml_path}", exc_info=verbose)
42
+ else:
43
+ LOG.warning(f"Failed to parse {xml_path}: {ex}", exc_info=verbose)
44
+ continue
45
+
46
+ elements = etree.iterfind(_DESCRIPTION_TAG, namespaces=EXIFTOOL_NAMESPACES)
47
+ for element in elements:
48
+ path = find_rdf_description_path(element)
49
+ if path is not None:
50
+ rdf_description_by_path[canonical_path(path)] = element
51
+
52
+ return rdf_description_by_path
53
+
54
+
55
+ class GeotagImagesFromExifTool(GeotagImagesFromGeneric):
56
+ def __init__(self, image_paths: T.Sequence[Path], xml_path: Path):
57
+ self.image_paths = image_paths
58
+ self.xml_path = xml_path
59
+ super().__init__()
60
+
61
+ @staticmethod
62
+ def geotag_image(element: ET.Element) -> types.ImageMetadataOrError:
63
+ image_path = find_rdf_description_path(element)
64
+ assert image_path is not None, "must find the path from the element"
65
+
66
+ try:
67
+ exif = ExifToolRead(ET.ElementTree(element))
68
+ image_metadata = GeotagImagesFromEXIF.build_image_metadata(
69
+ image_path, exif, skip_lonlat_error=False
70
+ )
71
+ # load the image bytes into memory to avoid reading it multiple times
72
+ with image_path.open("rb") as fp:
73
+ image_bytesio = io.BytesIO(fp.read())
74
+ image_bytesio.seek(0, io.SEEK_SET)
75
+ verify_image_exif_write(
76
+ image_metadata,
77
+ image_bytes=image_bytesio.read(),
78
+ )
79
+ except Exception as ex:
80
+ return types.describe_error_metadata(
81
+ ex, image_path, filetype=types.FileType.IMAGE
82
+ )
83
+
84
+ image_bytesio.seek(0, io.SEEK_SET)
85
+ image_metadata.update_md5sum(image_bytesio)
86
+
87
+ return image_metadata
88
+
89
+ def to_description(self) -> T.List[types.ImageMetadataOrError]:
90
+ rdf_description_by_path = index_rdf_description_by_path([self.xml_path])
91
+
92
+ error_metadatas: T.List[types.ErrorMetadata] = []
93
+ rdf_descriptions: T.List[ET.Element] = []
94
+ for path in self.image_paths:
95
+ rdf_description = rdf_description_by_path.get(canonical_path(path))
96
+ if rdf_description is None:
97
+ exc = exceptions.MapillaryEXIFNotFoundError(
98
+ f"The {_DESCRIPTION_TAG} XML element for the image not found"
99
+ )
100
+ error_metadatas.append(
101
+ types.describe_error_metadata(
102
+ exc, path, filetype=types.FileType.IMAGE
103
+ )
104
+ )
105
+ else:
106
+ rdf_descriptions.append(rdf_description)
107
+
108
+ with Pool() as pool:
109
+ image_metadatas_iter = pool.imap(
110
+ GeotagImagesFromExifTool.geotag_image,
111
+ rdf_descriptions,
112
+ )
113
+ image_metadata_or_errors = list(
114
+ tqdm(
115
+ image_metadatas_iter,
116
+ desc="Extracting geotags from ExifTool XML",
117
+ unit="images",
118
+ disable=LOG.getEffectiveLevel() <= logging.DEBUG,
119
+ total=len(self.image_paths),
120
+ )
121
+ )
122
+
123
+ return error_metadatas + image_metadata_or_errors