mapillary-tools 0.13.3a1__py3-none-any.whl → 0.14.0a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mapillary_tools/__init__.py +1 -1
- mapillary_tools/api_v4.py +237 -16
- mapillary_tools/authenticate.py +325 -64
- mapillary_tools/{geotag/blackvue_parser.py → blackvue_parser.py} +74 -54
- mapillary_tools/camm/camm_builder.py +55 -97
- mapillary_tools/camm/camm_parser.py +429 -181
- mapillary_tools/commands/__main__.py +12 -6
- mapillary_tools/commands/authenticate.py +8 -1
- mapillary_tools/commands/process.py +27 -51
- mapillary_tools/commands/process_and_upload.py +19 -5
- mapillary_tools/commands/sample_video.py +2 -3
- mapillary_tools/commands/upload.py +18 -9
- mapillary_tools/commands/video_process_and_upload.py +19 -5
- mapillary_tools/config.py +31 -13
- mapillary_tools/constants.py +47 -6
- mapillary_tools/exceptions.py +34 -35
- mapillary_tools/exif_read.py +221 -116
- mapillary_tools/exif_write.py +7 -7
- mapillary_tools/exiftool_read.py +33 -42
- mapillary_tools/exiftool_read_video.py +46 -33
- mapillary_tools/exiftool_runner.py +77 -0
- mapillary_tools/ffmpeg.py +24 -23
- mapillary_tools/geo.py +144 -120
- mapillary_tools/geotag/base.py +147 -0
- mapillary_tools/geotag/factory.py +291 -0
- mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
- mapillary_tools/geotag/geotag_images_from_exiftool.py +126 -82
- mapillary_tools/geotag/geotag_images_from_gpx.py +53 -118
- mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
- mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
- mapillary_tools/geotag/geotag_images_from_video.py +53 -51
- mapillary_tools/geotag/geotag_videos_from_exiftool.py +97 -0
- mapillary_tools/geotag/geotag_videos_from_gpx.py +39 -0
- mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
- mapillary_tools/geotag/image_extractors/base.py +18 -0
- mapillary_tools/geotag/image_extractors/exif.py +60 -0
- mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
- mapillary_tools/geotag/options.py +160 -0
- mapillary_tools/geotag/utils.py +52 -16
- mapillary_tools/geotag/video_extractors/base.py +18 -0
- mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
- mapillary_tools/{video_data_extraction/extractors/gpx_parser.py → geotag/video_extractors/gpx.py} +57 -39
- mapillary_tools/geotag/video_extractors/native.py +157 -0
- mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
- mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
- mapillary_tools/history.py +7 -13
- mapillary_tools/mp4/construct_mp4_parser.py +9 -8
- mapillary_tools/mp4/io_utils.py +0 -1
- mapillary_tools/mp4/mp4_sample_parser.py +36 -28
- mapillary_tools/mp4/simple_mp4_builder.py +10 -9
- mapillary_tools/mp4/simple_mp4_parser.py +13 -22
- mapillary_tools/process_geotag_properties.py +155 -392
- mapillary_tools/process_sequence_properties.py +562 -208
- mapillary_tools/sample_video.py +13 -20
- mapillary_tools/telemetry.py +26 -13
- mapillary_tools/types.py +111 -58
- mapillary_tools/upload.py +316 -298
- mapillary_tools/upload_api_v4.py +55 -122
- mapillary_tools/uploader.py +396 -254
- mapillary_tools/utils.py +42 -18
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/METADATA +3 -2
- mapillary_tools-0.14.0a2.dist-info/RECORD +72 -0
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/WHEEL +1 -1
- mapillary_tools/geotag/__init__.py +0 -1
- mapillary_tools/geotag/geotag_from_generic.py +0 -22
- mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
- mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
- mapillary_tools/video_data_extraction/cli_options.py +0 -22
- mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
- mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
- mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
- mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
- mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
- mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
- mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
- mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
- mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
- mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
- mapillary_tools-0.13.3a1.dist-info/RECORD +0 -75
- /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/entry_points.txt +0 -0
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info/licenses}/LICENSE +0 -0
- {mapillary_tools-0.13.3a1.dist-info → mapillary_tools-0.14.0a2.dist-info}/top_level.txt +0 -0
mapillary_tools/exif_read.py
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import abc
|
|
2
4
|
import datetime
|
|
5
|
+
import io
|
|
3
6
|
import logging
|
|
4
7
|
import re
|
|
8
|
+
import struct
|
|
5
9
|
import typing as T
|
|
6
10
|
import xml.etree.ElementTree as et
|
|
7
11
|
from fractions import Fraction
|
|
@@ -11,6 +15,7 @@ import exifread
|
|
|
11
15
|
from exifread.utils import Ratio
|
|
12
16
|
|
|
13
17
|
|
|
18
|
+
LOG = logging.getLogger(__name__)
|
|
14
19
|
XMP_NAMESPACES = {
|
|
15
20
|
"exif": "http://ns.adobe.com/exif/1.0/",
|
|
16
21
|
"tiff": "http://ns.adobe.com/tiff/1.0/",
|
|
@@ -31,7 +36,7 @@ def eval_frac(value: Ratio) -> float:
|
|
|
31
36
|
return float(value.num) / float(value.den)
|
|
32
37
|
|
|
33
38
|
|
|
34
|
-
def gps_to_decimal(values:
|
|
39
|
+
def gps_to_decimal(values: tuple[Ratio, Ratio, Ratio]) -> float | None:
|
|
35
40
|
try:
|
|
36
41
|
deg, min, sec, *_ = values
|
|
37
42
|
except (TypeError, ValueError):
|
|
@@ -51,14 +56,14 @@ def gps_to_decimal(values: T.Tuple[Ratio, Ratio, Ratio]) -> T.Optional[float]:
|
|
|
51
56
|
return degrees + minutes / 60 + seconds / 3600
|
|
52
57
|
|
|
53
58
|
|
|
54
|
-
def _parse_coord_numeric(coord: str, ref:
|
|
59
|
+
def _parse_coord_numeric(coord: str, ref: str | None) -> float | None:
|
|
55
60
|
try:
|
|
56
61
|
return float(coord) * SIGN_BY_DIRECTION[ref]
|
|
57
62
|
except (ValueError, KeyError):
|
|
58
63
|
return None
|
|
59
64
|
|
|
60
65
|
|
|
61
|
-
def _parse_coord_adobe(coord: str) ->
|
|
66
|
+
def _parse_coord_adobe(coord: str) -> float | None:
|
|
62
67
|
"""
|
|
63
68
|
Parse Adobe coordinate format: <degrees,fractionalminutes[NSEW]>
|
|
64
69
|
"""
|
|
@@ -74,7 +79,7 @@ def _parse_coord_adobe(coord: str) -> T.Optional[float]:
|
|
|
74
79
|
return None
|
|
75
80
|
|
|
76
81
|
|
|
77
|
-
def _parse_coord(coord:
|
|
82
|
+
def _parse_coord(coord: str | None, ref: str | None) -> float | None:
|
|
78
83
|
if coord is None:
|
|
79
84
|
return None
|
|
80
85
|
parsed = _parse_coord_numeric(coord, ref)
|
|
@@ -83,7 +88,7 @@ def _parse_coord(coord: T.Optional[str], ref: T.Optional[str]) -> T.Optional[flo
|
|
|
83
88
|
return parsed
|
|
84
89
|
|
|
85
90
|
|
|
86
|
-
def _parse_iso(dtstr: str) ->
|
|
91
|
+
def _parse_iso(dtstr: str) -> datetime.datetime | None:
|
|
87
92
|
try:
|
|
88
93
|
return datetime.datetime.fromisoformat(dtstr)
|
|
89
94
|
except ValueError:
|
|
@@ -94,8 +99,8 @@ def _parse_iso(dtstr: str) -> T.Optional[datetime.datetime]:
|
|
|
94
99
|
|
|
95
100
|
|
|
96
101
|
def strptime_alternative_formats(
|
|
97
|
-
dtstr: str, formats:
|
|
98
|
-
) ->
|
|
102
|
+
dtstr: str, formats: list[str]
|
|
103
|
+
) -> datetime.datetime | None:
|
|
99
104
|
for format in formats:
|
|
100
105
|
if format == "ISO":
|
|
101
106
|
dt = _parse_iso(dtstr)
|
|
@@ -109,7 +114,7 @@ def strptime_alternative_formats(
|
|
|
109
114
|
return None
|
|
110
115
|
|
|
111
116
|
|
|
112
|
-
def parse_timestr_as_timedelta(timestr: str) ->
|
|
117
|
+
def parse_timestr_as_timedelta(timestr: str) -> datetime.timedelta | None:
|
|
113
118
|
timestr = timestr.strip()
|
|
114
119
|
parts = timestr.strip().split(":")
|
|
115
120
|
try:
|
|
@@ -128,8 +133,8 @@ def parse_timestr_as_timedelta(timestr: str) -> T.Optional[datetime.timedelta]:
|
|
|
128
133
|
|
|
129
134
|
|
|
130
135
|
def parse_time_ratios_as_timedelta(
|
|
131
|
-
time_tuple:
|
|
132
|
-
) ->
|
|
136
|
+
time_tuple: list[Ratio],
|
|
137
|
+
) -> datetime.timedelta | None:
|
|
133
138
|
try:
|
|
134
139
|
hours, minutes, seconds, *_ = time_tuple
|
|
135
140
|
except (ValueError, TypeError):
|
|
@@ -151,8 +156,8 @@ def parse_time_ratios_as_timedelta(
|
|
|
151
156
|
|
|
152
157
|
def parse_gps_datetime(
|
|
153
158
|
dtstr: str,
|
|
154
|
-
default_tz:
|
|
155
|
-
) ->
|
|
159
|
+
default_tz: datetime.timezone | None = datetime.timezone.utc,
|
|
160
|
+
) -> datetime.datetime | None:
|
|
156
161
|
dtstr = dtstr.strip()
|
|
157
162
|
|
|
158
163
|
dt = strptime_alternative_formats(dtstr, ["ISO"])
|
|
@@ -171,8 +176,8 @@ def parse_gps_datetime(
|
|
|
171
176
|
def parse_gps_datetime_separately(
|
|
172
177
|
datestr: str,
|
|
173
178
|
timestr: str,
|
|
174
|
-
default_tz:
|
|
175
|
-
) ->
|
|
179
|
+
default_tz: datetime.timezone | None = datetime.timezone.utc,
|
|
180
|
+
) -> datetime.datetime | None:
|
|
176
181
|
"""
|
|
177
182
|
Parse GPSDateStamp and GPSTimeStamp and return the corresponding datetime object in GMT.
|
|
178
183
|
|
|
@@ -227,8 +232,8 @@ def parse_gps_datetime_separately(
|
|
|
227
232
|
|
|
228
233
|
|
|
229
234
|
def parse_datetimestr_with_subsec_and_offset(
|
|
230
|
-
dtstr: str, subsec:
|
|
231
|
-
) ->
|
|
235
|
+
dtstr: str, subsec: str | None = None, tz_offset: str | None = None
|
|
236
|
+
) -> datetime.datetime | None:
|
|
232
237
|
"""
|
|
233
238
|
Convert dtstr "YYYY:mm:dd HH:MM:SS[.sss]" to a datetime object.
|
|
234
239
|
It handles time "24:00:00" as "00:00:00" of the next day.
|
|
@@ -289,35 +294,35 @@ _FIELD_TYPE = T.TypeVar("_FIELD_TYPE", int, float, str)
|
|
|
289
294
|
|
|
290
295
|
class ExifReadABC(abc.ABC):
|
|
291
296
|
@abc.abstractmethod
|
|
292
|
-
def extract_altitude(self) ->
|
|
297
|
+
def extract_altitude(self) -> float | None:
|
|
293
298
|
raise NotImplementedError
|
|
294
299
|
|
|
295
300
|
@abc.abstractmethod
|
|
296
|
-
def extract_capture_time(self) ->
|
|
301
|
+
def extract_capture_time(self) -> datetime.datetime | None:
|
|
297
302
|
raise NotImplementedError
|
|
298
303
|
|
|
299
304
|
@abc.abstractmethod
|
|
300
|
-
def extract_direction(self) ->
|
|
305
|
+
def extract_direction(self) -> float | None:
|
|
301
306
|
raise NotImplementedError
|
|
302
307
|
|
|
303
308
|
@abc.abstractmethod
|
|
304
|
-
def extract_lon_lat(self) ->
|
|
309
|
+
def extract_lon_lat(self) -> tuple[float, float] | None:
|
|
305
310
|
raise NotImplementedError
|
|
306
311
|
|
|
307
312
|
@abc.abstractmethod
|
|
308
|
-
def extract_make(self) ->
|
|
313
|
+
def extract_make(self) -> str | None:
|
|
309
314
|
raise NotImplementedError
|
|
310
315
|
|
|
311
316
|
@abc.abstractmethod
|
|
312
|
-
def extract_model(self) ->
|
|
317
|
+
def extract_model(self) -> str | None:
|
|
313
318
|
raise NotImplementedError
|
|
314
319
|
|
|
315
320
|
@abc.abstractmethod
|
|
316
|
-
def extract_width(self) ->
|
|
321
|
+
def extract_width(self) -> int | None:
|
|
317
322
|
raise NotImplementedError
|
|
318
323
|
|
|
319
324
|
@abc.abstractmethod
|
|
320
|
-
def extract_height(self) ->
|
|
325
|
+
def extract_height(self) -> int | None:
|
|
321
326
|
raise NotImplementedError
|
|
322
327
|
|
|
323
328
|
@abc.abstractmethod
|
|
@@ -328,7 +333,7 @@ class ExifReadABC(abc.ABC):
|
|
|
328
333
|
class ExifReadFromXMP(ExifReadABC):
|
|
329
334
|
def __init__(self, etree: et.ElementTree):
|
|
330
335
|
self.etree = etree
|
|
331
|
-
self._tags_or_attrs:
|
|
336
|
+
self._tags_or_attrs: dict[str, str] = {}
|
|
332
337
|
for description in self.etree.iterfind(
|
|
333
338
|
".//rdf:Description", namespaces=XMP_NAMESPACES
|
|
334
339
|
):
|
|
@@ -338,12 +343,12 @@ class ExifReadFromXMP(ExifReadABC):
|
|
|
338
343
|
if child.text is not None:
|
|
339
344
|
self._tags_or_attrs[child.tag] = child.text
|
|
340
345
|
|
|
341
|
-
def extract_altitude(self) ->
|
|
346
|
+
def extract_altitude(self) -> float | None:
|
|
342
347
|
return self._extract_alternative_fields(["exif:GPSAltitude"], float)
|
|
343
348
|
|
|
344
349
|
def _extract_exif_datetime(
|
|
345
350
|
self, dt_tag: str, subsec_tag: str, offset_tag: str
|
|
346
|
-
) ->
|
|
351
|
+
) -> datetime.datetime | None:
|
|
347
352
|
dtstr = self._extract_alternative_fields([dt_tag], str)
|
|
348
353
|
if dtstr is None:
|
|
349
354
|
return None
|
|
@@ -358,7 +363,7 @@ class ExifReadFromXMP(ExifReadABC):
|
|
|
358
363
|
return None
|
|
359
364
|
return dt
|
|
360
365
|
|
|
361
|
-
def extract_exif_datetime(self) ->
|
|
366
|
+
def extract_exif_datetime(self) -> datetime.datetime | None:
|
|
362
367
|
dt = self._extract_exif_datetime(
|
|
363
368
|
"exif:DateTimeOriginal",
|
|
364
369
|
"exif:SubsecTimeOriginal",
|
|
@@ -377,7 +382,7 @@ class ExifReadFromXMP(ExifReadABC):
|
|
|
377
382
|
|
|
378
383
|
return None
|
|
379
384
|
|
|
380
|
-
def extract_gps_datetime(self) ->
|
|
385
|
+
def extract_gps_datetime(self) -> datetime.datetime | None:
|
|
381
386
|
"""
|
|
382
387
|
Extract timestamp from GPS field.
|
|
383
388
|
"""
|
|
@@ -397,7 +402,7 @@ class ExifReadFromXMP(ExifReadABC):
|
|
|
397
402
|
# handle: exif:GPSTimeStamp="17:22:05.999000"
|
|
398
403
|
return parse_gps_datetime_separately(datestr, timestr)
|
|
399
404
|
|
|
400
|
-
def extract_capture_time(self) ->
|
|
405
|
+
def extract_capture_time(self) -> datetime.datetime | None:
|
|
401
406
|
dt = self.extract_gps_datetime()
|
|
402
407
|
if dt is not None and dt.date() != datetime.date(1970, 1, 1):
|
|
403
408
|
return dt
|
|
@@ -408,22 +413,22 @@ class ExifReadFromXMP(ExifReadABC):
|
|
|
408
413
|
|
|
409
414
|
return None
|
|
410
415
|
|
|
411
|
-
def extract_direction(self) ->
|
|
416
|
+
def extract_direction(self) -> float | None:
|
|
412
417
|
return self._extract_alternative_fields(
|
|
413
418
|
["exif:GPSImgDirection", "exif:GPSTrack"], float
|
|
414
419
|
)
|
|
415
420
|
|
|
416
|
-
def extract_lon_lat(self) ->
|
|
421
|
+
def extract_lon_lat(self) -> tuple[float, float] | None:
|
|
417
422
|
lat_ref = self._extract_alternative_fields(["exif:GPSLatitudeRef"], str)
|
|
418
|
-
lat_str:
|
|
423
|
+
lat_str: str | None = self._extract_alternative_fields(
|
|
419
424
|
["exif:GPSLatitude"], str
|
|
420
425
|
)
|
|
421
|
-
lat:
|
|
426
|
+
lat: float | None = _parse_coord(lat_str, lat_ref)
|
|
422
427
|
if lat is None:
|
|
423
428
|
return None
|
|
424
429
|
|
|
425
430
|
lon_ref = self._extract_alternative_fields(["exif:GPSLongitudeRef"], str)
|
|
426
|
-
lon_str:
|
|
431
|
+
lon_str: str | None = self._extract_alternative_fields(
|
|
427
432
|
["exif:GPSLongitude"], str
|
|
428
433
|
)
|
|
429
434
|
lon = _parse_coord(lon_str, lon_ref)
|
|
@@ -432,13 +437,13 @@ class ExifReadFromXMP(ExifReadABC):
|
|
|
432
437
|
|
|
433
438
|
return lon, lat
|
|
434
439
|
|
|
435
|
-
def extract_make(self) ->
|
|
440
|
+
def extract_make(self) -> str | None:
|
|
436
441
|
make = self._extract_alternative_fields(["tiff:Make", "exifEX:LensMake"], str)
|
|
437
442
|
if make is None:
|
|
438
443
|
return None
|
|
439
444
|
return make.strip()
|
|
440
445
|
|
|
441
|
-
def extract_model(self) ->
|
|
446
|
+
def extract_model(self) -> str | None:
|
|
442
447
|
model = self._extract_alternative_fields(
|
|
443
448
|
["tiff:Model", "exifEX:LensModel"], str
|
|
444
449
|
)
|
|
@@ -446,7 +451,7 @@ class ExifReadFromXMP(ExifReadABC):
|
|
|
446
451
|
return None
|
|
447
452
|
return model.strip()
|
|
448
453
|
|
|
449
|
-
def extract_width(self) ->
|
|
454
|
+
def extract_width(self) -> int | None:
|
|
450
455
|
return self._extract_alternative_fields(
|
|
451
456
|
[
|
|
452
457
|
"exif:PixelXDimension",
|
|
@@ -456,7 +461,7 @@ class ExifReadFromXMP(ExifReadABC):
|
|
|
456
461
|
int,
|
|
457
462
|
)
|
|
458
463
|
|
|
459
|
-
def extract_height(self) ->
|
|
464
|
+
def extract_height(self) -> int | None:
|
|
460
465
|
return self._extract_alternative_fields(
|
|
461
466
|
[
|
|
462
467
|
"exif:PixelYDimension",
|
|
@@ -474,9 +479,9 @@ class ExifReadFromXMP(ExifReadABC):
|
|
|
474
479
|
|
|
475
480
|
def _extract_alternative_fields(
|
|
476
481
|
self,
|
|
477
|
-
fields: T.
|
|
478
|
-
field_type:
|
|
479
|
-
) ->
|
|
482
|
+
fields: T.Iterable[str],
|
|
483
|
+
field_type: type[_FIELD_TYPE],
|
|
484
|
+
) -> _FIELD_TYPE | None:
|
|
480
485
|
"""
|
|
481
486
|
Extract a value for a list of ordered fields.
|
|
482
487
|
Return the value of the first existed field in the list
|
|
@@ -508,31 +513,115 @@ class ExifReadFromXMP(ExifReadABC):
|
|
|
508
513
|
return None
|
|
509
514
|
|
|
510
515
|
|
|
516
|
+
def extract_xmp_efficiently(fp) -> str | None:
|
|
517
|
+
"""
|
|
518
|
+
Extract XMP metadata from a JPEG file efficiently by reading only necessary chunks.
|
|
519
|
+
|
|
520
|
+
Args:
|
|
521
|
+
image_path (str): Path to the JPEG image file
|
|
522
|
+
|
|
523
|
+
Returns:
|
|
524
|
+
str: Formatted XML string containing XMP metadata, or None if no XMP data found
|
|
525
|
+
"""
|
|
526
|
+
# JPEG markers
|
|
527
|
+
SOI_MARKER = b"\xff\xd8" # Start of Image
|
|
528
|
+
APP1_MARKER = b"\xff\xe1" # Application Segment 1 (where XMP usually lives)
|
|
529
|
+
XMP_IDENTIFIER = b"http://ns.adobe.com/xap/1.0/\x00"
|
|
530
|
+
XMP_META_TAG_BEGIN = b"<x:xmpmeta"
|
|
531
|
+
XMP_META_TAG_END = b"</x:xmpmeta>"
|
|
532
|
+
|
|
533
|
+
# Check for JPEG signature (SOI marker)
|
|
534
|
+
if fp.read(2) != SOI_MARKER:
|
|
535
|
+
return None
|
|
536
|
+
|
|
537
|
+
while True:
|
|
538
|
+
# Read marker
|
|
539
|
+
marker_bytes = fp.read(2)
|
|
540
|
+
if len(marker_bytes) < 2:
|
|
541
|
+
# End of file
|
|
542
|
+
break
|
|
543
|
+
|
|
544
|
+
# If not APP1, skip this segment
|
|
545
|
+
if marker_bytes != APP1_MARKER:
|
|
546
|
+
# Read length field (includes the length bytes themselves)
|
|
547
|
+
length_bytes = fp.read(2)
|
|
548
|
+
if len(length_bytes) < 2:
|
|
549
|
+
break
|
|
550
|
+
|
|
551
|
+
length = struct.unpack(">H", length_bytes)[0]
|
|
552
|
+
# Skip the rest of this segment (-2 because length includes length bytes)
|
|
553
|
+
fp.seek(length - 2, io.SEEK_CUR)
|
|
554
|
+
continue
|
|
555
|
+
|
|
556
|
+
# It's an APP1 segment - read length
|
|
557
|
+
length_bytes = fp.read(2)
|
|
558
|
+
if len(length_bytes) < 2:
|
|
559
|
+
break
|
|
560
|
+
|
|
561
|
+
length = struct.unpack(">H", length_bytes)[0]
|
|
562
|
+
segment_data_length = length - 2 # Subtract length field size
|
|
563
|
+
|
|
564
|
+
# Read enough bytes to check for XMP identifier
|
|
565
|
+
identifier_check = fp.read(len(XMP_IDENTIFIER))
|
|
566
|
+
if len(identifier_check) < len(XMP_IDENTIFIER):
|
|
567
|
+
break
|
|
568
|
+
|
|
569
|
+
# Check if this APP1 contains XMP data
|
|
570
|
+
if identifier_check == XMP_IDENTIFIER:
|
|
571
|
+
# We found XMP data - read the rest of the segment
|
|
572
|
+
remaining_length = segment_data_length - len(XMP_IDENTIFIER)
|
|
573
|
+
if remaining_length > 128 * 1024 * 1024:
|
|
574
|
+
raise ValueError("XMP data too large")
|
|
575
|
+
xmp_data = fp.read(remaining_length)
|
|
576
|
+
|
|
577
|
+
# Process the XMP data
|
|
578
|
+
begin_idx = xmp_data.find(XMP_META_TAG_BEGIN)
|
|
579
|
+
if begin_idx >= 0:
|
|
580
|
+
end_idx = xmp_data.rfind(XMP_META_TAG_END, begin_idx)
|
|
581
|
+
if end_idx >= 0:
|
|
582
|
+
xmp_data = xmp_data[begin_idx : end_idx + len(XMP_META_TAG_END)]
|
|
583
|
+
else:
|
|
584
|
+
xmp_data = xmp_data[begin_idx:]
|
|
585
|
+
|
|
586
|
+
return xmp_data.decode("utf-8")
|
|
587
|
+
else:
|
|
588
|
+
# Not XMP data - skip the rest of this APP1 segment
|
|
589
|
+
# We already read the identifier_check bytes, so subtract that
|
|
590
|
+
fp.seek(segment_data_length - len(identifier_check), io.SEEK_CUR)
|
|
591
|
+
|
|
592
|
+
# If we reach here, no XMP data was found
|
|
593
|
+
return None
|
|
594
|
+
|
|
595
|
+
|
|
511
596
|
class ExifReadFromEXIF(ExifReadABC):
|
|
512
597
|
"""
|
|
513
598
|
EXIF class for reading exif from an image
|
|
514
599
|
"""
|
|
515
600
|
|
|
516
|
-
def __init__(self, path_or_stream:
|
|
601
|
+
def __init__(self, path_or_stream: Path | T.BinaryIO) -> None:
|
|
517
602
|
"""
|
|
518
603
|
Initialize EXIF object with FILE as filename or fileobj
|
|
519
604
|
"""
|
|
520
605
|
if isinstance(path_or_stream, Path):
|
|
521
606
|
with path_or_stream.open("rb") as fp:
|
|
522
607
|
try:
|
|
523
|
-
|
|
524
|
-
|
|
608
|
+
# Turn off details and debug for performance reasons
|
|
609
|
+
self.tags = exifread.process_file(fp, details=False, debug=False)
|
|
610
|
+
except Exception as ex:
|
|
611
|
+
LOG.warning("Error reading EXIF from %s: %s", path_or_stream, ex)
|
|
525
612
|
self.tags = {}
|
|
526
613
|
|
|
527
614
|
else:
|
|
528
615
|
try:
|
|
616
|
+
# Turn off details and debug for performance reasons
|
|
529
617
|
self.tags = exifread.process_file(
|
|
530
|
-
path_or_stream, details=
|
|
618
|
+
path_or_stream, details=False, debug=False
|
|
531
619
|
)
|
|
532
|
-
except Exception:
|
|
620
|
+
except Exception as ex:
|
|
621
|
+
LOG.warning("Error reading EXIF: %s", ex)
|
|
533
622
|
self.tags = {}
|
|
534
623
|
|
|
535
|
-
def extract_altitude(self) ->
|
|
624
|
+
def extract_altitude(self) -> float | None:
|
|
536
625
|
"""
|
|
537
626
|
Extract altitude
|
|
538
627
|
"""
|
|
@@ -545,7 +634,7 @@ class ExifReadFromEXIF(ExifReadABC):
|
|
|
545
634
|
altitude_ref = {0: 1, 1: -1}
|
|
546
635
|
return altitude * altitude_ref.get(ref, 1)
|
|
547
636
|
|
|
548
|
-
def extract_gps_datetime(self) ->
|
|
637
|
+
def extract_gps_datetime(self) -> datetime.datetime | None:
|
|
549
638
|
"""
|
|
550
639
|
Extract timestamp from GPS field.
|
|
551
640
|
"""
|
|
@@ -573,7 +662,7 @@ class ExifReadFromEXIF(ExifReadABC):
|
|
|
573
662
|
|
|
574
663
|
def _extract_exif_datetime(
|
|
575
664
|
self, dt_tag: str, subsec_tag: str, offset_tag: str
|
|
576
|
-
) ->
|
|
665
|
+
) -> datetime.datetime | None:
|
|
577
666
|
dtstr = self._extract_alternative_fields([dt_tag], field_type=str)
|
|
578
667
|
if dtstr is None:
|
|
579
668
|
return None
|
|
@@ -588,7 +677,7 @@ class ExifReadFromEXIF(ExifReadABC):
|
|
|
588
677
|
return None
|
|
589
678
|
return dt
|
|
590
679
|
|
|
591
|
-
def extract_exif_datetime(self) ->
|
|
680
|
+
def extract_exif_datetime(self) -> datetime.datetime | None:
|
|
592
681
|
# EXIF DateTimeOriginal: 0x9003 (date/time when original image was taken)
|
|
593
682
|
# EXIF SubSecTimeOriginal: 0x9291 (fractional seconds for DateTimeOriginal)
|
|
594
683
|
# EXIF OffsetTimeOriginal: 0x9011 (time zone for DateTimeOriginal)
|
|
@@ -622,7 +711,7 @@ class ExifReadFromEXIF(ExifReadABC):
|
|
|
622
711
|
|
|
623
712
|
return None
|
|
624
713
|
|
|
625
|
-
def extract_capture_time(self) ->
|
|
714
|
+
def extract_capture_time(self) -> datetime.datetime | None:
|
|
626
715
|
"""
|
|
627
716
|
Extract capture time from EXIF DateTime tags
|
|
628
717
|
"""
|
|
@@ -641,7 +730,7 @@ class ExifReadFromEXIF(ExifReadABC):
|
|
|
641
730
|
|
|
642
731
|
return None
|
|
643
732
|
|
|
644
|
-
def extract_direction(self) ->
|
|
733
|
+
def extract_direction(self) -> float | None:
|
|
645
734
|
"""
|
|
646
735
|
Extract image direction (i.e. compass, heading, bearing)
|
|
647
736
|
"""
|
|
@@ -649,19 +738,9 @@ class ExifReadFromEXIF(ExifReadABC):
|
|
|
649
738
|
"GPS GPSImgDirection",
|
|
650
739
|
"GPS GPSTrack",
|
|
651
740
|
]
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
# fix negative value wrongly parsed in exifread
|
|
656
|
-
# -360 degree -> 4294966935 when converting from hex
|
|
657
|
-
bearing1 = bin(int(direction))[2:]
|
|
658
|
-
bearing2 = "".join([str(int(int(a) == 0)) for a in bearing1])
|
|
659
|
-
direction = -float(int(bearing2, 2))
|
|
660
|
-
direction %= 360
|
|
661
|
-
|
|
662
|
-
return direction
|
|
663
|
-
|
|
664
|
-
def extract_lon_lat(self) -> T.Optional[T.Tuple[float, float]]:
|
|
741
|
+
return self._extract_alternative_fields(fields, float)
|
|
742
|
+
|
|
743
|
+
def extract_lon_lat(self) -> tuple[float, float] | None:
|
|
665
744
|
lat_tag = self.tags.get("GPS GPSLatitude")
|
|
666
745
|
lon_tag = self.tags.get("GPS GPSLongitude")
|
|
667
746
|
if lat_tag and lon_tag:
|
|
@@ -683,25 +762,29 @@ class ExifReadFromEXIF(ExifReadABC):
|
|
|
683
762
|
|
|
684
763
|
return None
|
|
685
764
|
|
|
686
|
-
def extract_make(self) ->
|
|
765
|
+
def extract_make(self) -> str | None:
|
|
687
766
|
"""
|
|
688
767
|
Extract camera make
|
|
689
768
|
"""
|
|
690
|
-
make = self._extract_alternative_fields(
|
|
769
|
+
make = self._extract_alternative_fields(
|
|
770
|
+
["Image Make", "EXIF Make", "EXIF LensMake"], str
|
|
771
|
+
)
|
|
691
772
|
if make is None:
|
|
692
773
|
return None
|
|
693
774
|
return make.strip()
|
|
694
775
|
|
|
695
|
-
def extract_model(self) ->
|
|
776
|
+
def extract_model(self) -> str | None:
|
|
696
777
|
"""
|
|
697
778
|
Extract camera model
|
|
698
779
|
"""
|
|
699
|
-
model = self._extract_alternative_fields(
|
|
780
|
+
model = self._extract_alternative_fields(
|
|
781
|
+
["Image Model", "EXIF Model", "EXIF LensModel"], str
|
|
782
|
+
)
|
|
700
783
|
if model is None:
|
|
701
784
|
return None
|
|
702
785
|
return model.strip()
|
|
703
786
|
|
|
704
|
-
def extract_width(self) ->
|
|
787
|
+
def extract_width(self) -> int | None:
|
|
705
788
|
"""
|
|
706
789
|
Extract image width in pixels
|
|
707
790
|
"""
|
|
@@ -709,7 +792,7 @@ class ExifReadFromEXIF(ExifReadABC):
|
|
|
709
792
|
["Image ImageWidth", "EXIF ExifImageWidth"], int
|
|
710
793
|
)
|
|
711
794
|
|
|
712
|
-
def extract_height(self) ->
|
|
795
|
+
def extract_height(self) -> int | None:
|
|
713
796
|
"""
|
|
714
797
|
Extract image height in pixels
|
|
715
798
|
"""
|
|
@@ -730,9 +813,9 @@ class ExifReadFromEXIF(ExifReadABC):
|
|
|
730
813
|
|
|
731
814
|
def _extract_alternative_fields(
|
|
732
815
|
self,
|
|
733
|
-
fields: T.
|
|
734
|
-
field_type:
|
|
735
|
-
) ->
|
|
816
|
+
fields: T.Iterable[str],
|
|
817
|
+
field_type: type[_FIELD_TYPE],
|
|
818
|
+
) -> _FIELD_TYPE | None:
|
|
736
819
|
"""
|
|
737
820
|
Extract a value for a list of ordered fields.
|
|
738
821
|
Return the value of the first existed field in the list
|
|
@@ -764,7 +847,7 @@ class ExifReadFromEXIF(ExifReadABC):
|
|
|
764
847
|
raise ValueError(f"Invalid field type {field_type}")
|
|
765
848
|
return None
|
|
766
849
|
|
|
767
|
-
def extract_application_notes(self) ->
|
|
850
|
+
def extract_application_notes(self) -> str | None:
|
|
768
851
|
xmp = self.tags.get("Image ApplicationNotes")
|
|
769
852
|
if xmp is None:
|
|
770
853
|
return None
|
|
@@ -775,104 +858,126 @@ class ExifReadFromEXIF(ExifReadABC):
|
|
|
775
858
|
|
|
776
859
|
|
|
777
860
|
class ExifRead(ExifReadFromEXIF):
|
|
778
|
-
|
|
861
|
+
"""
|
|
862
|
+
Extract properties from EXIF first and then XMP
|
|
863
|
+
NOTE: For performance reasons, XMP is only extracted if EXIF does not contain the required fields
|
|
864
|
+
"""
|
|
865
|
+
|
|
866
|
+
def __init__(self, path_or_stream: Path | T.BinaryIO) -> None:
|
|
779
867
|
super().__init__(path_or_stream)
|
|
780
|
-
self.
|
|
868
|
+
self._path_or_stream = path_or_stream
|
|
869
|
+
self._xml_extracted: bool = False
|
|
870
|
+
self._cached_xml: ExifReadFromXMP | None = None
|
|
871
|
+
|
|
872
|
+
def _xmp_with_reason(self, reason: str) -> ExifReadFromXMP | None:
|
|
873
|
+
if not self._xml_extracted:
|
|
874
|
+
LOG.debug('Extracting XMP for "%s"', reason)
|
|
875
|
+
self._cached_xml = self._extract_xmp()
|
|
876
|
+
self._xml_extracted = True
|
|
877
|
+
|
|
878
|
+
return self._cached_xml
|
|
879
|
+
|
|
880
|
+
def _extract_xmp(self) -> ExifReadFromXMP | None:
|
|
881
|
+
xml_str = self.extract_application_notes()
|
|
882
|
+
if xml_str is None:
|
|
883
|
+
if isinstance(self._path_or_stream, Path):
|
|
884
|
+
with self._path_or_stream.open("rb") as fp:
|
|
885
|
+
xml_str = extract_xmp_efficiently(fp)
|
|
886
|
+
else:
|
|
887
|
+
self._path_or_stream.seek(0, io.SEEK_SET)
|
|
888
|
+
xml_str = extract_xmp_efficiently(self._path_or_stream)
|
|
889
|
+
|
|
890
|
+
if xml_str is None:
|
|
891
|
+
return None
|
|
781
892
|
|
|
782
|
-
def _extract_xmp(self) -> T.Optional[ExifReadFromXMP]:
|
|
783
|
-
application_notes = self.extract_application_notes()
|
|
784
|
-
if application_notes is None:
|
|
785
|
-
return None
|
|
786
893
|
try:
|
|
787
|
-
e = et.fromstring(
|
|
788
|
-
except et.ParseError:
|
|
894
|
+
e = et.fromstring(xml_str)
|
|
895
|
+
except et.ParseError as ex:
|
|
896
|
+
LOG.warning("Error parsing XMP XML: %s: %s", ex, xml_str)
|
|
789
897
|
return None
|
|
898
|
+
|
|
790
899
|
return ExifReadFromXMP(et.ElementTree(e))
|
|
791
900
|
|
|
792
|
-
def extract_altitude(self) ->
|
|
901
|
+
def extract_altitude(self) -> float | None:
|
|
793
902
|
val = super().extract_altitude()
|
|
794
903
|
if val is not None:
|
|
795
904
|
return val
|
|
796
|
-
|
|
905
|
+
xmp = self._xmp_with_reason("altitude")
|
|
906
|
+
if xmp is None:
|
|
797
907
|
return None
|
|
798
|
-
val =
|
|
908
|
+
val = xmp.extract_altitude()
|
|
799
909
|
if val is not None:
|
|
800
910
|
return val
|
|
801
911
|
return None
|
|
802
912
|
|
|
803
|
-
def extract_capture_time(self) ->
|
|
913
|
+
def extract_capture_time(self) -> datetime.datetime | None:
|
|
804
914
|
val = super().extract_capture_time()
|
|
805
915
|
if val is not None:
|
|
806
916
|
return val
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
val = self._xmp.extract_capture_time()
|
|
810
|
-
if val is not None:
|
|
811
|
-
return val
|
|
812
|
-
return None
|
|
813
|
-
|
|
814
|
-
def extract_direction(self) -> T.Optional[float]:
|
|
815
|
-
val = super().extract_direction()
|
|
816
|
-
if val is not None:
|
|
817
|
-
return val
|
|
818
|
-
if self._xmp is None:
|
|
917
|
+
xmp = self._xmp_with_reason("capture_time")
|
|
918
|
+
if xmp is None:
|
|
819
919
|
return None
|
|
820
|
-
val =
|
|
920
|
+
val = xmp.extract_capture_time()
|
|
821
921
|
if val is not None:
|
|
822
922
|
return val
|
|
823
923
|
return None
|
|
824
924
|
|
|
825
|
-
def extract_lon_lat(self) ->
|
|
925
|
+
def extract_lon_lat(self) -> tuple[float, float] | None:
|
|
826
926
|
val = super().extract_lon_lat()
|
|
827
927
|
if val is not None:
|
|
828
928
|
return val
|
|
829
|
-
|
|
929
|
+
xmp = self._xmp_with_reason("lon_lat")
|
|
930
|
+
if xmp is None:
|
|
830
931
|
return None
|
|
831
|
-
val =
|
|
932
|
+
val = xmp.extract_lon_lat()
|
|
832
933
|
if val is not None:
|
|
833
934
|
return val
|
|
834
935
|
return None
|
|
835
936
|
|
|
836
|
-
def extract_make(self) ->
|
|
937
|
+
def extract_make(self) -> str | None:
|
|
837
938
|
val = super().extract_make()
|
|
838
939
|
if val is not None:
|
|
839
940
|
return val
|
|
840
|
-
|
|
941
|
+
xmp = self._xmp_with_reason("make")
|
|
942
|
+
if xmp is None:
|
|
841
943
|
return None
|
|
842
|
-
val =
|
|
944
|
+
val = xmp.extract_make()
|
|
843
945
|
if val is not None:
|
|
844
946
|
return val
|
|
845
947
|
return None
|
|
846
948
|
|
|
847
|
-
def extract_model(self) ->
|
|
949
|
+
def extract_model(self) -> str | None:
|
|
848
950
|
val = super().extract_model()
|
|
849
951
|
if val is not None:
|
|
850
952
|
return val
|
|
851
|
-
|
|
953
|
+
xmp = self._xmp_with_reason("model")
|
|
954
|
+
if xmp is None:
|
|
852
955
|
return None
|
|
853
|
-
val =
|
|
956
|
+
val = xmp.extract_model()
|
|
854
957
|
if val is not None:
|
|
855
958
|
return val
|
|
856
959
|
return None
|
|
857
960
|
|
|
858
|
-
def extract_width(self) ->
|
|
961
|
+
def extract_width(self) -> int | None:
|
|
859
962
|
val = super().extract_width()
|
|
860
963
|
if val is not None:
|
|
861
964
|
return val
|
|
862
|
-
|
|
965
|
+
xmp = self._xmp_with_reason("width")
|
|
966
|
+
if xmp is None:
|
|
863
967
|
return None
|
|
864
|
-
val =
|
|
968
|
+
val = xmp.extract_width()
|
|
865
969
|
if val is not None:
|
|
866
970
|
return val
|
|
867
971
|
return None
|
|
868
972
|
|
|
869
|
-
def extract_height(self) ->
|
|
973
|
+
def extract_height(self) -> int | None:
|
|
870
974
|
val = super().extract_height()
|
|
871
975
|
if val is not None:
|
|
872
976
|
return val
|
|
873
|
-
|
|
977
|
+
xmp = self._xmp_with_reason("width")
|
|
978
|
+
if xmp is None:
|
|
874
979
|
return None
|
|
875
|
-
val =
|
|
980
|
+
val = xmp.extract_height()
|
|
876
981
|
if val is not None:
|
|
877
982
|
return val
|
|
878
983
|
return None
|