mapillary-tools 0.13.3__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. mapillary_tools/__init__.py +1 -1
  2. mapillary_tools/api_v4.py +198 -55
  3. mapillary_tools/authenticate.py +326 -64
  4. mapillary_tools/blackvue_parser.py +195 -0
  5. mapillary_tools/camm/camm_builder.py +55 -97
  6. mapillary_tools/camm/camm_parser.py +429 -181
  7. mapillary_tools/commands/__main__.py +10 -6
  8. mapillary_tools/commands/authenticate.py +8 -1
  9. mapillary_tools/commands/process.py +27 -51
  10. mapillary_tools/commands/process_and_upload.py +18 -5
  11. mapillary_tools/commands/sample_video.py +2 -3
  12. mapillary_tools/commands/upload.py +44 -13
  13. mapillary_tools/commands/video_process_and_upload.py +19 -5
  14. mapillary_tools/config.py +65 -26
  15. mapillary_tools/constants.py +141 -18
  16. mapillary_tools/exceptions.py +37 -34
  17. mapillary_tools/exif_read.py +221 -116
  18. mapillary_tools/exif_write.py +10 -8
  19. mapillary_tools/exiftool_read.py +33 -42
  20. mapillary_tools/exiftool_read_video.py +97 -47
  21. mapillary_tools/exiftool_runner.py +57 -0
  22. mapillary_tools/ffmpeg.py +417 -242
  23. mapillary_tools/geo.py +158 -118
  24. mapillary_tools/geotag/__init__.py +0 -1
  25. mapillary_tools/geotag/base.py +147 -0
  26. mapillary_tools/geotag/factory.py +307 -0
  27. mapillary_tools/geotag/geotag_images_from_exif.py +14 -131
  28. mapillary_tools/geotag/geotag_images_from_exiftool.py +136 -85
  29. mapillary_tools/geotag/geotag_images_from_gpx.py +60 -124
  30. mapillary_tools/geotag/geotag_images_from_gpx_file.py +13 -126
  31. mapillary_tools/geotag/geotag_images_from_nmea_file.py +4 -5
  32. mapillary_tools/geotag/geotag_images_from_video.py +88 -51
  33. mapillary_tools/geotag/geotag_videos_from_exiftool.py +123 -0
  34. mapillary_tools/geotag/geotag_videos_from_gpx.py +52 -0
  35. mapillary_tools/geotag/geotag_videos_from_video.py +20 -185
  36. mapillary_tools/geotag/image_extractors/base.py +18 -0
  37. mapillary_tools/geotag/image_extractors/exif.py +60 -0
  38. mapillary_tools/geotag/image_extractors/exiftool.py +18 -0
  39. mapillary_tools/geotag/options.py +182 -0
  40. mapillary_tools/geotag/utils.py +52 -16
  41. mapillary_tools/geotag/video_extractors/base.py +18 -0
  42. mapillary_tools/geotag/video_extractors/exiftool.py +70 -0
  43. mapillary_tools/geotag/video_extractors/gpx.py +116 -0
  44. mapillary_tools/geotag/video_extractors/native.py +160 -0
  45. mapillary_tools/{geotag → gpmf}/gpmf_parser.py +205 -182
  46. mapillary_tools/{geotag → gpmf}/gps_filter.py +5 -3
  47. mapillary_tools/history.py +134 -20
  48. mapillary_tools/mp4/construct_mp4_parser.py +17 -10
  49. mapillary_tools/mp4/io_utils.py +0 -1
  50. mapillary_tools/mp4/mp4_sample_parser.py +36 -28
  51. mapillary_tools/mp4/simple_mp4_builder.py +10 -9
  52. mapillary_tools/mp4/simple_mp4_parser.py +13 -22
  53. mapillary_tools/process_geotag_properties.py +184 -414
  54. mapillary_tools/process_sequence_properties.py +594 -225
  55. mapillary_tools/sample_video.py +20 -26
  56. mapillary_tools/serializer/description.py +587 -0
  57. mapillary_tools/serializer/gpx.py +132 -0
  58. mapillary_tools/telemetry.py +26 -13
  59. mapillary_tools/types.py +98 -611
  60. mapillary_tools/upload.py +411 -387
  61. mapillary_tools/upload_api_v4.py +167 -142
  62. mapillary_tools/uploader.py +804 -284
  63. mapillary_tools/utils.py +49 -18
  64. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/METADATA +93 -35
  65. mapillary_tools-0.14.0.dist-info/RECORD +75 -0
  66. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/WHEEL +1 -1
  67. mapillary_tools/geotag/blackvue_parser.py +0 -118
  68. mapillary_tools/geotag/geotag_from_generic.py +0 -22
  69. mapillary_tools/geotag/geotag_images_from_exiftool_both_image_and_video.py +0 -93
  70. mapillary_tools/geotag/geotag_videos_from_exiftool_video.py +0 -145
  71. mapillary_tools/video_data_extraction/cli_options.py +0 -22
  72. mapillary_tools/video_data_extraction/extract_video_data.py +0 -176
  73. mapillary_tools/video_data_extraction/extractors/base_parser.py +0 -75
  74. mapillary_tools/video_data_extraction/extractors/blackvue_parser.py +0 -34
  75. mapillary_tools/video_data_extraction/extractors/camm_parser.py +0 -38
  76. mapillary_tools/video_data_extraction/extractors/exiftool_runtime_parser.py +0 -71
  77. mapillary_tools/video_data_extraction/extractors/exiftool_xml_parser.py +0 -53
  78. mapillary_tools/video_data_extraction/extractors/generic_video_parser.py +0 -52
  79. mapillary_tools/video_data_extraction/extractors/gopro_parser.py +0 -43
  80. mapillary_tools/video_data_extraction/extractors/gpx_parser.py +0 -108
  81. mapillary_tools/video_data_extraction/extractors/nmea_parser.py +0 -24
  82. mapillary_tools/video_data_extraction/video_data_parser_factory.py +0 -39
  83. mapillary_tools-0.13.3.dist-info/RECORD +0 -75
  84. /mapillary_tools/{geotag → gpmf}/gpmf_gps_filter.py +0 -0
  85. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/entry_points.txt +0 -0
  86. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info/licenses}/LICENSE +0 -0
  87. {mapillary_tools-0.13.3.dist-info → mapillary_tools-0.14.0.dist-info}/top_level.txt +0 -0
@@ -1,22 +1,29 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import dbm
1
5
  import json
2
6
  import logging
3
- import os
4
7
  import string
8
+ import threading
9
+ import time
5
10
  import typing as T
6
11
  from pathlib import Path
7
12
 
13
+ # dbm modules are dynamically imported, so here we explicitly import dbm.sqlite3 to make sure pyinstaller include it
14
+ # Otherwise you will see: ImportError: no dbm clone found; tried ['dbm.sqlite3', 'dbm.gnu', 'dbm.ndbm', 'dbm.dumb']
15
+ try:
16
+ import dbm.sqlite3 # type: ignore
17
+ except ImportError:
18
+ pass
19
+
20
+
8
21
  from . import constants, types
22
+ from .serializer.description import DescriptionJSONSerializer
9
23
 
10
24
  JSONDict = T.Dict[str, T.Union[str, int, float, None]]
11
25
 
12
26
  LOG = logging.getLogger(__name__)
13
- MAPILLARY_UPLOAD_HISTORY_PATH = os.getenv(
14
- "MAPILLARY_UPLOAD_HISTORY_PATH",
15
- os.path.join(
16
- constants.USER_DATA_DIR,
17
- "upload_history",
18
- ),
19
- )
20
27
 
21
28
 
22
29
  def _validate_hexdigits(md5sum: str):
@@ -35,34 +42,141 @@ def history_desc_path(md5sum: str) -> Path:
35
42
  basename = md5sum[2:]
36
43
  assert basename, f"Invalid md5sum {md5sum}"
37
44
  return (
38
- Path(MAPILLARY_UPLOAD_HISTORY_PATH)
45
+ Path(constants.MAPILLARY_UPLOAD_HISTORY_PATH)
39
46
  .joinpath(subfolder)
40
47
  .joinpath(f"{basename}.json")
41
48
  )
42
49
 
43
50
 
44
- def is_uploaded(md5sum: str) -> bool:
45
- if not MAPILLARY_UPLOAD_HISTORY_PATH:
46
- return False
47
- return history_desc_path(md5sum).is_file()
51
+ def read_history_record(md5sum: str) -> None | T.Dict[str, T.Any]:
52
+ if not constants.MAPILLARY_UPLOAD_HISTORY_PATH:
53
+ return None
54
+
55
+ path = history_desc_path(md5sum)
56
+
57
+ if not path.is_file():
58
+ return None
59
+
60
+ with path.open("r") as fp:
61
+ try:
62
+ return json.load(fp)
63
+ except json.JSONDecodeError as ex:
64
+ LOG.error(f"Failed to read upload history {path}: {ex}")
65
+ return None
48
66
 
49
67
 
50
68
  def write_history(
51
69
  md5sum: str,
52
70
  params: JSONDict,
53
71
  summary: JSONDict,
54
- metadatas: T.Optional[T.Sequence[types.Metadata]] = None,
72
+ metadatas: T.Sequence[types.Metadata] | None = None,
55
73
  ) -> None:
56
- if not MAPILLARY_UPLOAD_HISTORY_PATH:
74
+ if not constants.MAPILLARY_UPLOAD_HISTORY_PATH:
57
75
  return
58
76
  path = history_desc_path(md5sum)
59
77
  LOG.debug("Writing upload history: %s", path)
60
78
  path.resolve().parent.mkdir(parents=True, exist_ok=True)
61
- history: T.Dict[str, T.Any] = {
62
- "params": params,
63
- "summary": summary,
64
- }
79
+ history: dict[str, T.Any] = {"params": params, "summary": summary}
65
80
  if metadatas is not None:
66
- history["descs"] = [types.as_desc(metadata) for metadata in metadatas]
81
+ history["descs"] = [
82
+ DescriptionJSONSerializer.as_desc(metadata) for metadata in metadatas
83
+ ]
67
84
  with open(path, "w") as fp:
68
85
  fp.write(json.dumps(history))
86
+
87
+
88
+ class PersistentCache:
89
+ _lock: contextlib.nullcontext | threading.Lock
90
+
91
+ def __init__(self, file: str):
92
+ # SQLite3 backend supports concurrent access without a lock
93
+ if dbm.whichdb(file) == "dbm.sqlite3":
94
+ self._lock = contextlib.nullcontext()
95
+ else:
96
+ self._lock = threading.Lock()
97
+ self._file = file
98
+
99
+ def get(self, key: str) -> str | None:
100
+ s = time.perf_counter()
101
+
102
+ with self._lock:
103
+ with dbm.open(self._file, flag="c") as db:
104
+ value: bytes | None = db.get(key)
105
+
106
+ if value is None:
107
+ return None
108
+
109
+ payload = self._decode(value)
110
+
111
+ if self._is_expired(payload):
112
+ return None
113
+
114
+ file_handle = payload.get("file_handle")
115
+
116
+ LOG.debug(
117
+ f"Found file handle for {key} in cache ({(time.perf_counter() - s) * 1000:.0f} ms)"
118
+ )
119
+
120
+ return T.cast(str, file_handle)
121
+
122
+ def set(self, key: str, file_handle: str, expires_in: int = 3600 * 24 * 2) -> None:
123
+ s = time.perf_counter()
124
+
125
+ payload = {
126
+ "expires_at": time.time() + expires_in,
127
+ "file_handle": file_handle,
128
+ }
129
+
130
+ value: bytes = json.dumps(payload).encode("utf-8")
131
+
132
+ with self._lock:
133
+ with dbm.open(self._file, flag="c") as db:
134
+ db[key] = value
135
+
136
+ LOG.debug(
137
+ f"Cached file handle for {key} ({(time.perf_counter() - s) * 1000:.0f} ms)"
138
+ )
139
+
140
+ def clear_expired(self) -> list[str]:
141
+ s = time.perf_counter()
142
+
143
+ expired_keys: list[str] = []
144
+
145
+ with self._lock:
146
+ with dbm.open(self._file, flag="c") as db:
147
+ if hasattr(db, "items"):
148
+ items: T.Iterable[tuple[str | bytes, bytes]] = db.items()
149
+ else:
150
+ items = ((key, db[key]) for key in db.keys())
151
+
152
+ for key, value in items:
153
+ payload = self._decode(value)
154
+ if self._is_expired(payload):
155
+ del db[key]
156
+ expired_keys.append(T.cast(str, key))
157
+
158
+ if expired_keys:
159
+ LOG.debug(
160
+ f"Cleared {len(expired_keys)} expired entries from the cache ({(time.perf_counter() - s) * 1000:.0f} ms)"
161
+ )
162
+
163
+ return expired_keys
164
+
165
+ def _is_expired(self, payload: JSONDict) -> bool:
166
+ expires_at = payload.get("expires_at")
167
+ if isinstance(expires_at, (int, float)):
168
+ return expires_at is None or expires_at <= time.time()
169
+ return False
170
+
171
+ def _decode(self, value: bytes) -> JSONDict:
172
+ try:
173
+ payload = json.loads(value.decode("utf-8"))
174
+ except json.JSONDecodeError as ex:
175
+ LOG.warning(f"Failed to decode cache value: {ex}")
176
+ return {}
177
+
178
+ if not isinstance(payload, dict):
179
+ LOG.warning(f"Invalid cache value format: {payload}")
180
+ return {}
181
+
182
+ return payload
@@ -1,4 +1,5 @@
1
1
  # pyre-ignore-all-errors[5, 16, 21, 58]
2
+ from __future__ import annotations
2
3
 
3
4
  import typing as T
4
5
 
@@ -42,7 +43,7 @@ BoxType = T.Literal[
42
43
 
43
44
  class BoxDict(T.TypedDict, total=True):
44
45
  type: BoxType
45
- data: T.Union[T.Sequence["BoxDict"], T.Dict[str, T.Any], bytes]
46
+ data: T.Sequence["BoxDict"] | dict[str, T.Any] | bytes
46
47
 
47
48
 
48
49
  _UNITY_MATRIX = [0x10000, 0, 0, 0, 0x10000, 0, 0, 0, 0x40000000]
@@ -369,6 +370,10 @@ BoxHeader64 = C.Struct(
369
370
  SwitchMapType = T.Dict[BoxType, T.Union[C.Construct, "SwitchMapType"]]
370
371
 
371
372
 
373
+ class BoxNotFoundError(Exception):
374
+ pass
375
+
376
+
372
377
  class Box64ConstructBuilder:
373
378
  """
374
379
  Build a box struct that **parses** MP4 boxes with both 32-bit and 64-bit sizes.
@@ -376,7 +381,7 @@ class Box64ConstructBuilder:
376
381
  NOTE: Do not build data with this struct. For building, use Box32StructBuilder instead.
377
382
  """
378
383
 
379
- _box: T.Optional[C.Construct]
384
+ _box: C.Construct | None
380
385
 
381
386
  def __init__(
382
387
  self,
@@ -438,7 +443,7 @@ class Box64ConstructBuilder:
438
443
  def parse_box(self, data: bytes) -> BoxDict:
439
444
  return T.cast(BoxDict, self.Box.parse(data))
440
445
 
441
- def parse_boxlist(self, data: bytes) -> T.List[BoxDict]:
446
+ def parse_boxlist(self, data: bytes) -> list[BoxDict]:
442
447
  return T.cast(T.List[BoxDict], self.BoxList.parse(data))
443
448
 
444
449
 
@@ -464,7 +469,7 @@ class Box32ConstructBuilder(Box64ConstructBuilder):
464
469
  def parse_box(self, data: bytes) -> BoxDict:
465
470
  raise NotImplementedError("Box32ConstructBuilder does not support parsing")
466
471
 
467
- def parse_boxlist(self, data: bytes) -> T.List[BoxDict]:
472
+ def parse_boxlist(self, data: bytes) -> list[BoxDict]:
468
473
  raise NotImplementedError("Box32ConstructBuilder does not support parsing")
469
474
 
470
475
  def build_box(self, box: BoxDict) -> bytes:
@@ -566,7 +571,9 @@ def _new_cmap_without_boxes(
566
571
  # pyre-ignore[9]: pyre does not support recursive type SwitchMapType
567
572
  MP4_WITHOUT_STBL_CMAP: SwitchMapType = {
568
573
  # pyre-ignore[6]: pyre does not support recursive type SwitchMapType
569
- b"moov": _new_cmap_without_boxes(CMAP[b"moov"], [b"stbl"]),
574
+ b"moov": _new_cmap_without_boxes(
575
+ CMAP[b"moov"], T.cast(T.Sequence[BoxType], [b"stbl"])
576
+ ),
570
577
  }
571
578
 
572
579
  # for parsing mp4 only
@@ -584,17 +591,17 @@ MOOVWithoutSTBLBuilderConstruct = Box32ConstructBuilder(
584
591
 
585
592
 
586
593
  def find_box_at_pathx(
587
- box: T.Union[T.Sequence[BoxDict], BoxDict], path: T.Sequence[bytes]
594
+ box: T.Sequence[BoxDict] | BoxDict, path: T.Sequence[bytes]
588
595
  ) -> BoxDict:
589
596
  found = find_box_at_path(box, path)
590
597
  if found is None:
591
- raise ValueError(f"box at path {path} not found")
598
+ raise BoxNotFoundError(f"box at path {path} not found")
592
599
  return found
593
600
 
594
601
 
595
602
  def find_box_at_path(
596
- box: T.Union[T.Sequence[BoxDict], BoxDict], path: T.Sequence[bytes]
597
- ) -> T.Optional[BoxDict]:
603
+ box: T.Sequence[BoxDict] | BoxDict, path: T.Sequence[bytes]
604
+ ) -> BoxDict | None:
598
605
  if not path:
599
606
  return None
600
607
 
@@ -608,7 +615,7 @@ def find_box_at_path(
608
615
  if box["type"] == path[0]:
609
616
  if len(path) == 1:
610
617
  return box
611
- box_data = T.cast(T.Sequence[BoxDict], box["data"])
618
+ box_data = T.cast(T.List[BoxDict], box["data"])
612
619
  # ListContainer from construct is not sequence
613
620
  assert isinstance(box_data, T.Sequence), (
614
621
  f"expect a list of boxes but got {type(box_data)} at path {path}"
@@ -3,7 +3,6 @@ import typing as T
3
3
 
4
4
 
5
5
  class ChainedIO(io.IOBase):
6
- # is the chained stream seekable?
7
6
  _streams: T.Sequence[io.IOBase]
8
7
  # the beginning offset of the current stream
9
8
  _begin_offset: int
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import datetime
2
4
  import typing as T
3
5
  from pathlib import Path
@@ -42,16 +44,16 @@ class Sample(T.NamedTuple):
42
44
  exact_timedelta: float
43
45
 
44
46
  # reference to the sample description
45
- description: T.Dict
47
+ description: dict
46
48
 
47
49
 
48
50
  def _extract_raw_samples(
49
51
  sizes: T.Sequence[int],
50
- chunk_entries: T.Sequence[T.Dict],
52
+ chunk_entries: T.Sequence[dict],
51
53
  chunk_offsets: T.Sequence[int],
52
54
  timedeltas: T.Sequence[int],
53
- composition_offsets: T.Optional[T.Sequence[int]],
54
- syncs: T.Optional[T.Set[int]],
55
+ composition_offsets: list[int] | None,
56
+ syncs: set[int] | None,
55
57
  ) -> T.Generator[RawSample, None, None]:
56
58
  if not sizes:
57
59
  return
@@ -128,7 +130,7 @@ def _extract_raw_samples(
128
130
 
129
131
  def _extract_samples(
130
132
  raw_samples: T.Iterator[RawSample],
131
- descriptions: T.List,
133
+ descriptions: list,
132
134
  timescale: int,
133
135
  ) -> T.Generator[Sample, None, None]:
134
136
  acc_delta = 0
@@ -152,21 +154,21 @@ STBLBoxlistConstruct = cparser.Box64ConstructBuilder(
152
154
 
153
155
  def extract_raw_samples_from_stbl_data(
154
156
  stbl: bytes,
155
- ) -> T.Tuple[T.List[T.Dict], T.Generator[RawSample, None, None]]:
156
- descriptions = []
157
- sizes = []
158
- chunk_offsets = []
159
- chunk_entries = []
160
- timedeltas: T.List[int] = []
161
- composition_offsets: T.Optional[T.List[int]] = None
162
- syncs: T.Optional[T.Set[int]] = None
157
+ ) -> tuple[list[dict], T.Generator[RawSample, None, None]]:
158
+ descriptions: list[dict] = []
159
+ sizes: list[int] = []
160
+ chunk_offsets: list[int] = []
161
+ chunk_entries: list[dict] = []
162
+ timedeltas: list[int] = []
163
+ composition_offsets: list[int] | None = None
164
+ syncs: set[int] | None = None
163
165
 
164
166
  stbl_children = T.cast(
165
167
  T.Sequence[cparser.BoxDict], STBLBoxlistConstruct.parse(stbl)
166
168
  )
167
169
 
168
170
  for box in stbl_children:
169
- data: T.Dict = T.cast(T.Dict, box["data"])
171
+ data: dict = T.cast(dict, box["data"])
170
172
 
171
173
  if box["type"] == b"stsd":
172
174
  descriptions = list(data["entries"])
@@ -225,32 +227,32 @@ class TrackBoxParser:
225
227
  )
226
228
  self.stbl_data = T.cast(bytes, stbl["data"])
227
229
 
228
- def extract_tkhd_boxdata(self) -> T.Dict:
230
+ def extract_tkhd_boxdata(self) -> dict:
229
231
  return T.cast(
230
- T.Dict, cparser.find_box_at_pathx(self.trak_children, [b"tkhd"])["data"]
232
+ dict, cparser.find_box_at_pathx(self.trak_children, [b"tkhd"])["data"]
231
233
  )
232
234
 
233
235
  def is_video_track(self) -> bool:
234
236
  hdlr = cparser.find_box_at_pathx(self.trak_children, [b"mdia", b"hdlr"])
235
237
  return T.cast(T.Dict[str, T.Any], hdlr["data"])["handler_type"] == b"vide"
236
238
 
237
- def extract_sample_descriptions(self) -> T.List[T.Dict]:
239
+ def extract_sample_descriptions(self) -> list[dict]:
238
240
  # TODO: return [] if parsing fail
239
241
  boxes = _STSDBoxListConstruct.parse(self.stbl_data)
240
242
  stsd = cparser.find_box_at_pathx(
241
243
  T.cast(T.Sequence[cparser.BoxDict], boxes), [b"stsd"]
242
244
  )
243
- return T.cast(T.List[T.Dict], T.cast(T.Dict, stsd["data"])["entries"])
245
+ return T.cast(T.List[dict], T.cast(dict, stsd["data"])["entries"])
244
246
 
245
- def extract_elst_boxdata(self) -> T.Optional[T.Dict]:
247
+ def extract_elst_boxdata(self) -> dict | None:
246
248
  box = cparser.find_box_at_path(self.trak_children, [b"edts", b"elst"])
247
249
  if box is None:
248
250
  return None
249
- return T.cast(T.Dict, box["data"])
251
+ return T.cast(dict, box["data"])
250
252
 
251
- def extract_mdhd_boxdata(self) -> T.Dict:
253
+ def extract_mdhd_boxdata(self) -> dict:
252
254
  box = cparser.find_box_at_pathx(self.trak_children, [b"mdia", b"mdhd"])
253
- return T.cast(T.Dict, box["data"])
255
+ return T.cast(dict, box["data"])
254
256
 
255
257
  def extract_raw_samples(self) -> T.Generator[RawSample, None, None]:
256
258
  _, raw_samples = extract_raw_samples_from_stbl_data(self.stbl_data)
@@ -259,7 +261,7 @@ class TrackBoxParser:
259
261
  def extract_samples(self) -> T.Generator[Sample, None, None]:
260
262
  descriptions, raw_samples = extract_raw_samples_from_stbl_data(self.stbl_data)
261
263
  mdhd = T.cast(
262
- T.Dict,
264
+ dict,
263
265
  cparser.find_box_at_pathx(self.trak_children, [b"mdia", b"mdhd"])["data"],
264
266
  )
265
267
  yield from _extract_samples(raw_samples, descriptions, mdhd["timescale"])
@@ -278,16 +280,22 @@ class MovieBoxParser:
278
280
  def parse_file(cls, video_path: Path) -> "MovieBoxParser":
279
281
  with video_path.open("rb") as fp:
280
282
  moov = sparser.parse_box_data_firstx(fp, [b"moov"])
281
- return MovieBoxParser(moov)
283
+ return cls(moov)
282
284
 
283
285
  @classmethod
284
286
  def parse_stream(cls, stream: T.BinaryIO) -> "MovieBoxParser":
285
287
  moov = sparser.parse_box_data_firstx(stream, [b"moov"])
286
- return MovieBoxParser(moov)
288
+ return cls(moov)
287
289
 
288
- def extract_mvhd_boxdata(self) -> T.Dict:
290
+ def extract_mvhd_boxdata(self) -> dict:
289
291
  mvhd = cparser.find_box_at_pathx(self.moov_children, [b"mvhd"])
290
- return T.cast(T.Dict, mvhd["data"])
292
+ return T.cast(dict, mvhd["data"])
293
+
294
+ def extract_udta_boxdata(self) -> dict | None:
295
+ box = cparser.find_box_at_path(self.moov_children, [b"udta"])
296
+ if box is None:
297
+ return None
298
+ return T.cast(dict, box["data"])
291
299
 
292
300
  def extract_tracks(self) -> T.Generator[TrackBoxParser, None, None]:
293
301
  for box in self.moov_children:
@@ -312,7 +320,7 @@ class MovieBoxParser:
312
320
  return TrackBoxParser(trak_children)
313
321
 
314
322
 
315
- _DT_1904 = datetime.datetime.utcfromtimestamp(0).replace(year=1904)
323
+ _DT_1904 = datetime.datetime.fromtimestamp(0, datetime.timezone.utc).replace(year=1904)
316
324
 
317
325
 
318
326
  def to_datetime(seconds_since_1904: int) -> datetime.datetime:
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import dataclasses
2
4
  import io
3
5
  import typing as T
@@ -64,8 +66,8 @@ class _SampleChunk:
64
66
  offset: int
65
67
 
66
68
 
67
- def _build_chunks(raw_samples: T.Iterable[RawSample]) -> T.List[_SampleChunk]:
68
- chunks: T.List[_SampleChunk] = []
69
+ def _build_chunks(raw_samples: T.Iterable[RawSample]) -> list[_SampleChunk]:
70
+ chunks: list[_SampleChunk] = []
69
71
  prev_raw_sample = None
70
72
 
71
73
  for raw_sample in raw_samples:
@@ -120,7 +122,7 @@ class _CompressedSampleDelta:
120
122
 
121
123
  def _build_stts(sample_deltas: T.Iterable[int]) -> BoxDict:
122
124
  # compress deltas
123
- compressed: T.List[_CompressedSampleDelta] = []
125
+ compressed: list[_CompressedSampleDelta] = []
124
126
  for delta in sample_deltas:
125
127
  if compressed and delta == compressed[-1].sample_delta:
126
128
  compressed[-1].sample_count += 1
@@ -146,7 +148,7 @@ class _CompressedSampleCompositionOffset:
146
148
 
147
149
  def _build_ctts(sample_composition_offsets: T.Iterable[int]) -> BoxDict:
148
150
  # compress offsets
149
- compressed: T.List[_CompressedSampleCompositionOffset] = []
151
+ compressed: list[_CompressedSampleCompositionOffset] = []
150
152
  for offset in sample_composition_offsets:
151
153
  if compressed and offset == compressed[-1].sample_offset:
152
154
  compressed[-1].sample_count += 1
@@ -182,7 +184,7 @@ def _build_stss(is_syncs: T.Iterable[bool]) -> BoxDict:
182
184
 
183
185
  def build_stbl_from_raw_samples(
184
186
  descriptions: T.Sequence[T.Any], raw_samples: T.Iterable[RawSample]
185
- ) -> T.List[BoxDict]:
187
+ ) -> list[BoxDict]:
186
188
  # raw_samples could be iterator so convert to list
187
189
  raw_samples = list(raw_samples)
188
190
  # It is recommended that the boxes within the Sample Table Box be in the following order:
@@ -329,9 +331,8 @@ _MOOVChildrenParserConstruct = cparser.Box64ConstructBuilder(
329
331
 
330
332
  def transform_mp4(
331
333
  src_fp: T.BinaryIO,
332
- sample_generator: T.Optional[
333
- T.Callable[[T.BinaryIO, T.List[BoxDict]], T.Iterator[io.IOBase]]
334
- ] = None,
334
+ sample_generator: T.Callable[[T.BinaryIO, list[BoxDict]], T.Iterator[io.IOBase]]
335
+ | None = None,
335
336
  ) -> io_utils.ChainedIO:
336
337
  # extract ftyp
337
338
  src_fp.seek(0)
@@ -347,7 +348,7 @@ def transform_mp4(
347
348
 
348
349
  # extract video samples
349
350
  source_samples = list(iterate_samples(moov_children))
350
- sample_readers: T.List[io.IOBase] = [
351
+ sample_readers: list[io.IOBase] = [
351
352
  io_utils.SlicedIO(src_fp, sample.offset, sample.size)
352
353
  for sample in source_samples
353
354
  ]
@@ -1,4 +1,5 @@
1
1
  # pyre-ignore-all-errors[5, 16, 21, 24, 58]
2
+ from __future__ import annotations
2
3
 
3
4
  import io
4
5
  import typing as T
@@ -130,8 +131,8 @@ def parse_boxes_recursive(
130
131
  stream: T.BinaryIO,
131
132
  maxsize: int = -1,
132
133
  depth: int = 0,
133
- box_list_types: T.Optional[T.Set[bytes]] = None,
134
- ) -> T.Generator[T.Tuple[Header, int, T.BinaryIO], None, None]:
134
+ box_list_types: set[bytes] | None = None,
135
+ ) -> T.Generator[tuple[Header, int, T.BinaryIO], None, None]:
135
136
  assert maxsize == -1 or 0 <= maxsize
136
137
 
137
138
  if box_list_types is None:
@@ -152,10 +153,10 @@ def parse_boxes_recursive(
152
153
 
153
154
  def parse_path(
154
155
  stream: T.BinaryIO,
155
- path: T.Sequence[T.Union[bytes, T.Sequence[bytes]]],
156
+ path: T.Sequence[bytes | T.Sequence[bytes]],
156
157
  maxsize: int = -1,
157
158
  depth: int = 0,
158
- ) -> T.Generator[T.Tuple[Header, T.BinaryIO], None, None]:
159
+ ) -> T.Generator[tuple[Header, T.BinaryIO], None, None]:
159
160
  if not path:
160
161
  return
161
162
 
@@ -172,8 +173,8 @@ def parse_path(
172
173
 
173
174
 
174
175
  def _parse_path_first(
175
- stream: T.BinaryIO, path: T.List[bytes], maxsize: int = -1, depth: int = 0
176
- ) -> T.Optional[T.Tuple[Header, T.BinaryIO]]:
176
+ stream: T.BinaryIO, path: list[bytes], maxsize: int = -1, depth: int = 0
177
+ ) -> tuple[Header, T.BinaryIO] | None:
177
178
  if not path:
178
179
  return None
179
180
  for h, s in parse_boxes(stream, maxsize=maxsize, extend_eof=depth == 0):
@@ -187,19 +188,9 @@ def _parse_path_first(
187
188
  return None
188
189
 
189
190
 
190
- def parse_box_path_firstx(
191
- stream: T.BinaryIO, path: T.List[bytes], maxsize: int = -1
192
- ) -> T.Tuple[Header, T.BinaryIO]:
193
- # depth=1 will disable EoF extension
194
- parsed = _parse_path_first(stream, path, maxsize=maxsize, depth=1)
195
- if parsed is None:
196
- raise BoxNotFoundError(f"unable find box at path {path}")
197
- return parsed
198
-
199
-
200
191
  def parse_mp4_data_first(
201
- stream: T.BinaryIO, path: T.List[bytes], maxsize: int = -1
202
- ) -> T.Optional[bytes]:
192
+ stream: T.BinaryIO, path: list[bytes], maxsize: int = -1
193
+ ) -> bytes | None:
203
194
  # depth=0 will enable EoF extension
204
195
  parsed = _parse_path_first(stream, path, maxsize=maxsize, depth=0)
205
196
  if parsed is None:
@@ -209,7 +200,7 @@ def parse_mp4_data_first(
209
200
 
210
201
 
211
202
  def parse_mp4_data_firstx(
212
- stream: T.BinaryIO, path: T.List[bytes], maxsize: int = -1
203
+ stream: T.BinaryIO, path: list[bytes], maxsize: int = -1
213
204
  ) -> bytes:
214
205
  data = parse_mp4_data_first(stream, path, maxsize=maxsize)
215
206
  if data is None:
@@ -218,8 +209,8 @@ def parse_mp4_data_firstx(
218
209
 
219
210
 
220
211
  def parse_box_data_first(
221
- stream: T.BinaryIO, path: T.List[bytes], maxsize: int = -1
222
- ) -> T.Optional[bytes]:
212
+ stream: T.BinaryIO, path: list[bytes], maxsize: int = -1
213
+ ) -> bytes | None:
223
214
  # depth=1 will disable EoF extension
224
215
  parsed = _parse_path_first(stream, path, maxsize=maxsize, depth=1)
225
216
  if parsed is None:
@@ -229,7 +220,7 @@ def parse_box_data_first(
229
220
 
230
221
 
231
222
  def parse_box_data_firstx(
232
- stream: T.BinaryIO, path: T.List[bytes], maxsize: int = -1
223
+ stream: T.BinaryIO, path: list[bytes], maxsize: int = -1
233
224
  ) -> bytes:
234
225
  data = parse_box_data_first(stream, path, maxsize=maxsize)
235
226
  if data is None: