mvdata 0.9.2__tar.gz → 0.9.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mvdata-0.9.2 → mvdata-0.9.3}/PKG-INFO +19 -1
- mvdata-0.9.3/README.md +24 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/downloader.py +5 -2
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/multivideo.py +63 -2
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/multivideo_slicer.py +284 -64
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/stash_utils.py +126 -25
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/writer_base.py +17 -2
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata.egg-info/PKG-INFO +19 -1
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata.egg-info/SOURCES.txt +1 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/pyproject.toml +1 -1
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_dataset_base_defaults.py +3 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_multivideo_bit_depth.py +48 -0
- mvdata-0.9.3/tests/test_multivideo_slicer.py +521 -0
- mvdata-0.9.3/tests/test_release_script.py +220 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_s3_downloader.py +40 -0
- mvdata-0.9.2/README.md +0 -6
- mvdata-0.9.2/tests/test_multivideo_slicer.py +0 -223
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/__init__.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/cloud_storage.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/codec/__init__.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/codec/_imports.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/codec/decode.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/codec/encode.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/codec/frames.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/codec/native_yuv.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/codec/probe.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/codec/select.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/dataset_base.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/gpu_policy.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/gpu_support.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/image_metrics.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/legacy_writer.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/multivideo_writer.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/nvdec_parallel.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/nvenc_codec.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/per_frame.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/ranged.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/ranged_writer.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/utils.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/video_stream_reader.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata/write_progress.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata.egg-info/dependency_links.txt +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata.egg-info/requires.txt +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/mvdata.egg-info/top_level.txt +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/setup.cfg +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_gpu_policy.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_gpu_support.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_image_metrics.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_nvdec_parallel.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_per_camera.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_ranged_mixed_streams.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_ranged_nvenc_roundtrip.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_ranged_resume.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_ranged_stream_discovery.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_roundtrip.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_stash_bit_depth.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_stash_comprehensive.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_stash_policy.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_stash_regenerate.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_video_stream_reader.py +0 -0
- {mvdata-0.9.2 → mvdata-0.9.3}/tests/test_write_progress.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mvdata
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.3
|
|
4
4
|
Summary: Gracia Dataset Convention - Python library for working with multi-view video datasets
|
|
5
5
|
Author: Gracia Team
|
|
6
6
|
License: MIT
|
|
@@ -50,3 +50,21 @@ Python library for working with Gracia multi-view video datasets.
|
|
|
50
50
|
|
|
51
51
|
The package provides readers, writers, conversion tools, and GPU-aware video
|
|
52
52
|
decode helpers for the dataset layouts documented in the `docs` directory.
|
|
53
|
+
|
|
54
|
+
## Release
|
|
55
|
+
|
|
56
|
+
Prepare the next release with the local helper script:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --dry-run
|
|
60
|
+
UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --push
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Use `minor` or `major` instead of `patch` for larger version bumps, or pass an
|
|
64
|
+
explicit version with `--version 1.2.3`. The script updates `pyproject.toml` and
|
|
65
|
+
`uv.lock` through `uv`, runs the tests and build, creates a `Release vX.Y.Z`
|
|
66
|
+
commit, creates an annotated `vX.Y.Z` tag, and pushes the branch and tag when
|
|
67
|
+
`--push` is set.
|
|
68
|
+
|
|
69
|
+
Pushing the tag triggers the GitHub Actions build, PyPI publish, and GitHub
|
|
70
|
+
Release workflow.
|
mvdata-0.9.3/README.md
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# mvdata
|
|
2
|
+
|
|
3
|
+
Python library for working with Gracia multi-view video datasets.
|
|
4
|
+
|
|
5
|
+
The package provides readers, writers, conversion tools, and GPU-aware video
|
|
6
|
+
decode helpers for the dataset layouts documented in the `docs` directory.
|
|
7
|
+
|
|
8
|
+
## Release
|
|
9
|
+
|
|
10
|
+
Prepare the next release with the local helper script:
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --dry-run
|
|
14
|
+
UV_PROJECT_ENVIRONMENT=.venv312 uv run python scripts/release.py patch --push
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
Use `minor` or `major` instead of `patch` for larger version bumps, or pass an
|
|
18
|
+
explicit version with `--version 1.2.3`. The script updates `pyproject.toml` and
|
|
19
|
+
`uv.lock` through `uv`, runs the tests and build, creates a `Release vX.Y.Z`
|
|
20
|
+
commit, creates an annotated `vX.Y.Z` tag, and pushes the branch and tag when
|
|
21
|
+
`--push` is set.
|
|
22
|
+
|
|
23
|
+
Pushing the tag triggers the GitHub Actions build, PyPI publish, and GitHub
|
|
24
|
+
Release workflow.
|
|
@@ -116,8 +116,11 @@ class DatasetDownloader(ABC):
|
|
|
116
116
|
return None
|
|
117
117
|
|
|
118
118
|
|
|
119
|
+
RANGED_STREAM_EXTENSIONS = (".avif", ".mp4")
|
|
120
|
+
|
|
121
|
+
|
|
119
122
|
class RangedDatasetDownloader(DatasetDownloader):
|
|
120
|
-
"""Downloader for ranged
|
|
123
|
+
"""Downloader for ranged dataset format."""
|
|
121
124
|
|
|
122
125
|
def _parse_range_folder(self, folder_name: str) -> Optional[Tuple[int, int]]:
|
|
123
126
|
"""Parse range folder name to extract start and end frame numbers."""
|
|
@@ -260,7 +263,7 @@ class RangedDatasetDownloader(DatasetDownloader):
|
|
|
260
263
|
if is_system_file(rel_path_wrapped):
|
|
261
264
|
continue
|
|
262
265
|
|
|
263
|
-
if rel_path_wrapped.suffix.lower()
|
|
266
|
+
if rel_path_wrapped.suffix.lower() not in RANGED_STREAM_EXTENSIONS:
|
|
264
267
|
files_to_download.append((obj_key, local_file))
|
|
265
268
|
continue
|
|
266
269
|
|
|
@@ -20,6 +20,59 @@ SUPPORTED_VIDEO_EXTENSIONS = ['.mov', '.mp4', '.avi']
|
|
|
20
20
|
SUPPORTED_CODECS = ['h264', 'hevc', 'av1']
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
def _is_content_packet(packet: Any) -> bool:
|
|
24
|
+
return getattr(packet, "size", 0) > 0
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _count_visible_video_packets_if_discard_preroll_detected(
|
|
28
|
+
container: Any,
|
|
29
|
+
video_stream: Any,
|
|
30
|
+
*,
|
|
31
|
+
probe_packets: int = 64,
|
|
32
|
+
) -> Optional[int]:
|
|
33
|
+
if not _may_include_discard_preroll(video_stream):
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
demux = getattr(container, "demux", None)
|
|
37
|
+
if not callable(demux):
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
visible_packets = 0
|
|
41
|
+
content_packets = 0
|
|
42
|
+
found_discard = False
|
|
43
|
+
packets = demux(video_stream)
|
|
44
|
+
|
|
45
|
+
for packet in packets:
|
|
46
|
+
if not _is_content_packet(packet):
|
|
47
|
+
continue
|
|
48
|
+
content_packets += 1
|
|
49
|
+
if packet.is_discard:
|
|
50
|
+
found_discard = True
|
|
51
|
+
break
|
|
52
|
+
visible_packets += 1
|
|
53
|
+
if content_packets >= probe_packets:
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
if not found_discard:
|
|
57
|
+
return visible_packets
|
|
58
|
+
|
|
59
|
+
for packet in packets:
|
|
60
|
+
if _is_content_packet(packet) and not packet.is_discard:
|
|
61
|
+
visible_packets += 1
|
|
62
|
+
|
|
63
|
+
return visible_packets
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _may_include_discard_preroll(video_stream: Any) -> bool:
|
|
67
|
+
if getattr(video_stream, "frames", 0) <= 0:
|
|
68
|
+
return False
|
|
69
|
+
codec_context = getattr(video_stream, "codec_context", None)
|
|
70
|
+
if bool(getattr(codec_context, "has_b_frames", False)):
|
|
71
|
+
return True
|
|
72
|
+
start_time = getattr(video_stream, "start_time", None)
|
|
73
|
+
return start_time not in (None, 0)
|
|
74
|
+
|
|
75
|
+
|
|
23
76
|
def _check_av_available():
|
|
24
77
|
if not HAS_AV:
|
|
25
78
|
raise ImportError(
|
|
@@ -120,9 +173,17 @@ class MultiVideoDataset(Dataset):
|
|
|
120
173
|
video_stream = container.streams.video[0]
|
|
121
174
|
|
|
122
175
|
frame_count = video_stream.frames
|
|
123
|
-
|
|
176
|
+
visible_packet_count = _count_visible_video_packets_if_discard_preroll_detected(
|
|
177
|
+
container,
|
|
178
|
+
video_stream,
|
|
179
|
+
)
|
|
180
|
+
if visible_packet_count is not None and visible_packet_count > 0:
|
|
181
|
+
frame_count = visible_packet_count
|
|
182
|
+
elif frame_count == 0:
|
|
183
|
+
seek = getattr(container, "seek", None)
|
|
184
|
+
if callable(seek):
|
|
185
|
+
seek(0)
|
|
124
186
|
frame_count = sum(1 for _ in container.decode(video=0))
|
|
125
|
-
container.seek(0)
|
|
126
187
|
|
|
127
188
|
fps = float(video_stream.average_rate) if video_stream.average_rate else 30.0
|
|
128
189
|
duration = float(video_stream.duration * video_stream.time_base) if video_stream.duration else 0.0
|
|
@@ -35,6 +35,7 @@ class MultiVideoStreamSliceInfo:
|
|
|
35
35
|
width: int
|
|
36
36
|
height: int
|
|
37
37
|
packet_count: int
|
|
38
|
+
visible_frame_count: int
|
|
38
39
|
keyframe_indices: tuple[int, ...]
|
|
39
40
|
packet_duration_ticks: Optional[int]
|
|
40
41
|
|
|
@@ -350,6 +351,175 @@ def _stream_from_template(output_container: Any, input_stream: Any) -> Any:
|
|
|
350
351
|
return output_container.add_stream(template=input_stream)
|
|
351
352
|
|
|
352
353
|
|
|
354
|
+
@dataclass(frozen=True)
|
|
355
|
+
class _VideoPacketRecord:
|
|
356
|
+
packet_index: int
|
|
357
|
+
pts: int
|
|
358
|
+
dts: int
|
|
359
|
+
is_keyframe: bool
|
|
360
|
+
is_discard: bool
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
@dataclass(frozen=True)
|
|
364
|
+
class _PacketCopyPlan:
|
|
365
|
+
packet_start: int
|
|
366
|
+
packet_end: int
|
|
367
|
+
timestamp_offset: int
|
|
368
|
+
packet_indices: frozenset[int]
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _is_content_packet(packet: Any) -> bool:
|
|
372
|
+
return getattr(packet, "size", 0) > 0
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _record_packet(packet: Any, packet_index: int) -> _VideoPacketRecord:
|
|
376
|
+
return _VideoPacketRecord(
|
|
377
|
+
packet_index=packet_index,
|
|
378
|
+
pts=int(packet.pts),
|
|
379
|
+
dts=int(packet.dts),
|
|
380
|
+
is_keyframe=bool(packet.is_keyframe),
|
|
381
|
+
is_discard=bool(packet.is_discard),
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def _presentation_order(records: list[_VideoPacketRecord]) -> list[_VideoPacketRecord]:
|
|
386
|
+
return sorted(
|
|
387
|
+
(record for record in records if not record.is_discard),
|
|
388
|
+
key=lambda record: (record.pts, record.dts, record.packet_index),
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
def _count_visible_video_packets(video_path: Path) -> int:
|
|
393
|
+
container = av.open(str(video_path))
|
|
394
|
+
try:
|
|
395
|
+
if not container.streams.video:
|
|
396
|
+
raise MultiVideoSliceError(f"No video stream in sliced output: {video_path}")
|
|
397
|
+
video_stream = container.streams.video[0]
|
|
398
|
+
return sum(
|
|
399
|
+
1
|
|
400
|
+
for packet in container.demux(video_stream)
|
|
401
|
+
if _is_content_packet(packet) and not packet.is_discard
|
|
402
|
+
)
|
|
403
|
+
finally:
|
|
404
|
+
container.close()
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def _read_video_packet_records(video_path: Path) -> list[_VideoPacketRecord]:
|
|
408
|
+
container = av.open(str(video_path))
|
|
409
|
+
try:
|
|
410
|
+
if not container.streams.video:
|
|
411
|
+
raise MultiVideoSliceError(f"No video stream: {video_path}")
|
|
412
|
+
|
|
413
|
+
records = _scan_packet_records(container, container.streams.video[0])
|
|
414
|
+
|
|
415
|
+
if not records:
|
|
416
|
+
raise MultiVideoSliceError(f"No video packets: {video_path}")
|
|
417
|
+
|
|
418
|
+
return records
|
|
419
|
+
finally:
|
|
420
|
+
container.close()
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def _scan_packet_records(container: Any, video_stream: Any) -> list[_VideoPacketRecord]:
|
|
424
|
+
records: list[_VideoPacketRecord] = []
|
|
425
|
+
packet_index = 0
|
|
426
|
+
missing_timestamps = False
|
|
427
|
+
|
|
428
|
+
for packet in container.demux(video_stream):
|
|
429
|
+
if not _is_content_packet(packet):
|
|
430
|
+
continue
|
|
431
|
+
|
|
432
|
+
if packet.pts is None or packet.dts is None:
|
|
433
|
+
missing_timestamps = True
|
|
434
|
+
else:
|
|
435
|
+
records.append(_record_packet(packet, packet_index))
|
|
436
|
+
packet_index += 1
|
|
437
|
+
|
|
438
|
+
if missing_timestamps:
|
|
439
|
+
raise MultiVideoSliceError(
|
|
440
|
+
"Video is not eligible for frame-based packet-copy slicing: "
|
|
441
|
+
"video packets must have presentation and decode timestamps"
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
return records
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
def _copy_packet_for_mux(packet: Any) -> Any:
|
|
448
|
+
packet_copy = av.Packet(bytes(packet))
|
|
449
|
+
packet_copy.pts = packet.pts
|
|
450
|
+
packet_copy.dts = packet.dts
|
|
451
|
+
packet_copy.duration = packet.duration
|
|
452
|
+
if packet.time_base is not None:
|
|
453
|
+
packet_copy.time_base = packet.time_base
|
|
454
|
+
packet_copy.is_keyframe = bool(packet.is_keyframe)
|
|
455
|
+
packet_copy.is_corrupt = bool(packet.is_corrupt)
|
|
456
|
+
packet_copy.opaque = packet.opaque
|
|
457
|
+
for side_data in packet.iter_sidedata():
|
|
458
|
+
packet_copy.set_sidedata(side_data)
|
|
459
|
+
return packet_copy
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def _packet_copy_plans(
|
|
463
|
+
video_path: Path,
|
|
464
|
+
ranges: tuple[MultiVideoSliceRange, ...],
|
|
465
|
+
stream_id: int,
|
|
466
|
+
) -> dict[MultiVideoSliceRange, _PacketCopyPlan]:
|
|
467
|
+
return _packet_copy_plans_from_records(
|
|
468
|
+
_read_video_packet_records(video_path),
|
|
469
|
+
ranges,
|
|
470
|
+
stream_id,
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def _packet_copy_plans_from_records(
|
|
475
|
+
records: list[_VideoPacketRecord],
|
|
476
|
+
ranges: tuple[MultiVideoSliceRange, ...],
|
|
477
|
+
stream_id: int,
|
|
478
|
+
) -> dict[MultiVideoSliceRange, _PacketCopyPlan]:
|
|
479
|
+
presentation = _presentation_order(records)
|
|
480
|
+
visible_frame_by_packet = {
|
|
481
|
+
record.packet_index: frame_index
|
|
482
|
+
for frame_index, record in enumerate(presentation)
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
plans: dict[MultiVideoSliceRange, _PacketCopyPlan] = {}
|
|
486
|
+
for range_info in ranges:
|
|
487
|
+
if range_info.source_end_frame >= len(presentation):
|
|
488
|
+
raise MultiVideoSliceError(
|
|
489
|
+
f"Stream {stream_id} range {range_info.output_name} ends at frame "
|
|
490
|
+
f"{range_info.source_end_frame}, but only {len(presentation)} visible "
|
|
491
|
+
"frames are available"
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
start_record = presentation[range_info.source_start_frame]
|
|
495
|
+
if not start_record.is_keyframe:
|
|
496
|
+
raise MultiVideoSliceError(
|
|
497
|
+
f"Stream {stream_id} range {range_info.output_name} starts at visible "
|
|
498
|
+
f"frame {range_info.source_start_frame}, which is not a keyframe"
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
packet_start = start_record.packet_index
|
|
502
|
+
packet_end = packet_start
|
|
503
|
+
for frame_index in range(
|
|
504
|
+
range_info.source_start_frame,
|
|
505
|
+
range_info.source_end_frame + 1,
|
|
506
|
+
):
|
|
507
|
+
packet_end = max(packet_end, presentation[frame_index].packet_index)
|
|
508
|
+
packet_indices = frozenset(
|
|
509
|
+
packet_index
|
|
510
|
+
for packet_index in range(packet_start, packet_end + 1)
|
|
511
|
+
if visible_frame_by_packet.get(packet_index, -1) >= range_info.source_start_frame
|
|
512
|
+
)
|
|
513
|
+
plans[range_info] = _PacketCopyPlan(
|
|
514
|
+
packet_start,
|
|
515
|
+
packet_end,
|
|
516
|
+
start_record.pts,
|
|
517
|
+
packet_indices,
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
return plans
|
|
521
|
+
|
|
522
|
+
|
|
353
523
|
def _scan_video_for_slicing(stream_id: int, video_path: Path) -> MultiVideoStreamSliceInfo:
|
|
354
524
|
_check_av_available()
|
|
355
525
|
|
|
@@ -370,18 +540,19 @@ def _scan_video_for_slicing(stream_id: int, video_path: Path) -> MultiVideoStrea
|
|
|
370
540
|
)
|
|
371
541
|
|
|
372
542
|
codec = _normalize_codec_name(video_stream.codec_context.name)
|
|
373
|
-
keyframe_indices: list[int] = []
|
|
374
543
|
packet_count = 0
|
|
375
544
|
first_duration: Optional[int] = None
|
|
545
|
+
records: list[_VideoPacketRecord] = []
|
|
376
546
|
missing_duration = False
|
|
547
|
+
missing_timestamps = False
|
|
377
548
|
variable_duration = False
|
|
378
549
|
|
|
379
550
|
for packet in container.demux(video_stream):
|
|
380
|
-
if
|
|
551
|
+
if not _is_content_packet(packet):
|
|
381
552
|
continue
|
|
382
553
|
|
|
383
|
-
if packet.
|
|
384
|
-
|
|
554
|
+
if packet.pts is None or packet.dts is None:
|
|
555
|
+
missing_timestamps = True
|
|
385
556
|
|
|
386
557
|
duration = packet.duration
|
|
387
558
|
if duration is None or duration <= 0:
|
|
@@ -391,16 +562,34 @@ def _scan_video_for_slicing(stream_id: int, video_path: Path) -> MultiVideoStrea
|
|
|
391
562
|
elif int(duration) != first_duration:
|
|
392
563
|
variable_duration = True
|
|
393
564
|
|
|
565
|
+
if packet.pts is not None and packet.dts is not None:
|
|
566
|
+
records.append(_record_packet(packet, packet_count))
|
|
394
567
|
packet_count += 1
|
|
395
568
|
|
|
396
569
|
if packet_count == 0:
|
|
397
570
|
raise MultiVideoSliceError(f"Stream {stream_id} has no video packets: {video_path}")
|
|
571
|
+
if missing_timestamps:
|
|
572
|
+
raise MultiVideoSliceError(
|
|
573
|
+
f"Stream {stream_id} is not eligible for frame-based packet-copy slicing: "
|
|
574
|
+
"video packets must have presentation and decode timestamps"
|
|
575
|
+
)
|
|
398
576
|
if missing_duration or variable_duration:
|
|
399
577
|
raise MultiVideoSliceError(
|
|
400
578
|
f"Stream {stream_id} is not eligible for frame-based slicing: "
|
|
401
579
|
"video packets must have a constant non-zero duration"
|
|
402
580
|
)
|
|
403
581
|
|
|
582
|
+
presentation = _presentation_order(records)
|
|
583
|
+
if not presentation:
|
|
584
|
+
raise MultiVideoSliceError(
|
|
585
|
+
f"Stream {stream_id} has no visible video frames after discards: {video_path}"
|
|
586
|
+
)
|
|
587
|
+
keyframe_indices = tuple(
|
|
588
|
+
frame_index
|
|
589
|
+
for frame_index, record in enumerate(presentation)
|
|
590
|
+
if record.is_keyframe
|
|
591
|
+
)
|
|
592
|
+
|
|
404
593
|
return MultiVideoStreamSliceInfo(
|
|
405
594
|
stream_id=stream_id,
|
|
406
595
|
path=str(video_path),
|
|
@@ -409,7 +598,8 @@ def _scan_video_for_slicing(stream_id: int, video_path: Path) -> MultiVideoStrea
|
|
|
409
598
|
width=int(video_stream.width),
|
|
410
599
|
height=int(video_stream.height),
|
|
411
600
|
packet_count=packet_count,
|
|
412
|
-
|
|
601
|
+
visible_frame_count=len(presentation),
|
|
602
|
+
keyframe_indices=keyframe_indices,
|
|
413
603
|
packet_duration_ticks=first_duration,
|
|
414
604
|
)
|
|
415
605
|
finally:
|
|
@@ -447,73 +637,100 @@ def _slice_video_to_ranged_mp4s(
|
|
|
447
637
|
if not ranges:
|
|
448
638
|
return
|
|
449
639
|
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
expected = current_range.frame_count
|
|
462
|
-
if packets_written != expected:
|
|
463
|
-
raise MultiVideoSliceError(
|
|
464
|
-
f"Expected {expected} packets for stream {stream_id} range "
|
|
465
|
-
f"{current_range.output_name}, wrote {packets_written}"
|
|
466
|
-
)
|
|
467
|
-
output_container = None
|
|
468
|
-
packets_written = 0
|
|
640
|
+
packet_plans = _packet_copy_plans(video_path, ranges, stream_id)
|
|
641
|
+
packet_targets: dict[int, list[MultiVideoSliceRange]] = {}
|
|
642
|
+
for range_info in ranges:
|
|
643
|
+
for packet_index in packet_plans[range_info].packet_indices:
|
|
644
|
+
packet_targets.setdefault(packet_index, []).append(range_info)
|
|
645
|
+
|
|
646
|
+
last_packet_end = max(packet_plans[range_info].packet_end for range_info in ranges)
|
|
647
|
+
packets_written = {range_info: 0 for range_info in ranges}
|
|
648
|
+
output_files: dict[MultiVideoSliceRange, Path] = {}
|
|
649
|
+
output_containers: dict[MultiVideoSliceRange, Any] = {}
|
|
650
|
+
output_streams: dict[MultiVideoSliceRange, Any] = {}
|
|
469
651
|
|
|
652
|
+
input_container = av.open(str(video_path))
|
|
470
653
|
try:
|
|
471
654
|
if not input_container.streams.video:
|
|
472
655
|
raise MultiVideoSliceError(f"Stream {stream_id} has no video stream: {video_path}")
|
|
473
656
|
|
|
474
657
|
input_stream = input_container.streams.video[0]
|
|
475
|
-
output_stream = None
|
|
476
|
-
packet_index = 0
|
|
477
658
|
|
|
659
|
+
def open_output(range_info: MultiVideoSliceRange) -> tuple[Any, Any]:
|
|
660
|
+
if range_info not in output_containers:
|
|
661
|
+
range_dir = output_path / range_info.output_name / "rgb"
|
|
662
|
+
range_dir.mkdir(parents=True, exist_ok=True)
|
|
663
|
+
output_file = range_dir / _format_stream_filename(stream_id)
|
|
664
|
+
output_container = av.open(str(output_file), mode="w", format="mp4")
|
|
665
|
+
output_containers[range_info] = output_container
|
|
666
|
+
output_streams[range_info] = _stream_from_template(output_container, input_stream)
|
|
667
|
+
output_files[range_info] = output_file
|
|
668
|
+
return output_containers[range_info], output_streams[range_info]
|
|
669
|
+
|
|
670
|
+
def close_finished_outputs(packet_index: int) -> None:
|
|
671
|
+
for range_info in list(output_containers):
|
|
672
|
+
if packet_index > packet_plans[range_info].packet_end:
|
|
673
|
+
output_containers.pop(range_info).close()
|
|
674
|
+
output_streams.pop(range_info, None)
|
|
675
|
+
|
|
676
|
+
def mux_packet(range_info: MultiVideoSliceRange, packet: Any) -> None:
|
|
677
|
+
output_container, output_stream = open_output(range_info)
|
|
678
|
+
packet_plan = packet_plans[range_info]
|
|
679
|
+
original_pts = packet.pts
|
|
680
|
+
original_dts = packet.dts
|
|
681
|
+
original_stream = packet.stream
|
|
682
|
+
try:
|
|
683
|
+
packet.pts = packet.pts - packet_plan.timestamp_offset
|
|
684
|
+
packet.dts = packet.dts - packet_plan.timestamp_offset
|
|
685
|
+
packet.stream = output_stream
|
|
686
|
+
output_container.mux(packet)
|
|
687
|
+
finally:
|
|
688
|
+
packet.pts = original_pts
|
|
689
|
+
packet.dts = original_dts
|
|
690
|
+
if original_stream is not None:
|
|
691
|
+
packet.stream = original_stream
|
|
692
|
+
|
|
693
|
+
packet_index = 0
|
|
478
694
|
for packet in input_container.demux(input_stream):
|
|
479
|
-
if
|
|
695
|
+
if not _is_content_packet(packet):
|
|
480
696
|
continue
|
|
481
697
|
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
if current_range_index >= len(ranges):
|
|
486
|
-
return
|
|
487
|
-
current_range = ranges[current_range_index]
|
|
488
|
-
output_stream = None
|
|
698
|
+
if packet_index > last_packet_end:
|
|
699
|
+
break
|
|
700
|
+
close_finished_outputs(packet_index)
|
|
489
701
|
|
|
490
|
-
|
|
702
|
+
targets = packet_targets.get(packet_index)
|
|
703
|
+
if not targets:
|
|
491
704
|
packet_index += 1
|
|
492
705
|
continue
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
packet.stream = output_stream
|
|
502
|
-
output_container.mux(packet)
|
|
503
|
-
packets_written += 1
|
|
706
|
+
target_packets = (
|
|
707
|
+
[_copy_packet_for_mux(packet) for _ in targets]
|
|
708
|
+
if len(targets) > 1
|
|
709
|
+
else [packet]
|
|
710
|
+
)
|
|
711
|
+
for range_info, target_packet in zip(targets, target_packets):
|
|
712
|
+
mux_packet(range_info, target_packet)
|
|
713
|
+
packets_written[range_info] += 1
|
|
504
714
|
packet_index += 1
|
|
505
715
|
finally:
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
input_container.close()
|
|
716
|
+
for output_container in output_containers.values():
|
|
717
|
+
output_container.close()
|
|
718
|
+
input_container.close()
|
|
510
719
|
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
720
|
+
for range_info in ranges:
|
|
721
|
+
expected = range_info.frame_count
|
|
722
|
+
if packets_written[range_info] < expected:
|
|
723
|
+
raise MultiVideoSliceError(
|
|
724
|
+
f"Expected at least {expected} packets for stream {stream_id} range "
|
|
725
|
+
f"{range_info.output_name}, wrote {packets_written[range_info]}"
|
|
726
|
+
)
|
|
727
|
+
output_file = output_files[range_info]
|
|
728
|
+
visible_packets = _count_visible_video_packets(output_file)
|
|
729
|
+
if visible_packets < expected:
|
|
730
|
+
raise MultiVideoSliceError(
|
|
731
|
+
f"Expected at least {expected} visible packets for stream {stream_id} range "
|
|
732
|
+
f"{range_info.output_name}, found {visible_packets}."
|
|
733
|
+
)
|
|
517
734
|
|
|
518
735
|
|
|
519
736
|
class MultiVideoToRangedSlicer(DatasetWriter):
|
|
@@ -750,12 +967,12 @@ class MultiVideoToRangedSlicer(DatasetWriter):
|
|
|
750
967
|
if video.fps != reference_fps:
|
|
751
968
|
_record_stream_error(video.stream_id, message)
|
|
752
969
|
|
|
753
|
-
min_frames = min(video.
|
|
754
|
-
max_frames = max(video.
|
|
970
|
+
min_frames = min(video.visible_frame_count for video in videos)
|
|
971
|
+
max_frames = max(video.visible_frame_count for video in videos)
|
|
755
972
|
tail_spread = max_frames - min_frames
|
|
756
973
|
if tail_spread > self.tail_tolerance_frames:
|
|
757
974
|
counts = ", ".join(
|
|
758
|
-
f"{video.stream_id}={video.
|
|
975
|
+
f"{video.stream_id}={video.visible_frame_count}"
|
|
759
976
|
for video in sorted(videos, key=lambda v: v.stream_id)
|
|
760
977
|
)
|
|
761
978
|
message = (
|
|
@@ -764,7 +981,7 @@ class MultiVideoToRangedSlicer(DatasetWriter):
|
|
|
764
981
|
)
|
|
765
982
|
errors.append(message)
|
|
766
983
|
for video in videos:
|
|
767
|
-
if video.
|
|
984
|
+
if video.visible_frame_count != min_frames:
|
|
768
985
|
_record_stream_error(video.stream_id, message)
|
|
769
986
|
elif tail_spread:
|
|
770
987
|
warnings.append(
|
|
@@ -853,9 +1070,9 @@ class MultiVideoToRangedSlicer(DatasetWriter):
|
|
|
853
1070
|
)
|
|
854
1071
|
|
|
855
1072
|
discarded_tail_frames = {
|
|
856
|
-
video.stream_id: video.
|
|
1073
|
+
video.stream_id: video.visible_frame_count - min_frames
|
|
857
1074
|
for video in videos
|
|
858
|
-
if video.
|
|
1075
|
+
if video.visible_frame_count > min_frames
|
|
859
1076
|
}
|
|
860
1077
|
plan = MultiVideoSlicePlan(
|
|
861
1078
|
version=1,
|
|
@@ -909,6 +1126,8 @@ class MultiVideoToRangedSlicer(DatasetWriter):
|
|
|
909
1126
|
if self.copy_meta:
|
|
910
1127
|
self._copy_meta_folder()
|
|
911
1128
|
|
|
1129
|
+
ranged_dataset = RangedDataset(self.output_path, max_workers=self.max_workers)
|
|
1130
|
+
|
|
912
1131
|
if self.stash_policy == "copy":
|
|
913
1132
|
self._copy_stash_folder(
|
|
914
1133
|
stream_ids=list(plan.stream_ids),
|
|
@@ -917,7 +1136,8 @@ class MultiVideoToRangedSlicer(DatasetWriter):
|
|
|
917
1136
|
maintain_frame_numbers=True,
|
|
918
1137
|
)
|
|
919
1138
|
elif self.stash_policy == "generate":
|
|
920
|
-
self.
|
|
1139
|
+
self._generate_stash_from_dataset(
|
|
1140
|
+
source=ranged_dataset,
|
|
921
1141
|
stream_ids=list(plan.stream_ids),
|
|
922
1142
|
start_frame=start_frame,
|
|
923
1143
|
end_frame=end_frame,
|
|
@@ -927,4 +1147,4 @@ class MultiVideoToRangedSlicer(DatasetWriter):
|
|
|
927
1147
|
if self.verbose:
|
|
928
1148
|
print(f"Successfully created sliced Ranged dataset at {self.output_path}")
|
|
929
1149
|
|
|
930
|
-
return
|
|
1150
|
+
return ranged_dataset
|