skeletrack 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. skeletrack-0.1.0/.gitignore +42 -0
  2. skeletrack-0.1.0/LICENSE +21 -0
  3. skeletrack-0.1.0/PKG-INFO +151 -0
  4. skeletrack-0.1.0/README.md +111 -0
  5. skeletrack-0.1.0/pyproject.toml +42 -0
  6. skeletrack-0.1.0/src/skeletrack/__init__.py +87 -0
  7. skeletrack-0.1.0/src/skeletrack/_utils/__init__.py +0 -0
  8. skeletrack-0.1.0/src/skeletrack/_version.py +1 -0
  9. skeletrack-0.1.0/src/skeletrack/core/__init__.py +5 -0
  10. skeletrack-0.1.0/src/skeletrack/core/config.py +38 -0
  11. skeletrack-0.1.0/src/skeletrack/core/pipeline.py +178 -0
  12. skeletrack-0.1.0/src/skeletrack/core/video.py +115 -0
  13. skeletrack-0.1.0/src/skeletrack/data/__init__.py +6 -0
  14. skeletrack-0.1.0/src/skeletrack/data/bbox.py +42 -0
  15. skeletrack-0.1.0/src/skeletrack/data/collection.py +135 -0
  16. skeletrack-0.1.0/src/skeletrack/data/skeleton.py +79 -0
  17. skeletrack-0.1.0/src/skeletrack/data/track.py +108 -0
  18. skeletrack-0.1.0/src/skeletrack/detection/__init__.py +0 -0
  19. skeletrack-0.1.0/src/skeletrack/detection/base.py +36 -0
  20. skeletrack-0.1.0/src/skeletrack/detection/registry.py +45 -0
  21. skeletrack-0.1.0/src/skeletrack/detection/yolo.py +72 -0
  22. skeletrack-0.1.0/src/skeletrack/filters/__init__.py +0 -0
  23. skeletrack-0.1.0/src/skeletrack/filters/scene.py +22 -0
  24. skeletrack-0.1.0/src/skeletrack/io/__init__.py +0 -0
  25. skeletrack-0.1.0/src/skeletrack/io/coco.py +71 -0
  26. skeletrack-0.1.0/src/skeletrack/io/dataframe.py +48 -0
  27. skeletrack-0.1.0/src/skeletrack/io/npy.py +131 -0
  28. skeletrack-0.1.0/src/skeletrack/pose/__init__.py +0 -0
  29. skeletrack-0.1.0/src/skeletrack/pose/base.py +35 -0
  30. skeletrack-0.1.0/src/skeletrack/pose/mediapipe_backend.py +88 -0
  31. skeletrack-0.1.0/src/skeletrack/pose/registry.py +36 -0
  32. skeletrack-0.1.0/src/skeletrack/repair/__init__.py +0 -0
  33. skeletrack-0.1.0/src/skeletrack/tracking/__init__.py +0 -0
  34. skeletrack-0.1.0/src/skeletrack/tracking/base.py +24 -0
  35. skeletrack-0.1.0/src/skeletrack/viz/__init__.py +0 -0
  36. skeletrack-0.1.0/tests/__init__.py +0 -0
  37. skeletrack-0.1.0/tests/test_bbox.py +81 -0
  38. skeletrack-0.1.0/tests/test_collection.py +123 -0
  39. skeletrack-0.1.0/tests/test_io_npy.py +130 -0
  40. skeletrack-0.1.0/tests/test_registry.py +74 -0
  41. skeletrack-0.1.0/tests/test_scene.py +35 -0
  42. skeletrack-0.1.0/tests/test_skeleton.py +84 -0
  43. skeletrack-0.1.0/tests/test_track.py +96 -0
@@ -0,0 +1,42 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ *.egg
8
+
9
+ # Virtual environments
10
+ .venv/
11
+ venv/
12
+
13
+ # IDE
14
+ .idea/
15
+ .vscode/
16
+ *.swp
17
+ *.swo
18
+
19
+ # Claude Code
20
+ .claude/
21
+
22
+ # OS
23
+ .DS_Store
24
+ Thumbs.db
25
+
26
+ # Testing / Coverage
27
+ .pytest_cache/
28
+ htmlcov/
29
+ .coverage
30
+
31
+ # YOLO model weights
32
+ *.pt
33
+
34
+ # Directories not yet ready
35
+ docs/
36
+ examples/
37
+
38
+ # Output data
39
+ *.npy
40
+
41
+ # Others
42
+ CLAUDE.md
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Steven Tse
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,151 @@
1
+ Metadata-Version: 2.4
2
+ Name: skeletrack
3
+ Version: 0.1.0
4
+ Summary: Extract multi-person skeleton trajectories from videos with one line of code.
5
+ Project-URL: Homepage, https://github.com/StevenUST/skeletrack
6
+ Author: Steven Tse
7
+ License-Expression: MIT
8
+ License-File: LICENSE
9
+ Keywords: keypoints,pose,skeleton,tracking,video
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Requires-Python: >=3.9
16
+ Requires-Dist: numpy>=1.20
17
+ Requires-Dist: opencv-python>=4.5
18
+ Requires-Dist: tqdm>=4.0
19
+ Provides-Extra: age
20
+ Requires-Dist: torch>=1.9; extra == 'age'
21
+ Requires-Dist: torchvision>=0.10; extra == 'age'
22
+ Provides-Extra: dev
23
+ Requires-Dist: mypy; extra == 'dev'
24
+ Requires-Dist: pytest; extra == 'dev'
25
+ Requires-Dist: pytest-cov; extra == 'dev'
26
+ Requires-Dist: ruff; extra == 'dev'
27
+ Provides-Extra: full
28
+ Requires-Dist: mediapipe>=0.10; extra == 'full'
29
+ Requires-Dist: pandas>=1.3; extra == 'full'
30
+ Requires-Dist: torch>=1.9; extra == 'full'
31
+ Requires-Dist: torchvision>=0.10; extra == 'full'
32
+ Requires-Dist: ultralytics>=8.0; extra == 'full'
33
+ Provides-Extra: mediapipe
34
+ Requires-Dist: mediapipe>=0.10; extra == 'mediapipe'
35
+ Provides-Extra: pandas
36
+ Requires-Dist: pandas>=1.3; extra == 'pandas'
37
+ Provides-Extra: yolo
38
+ Requires-Dist: ultralytics>=8.0; extra == 'yolo'
39
+ Description-Content-Type: text/markdown
40
+
41
+ # Skeletrack
42
+
43
+ Extract multi-person skeleton trajectories from videos with one line of code.
44
+
45
+ ```python
46
+ import skeletrack
47
+
48
+ tracks = skeletrack.extract("video.mp4")
49
+ tracks.filter(min_duration=2.0).save("output.npy")
50
+ ```
51
+
52
+ ## How It Works
53
+
54
+ ```
55
+ Video → Person Detection (YOLO) → Multi-Person Tracking (ByteTrack) → Pose Estimation (MediaPipe) → Skeleton Trajectories
56
+ ```
57
+
58
+ Pose estimation runs **after** tracking and filtering, so compute is only spent on valid tracks.
59
+
60
+ ## Installation
61
+
62
+ ```bash
63
+ pip install skeletrack
64
+ pip install skeletrack[yolo,mediapipe] # with detection + pose backends
65
+ ```
66
+
67
+ ### Optional dependencies
68
+
69
+ | Extra | Packages | Purpose |
70
+ |-------|----------|---------|
71
+ | `yolo` | ultralytics | Person detection + tracking |
72
+ | `mediapipe` | mediapipe | Pose estimation |
73
+ | `pandas` | pandas | DataFrame export |
74
+ | `full` | all of the above | Everything |
75
+
76
+ ## Quick Start
77
+
78
+ ```python
79
+ import skeletrack
80
+
81
+ # Extract with default settings
82
+ tracks = skeletrack.extract("video.mp4")
83
+
84
+ # Extract with options
85
+ tracks = skeletrack.extract(
86
+ "video.mp4",
87
+ device="cuda",
88
+ detector="yolo:yolov8s.pt",
89
+ frame_skip=3,
90
+ min_duration=1.0,
91
+ )
92
+
93
+ # Filter + save
94
+ tracks.filter(min_duration=2.0, min_frames=10).save("output.npy")
95
+
96
+ # Access skeleton data
97
+ for track in tracks:
98
+ pose = track.skeleton_array("pose") # shape: (T, 33, 4)
99
+ print(f"Track {track.track_id}: {track.duration:.1f}s, {track.num_frames} frames")
100
+
101
+ # Load saved tracks
102
+ tracks = skeletrack.load("output.npy")
103
+
104
+ # Export to DataFrame
105
+ df = tracks.to_dataframe()
106
+ ```
107
+
108
+ ## Reusable Pipeline
109
+
110
+ For processing multiple videos, create a `Pipeline` to avoid reloading models:
111
+
112
+ ```python
113
+ from skeletrack import Pipeline
114
+
115
+ pipeline = Pipeline(device="cuda")
116
+ for video in video_list:
117
+ tracks = pipeline.run(video)
118
+ tracks.save(f"{video}.npy")
119
+ pipeline.close()
120
+ ```
121
+
122
+ ## Output Format
123
+
124
+ Each track contains:
125
+
126
+ | Field | Type | Description |
127
+ |-------|------|-------------|
128
+ | `track_id` | int | Unique person ID |
129
+ | `frames` | list[int] | Frame indices |
130
+ | `timestamps` | list[float] | Timestamps (seconds) |
131
+ | `bboxes` | list[BBox] | Bounding boxes (x, y, w, h) |
132
+ | `skeletons` | list[Skeleton] | Keypoints per frame |
133
+
134
+ Skeleton keypoint groups (via MediaPipe Holistic):
135
+
136
+ | Group | Keypoints | Dimensions |
137
+ |-------|-----------|------------|
138
+ | `pose` | 33 | x, y, z, visibility |
139
+ | `left_hand` | 21 | x, y, z, visibility |
140
+ | `right_hand` | 21 | x, y, z, visibility |
141
+ | `face` | 468 | x, y, z |
142
+
143
+ ## Requirements
144
+
145
+ - Python >= 3.9
146
+ - numpy >= 1.20
147
+ - opencv-python >= 4.5
148
+
149
+ ## License
150
+
151
+ MIT
@@ -0,0 +1,111 @@
1
+ # Skeletrack
2
+
3
+ Extract multi-person skeleton trajectories from videos with one line of code.
4
+
5
+ ```python
6
+ import skeletrack
7
+
8
+ tracks = skeletrack.extract("video.mp4")
9
+ tracks.filter(min_duration=2.0).save("output.npy")
10
+ ```
11
+
12
+ ## How It Works
13
+
14
+ ```
15
+ Video → Person Detection (YOLO) → Multi-Person Tracking (ByteTrack) → Pose Estimation (MediaPipe) → Skeleton Trajectories
16
+ ```
17
+
18
+ Pose estimation runs **after** tracking and filtering, so compute is only spent on valid tracks.
19
+
20
+ ## Installation
21
+
22
+ ```bash
23
+ pip install skeletrack
24
+ pip install skeletrack[yolo,mediapipe] # with detection + pose backends
25
+ ```
26
+
27
+ ### Optional dependencies
28
+
29
+ | Extra | Packages | Purpose |
30
+ |-------|----------|---------|
31
+ | `yolo` | ultralytics | Person detection + tracking |
32
+ | `mediapipe` | mediapipe | Pose estimation |
33
+ | `pandas` | pandas | DataFrame export |
34
+ | `full` | all of the above | Everything |
35
+
36
+ ## Quick Start
37
+
38
+ ```python
39
+ import skeletrack
40
+
41
+ # Extract with default settings
42
+ tracks = skeletrack.extract("video.mp4")
43
+
44
+ # Extract with options
45
+ tracks = skeletrack.extract(
46
+ "video.mp4",
47
+ device="cuda",
48
+ detector="yolo:yolov8s.pt",
49
+ frame_skip=3,
50
+ min_duration=1.0,
51
+ )
52
+
53
+ # Filter + save
54
+ tracks.filter(min_duration=2.0, min_frames=10).save("output.npy")
55
+
56
+ # Access skeleton data
57
+ for track in tracks:
58
+ pose = track.skeleton_array("pose") # shape: (T, 33, 4)
59
+ print(f"Track {track.track_id}: {track.duration:.1f}s, {track.num_frames} frames")
60
+
61
+ # Load saved tracks
62
+ tracks = skeletrack.load("output.npy")
63
+
64
+ # Export to DataFrame
65
+ df = tracks.to_dataframe()
66
+ ```
67
+
68
+ ## Reusable Pipeline
69
+
70
+ For processing multiple videos, create a `Pipeline` to avoid reloading models:
71
+
72
+ ```python
73
+ from skeletrack import Pipeline
74
+
75
+ pipeline = Pipeline(device="cuda")
76
+ for video in video_list:
77
+ tracks = pipeline.run(video)
78
+ tracks.save(f"{video}.npy")
79
+ pipeline.close()
80
+ ```
81
+
82
+ ## Output Format
83
+
84
+ Each track contains:
85
+
86
+ | Field | Type | Description |
87
+ |-------|------|-------------|
88
+ | `track_id` | int | Unique person ID |
89
+ | `frames` | list[int] | Frame indices |
90
+ | `timestamps` | list[float] | Timestamps (seconds) |
91
+ | `bboxes` | list[BBox] | Bounding boxes (x, y, w, h) |
92
+ | `skeletons` | list[Skeleton] | Keypoints per frame |
93
+
94
+ Skeleton keypoint groups (via MediaPipe Holistic):
95
+
96
+ | Group | Keypoints | Dimensions |
97
+ |-------|-----------|------------|
98
+ | `pose` | 33 | x, y, z, visibility |
99
+ | `left_hand` | 21 | x, y, z, visibility |
100
+ | `right_hand` | 21 | x, y, z, visibility |
101
+ | `face` | 468 | x, y, z |
102
+
103
+ ## Requirements
104
+
105
+ - Python >= 3.9
106
+ - numpy >= 1.20
107
+ - opencv-python >= 4.5
108
+
109
+ ## License
110
+
111
+ MIT
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "skeletrack"
7
+ version = "0.1.0"
8
+ description = "Extract multi-person skeleton trajectories from videos with one line of code."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.9"
12
+ authors = [{ name = "Steven Tse" }]
13
+ keywords = ["skeleton", "pose", "tracking", "video", "keypoints"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Science/Research",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Programming Language :: Python :: 3",
19
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
20
+ ]
21
+ dependencies = [
22
+ "numpy>=1.20",
23
+ "opencv-python>=4.5",
24
+ "tqdm>=4.0",
25
+ ]
26
+
27
+ [project.optional-dependencies]
28
+ yolo = ["ultralytics>=8.0"]
29
+ mediapipe = ["mediapipe>=0.10"]
30
+ age = ["torch>=1.9", "torchvision>=0.10"]
31
+ pandas = ["pandas>=1.3"]
32
+ full = ["skeletrack[yolo,mediapipe,age,pandas]"]
33
+ dev = ["pytest", "pytest-cov", "ruff", "mypy"]
34
+
35
+ [project.urls]
36
+ Homepage = "https://github.com/StevenUST/skeletrack"
37
+
38
+ [tool.hatch.build.targets.wheel]
39
+ packages = ["src/skeletrack"]
40
+
41
+ [tool.ruff]
42
+ line-length = 100
@@ -0,0 +1,87 @@
1
+ """Skeletrack: Extract multi-person skeleton trajectories from videos.
2
+
3
+ Usage::
4
+
5
+ import skeletrack
6
+
7
+ # One-liner
8
+ tracks = skeletrack.extract("video.mp4")
9
+
10
+ # With options
11
+ tracks = skeletrack.extract("video.mp4", device="cuda", detector="yolo:yolov8s.pt")
12
+
13
+ # Filter + save
14
+ tracks.filter(min_duration=2.0).save("output.npy")
15
+
16
+ # Load previously saved tracks
17
+ tracks = skeletrack.load("output.npy")
18
+ """
19
+
20
+ from ._version import __version__
21
+ from .core.config import PipelineConfig
22
+ from .core.pipeline import Pipeline
23
+ from .data.collection import TrackCollection
24
+ from .data.skeleton import Skeleton
25
+ from .data.track import Track
26
+
27
+ __all__ = [
28
+ "__version__",
29
+ "extract",
30
+ "load",
31
+ "Pipeline",
32
+ "PipelineConfig",
33
+ "Track",
34
+ "TrackCollection",
35
+ "Skeleton",
36
+ ]
37
+
38
+
39
+ def extract(
40
+ source: str,
41
+ *,
42
+ device: str = "cpu",
43
+ detector: str = "yolo",
44
+ pose: str = "mediapipe",
45
+ frame_skip: int = 3,
46
+ min_duration: float = 1.0,
47
+ show_progress: bool = True,
48
+ **kwargs,
49
+ ) -> TrackCollection:
50
+ """Extract skeleton trajectories from a video file.
51
+
52
+ This is the main entry point for quick usage. For processing multiple
53
+ videos, create a :class:`Pipeline` instance for better performance.
54
+
55
+ Args:
56
+ source: Path to a video file.
57
+ device: "cpu" or "cuda".
58
+ detector: Detector backend name (default: "yolo").
59
+ Use "yolo:yolov8s.pt" to specify a model variant.
60
+ pose: Pose backend name (default: "mediapipe").
61
+ frame_skip: Process every N-th frame for detection (default: 3).
62
+ min_duration: Discard tracks shorter than this (seconds, default: 1.0).
63
+ show_progress: Show a tqdm progress bar (default: True).
64
+ **kwargs: Additional arguments passed to PipelineConfig.
65
+
66
+ Returns:
67
+ TrackCollection with all detected person skeleton trajectories.
68
+ """
69
+ config = PipelineConfig(
70
+ device=device,
71
+ frame_skip=frame_skip,
72
+ min_track_duration=min_duration,
73
+ **kwargs,
74
+ )
75
+ pipeline = Pipeline(config=config, detector=detector, pose=pose)
76
+ try:
77
+ return pipeline.run(source, show_progress=show_progress)
78
+ finally:
79
+ pipeline.close()
80
+
81
+
82
+ def load(path: str) -> TrackCollection:
83
+ """Load previously saved tracks from file.
84
+
85
+ Supports .npy files (both skeletrack and legacy VideoScreener format).
86
+ """
87
+ return TrackCollection.load(path)
File without changes
@@ -0,0 +1 @@
1
+ __version__ = "0.1.0"
@@ -0,0 +1,5 @@
1
+ from .config import PipelineConfig
2
+ from .pipeline import Pipeline
3
+ from .video import VideoReader
4
+
5
+ __all__ = ["Pipeline", "PipelineConfig", "VideoReader"]
@@ -0,0 +1,38 @@
1
+ """Pipeline configuration."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+
9
+ @dataclass
10
+ class PipelineConfig:
11
+ """Configuration for the extraction pipeline.
12
+
13
+ All parameters have sensible defaults so you can start with just::
14
+
15
+ config = PipelineConfig()
16
+
17
+ Attributes:
18
+ device: "cpu" or "cuda".
19
+ frame_skip: Process every N-th frame for detection/tracking.
20
+ scene_change_threshold: Histogram correlation below this triggers a scene reset.
21
+ max_missing_frames: Track dies after this many consecutive frames without detection.
22
+ min_confidence: Minimum detection confidence.
23
+ min_track_duration: Discard tracks shorter than this (seconds).
24
+ motion_threshold: Minimum bbox center std-dev to keep a track (pixels).
25
+ Set to None to disable motion filtering.
26
+ pose_min_detection_conf: MediaPipe min_detection_confidence.
27
+ pose_min_tracking_conf: MediaPipe min_tracking_confidence.
28
+ """
29
+
30
+ device: str = "cpu"
31
+ frame_skip: int = 3
32
+ scene_change_threshold: float = 0.4
33
+ max_missing_frames: int = 60
34
+ min_confidence: float = 0.4
35
+ min_track_duration: float = 1.0
36
+ motion_threshold: float | None = 5.0
37
+ pose_min_detection_conf: float = 0.5
38
+ pose_min_tracking_conf: float = 0.5
@@ -0,0 +1,178 @@
1
+ """Main extraction pipeline."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ import numpy as np
8
+ from tqdm import tqdm
9
+
10
+ from ..data.bbox import BBox
11
+ from ..data.collection import TrackCollection
12
+ from ..data.track import Track
13
+ from ..detection.base import DetectorBackend
14
+ from ..detection.registry import get_detector
15
+ from ..filters.scene import scene_changed
16
+ from ..pose.base import PoseBackend
17
+ from ..pose.registry import get_pose_backend
18
+ from .config import PipelineConfig
19
+ from .video import VideoReader
20
+
21
+
22
+ class Pipeline:
23
+ """Reusable extraction pipeline.
24
+
25
+ Usage::
26
+
27
+ pipeline = Pipeline(device="cuda")
28
+ tracks = pipeline.run("video.mp4")
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ config: PipelineConfig | None = None,
34
+ *,
35
+ detector: str | DetectorBackend = "yolo",
36
+ pose: str | PoseBackend = "mediapipe",
37
+ device: str | None = None,
38
+ **kwargs: Any,
39
+ ):
40
+ self.config = config or PipelineConfig()
41
+ if device is not None:
42
+ self.config.device = device
43
+
44
+ # Resolve backends (lazy — only instantiated when needed)
45
+ self._detector_spec = detector
46
+ self._pose_spec = pose
47
+ self._extra_kwargs = kwargs
48
+ self._detector: DetectorBackend | None = None
49
+ self._pose: PoseBackend | None = None
50
+
51
+ def _get_detector(self) -> DetectorBackend:
52
+ if self._detector is None:
53
+ self._detector = get_detector(
54
+ self._detector_spec,
55
+ device=self.config.device,
56
+ min_confidence=self.config.min_confidence,
57
+ **{k: v for k, v in self._extra_kwargs.items()
58
+ if k in ("model_name",)},
59
+ )
60
+ return self._detector
61
+
62
+ def _get_pose(self) -> PoseBackend:
63
+ if self._pose is None:
64
+ self._pose = get_pose_backend(
65
+ self._pose_spec,
66
+ min_detection_confidence=self.config.pose_min_detection_conf,
67
+ min_tracking_confidence=self.config.pose_min_tracking_conf,
68
+ )
69
+ return self._pose
70
+
71
+ def run(self, source: str, *, show_progress: bool = True) -> TrackCollection:
72
+ """Extract skeleton trajectories from a video.
73
+
74
+ Args:
75
+ source: Path to video file.
76
+ show_progress: Show a tqdm progress bar.
77
+
78
+ Returns:
79
+ TrackCollection with all detected person tracks and their skeletons.
80
+ """
81
+ cfg = self.config
82
+ detector = self._get_detector()
83
+
84
+ # -- Phase 1: Detection + Tracking ------------------------------------
85
+ tracks_by_id: dict[int, Track] = {}
86
+ yolo_to_track: dict[int, int] = {} # yolo_track_id -> track_id
87
+ active_yolo: dict[int, int] = {} # yolo_track_id -> last_frame_idx
88
+ next_track_id = 1
89
+ prev_frame: np.ndarray | None = None
90
+
91
+ with VideoReader(source, frame_skip=cfg.frame_skip) as reader:
92
+ video_meta = reader.metadata()
93
+ fps = reader.fps
94
+
95
+ frames_iter = reader
96
+ if show_progress:
97
+ frames_iter = tqdm(
98
+ reader,
99
+ total=reader.total_frames // cfg.frame_skip,
100
+ desc="Tracking",
101
+ unit="frame",
102
+ )
103
+
104
+ for info in frames_iter:
105
+ # Scene change detection
106
+ if prev_frame is not None:
107
+ if scene_changed(prev_frame, info.frame, cfg.scene_change_threshold):
108
+ detector.reset()
109
+ yolo_to_track.clear()
110
+ active_yolo.clear()
111
+
112
+ prev_frame = info.frame.copy()
113
+
114
+ # Detect + track
115
+ detections = detector.detect(info.frame, info.frame_idx)
116
+ current_yolo_ids = set()
117
+
118
+ for det in detections:
119
+ yolo_id = det.track_id
120
+ if yolo_id is None:
121
+ continue
122
+
123
+ current_yolo_ids.add(yolo_id)
124
+
125
+ if yolo_id not in yolo_to_track:
126
+ tid = next_track_id
127
+ next_track_id += 1
128
+ yolo_to_track[yolo_id] = tid
129
+ tracks_by_id[tid] = Track(track_id=tid)
130
+
131
+ tid = yolo_to_track[yolo_id]
132
+ tracks_by_id[tid].add_frame(info.frame_idx, info.timestamp, det.bbox)
133
+ active_yolo[yolo_id] = info.frame_idx
134
+
135
+ # Expire stale tracks
136
+ stale = [
137
+ yid for yid, last_f in active_yolo.items()
138
+ if info.frame_idx - last_f > cfg.max_missing_frames
139
+ and yid not in current_yolo_ids
140
+ ]
141
+ for yid in stale:
142
+ active_yolo.pop(yid, None)
143
+ yolo_to_track.pop(yid, None)
144
+
145
+ all_tracks = list(tracks_by_id.values())
146
+
147
+ # -- Phase 2: Filter --------------------------------------------------
148
+ if cfg.min_track_duration > 0:
149
+ all_tracks = [t for t in all_tracks if t.duration >= cfg.min_track_duration]
150
+
151
+ if cfg.motion_threshold is not None:
152
+ kept = []
153
+ for t in all_tracks:
154
+ if t.num_frames < 3:
155
+ continue
156
+ centers = np.array([b.center for b in t.bboxes], dtype=np.float32)
157
+ std = float(np.std(centers, axis=0).mean())
158
+ if std >= cfg.motion_threshold:
159
+ kept.append(t)
160
+ all_tracks = kept
161
+
162
+ # -- Phase 3: Pose Estimation -----------------------------------------
163
+ pose_backend = self._get_pose()
164
+
165
+ with VideoReader(source, frame_skip=1) as reader:
166
+ for track in tqdm(all_tracks, desc="Pose estimation", disable=not show_progress):
167
+ for i, (frame_idx, bbox) in enumerate(zip(track.frames, track.bboxes)):
168
+ for info in reader.read_range(frame_idx, frame_idx):
169
+ skeleton = pose_backend.estimate(info.frame, bbox)
170
+ track.skeletons[i] = skeleton
171
+
172
+ return TrackCollection(tracks=all_tracks, video_metadata=video_meta)
173
+
174
+ def close(self) -> None:
175
+ if self._detector is not None:
176
+ self._detector.close()
177
+ if self._pose is not None:
178
+ self._pose.close()