skeletrack 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skeletrack-0.1.0/.gitignore +42 -0
- skeletrack-0.1.0/LICENSE +21 -0
- skeletrack-0.1.0/PKG-INFO +151 -0
- skeletrack-0.1.0/README.md +111 -0
- skeletrack-0.1.0/pyproject.toml +42 -0
- skeletrack-0.1.0/src/skeletrack/__init__.py +87 -0
- skeletrack-0.1.0/src/skeletrack/_utils/__init__.py +0 -0
- skeletrack-0.1.0/src/skeletrack/_version.py +1 -0
- skeletrack-0.1.0/src/skeletrack/core/__init__.py +5 -0
- skeletrack-0.1.0/src/skeletrack/core/config.py +38 -0
- skeletrack-0.1.0/src/skeletrack/core/pipeline.py +178 -0
- skeletrack-0.1.0/src/skeletrack/core/video.py +115 -0
- skeletrack-0.1.0/src/skeletrack/data/__init__.py +6 -0
- skeletrack-0.1.0/src/skeletrack/data/bbox.py +42 -0
- skeletrack-0.1.0/src/skeletrack/data/collection.py +135 -0
- skeletrack-0.1.0/src/skeletrack/data/skeleton.py +79 -0
- skeletrack-0.1.0/src/skeletrack/data/track.py +108 -0
- skeletrack-0.1.0/src/skeletrack/detection/__init__.py +0 -0
- skeletrack-0.1.0/src/skeletrack/detection/base.py +36 -0
- skeletrack-0.1.0/src/skeletrack/detection/registry.py +45 -0
- skeletrack-0.1.0/src/skeletrack/detection/yolo.py +72 -0
- skeletrack-0.1.0/src/skeletrack/filters/__init__.py +0 -0
- skeletrack-0.1.0/src/skeletrack/filters/scene.py +22 -0
- skeletrack-0.1.0/src/skeletrack/io/__init__.py +0 -0
- skeletrack-0.1.0/src/skeletrack/io/coco.py +71 -0
- skeletrack-0.1.0/src/skeletrack/io/dataframe.py +48 -0
- skeletrack-0.1.0/src/skeletrack/io/npy.py +131 -0
- skeletrack-0.1.0/src/skeletrack/pose/__init__.py +0 -0
- skeletrack-0.1.0/src/skeletrack/pose/base.py +35 -0
- skeletrack-0.1.0/src/skeletrack/pose/mediapipe_backend.py +88 -0
- skeletrack-0.1.0/src/skeletrack/pose/registry.py +36 -0
- skeletrack-0.1.0/src/skeletrack/repair/__init__.py +0 -0
- skeletrack-0.1.0/src/skeletrack/tracking/__init__.py +0 -0
- skeletrack-0.1.0/src/skeletrack/tracking/base.py +24 -0
- skeletrack-0.1.0/src/skeletrack/viz/__init__.py +0 -0
- skeletrack-0.1.0/tests/__init__.py +0 -0
- skeletrack-0.1.0/tests/test_bbox.py +81 -0
- skeletrack-0.1.0/tests/test_collection.py +123 -0
- skeletrack-0.1.0/tests/test_io_npy.py +130 -0
- skeletrack-0.1.0/tests/test_registry.py +74 -0
- skeletrack-0.1.0/tests/test_scene.py +35 -0
- skeletrack-0.1.0/tests/test_skeleton.py +84 -0
- skeletrack-0.1.0/tests/test_track.py +96 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
*.egg
|
|
8
|
+
|
|
9
|
+
# Virtual environments
|
|
10
|
+
.venv/
|
|
11
|
+
venv/
|
|
12
|
+
|
|
13
|
+
# IDE
|
|
14
|
+
.idea/
|
|
15
|
+
.vscode/
|
|
16
|
+
*.swp
|
|
17
|
+
*.swo
|
|
18
|
+
|
|
19
|
+
# Claude Code
|
|
20
|
+
.claude/
|
|
21
|
+
|
|
22
|
+
# OS
|
|
23
|
+
.DS_Store
|
|
24
|
+
Thumbs.db
|
|
25
|
+
|
|
26
|
+
# Testing / Coverage
|
|
27
|
+
.pytest_cache/
|
|
28
|
+
htmlcov/
|
|
29
|
+
.coverage
|
|
30
|
+
|
|
31
|
+
# YOLO model weights
|
|
32
|
+
*.pt
|
|
33
|
+
|
|
34
|
+
# Directories not yet ready
|
|
35
|
+
docs/
|
|
36
|
+
examples/
|
|
37
|
+
|
|
38
|
+
# Output data
|
|
39
|
+
*.npy
|
|
40
|
+
|
|
41
|
+
# Others
|
|
42
|
+
CLAUDE.md
|
skeletrack-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Steven Tse
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: skeletrack
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Extract multi-person skeleton trajectories from videos with one line of code.
|
|
5
|
+
Project-URL: Homepage, https://github.com/StevenUST/skeletrack
|
|
6
|
+
Author: Steven Tse
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Keywords: keypoints,pose,skeleton,tracking,video
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Requires-Python: >=3.9
|
|
16
|
+
Requires-Dist: numpy>=1.20
|
|
17
|
+
Requires-Dist: opencv-python>=4.5
|
|
18
|
+
Requires-Dist: tqdm>=4.0
|
|
19
|
+
Provides-Extra: age
|
|
20
|
+
Requires-Dist: torch>=1.9; extra == 'age'
|
|
21
|
+
Requires-Dist: torchvision>=0.10; extra == 'age'
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
26
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
27
|
+
Provides-Extra: full
|
|
28
|
+
Requires-Dist: mediapipe>=0.10; extra == 'full'
|
|
29
|
+
Requires-Dist: pandas>=1.3; extra == 'full'
|
|
30
|
+
Requires-Dist: torch>=1.9; extra == 'full'
|
|
31
|
+
Requires-Dist: torchvision>=0.10; extra == 'full'
|
|
32
|
+
Requires-Dist: ultralytics>=8.0; extra == 'full'
|
|
33
|
+
Provides-Extra: mediapipe
|
|
34
|
+
Requires-Dist: mediapipe>=0.10; extra == 'mediapipe'
|
|
35
|
+
Provides-Extra: pandas
|
|
36
|
+
Requires-Dist: pandas>=1.3; extra == 'pandas'
|
|
37
|
+
Provides-Extra: yolo
|
|
38
|
+
Requires-Dist: ultralytics>=8.0; extra == 'yolo'
|
|
39
|
+
Description-Content-Type: text/markdown
|
|
40
|
+
|
|
41
|
+
# Skeletrack
|
|
42
|
+
|
|
43
|
+
Extract multi-person skeleton trajectories from videos with one line of code.
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
import skeletrack
|
|
47
|
+
|
|
48
|
+
tracks = skeletrack.extract("video.mp4")
|
|
49
|
+
tracks.filter(min_duration=2.0).save("output.npy")
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## How It Works
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
Video → Person Detection (YOLO) → Multi-Person Tracking (ByteTrack) → Pose Estimation (MediaPipe) → Skeleton Trajectories
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Pose estimation runs **after** tracking and filtering, so compute is only spent on valid tracks.
|
|
59
|
+
|
|
60
|
+
## Installation
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install skeletrack
|
|
64
|
+
pip install skeletrack[yolo,mediapipe] # with detection + pose backends
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Optional dependencies
|
|
68
|
+
|
|
69
|
+
| Extra | Packages | Purpose |
|
|
70
|
+
|-------|----------|---------|
|
|
71
|
+
| `yolo` | ultralytics | Person detection + tracking |
|
|
72
|
+
| `mediapipe` | mediapipe | Pose estimation |
|
|
73
|
+
| `pandas` | pandas | DataFrame export |
|
|
74
|
+
| `full` | all of the above | Everything |
|
|
75
|
+
|
|
76
|
+
## Quick Start
|
|
77
|
+
|
|
78
|
+
```python
|
|
79
|
+
import skeletrack
|
|
80
|
+
|
|
81
|
+
# Extract with default settings
|
|
82
|
+
tracks = skeletrack.extract("video.mp4")
|
|
83
|
+
|
|
84
|
+
# Extract with options
|
|
85
|
+
tracks = skeletrack.extract(
|
|
86
|
+
"video.mp4",
|
|
87
|
+
device="cuda",
|
|
88
|
+
detector="yolo:yolov8s.pt",
|
|
89
|
+
frame_skip=3,
|
|
90
|
+
min_duration=1.0,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Filter + save
|
|
94
|
+
tracks.filter(min_duration=2.0, min_frames=10).save("output.npy")
|
|
95
|
+
|
|
96
|
+
# Access skeleton data
|
|
97
|
+
for track in tracks:
|
|
98
|
+
pose = track.skeleton_array("pose") # shape: (T, 33, 4)
|
|
99
|
+
print(f"Track {track.track_id}: {track.duration:.1f}s, {track.num_frames} frames")
|
|
100
|
+
|
|
101
|
+
# Load saved tracks
|
|
102
|
+
tracks = skeletrack.load("output.npy")
|
|
103
|
+
|
|
104
|
+
# Export to DataFrame
|
|
105
|
+
df = tracks.to_dataframe()
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Reusable Pipeline
|
|
109
|
+
|
|
110
|
+
For processing multiple videos, create a `Pipeline` to avoid reloading models:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
from skeletrack import Pipeline
|
|
114
|
+
|
|
115
|
+
pipeline = Pipeline(device="cuda")
|
|
116
|
+
for video in video_list:
|
|
117
|
+
tracks = pipeline.run(video)
|
|
118
|
+
tracks.save(f"{video}.npy")
|
|
119
|
+
pipeline.close()
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Output Format
|
|
123
|
+
|
|
124
|
+
Each track contains:
|
|
125
|
+
|
|
126
|
+
| Field | Type | Description |
|
|
127
|
+
|-------|------|-------------|
|
|
128
|
+
| `track_id` | int | Unique person ID |
|
|
129
|
+
| `frames` | list[int] | Frame indices |
|
|
130
|
+
| `timestamps` | list[float] | Timestamps (seconds) |
|
|
131
|
+
| `bboxes` | list[BBox] | Bounding boxes (x, y, w, h) |
|
|
132
|
+
| `skeletons` | list[Skeleton] | Keypoints per frame |
|
|
133
|
+
|
|
134
|
+
Skeleton keypoint groups (via MediaPipe Holistic):
|
|
135
|
+
|
|
136
|
+
| Group | Keypoints | Dimensions |
|
|
137
|
+
|-------|-----------|------------|
|
|
138
|
+
| `pose` | 33 | x, y, z, visibility |
|
|
139
|
+
| `left_hand` | 21 | x, y, z, visibility |
|
|
140
|
+
| `right_hand` | 21 | x, y, z, visibility |
|
|
141
|
+
| `face` | 468 | x, y, z |
|
|
142
|
+
|
|
143
|
+
## Requirements
|
|
144
|
+
|
|
145
|
+
- Python >= 3.9
|
|
146
|
+
- numpy >= 1.20
|
|
147
|
+
- opencv-python >= 4.5
|
|
148
|
+
|
|
149
|
+
## License
|
|
150
|
+
|
|
151
|
+
MIT
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# Skeletrack
|
|
2
|
+
|
|
3
|
+
Extract multi-person skeleton trajectories from videos with one line of code.
|
|
4
|
+
|
|
5
|
+
```python
|
|
6
|
+
import skeletrack
|
|
7
|
+
|
|
8
|
+
tracks = skeletrack.extract("video.mp4")
|
|
9
|
+
tracks.filter(min_duration=2.0).save("output.npy")
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## How It Works
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
Video → Person Detection (YOLO) → Multi-Person Tracking (ByteTrack) → Pose Estimation (MediaPipe) → Skeleton Trajectories
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
Pose estimation runs **after** tracking and filtering, so compute is only spent on valid tracks.
|
|
19
|
+
|
|
20
|
+
## Installation
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install skeletrack
|
|
24
|
+
pip install skeletrack[yolo,mediapipe] # with detection + pose backends
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Optional dependencies
|
|
28
|
+
|
|
29
|
+
| Extra | Packages | Purpose |
|
|
30
|
+
|-------|----------|---------|
|
|
31
|
+
| `yolo` | ultralytics | Person detection + tracking |
|
|
32
|
+
| `mediapipe` | mediapipe | Pose estimation |
|
|
33
|
+
| `pandas` | pandas | DataFrame export |
|
|
34
|
+
| `full` | all of the above | Everything |
|
|
35
|
+
|
|
36
|
+
## Quick Start
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
import skeletrack
|
|
40
|
+
|
|
41
|
+
# Extract with default settings
|
|
42
|
+
tracks = skeletrack.extract("video.mp4")
|
|
43
|
+
|
|
44
|
+
# Extract with options
|
|
45
|
+
tracks = skeletrack.extract(
|
|
46
|
+
"video.mp4",
|
|
47
|
+
device="cuda",
|
|
48
|
+
detector="yolo:yolov8s.pt",
|
|
49
|
+
frame_skip=3,
|
|
50
|
+
min_duration=1.0,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Filter + save
|
|
54
|
+
tracks.filter(min_duration=2.0, min_frames=10).save("output.npy")
|
|
55
|
+
|
|
56
|
+
# Access skeleton data
|
|
57
|
+
for track in tracks:
|
|
58
|
+
pose = track.skeleton_array("pose") # shape: (T, 33, 4)
|
|
59
|
+
print(f"Track {track.track_id}: {track.duration:.1f}s, {track.num_frames} frames")
|
|
60
|
+
|
|
61
|
+
# Load saved tracks
|
|
62
|
+
tracks = skeletrack.load("output.npy")
|
|
63
|
+
|
|
64
|
+
# Export to DataFrame
|
|
65
|
+
df = tracks.to_dataframe()
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Reusable Pipeline
|
|
69
|
+
|
|
70
|
+
For processing multiple videos, create a `Pipeline` to avoid reloading models:
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
from skeletrack import Pipeline
|
|
74
|
+
|
|
75
|
+
pipeline = Pipeline(device="cuda")
|
|
76
|
+
for video in video_list:
|
|
77
|
+
tracks = pipeline.run(video)
|
|
78
|
+
tracks.save(f"{video}.npy")
|
|
79
|
+
pipeline.close()
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Output Format
|
|
83
|
+
|
|
84
|
+
Each track contains:
|
|
85
|
+
|
|
86
|
+
| Field | Type | Description |
|
|
87
|
+
|-------|------|-------------|
|
|
88
|
+
| `track_id` | int | Unique person ID |
|
|
89
|
+
| `frames` | list[int] | Frame indices |
|
|
90
|
+
| `timestamps` | list[float] | Timestamps (seconds) |
|
|
91
|
+
| `bboxes` | list[BBox] | Bounding boxes (x, y, w, h) |
|
|
92
|
+
| `skeletons` | list[Skeleton] | Keypoints per frame |
|
|
93
|
+
|
|
94
|
+
Skeleton keypoint groups (via MediaPipe Holistic):
|
|
95
|
+
|
|
96
|
+
| Group | Keypoints | Dimensions |
|
|
97
|
+
|-------|-----------|------------|
|
|
98
|
+
| `pose` | 33 | x, y, z, visibility |
|
|
99
|
+
| `left_hand` | 21 | x, y, z, visibility |
|
|
100
|
+
| `right_hand` | 21 | x, y, z, visibility |
|
|
101
|
+
| `face` | 468 | x, y, z |
|
|
102
|
+
|
|
103
|
+
## Requirements
|
|
104
|
+
|
|
105
|
+
- Python >= 3.9
|
|
106
|
+
- numpy >= 1.20
|
|
107
|
+
- opencv-python >= 4.5
|
|
108
|
+
|
|
109
|
+
## License
|
|
110
|
+
|
|
111
|
+
MIT
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "skeletrack"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Extract multi-person skeleton trajectories from videos with one line of code."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
authors = [{ name = "Steven Tse" }]
|
|
13
|
+
keywords = ["skeleton", "pose", "tracking", "video", "keypoints"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Science/Research",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
20
|
+
]
|
|
21
|
+
dependencies = [
|
|
22
|
+
"numpy>=1.20",
|
|
23
|
+
"opencv-python>=4.5",
|
|
24
|
+
"tqdm>=4.0",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
yolo = ["ultralytics>=8.0"]
|
|
29
|
+
mediapipe = ["mediapipe>=0.10"]
|
|
30
|
+
age = ["torch>=1.9", "torchvision>=0.10"]
|
|
31
|
+
pandas = ["pandas>=1.3"]
|
|
32
|
+
full = ["skeletrack[yolo,mediapipe,age,pandas]"]
|
|
33
|
+
dev = ["pytest", "pytest-cov", "ruff", "mypy"]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/StevenUST/skeletrack"
|
|
37
|
+
|
|
38
|
+
[tool.hatch.build.targets.wheel]
|
|
39
|
+
packages = ["src/skeletrack"]
|
|
40
|
+
|
|
41
|
+
[tool.ruff]
|
|
42
|
+
line-length = 100
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Skeletrack: Extract multi-person skeleton trajectories from videos.
|
|
2
|
+
|
|
3
|
+
Usage::
|
|
4
|
+
|
|
5
|
+
import skeletrack
|
|
6
|
+
|
|
7
|
+
# One-liner
|
|
8
|
+
tracks = skeletrack.extract("video.mp4")
|
|
9
|
+
|
|
10
|
+
# With options
|
|
11
|
+
tracks = skeletrack.extract("video.mp4", device="cuda", detector="yolo:yolov8s.pt")
|
|
12
|
+
|
|
13
|
+
# Filter + save
|
|
14
|
+
tracks.filter(min_duration=2.0).save("output.npy")
|
|
15
|
+
|
|
16
|
+
# Load previously saved tracks
|
|
17
|
+
tracks = skeletrack.load("output.npy")
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from ._version import __version__
|
|
21
|
+
from .core.config import PipelineConfig
|
|
22
|
+
from .core.pipeline import Pipeline
|
|
23
|
+
from .data.collection import TrackCollection
|
|
24
|
+
from .data.skeleton import Skeleton
|
|
25
|
+
from .data.track import Track
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"__version__",
|
|
29
|
+
"extract",
|
|
30
|
+
"load",
|
|
31
|
+
"Pipeline",
|
|
32
|
+
"PipelineConfig",
|
|
33
|
+
"Track",
|
|
34
|
+
"TrackCollection",
|
|
35
|
+
"Skeleton",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def extract(
|
|
40
|
+
source: str,
|
|
41
|
+
*,
|
|
42
|
+
device: str = "cpu",
|
|
43
|
+
detector: str = "yolo",
|
|
44
|
+
pose: str = "mediapipe",
|
|
45
|
+
frame_skip: int = 3,
|
|
46
|
+
min_duration: float = 1.0,
|
|
47
|
+
show_progress: bool = True,
|
|
48
|
+
**kwargs,
|
|
49
|
+
) -> TrackCollection:
|
|
50
|
+
"""Extract skeleton trajectories from a video file.
|
|
51
|
+
|
|
52
|
+
This is the main entry point for quick usage. For processing multiple
|
|
53
|
+
videos, create a :class:`Pipeline` instance for better performance.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
source: Path to a video file.
|
|
57
|
+
device: "cpu" or "cuda".
|
|
58
|
+
detector: Detector backend name (default: "yolo").
|
|
59
|
+
Use "yolo:yolov8s.pt" to specify a model variant.
|
|
60
|
+
pose: Pose backend name (default: "mediapipe").
|
|
61
|
+
frame_skip: Process every N-th frame for detection (default: 3).
|
|
62
|
+
min_duration: Discard tracks shorter than this (seconds, default: 1.0).
|
|
63
|
+
show_progress: Show a tqdm progress bar (default: True).
|
|
64
|
+
**kwargs: Additional arguments passed to PipelineConfig.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
TrackCollection with all detected person skeleton trajectories.
|
|
68
|
+
"""
|
|
69
|
+
config = PipelineConfig(
|
|
70
|
+
device=device,
|
|
71
|
+
frame_skip=frame_skip,
|
|
72
|
+
min_track_duration=min_duration,
|
|
73
|
+
**kwargs,
|
|
74
|
+
)
|
|
75
|
+
pipeline = Pipeline(config=config, detector=detector, pose=pose)
|
|
76
|
+
try:
|
|
77
|
+
return pipeline.run(source, show_progress=show_progress)
|
|
78
|
+
finally:
|
|
79
|
+
pipeline.close()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def load(path: str) -> TrackCollection:
|
|
83
|
+
"""Load previously saved tracks from file.
|
|
84
|
+
|
|
85
|
+
Supports .npy files (both skeletrack and legacy VideoScreener format).
|
|
86
|
+
"""
|
|
87
|
+
return TrackCollection.load(path)
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Pipeline configuration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class PipelineConfig:
|
|
11
|
+
"""Configuration for the extraction pipeline.
|
|
12
|
+
|
|
13
|
+
All parameters have sensible defaults so you can start with just::
|
|
14
|
+
|
|
15
|
+
config = PipelineConfig()
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
device: "cpu" or "cuda".
|
|
19
|
+
frame_skip: Process every N-th frame for detection/tracking.
|
|
20
|
+
scene_change_threshold: Histogram correlation below this triggers a scene reset.
|
|
21
|
+
max_missing_frames: Track dies after this many consecutive frames without detection.
|
|
22
|
+
min_confidence: Minimum detection confidence.
|
|
23
|
+
min_track_duration: Discard tracks shorter than this (seconds).
|
|
24
|
+
motion_threshold: Minimum bbox center std-dev to keep a track (pixels).
|
|
25
|
+
Set to None to disable motion filtering.
|
|
26
|
+
pose_min_detection_conf: MediaPipe min_detection_confidence.
|
|
27
|
+
pose_min_tracking_conf: MediaPipe min_tracking_confidence.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
device: str = "cpu"
|
|
31
|
+
frame_skip: int = 3
|
|
32
|
+
scene_change_threshold: float = 0.4
|
|
33
|
+
max_missing_frames: int = 60
|
|
34
|
+
min_confidence: float = 0.4
|
|
35
|
+
min_track_duration: float = 1.0
|
|
36
|
+
motion_threshold: float | None = 5.0
|
|
37
|
+
pose_min_detection_conf: float = 0.5
|
|
38
|
+
pose_min_tracking_conf: float = 0.5
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Main extraction pipeline."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from tqdm import tqdm
|
|
9
|
+
|
|
10
|
+
from ..data.bbox import BBox
|
|
11
|
+
from ..data.collection import TrackCollection
|
|
12
|
+
from ..data.track import Track
|
|
13
|
+
from ..detection.base import DetectorBackend
|
|
14
|
+
from ..detection.registry import get_detector
|
|
15
|
+
from ..filters.scene import scene_changed
|
|
16
|
+
from ..pose.base import PoseBackend
|
|
17
|
+
from ..pose.registry import get_pose_backend
|
|
18
|
+
from .config import PipelineConfig
|
|
19
|
+
from .video import VideoReader
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Pipeline:
|
|
23
|
+
"""Reusable extraction pipeline.
|
|
24
|
+
|
|
25
|
+
Usage::
|
|
26
|
+
|
|
27
|
+
pipeline = Pipeline(device="cuda")
|
|
28
|
+
tracks = pipeline.run("video.mp4")
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
config: PipelineConfig | None = None,
|
|
34
|
+
*,
|
|
35
|
+
detector: str | DetectorBackend = "yolo",
|
|
36
|
+
pose: str | PoseBackend = "mediapipe",
|
|
37
|
+
device: str | None = None,
|
|
38
|
+
**kwargs: Any,
|
|
39
|
+
):
|
|
40
|
+
self.config = config or PipelineConfig()
|
|
41
|
+
if device is not None:
|
|
42
|
+
self.config.device = device
|
|
43
|
+
|
|
44
|
+
# Resolve backends (lazy — only instantiated when needed)
|
|
45
|
+
self._detector_spec = detector
|
|
46
|
+
self._pose_spec = pose
|
|
47
|
+
self._extra_kwargs = kwargs
|
|
48
|
+
self._detector: DetectorBackend | None = None
|
|
49
|
+
self._pose: PoseBackend | None = None
|
|
50
|
+
|
|
51
|
+
def _get_detector(self) -> DetectorBackend:
|
|
52
|
+
if self._detector is None:
|
|
53
|
+
self._detector = get_detector(
|
|
54
|
+
self._detector_spec,
|
|
55
|
+
device=self.config.device,
|
|
56
|
+
min_confidence=self.config.min_confidence,
|
|
57
|
+
**{k: v for k, v in self._extra_kwargs.items()
|
|
58
|
+
if k in ("model_name",)},
|
|
59
|
+
)
|
|
60
|
+
return self._detector
|
|
61
|
+
|
|
62
|
+
def _get_pose(self) -> PoseBackend:
|
|
63
|
+
if self._pose is None:
|
|
64
|
+
self._pose = get_pose_backend(
|
|
65
|
+
self._pose_spec,
|
|
66
|
+
min_detection_confidence=self.config.pose_min_detection_conf,
|
|
67
|
+
min_tracking_confidence=self.config.pose_min_tracking_conf,
|
|
68
|
+
)
|
|
69
|
+
return self._pose
|
|
70
|
+
|
|
71
|
+
def run(self, source: str, *, show_progress: bool = True) -> TrackCollection:
|
|
72
|
+
"""Extract skeleton trajectories from a video.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
source: Path to video file.
|
|
76
|
+
show_progress: Show a tqdm progress bar.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
TrackCollection with all detected person tracks and their skeletons.
|
|
80
|
+
"""
|
|
81
|
+
cfg = self.config
|
|
82
|
+
detector = self._get_detector()
|
|
83
|
+
|
|
84
|
+
# -- Phase 1: Detection + Tracking ------------------------------------
|
|
85
|
+
tracks_by_id: dict[int, Track] = {}
|
|
86
|
+
yolo_to_track: dict[int, int] = {} # yolo_track_id -> track_id
|
|
87
|
+
active_yolo: dict[int, int] = {} # yolo_track_id -> last_frame_idx
|
|
88
|
+
next_track_id = 1
|
|
89
|
+
prev_frame: np.ndarray | None = None
|
|
90
|
+
|
|
91
|
+
with VideoReader(source, frame_skip=cfg.frame_skip) as reader:
|
|
92
|
+
video_meta = reader.metadata()
|
|
93
|
+
fps = reader.fps
|
|
94
|
+
|
|
95
|
+
frames_iter = reader
|
|
96
|
+
if show_progress:
|
|
97
|
+
frames_iter = tqdm(
|
|
98
|
+
reader,
|
|
99
|
+
total=reader.total_frames // cfg.frame_skip,
|
|
100
|
+
desc="Tracking",
|
|
101
|
+
unit="frame",
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
for info in frames_iter:
|
|
105
|
+
# Scene change detection
|
|
106
|
+
if prev_frame is not None:
|
|
107
|
+
if scene_changed(prev_frame, info.frame, cfg.scene_change_threshold):
|
|
108
|
+
detector.reset()
|
|
109
|
+
yolo_to_track.clear()
|
|
110
|
+
active_yolo.clear()
|
|
111
|
+
|
|
112
|
+
prev_frame = info.frame.copy()
|
|
113
|
+
|
|
114
|
+
# Detect + track
|
|
115
|
+
detections = detector.detect(info.frame, info.frame_idx)
|
|
116
|
+
current_yolo_ids = set()
|
|
117
|
+
|
|
118
|
+
for det in detections:
|
|
119
|
+
yolo_id = det.track_id
|
|
120
|
+
if yolo_id is None:
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
current_yolo_ids.add(yolo_id)
|
|
124
|
+
|
|
125
|
+
if yolo_id not in yolo_to_track:
|
|
126
|
+
tid = next_track_id
|
|
127
|
+
next_track_id += 1
|
|
128
|
+
yolo_to_track[yolo_id] = tid
|
|
129
|
+
tracks_by_id[tid] = Track(track_id=tid)
|
|
130
|
+
|
|
131
|
+
tid = yolo_to_track[yolo_id]
|
|
132
|
+
tracks_by_id[tid].add_frame(info.frame_idx, info.timestamp, det.bbox)
|
|
133
|
+
active_yolo[yolo_id] = info.frame_idx
|
|
134
|
+
|
|
135
|
+
# Expire stale tracks
|
|
136
|
+
stale = [
|
|
137
|
+
yid for yid, last_f in active_yolo.items()
|
|
138
|
+
if info.frame_idx - last_f > cfg.max_missing_frames
|
|
139
|
+
and yid not in current_yolo_ids
|
|
140
|
+
]
|
|
141
|
+
for yid in stale:
|
|
142
|
+
active_yolo.pop(yid, None)
|
|
143
|
+
yolo_to_track.pop(yid, None)
|
|
144
|
+
|
|
145
|
+
all_tracks = list(tracks_by_id.values())
|
|
146
|
+
|
|
147
|
+
# -- Phase 2: Filter --------------------------------------------------
|
|
148
|
+
if cfg.min_track_duration > 0:
|
|
149
|
+
all_tracks = [t for t in all_tracks if t.duration >= cfg.min_track_duration]
|
|
150
|
+
|
|
151
|
+
if cfg.motion_threshold is not None:
|
|
152
|
+
kept = []
|
|
153
|
+
for t in all_tracks:
|
|
154
|
+
if t.num_frames < 3:
|
|
155
|
+
continue
|
|
156
|
+
centers = np.array([b.center for b in t.bboxes], dtype=np.float32)
|
|
157
|
+
std = float(np.std(centers, axis=0).mean())
|
|
158
|
+
if std >= cfg.motion_threshold:
|
|
159
|
+
kept.append(t)
|
|
160
|
+
all_tracks = kept
|
|
161
|
+
|
|
162
|
+
# -- Phase 3: Pose Estimation -----------------------------------------
|
|
163
|
+
pose_backend = self._get_pose()
|
|
164
|
+
|
|
165
|
+
with VideoReader(source, frame_skip=1) as reader:
|
|
166
|
+
for track in tqdm(all_tracks, desc="Pose estimation", disable=not show_progress):
|
|
167
|
+
for i, (frame_idx, bbox) in enumerate(zip(track.frames, track.bboxes)):
|
|
168
|
+
for info in reader.read_range(frame_idx, frame_idx):
|
|
169
|
+
skeleton = pose_backend.estimate(info.frame, bbox)
|
|
170
|
+
track.skeletons[i] = skeleton
|
|
171
|
+
|
|
172
|
+
return TrackCollection(tracks=all_tracks, video_metadata=video_meta)
|
|
173
|
+
|
|
174
|
+
def close(self) -> None:
|
|
175
|
+
if self._detector is not None:
|
|
176
|
+
self._detector.close()
|
|
177
|
+
if self._pose is not None:
|
|
178
|
+
self._pose.close()
|