rtmlib-ts 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +1 -0
- package/README.md +202 -0
- package/dist/core/base.d.ts +20 -0
- package/dist/core/base.d.ts.map +1 -0
- package/dist/core/base.js +40 -0
- package/dist/core/file.d.ts +11 -0
- package/dist/core/file.d.ts.map +1 -0
- package/dist/core/file.js +111 -0
- package/dist/core/modelCache.d.ts +35 -0
- package/dist/core/modelCache.d.ts.map +1 -0
- package/dist/core/modelCache.js +161 -0
- package/dist/core/posePostprocessing.d.ts +12 -0
- package/dist/core/posePostprocessing.d.ts.map +1 -0
- package/dist/core/posePostprocessing.js +76 -0
- package/dist/core/postprocessing.d.ts +10 -0
- package/dist/core/postprocessing.d.ts.map +1 -0
- package/dist/core/postprocessing.js +70 -0
- package/dist/core/preprocessing.d.ts +14 -0
- package/dist/core/preprocessing.d.ts.map +1 -0
- package/dist/core/preprocessing.js +79 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +31 -0
- package/dist/models/rtmpose.d.ts +25 -0
- package/dist/models/rtmpose.d.ts.map +1 -0
- package/dist/models/rtmpose.js +185 -0
- package/dist/models/rtmpose3d.d.ts +28 -0
- package/dist/models/rtmpose3d.d.ts.map +1 -0
- package/dist/models/rtmpose3d.js +184 -0
- package/dist/models/yolo12.d.ts +23 -0
- package/dist/models/yolo12.d.ts.map +1 -0
- package/dist/models/yolo12.js +165 -0
- package/dist/models/yolox.d.ts +18 -0
- package/dist/models/yolox.d.ts.map +1 -0
- package/dist/models/yolox.js +167 -0
- package/dist/solution/animalDetector.d.ts +229 -0
- package/dist/solution/animalDetector.d.ts.map +1 -0
- package/dist/solution/animalDetector.js +663 -0
- package/dist/solution/body.d.ts +16 -0
- package/dist/solution/body.d.ts.map +1 -0
- package/dist/solution/body.js +52 -0
- package/dist/solution/bodyWithFeet.d.ts +16 -0
- package/dist/solution/bodyWithFeet.d.ts.map +1 -0
- package/dist/solution/bodyWithFeet.js +52 -0
- package/dist/solution/customDetector.d.ts +137 -0
- package/dist/solution/customDetector.d.ts.map +1 -0
- package/dist/solution/customDetector.js +342 -0
- package/dist/solution/hand.d.ts +14 -0
- package/dist/solution/hand.d.ts.map +1 -0
- package/dist/solution/hand.js +20 -0
- package/dist/solution/index.d.ts +10 -0
- package/dist/solution/index.d.ts.map +1 -0
- package/dist/solution/index.js +9 -0
- package/dist/solution/objectDetector.d.ts +172 -0
- package/dist/solution/objectDetector.d.ts.map +1 -0
- package/dist/solution/objectDetector.js +606 -0
- package/dist/solution/pose3dDetector.d.ts +145 -0
- package/dist/solution/pose3dDetector.d.ts.map +1 -0
- package/dist/solution/pose3dDetector.js +611 -0
- package/dist/solution/poseDetector.d.ts +198 -0
- package/dist/solution/poseDetector.d.ts.map +1 -0
- package/dist/solution/poseDetector.js +622 -0
- package/dist/solution/poseTracker.d.ts +22 -0
- package/dist/solution/poseTracker.d.ts.map +1 -0
- package/dist/solution/poseTracker.js +106 -0
- package/dist/solution/wholebody.d.ts +19 -0
- package/dist/solution/wholebody.d.ts.map +1 -0
- package/dist/solution/wholebody.js +82 -0
- package/dist/solution/wholebody3d.d.ts +22 -0
- package/dist/solution/wholebody3d.d.ts.map +1 -0
- package/dist/solution/wholebody3d.js +75 -0
- package/dist/types/index.d.ts +52 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/visualization/draw.d.ts +57 -0
- package/dist/visualization/draw.d.ts.map +1 -0
- package/dist/visualization/draw.js +400 -0
- package/dist/visualization/skeleton/coco133.d.ts +350 -0
- package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco133.js +120 -0
- package/dist/visualization/skeleton/coco17.d.ts +180 -0
- package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco17.js +48 -0
- package/dist/visualization/skeleton/halpe26.d.ts +278 -0
- package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
- package/dist/visualization/skeleton/halpe26.js +70 -0
- package/dist/visualization/skeleton/hand21.d.ts +196 -0
- package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
- package/dist/visualization/skeleton/hand21.js +51 -0
- package/dist/visualization/skeleton/index.d.ts +10 -0
- package/dist/visualization/skeleton/index.d.ts.map +1 -0
- package/dist/visualization/skeleton/index.js +9 -0
- package/dist/visualization/skeleton/openpose134.d.ts +357 -0
- package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose134.js +116 -0
- package/dist/visualization/skeleton/openpose18.d.ts +177 -0
- package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose18.js +47 -0
- package/docs/ANIMAL_DETECTOR.md +450 -0
- package/docs/CUSTOM_DETECTOR.md +568 -0
- package/docs/OBJECT_DETECTOR.md +373 -0
- package/docs/POSE3D_DETECTOR.md +458 -0
- package/docs/POSE_DETECTOR.md +442 -0
- package/examples/README.md +119 -0
- package/examples/index.html +746 -0
- package/package.json +51 -0
- package/playground/README.md +114 -0
- package/playground/app/favicon.ico +0 -0
- package/playground/app/globals.css +17 -0
- package/playground/app/layout.tsx +19 -0
- package/playground/app/page.tsx +1338 -0
- package/playground/eslint.config.mjs +18 -0
- package/playground/next.config.ts +34 -0
- package/playground/package-lock.json +6723 -0
- package/playground/package.json +27 -0
- package/playground/postcss.config.mjs +7 -0
- package/playground/tsconfig.json +34 -0
- package/src/core/base.ts +66 -0
- package/src/core/file.ts +141 -0
- package/src/core/modelCache.ts +189 -0
- package/src/core/posePostprocessing.ts +91 -0
- package/src/core/postprocessing.ts +93 -0
- package/src/core/preprocessing.ts +127 -0
- package/src/index.ts +69 -0
- package/src/models/rtmpose.ts +265 -0
- package/src/models/rtmpose3d.ts +289 -0
- package/src/models/yolo12.ts +220 -0
- package/src/models/yolox.ts +214 -0
- package/src/solution/animalDetector.ts +955 -0
- package/src/solution/body.ts +89 -0
- package/src/solution/bodyWithFeet.ts +89 -0
- package/src/solution/customDetector.ts +474 -0
- package/src/solution/hand.ts +52 -0
- package/src/solution/index.ts +10 -0
- package/src/solution/objectDetector.ts +816 -0
- package/src/solution/pose3dDetector.ts +890 -0
- package/src/solution/poseDetector.ts +892 -0
- package/src/solution/poseTracker.ts +172 -0
- package/src/solution/wholebody.ts +130 -0
- package/src/solution/wholebody3d.ts +125 -0
- package/src/types/index.ts +62 -0
- package/src/visualization/draw.ts +543 -0
- package/src/visualization/skeleton/coco133.ts +131 -0
- package/src/visualization/skeleton/coco17.ts +49 -0
- package/src/visualization/skeleton/halpe26.ts +71 -0
- package/src/visualization/skeleton/hand21.ts +52 -0
- package/src/visualization/skeleton/index.ts +10 -0
- package/src/visualization/skeleton/openpose134.ts +125 -0
- package/src/visualization/skeleton/openpose18.ts +48 -0
- package/tsconfig.json +32 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PoseTracker - tracks poses across frames with cached detections
|
|
3
|
+
* Reduces detection frequency for better performance
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { Wholebody } from './wholebody';
|
|
7
|
+
import { BBox } from '../types/index';
|
|
8
|
+
|
|
9
|
+
interface TrackedBox {
|
|
10
|
+
bbox: BBox;
|
|
11
|
+
age: number;
|
|
12
|
+
lastSeen: number;
|
|
13
|
+
id: number;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export class PoseTracker {
|
|
17
|
+
private wholebody: Wholebody;
|
|
18
|
+
private detFrequency: number;
|
|
19
|
+
private cachedBoxes: TrackedBox[] = [];
|
|
20
|
+
private frameCount: number = 0;
|
|
21
|
+
private nextId: number = 0;
|
|
22
|
+
|
|
23
|
+
constructor(
|
|
24
|
+
WholebodyClass: typeof Wholebody,
|
|
25
|
+
detFrequency: number = 7,
|
|
26
|
+
toOpenpose: boolean = false,
|
|
27
|
+
mode: 'performance' | 'lightweight' | 'balanced' = 'balanced',
|
|
28
|
+
backend: 'onnxruntime' = 'onnxruntime',
|
|
29
|
+
device: string = 'cpu'
|
|
30
|
+
) {
|
|
31
|
+
this.detFrequency = detFrequency;
|
|
32
|
+
this.wholebody = new WholebodyClass(
|
|
33
|
+
null,
|
|
34
|
+
[640, 640],
|
|
35
|
+
null,
|
|
36
|
+
[288, 384],
|
|
37
|
+
mode,
|
|
38
|
+
toOpenpose,
|
|
39
|
+
backend,
|
|
40
|
+
device
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async init(): Promise<void> {
|
|
45
|
+
await this.wholebody.init();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async call(
|
|
49
|
+
image: Uint8Array,
|
|
50
|
+
imgWidth: number,
|
|
51
|
+
imgHeight: number
|
|
52
|
+
): Promise<{ keypoints: number[][]; scores: number[] }> {
|
|
53
|
+
this.frameCount++;
|
|
54
|
+
|
|
55
|
+
// Run detection periodically
|
|
56
|
+
if (this.frameCount % this.detFrequency === 0 || this.cachedBoxes.length === 0) {
|
|
57
|
+
const result = await this.wholebody.call(image, imgWidth, imgHeight);
|
|
58
|
+
this.updateCachedBoxes(result.keypoints, result.scores, imgWidth, imgHeight);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Use cached boxes for pose estimation
|
|
62
|
+
const bboxes = this.cachedBoxes.map((tb) => tb.bbox);
|
|
63
|
+
const result = await this.wholebody.call(image, imgWidth, imgHeight, bboxes);
|
|
64
|
+
|
|
65
|
+
// Clean up old boxes
|
|
66
|
+
this.cleanupCachedBoxes();
|
|
67
|
+
|
|
68
|
+
return result;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
private updateCachedBoxes(
|
|
72
|
+
keypoints: number[][],
|
|
73
|
+
scores: number[],
|
|
74
|
+
imgWidth: number,
|
|
75
|
+
imgHeight: number
|
|
76
|
+
): void {
|
|
77
|
+
// Simple tracking: create new boxes from keypoints
|
|
78
|
+
const newBoxes: TrackedBox[] = [];
|
|
79
|
+
|
|
80
|
+
for (let i = 0; i < keypoints.length; i += 17) {
|
|
81
|
+
const instanceKeypoints = keypoints.slice(i, Math.min(i + 17, keypoints.length));
|
|
82
|
+
const instanceScores = scores.slice(i, Math.min(i + 17, scores.length));
|
|
83
|
+
|
|
84
|
+
// Calculate bounding box from keypoints
|
|
85
|
+
let minX = imgWidth;
|
|
86
|
+
let minY = imgHeight;
|
|
87
|
+
let maxX = 0;
|
|
88
|
+
let maxY = 0;
|
|
89
|
+
|
|
90
|
+
for (let j = 0; j < instanceKeypoints.length; j++) {
|
|
91
|
+
if (instanceScores[j] > 0.3) {
|
|
92
|
+
const [x, y] = instanceKeypoints[j];
|
|
93
|
+
minX = Math.min(minX, x);
|
|
94
|
+
minY = Math.min(minY, y);
|
|
95
|
+
maxX = Math.max(maxX, x);
|
|
96
|
+
maxY = Math.max(maxY, y);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
if (maxX > minX && maxY > minY) {
|
|
101
|
+
// Add padding
|
|
102
|
+
const padding = 0.25;
|
|
103
|
+
const width = maxX - minX;
|
|
104
|
+
const height = maxY - minY;
|
|
105
|
+
const paddedWidth = width * (1 + padding);
|
|
106
|
+
const paddedHeight = height * (1 + padding);
|
|
107
|
+
|
|
108
|
+
const centerX = (minX + maxX) / 2;
|
|
109
|
+
const centerY = (minY + maxY) / 2;
|
|
110
|
+
|
|
111
|
+
const x1 = Math.max(0, centerX - paddedWidth / 2);
|
|
112
|
+
const y1 = Math.max(0, centerY - paddedHeight / 2);
|
|
113
|
+
const x2 = Math.min(imgWidth, centerX + paddedWidth / 2);
|
|
114
|
+
const y2 = Math.min(imgHeight, centerY + paddedHeight / 2);
|
|
115
|
+
|
|
116
|
+
// Try to match with existing boxes
|
|
117
|
+
let matched = false;
|
|
118
|
+
for (const cachedBox of this.cachedBoxes) {
|
|
119
|
+
const iou = this.calculateIoU(
|
|
120
|
+
[x1, y1, x2, y2],
|
|
121
|
+
[cachedBox.bbox.x1, cachedBox.bbox.y1, cachedBox.bbox.x2, cachedBox.bbox.y2]
|
|
122
|
+
);
|
|
123
|
+
|
|
124
|
+
if (iou > 0.3) {
|
|
125
|
+
cachedBox.bbox = { x1, y1, x2, y2 };
|
|
126
|
+
cachedBox.age = 0;
|
|
127
|
+
cachedBox.lastSeen = this.frameCount;
|
|
128
|
+
newBoxes.push(cachedBox);
|
|
129
|
+
matched = true;
|
|
130
|
+
break;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (!matched) {
|
|
135
|
+
newBoxes.push({
|
|
136
|
+
bbox: { x1, y1, x2, y2 },
|
|
137
|
+
age: 0,
|
|
138
|
+
lastSeen: this.frameCount,
|
|
139
|
+
id: this.nextId++,
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
this.cachedBoxes = newBoxes;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
private calculateIoU(box1: number[], box2: number[]): number {
|
|
149
|
+
const x1 = Math.max(box1[0], box2[0]);
|
|
150
|
+
const y1 = Math.max(box1[1], box2[1]);
|
|
151
|
+
const x2 = Math.min(box1[2], box2[2]);
|
|
152
|
+
const y2 = Math.min(box1[3], box2[3]);
|
|
153
|
+
|
|
154
|
+
const interWidth = Math.max(0, x2 - x1);
|
|
155
|
+
const interHeight = Math.max(0, y2 - y1);
|
|
156
|
+
const interArea = interWidth * interHeight;
|
|
157
|
+
|
|
158
|
+
const box1Area = (box1[2] - box1[0]) * (box1[3] - box1[1]);
|
|
159
|
+
const box2Area = (box2[2] - box2[0]) * (box2[3] - box2[1]);
|
|
160
|
+
|
|
161
|
+
const unionArea = box1Area + box2Area - interArea;
|
|
162
|
+
return unionArea > 0 ? interArea / unionArea : 0;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
private cleanupCachedBoxes(): void {
|
|
166
|
+
// Remove boxes that haven't been seen for too long
|
|
167
|
+
const maxAge = this.detFrequency * 3;
|
|
168
|
+
this.cachedBoxes = this.cachedBoxes.filter(
|
|
169
|
+
(box) => this.frameCount - box.lastSeen < maxAge
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wholebody solution - combines detection and pose estimation
|
|
3
|
+
* Based on rtmlib Wholebody class
|
|
4
|
+
* Supports YOLO12 and YOLOX detectors with RTMW pose model
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { YOLOX } from '../models/yolox';
|
|
8
|
+
import { YOLO12 } from '../models/yolo12';
|
|
9
|
+
import { RTMPose } from '../models/rtmpose';
|
|
10
|
+
import { BBox, ModeType, ModelConfig } from '../types';
|
|
11
|
+
|
|
12
|
+
export class Wholebody {
|
|
13
|
+
private detModel: YOLOX | YOLO12;
|
|
14
|
+
private poseModel: RTMPose;
|
|
15
|
+
private detectorType: 'yolox' | 'yolo12';
|
|
16
|
+
|
|
17
|
+
private static readonly MODE: Record<ModeType, ModelConfig> = {
|
|
18
|
+
performance: {
|
|
19
|
+
det: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/yolox_m_8xb8-300e_humanart-c2c7a14a.zip',
|
|
20
|
+
detInputSize: [640, 640],
|
|
21
|
+
pose: 'https://download.openmmlab.com/mmpose/v1/projects/rtmw/onnx_sdk/rtmw-dw-x-l_simcc-cocktail14_270e-384x288_20231122.zip',
|
|
22
|
+
poseInputSize: [384, 288],
|
|
23
|
+
},
|
|
24
|
+
lightweight: {
|
|
25
|
+
det: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/yolox_tiny_8xb8-300e_humanart-6f3252f9.zip',
|
|
26
|
+
detInputSize: [416, 416],
|
|
27
|
+
pose: 'https://download.openmmlab.com/mmpose/v1/projects/rtmw/onnx_sdk/rtmw-dw-l-m_simcc-cocktail14_270e-256x192_20231122.zip',
|
|
28
|
+
poseInputSize: [256, 192],
|
|
29
|
+
},
|
|
30
|
+
balanced: {
|
|
31
|
+
det: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/yolox_m_8xb8-300e_humanart-c2c7a14a.zip',
|
|
32
|
+
detInputSize: [640, 640],
|
|
33
|
+
pose: 'https://download.openmmlab.com/mmpose/v1/projects/rtmw/onnx_sdk/rtmw-dw-x-l_simcc-cocktail14_270e-256x192_20231122.zip',
|
|
34
|
+
poseInputSize: [256, 192],
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
constructor(
|
|
39
|
+
det: string | null = null,
|
|
40
|
+
detInputSize: [number, number] = [640, 640],
|
|
41
|
+
pose: string | null = null,
|
|
42
|
+
poseInputSize: [number, number] = [384, 288],
|
|
43
|
+
mode: ModeType = 'balanced',
|
|
44
|
+
toOpenpose: boolean = false,
|
|
45
|
+
backend: 'onnxruntime' = 'onnxruntime',
|
|
46
|
+
device: string = 'cpu',
|
|
47
|
+
detectorType: 'yolox' | 'yolo12' = 'yolox'
|
|
48
|
+
) {
|
|
49
|
+
this.detectorType = detectorType;
|
|
50
|
+
|
|
51
|
+
// Use mode config if det/pose not specified
|
|
52
|
+
let finalDet = det;
|
|
53
|
+
let finalDetInputSize = detInputSize;
|
|
54
|
+
let finalPose = pose;
|
|
55
|
+
let finalPoseInputSize = poseInputSize;
|
|
56
|
+
|
|
57
|
+
if (det === null) {
|
|
58
|
+
finalDet = Wholebody.MODE[mode].det;
|
|
59
|
+
finalDetInputSize = Wholebody.MODE[mode].detInputSize;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (pose === null) {
|
|
63
|
+
finalPose = Wholebody.MODE[mode].pose;
|
|
64
|
+
finalPoseInputSize = Wholebody.MODE[mode].poseInputSize;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Initialize detector based on type
|
|
68
|
+
if (detectorType === 'yolo12') {
|
|
69
|
+
this.detModel = new YOLO12(
|
|
70
|
+
finalDet!,
|
|
71
|
+
finalDetInputSize,
|
|
72
|
+
0.45,
|
|
73
|
+
0.5,
|
|
74
|
+
backend
|
|
75
|
+
);
|
|
76
|
+
} else {
|
|
77
|
+
this.detModel = new YOLOX(
|
|
78
|
+
finalDet!,
|
|
79
|
+
finalDetInputSize,
|
|
80
|
+
0.45,
|
|
81
|
+
0.7,
|
|
82
|
+
backend
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
this.poseModel = new RTMPose(
|
|
87
|
+
finalPose!,
|
|
88
|
+
finalPoseInputSize,
|
|
89
|
+
toOpenpose,
|
|
90
|
+
backend
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
async init(): Promise<void> {
|
|
95
|
+
await this.detModel.init();
|
|
96
|
+
await this.poseModel.init();
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
async call(
|
|
100
|
+
image: Uint8Array,
|
|
101
|
+
imgWidth: number,
|
|
102
|
+
imgHeight: number,
|
|
103
|
+
bboxes?: BBox[]
|
|
104
|
+
): Promise<{ keypoints: number[][]; scores: number[] }> {
|
|
105
|
+
// Run detection if bboxes not provided
|
|
106
|
+
let finalBboxes = bboxes;
|
|
107
|
+
if (!finalBboxes || finalBboxes.length === 0) {
|
|
108
|
+
const detections: any[] = await this.detModel.call(image, imgWidth, imgHeight);
|
|
109
|
+
// Convert Detection[] or BBox[] to BBox[]
|
|
110
|
+
finalBboxes = detections.map((d: any) => {
|
|
111
|
+
if ('bbox' in d) {
|
|
112
|
+
// Detection type
|
|
113
|
+
return {
|
|
114
|
+
x1: d.bbox.x1,
|
|
115
|
+
y1: d.bbox.y1,
|
|
116
|
+
x2: d.bbox.x2,
|
|
117
|
+
y2: d.bbox.y2,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
// Already BBox type
|
|
121
|
+
return d;
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Run pose estimation
|
|
126
|
+
const result = await this.poseModel.call(image, imgWidth, imgHeight, finalBboxes);
|
|
127
|
+
|
|
128
|
+
return result;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Wholebody3D solution - combines detection and 3D pose estimation
|
|
3
|
+
* Based on rtmlib Wholebody3d class
|
|
4
|
+
* Uses YOLOX detector with RTMW3D pose model
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { YOLOX } from '../models/yolox';
|
|
8
|
+
import { RTMPose3D } from '../models/rtmpose3d';
|
|
9
|
+
import { BBox, ModeType, ModelConfig, BackendType } from '../types';
|
|
10
|
+
|
|
11
|
+
export interface Wholebody3DResult {
|
|
12
|
+
keypoints: number[][][];
|
|
13
|
+
scores: number[][];
|
|
14
|
+
keypointsSimcc: number[][][];
|
|
15
|
+
keypoints2d: number[][][];
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export class Wholebody3D {
|
|
19
|
+
private detModel: YOLOX;
|
|
20
|
+
private poseModel: RTMPose3D;
|
|
21
|
+
|
|
22
|
+
private static readonly MODE: Record<ModeType, ModelConfig> = {
|
|
23
|
+
performance: {
|
|
24
|
+
det: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/yolox_m_8xb8-300e_humanart-c2c7a14a.zip',
|
|
25
|
+
detInputSize: [640, 640],
|
|
26
|
+
pose: 'https://huggingface.co/Soykaf/RTMW3D-x/resolve/main/onnx/rtmw3d-x_8xb64_cocktail14-384x288-b0a0eab7_20240626.onnx',
|
|
27
|
+
poseInputSize: [288, 384], // [width=288, height=384] - creates tensor [1,3,384,288]
|
|
28
|
+
},
|
|
29
|
+
lightweight: {
|
|
30
|
+
det: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/yolox_tiny_8xb8-300e_humanart-6f3252f9.zip',
|
|
31
|
+
detInputSize: [416, 416],
|
|
32
|
+
pose: 'https://huggingface.co/Soykaf/RTMW3D-x/resolve/main/onnx/rtmw3d-x_8xb64_cocktail14-384x288-b0a0eab7_20240626.onnx',
|
|
33
|
+
poseInputSize: [192, 256], // [width=192, height=256]
|
|
34
|
+
},
|
|
35
|
+
balanced: {
|
|
36
|
+
det: 'https://download.openmmlab.com/mmpose/v1/projects/rtmposev1/onnx_sdk/yolox_m_8xb8-300e_humanart-c2c7a14a.zip',
|
|
37
|
+
detInputSize: [640, 640],
|
|
38
|
+
pose: 'https://huggingface.co/Soykaf/RTMW3D-x/resolve/main/onnx/rtmw3d-x_8xb64_cocktail14-384x288-b0a0eab7_20240626.onnx',
|
|
39
|
+
poseInputSize: [288, 384], // [width=288, height=384] - creates tensor [1,3,384,288]
|
|
40
|
+
},
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
constructor(
|
|
44
|
+
det: string | null = null,
|
|
45
|
+
detInputSize: [number, number] = [640, 640],
|
|
46
|
+
pose: string | null = null,
|
|
47
|
+
poseInputSize: [number, number] = [288, 384], // [width=288, height=384]
|
|
48
|
+
mode: ModeType = 'balanced',
|
|
49
|
+
toOpenpose: boolean = false,
|
|
50
|
+
backend: BackendType = 'webgpu'
|
|
51
|
+
) {
|
|
52
|
+
// Use mode config if det/pose not specified
|
|
53
|
+
let finalDet = det;
|
|
54
|
+
let finalDetInputSize = detInputSize;
|
|
55
|
+
let finalPose = pose;
|
|
56
|
+
let finalPoseInputSize = poseInputSize;
|
|
57
|
+
|
|
58
|
+
if (det === null) {
|
|
59
|
+
finalDet = Wholebody3D.MODE[mode].det;
|
|
60
|
+
finalDetInputSize = Wholebody3D.MODE[mode].detInputSize;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (pose === null) {
|
|
64
|
+
finalPose = Wholebody3D.MODE[mode].pose;
|
|
65
|
+
finalPoseInputSize = Wholebody3D.MODE[mode].poseInputSize;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
this.detModel = new YOLOX(
|
|
69
|
+
finalDet!,
|
|
70
|
+
finalDetInputSize,
|
|
71
|
+
0.45,
|
|
72
|
+
0.7,
|
|
73
|
+
backend
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
this.poseModel = new RTMPose3D(
|
|
77
|
+
finalPose!,
|
|
78
|
+
finalPoseInputSize,
|
|
79
|
+
toOpenpose,
|
|
80
|
+
backend
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
async init(): Promise<void> {
|
|
85
|
+
await this.detModel.init();
|
|
86
|
+
await this.poseModel.init();
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async call(
|
|
90
|
+
image: Uint8Array,
|
|
91
|
+
imgWidth: number,
|
|
92
|
+
imgHeight: number,
|
|
93
|
+
bboxes?: BBox[]
|
|
94
|
+
): Promise<Wholebody3DResult> {
|
|
95
|
+
// Run detection if bboxes not provided
|
|
96
|
+
let finalBboxes = bboxes;
|
|
97
|
+
if (!finalBboxes || finalBboxes.length === 0) {
|
|
98
|
+
const detections: any[] = await this.detModel.call(image, imgWidth, imgHeight);
|
|
99
|
+
// Convert Detection[] or BBox[] to BBox[]
|
|
100
|
+
finalBboxes = detections.map((d: any) => {
|
|
101
|
+
if ('bbox' in d) {
|
|
102
|
+
// Detection type
|
|
103
|
+
return {
|
|
104
|
+
x1: d.bbox.x1,
|
|
105
|
+
y1: d.bbox.y1,
|
|
106
|
+
x2: d.bbox.x2,
|
|
107
|
+
y2: d.bbox.y2,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
// Already BBox type
|
|
111
|
+
return d;
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Run 3D pose estimation
|
|
116
|
+
const result = await this.poseModel.call(
|
|
117
|
+
image,
|
|
118
|
+
imgWidth,
|
|
119
|
+
imgHeight,
|
|
120
|
+
finalBboxes
|
|
121
|
+
);
|
|
122
|
+
|
|
123
|
+
return result;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Basic types for rtmlib-ts
|
|
3
|
+
* Based on rtmlib Python library
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export interface Keypoint {
|
|
7
|
+
x: number;
|
|
8
|
+
y: number;
|
|
9
|
+
score: number;
|
|
10
|
+
id: number;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export interface BodyResult {
|
|
14
|
+
keypoints: Array<Keypoint | null>;
|
|
15
|
+
totalScore: number;
|
|
16
|
+
totalParts: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export type HandResult = Keypoint[];
|
|
20
|
+
export type FaceResult = Keypoint[];
|
|
21
|
+
|
|
22
|
+
export interface PoseResult {
|
|
23
|
+
body: BodyResult;
|
|
24
|
+
leftHand: HandResult | null;
|
|
25
|
+
rightHand: HandResult | null;
|
|
26
|
+
face: FaceResult | null;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface BBox {
|
|
30
|
+
x1: number;
|
|
31
|
+
y1: number;
|
|
32
|
+
x2: number;
|
|
33
|
+
y2: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export interface Detection {
|
|
37
|
+
bbox: BBox;
|
|
38
|
+
score: number;
|
|
39
|
+
classId: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface ModelConfig {
|
|
43
|
+
det: string;
|
|
44
|
+
detInputSize: [number, number];
|
|
45
|
+
pose: string;
|
|
46
|
+
poseInputSize: [number, number];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export type ModeType = 'performance' | 'lightweight' | 'balanced';
|
|
50
|
+
|
|
51
|
+
export type BackendType = 'opencv' | 'onnxruntime' | 'openvino' | 'wasm' | 'webgpu';
|
|
52
|
+
export type DeviceType = 'cpu' | 'cuda' | 'mps' | string;
|
|
53
|
+
|
|
54
|
+
export interface ImageData {
|
|
55
|
+
data: Uint8Array;
|
|
56
|
+
width: number;
|
|
57
|
+
height: number;
|
|
58
|
+
channels: number;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export type RGBImage = ImageData;
|
|
62
|
+
export type BGRImage = ImageData;
|