rtmlib-ts 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +1 -0
- package/README.md +202 -0
- package/dist/core/base.d.ts +20 -0
- package/dist/core/base.d.ts.map +1 -0
- package/dist/core/base.js +40 -0
- package/dist/core/file.d.ts +11 -0
- package/dist/core/file.d.ts.map +1 -0
- package/dist/core/file.js +111 -0
- package/dist/core/modelCache.d.ts +35 -0
- package/dist/core/modelCache.d.ts.map +1 -0
- package/dist/core/modelCache.js +161 -0
- package/dist/core/posePostprocessing.d.ts +12 -0
- package/dist/core/posePostprocessing.d.ts.map +1 -0
- package/dist/core/posePostprocessing.js +76 -0
- package/dist/core/postprocessing.d.ts +10 -0
- package/dist/core/postprocessing.d.ts.map +1 -0
- package/dist/core/postprocessing.js +70 -0
- package/dist/core/preprocessing.d.ts +14 -0
- package/dist/core/preprocessing.d.ts.map +1 -0
- package/dist/core/preprocessing.js +79 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +31 -0
- package/dist/models/rtmpose.d.ts +25 -0
- package/dist/models/rtmpose.d.ts.map +1 -0
- package/dist/models/rtmpose.js +185 -0
- package/dist/models/rtmpose3d.d.ts +28 -0
- package/dist/models/rtmpose3d.d.ts.map +1 -0
- package/dist/models/rtmpose3d.js +184 -0
- package/dist/models/yolo12.d.ts +23 -0
- package/dist/models/yolo12.d.ts.map +1 -0
- package/dist/models/yolo12.js +165 -0
- package/dist/models/yolox.d.ts +18 -0
- package/dist/models/yolox.d.ts.map +1 -0
- package/dist/models/yolox.js +167 -0
- package/dist/solution/animalDetector.d.ts +229 -0
- package/dist/solution/animalDetector.d.ts.map +1 -0
- package/dist/solution/animalDetector.js +663 -0
- package/dist/solution/body.d.ts +16 -0
- package/dist/solution/body.d.ts.map +1 -0
- package/dist/solution/body.js +52 -0
- package/dist/solution/bodyWithFeet.d.ts +16 -0
- package/dist/solution/bodyWithFeet.d.ts.map +1 -0
- package/dist/solution/bodyWithFeet.js +52 -0
- package/dist/solution/customDetector.d.ts +137 -0
- package/dist/solution/customDetector.d.ts.map +1 -0
- package/dist/solution/customDetector.js +342 -0
- package/dist/solution/hand.d.ts +14 -0
- package/dist/solution/hand.d.ts.map +1 -0
- package/dist/solution/hand.js +20 -0
- package/dist/solution/index.d.ts +10 -0
- package/dist/solution/index.d.ts.map +1 -0
- package/dist/solution/index.js +9 -0
- package/dist/solution/objectDetector.d.ts +172 -0
- package/dist/solution/objectDetector.d.ts.map +1 -0
- package/dist/solution/objectDetector.js +606 -0
- package/dist/solution/pose3dDetector.d.ts +145 -0
- package/dist/solution/pose3dDetector.d.ts.map +1 -0
- package/dist/solution/pose3dDetector.js +611 -0
- package/dist/solution/poseDetector.d.ts +198 -0
- package/dist/solution/poseDetector.d.ts.map +1 -0
- package/dist/solution/poseDetector.js +622 -0
- package/dist/solution/poseTracker.d.ts +22 -0
- package/dist/solution/poseTracker.d.ts.map +1 -0
- package/dist/solution/poseTracker.js +106 -0
- package/dist/solution/wholebody.d.ts +19 -0
- package/dist/solution/wholebody.d.ts.map +1 -0
- package/dist/solution/wholebody.js +82 -0
- package/dist/solution/wholebody3d.d.ts +22 -0
- package/dist/solution/wholebody3d.d.ts.map +1 -0
- package/dist/solution/wholebody3d.js +75 -0
- package/dist/types/index.d.ts +52 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/visualization/draw.d.ts +57 -0
- package/dist/visualization/draw.d.ts.map +1 -0
- package/dist/visualization/draw.js +400 -0
- package/dist/visualization/skeleton/coco133.d.ts +350 -0
- package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco133.js +120 -0
- package/dist/visualization/skeleton/coco17.d.ts +180 -0
- package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco17.js +48 -0
- package/dist/visualization/skeleton/halpe26.d.ts +278 -0
- package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
- package/dist/visualization/skeleton/halpe26.js +70 -0
- package/dist/visualization/skeleton/hand21.d.ts +196 -0
- package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
- package/dist/visualization/skeleton/hand21.js +51 -0
- package/dist/visualization/skeleton/index.d.ts +10 -0
- package/dist/visualization/skeleton/index.d.ts.map +1 -0
- package/dist/visualization/skeleton/index.js +9 -0
- package/dist/visualization/skeleton/openpose134.d.ts +357 -0
- package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose134.js +116 -0
- package/dist/visualization/skeleton/openpose18.d.ts +177 -0
- package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose18.js +47 -0
- package/docs/ANIMAL_DETECTOR.md +450 -0
- package/docs/CUSTOM_DETECTOR.md +568 -0
- package/docs/OBJECT_DETECTOR.md +373 -0
- package/docs/POSE3D_DETECTOR.md +458 -0
- package/docs/POSE_DETECTOR.md +442 -0
- package/examples/README.md +119 -0
- package/examples/index.html +746 -0
- package/package.json +51 -0
- package/playground/README.md +114 -0
- package/playground/app/favicon.ico +0 -0
- package/playground/app/globals.css +17 -0
- package/playground/app/layout.tsx +19 -0
- package/playground/app/page.tsx +1338 -0
- package/playground/eslint.config.mjs +18 -0
- package/playground/next.config.ts +34 -0
- package/playground/package-lock.json +6723 -0
- package/playground/package.json +27 -0
- package/playground/postcss.config.mjs +7 -0
- package/playground/tsconfig.json +34 -0
- package/src/core/base.ts +66 -0
- package/src/core/file.ts +141 -0
- package/src/core/modelCache.ts +189 -0
- package/src/core/posePostprocessing.ts +91 -0
- package/src/core/postprocessing.ts +93 -0
- package/src/core/preprocessing.ts +127 -0
- package/src/index.ts +69 -0
- package/src/models/rtmpose.ts +265 -0
- package/src/models/rtmpose3d.ts +289 -0
- package/src/models/yolo12.ts +220 -0
- package/src/models/yolox.ts +214 -0
- package/src/solution/animalDetector.ts +955 -0
- package/src/solution/body.ts +89 -0
- package/src/solution/bodyWithFeet.ts +89 -0
- package/src/solution/customDetector.ts +474 -0
- package/src/solution/hand.ts +52 -0
- package/src/solution/index.ts +10 -0
- package/src/solution/objectDetector.ts +816 -0
- package/src/solution/pose3dDetector.ts +890 -0
- package/src/solution/poseDetector.ts +892 -0
- package/src/solution/poseTracker.ts +172 -0
- package/src/solution/wholebody.ts +130 -0
- package/src/solution/wholebody3d.ts +125 -0
- package/src/types/index.ts +62 -0
- package/src/visualization/draw.ts +543 -0
- package/src/visualization/skeleton/coco133.ts +131 -0
- package/src/visualization/skeleton/coco17.ts +49 -0
- package/src/visualization/skeleton/halpe26.ts +71 -0
- package/src/visualization/skeleton/hand21.ts +52 -0
- package/src/visualization/skeleton/index.ts +10 -0
- package/src/visualization/skeleton/openpose134.ts +125 -0
- package/src/visualization/skeleton/openpose18.ts +48 -0
- package/tsconfig.json +32 -0
|
@@ -0,0 +1,890 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pose3DDetector - 3D Pose Estimation API
|
|
3
|
+
* Combines YOLOX detector with RTMW3D 3D pose model
|
|
4
|
+
*
|
|
5
|
+
* @example
|
|
6
|
+
* ```typescript
|
|
7
|
+
* // Initialize with default models
|
|
8
|
+
* const detector = new Pose3DDetector();
|
|
9
|
+
* await detector.init();
|
|
10
|
+
*
|
|
11
|
+
* // From canvas
|
|
12
|
+
* const result = await detector.detectFromCanvas(canvas);
|
|
13
|
+
* console.log(result.keypoints[0][0]); // [x, y, z] - 3D coordinates
|
|
14
|
+
*
|
|
15
|
+
* // With custom models
|
|
16
|
+
* const detector2 = new Pose3DDetector({
|
|
17
|
+
* detModel: 'path/to/yolox.onnx',
|
|
18
|
+
* poseModel: 'path/to/rtmw3d.onnx',
|
|
19
|
+
* });
|
|
20
|
+
* ```
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import * as ort from 'onnxruntime-web';
|
|
24
|
+
import { getCachedModel, isModelCached } from '../core/modelCache';
|
|
25
|
+
import { Wholebody3DResult } from './wholebody3d';
|
|
26
|
+
|
|
27
|
+
// Configure ONNX Runtime Web
|
|
28
|
+
ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/';
|
|
29
|
+
ort.env.wasm.simd = true;
|
|
30
|
+
ort.env.wasm.proxy = false;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Configuration options for Pose3DDetector
|
|
34
|
+
*/
|
|
35
|
+
export interface Pose3DDetectorConfig {
|
|
36
|
+
/** Path to YOLOX detection model (optional - uses default if not specified) */
|
|
37
|
+
detModel?: string;
|
|
38
|
+
/** Path to RTMW3D 3D pose estimation model (optional - uses default if not specified) */
|
|
39
|
+
poseModel?: string;
|
|
40
|
+
/** Detection input size (default: [640, 640]) */
|
|
41
|
+
detInputSize?: [number, number];
|
|
42
|
+
/** Pose input size (default: [384, 288]) */
|
|
43
|
+
poseInputSize?: [number, number];
|
|
44
|
+
/** Detection confidence threshold (default: 0.45) */
|
|
45
|
+
detConfidence?: number;
|
|
46
|
+
/** NMS IoU threshold (default: 0.7) */
|
|
47
|
+
nmsThreshold?: number;
|
|
48
|
+
/** Pose keypoint confidence threshold (default: 0.3) */
|
|
49
|
+
poseConfidence?: number;
|
|
50
|
+
/** Execution backend (default: 'webgpu') */
|
|
51
|
+
backend?: 'wasm' | 'webgpu';
|
|
52
|
+
/** Enable model caching (default: true) */
|
|
53
|
+
cache?: boolean;
|
|
54
|
+
/** Z-axis range in meters (default: 2.1744869) */
|
|
55
|
+
zRange?: number;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* 3D Person result with 3D keypoints
|
|
60
|
+
*/
|
|
61
|
+
export interface Person3D {
|
|
62
|
+
/** Bounding box coordinates */
|
|
63
|
+
bbox: {
|
|
64
|
+
x1: number;
|
|
65
|
+
y1: number;
|
|
66
|
+
x2: number;
|
|
67
|
+
y2: number;
|
|
68
|
+
confidence: number;
|
|
69
|
+
};
|
|
70
|
+
/** 17 3D keypoints [x, y, z] in meters */
|
|
71
|
+
keypoints: number[][];
|
|
72
|
+
/** Keypoint scores (0-1) */
|
|
73
|
+
scores: number[];
|
|
74
|
+
/** 2D projection of keypoints */
|
|
75
|
+
keypoints2d: number[][];
|
|
76
|
+
/** Normalized SimCC coordinates */
|
|
77
|
+
keypointsSimcc: number[][];
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Detection statistics
|
|
82
|
+
*/
|
|
83
|
+
export interface Pose3DStats {
|
|
84
|
+
/** Number of detected people */
|
|
85
|
+
personCount: number;
|
|
86
|
+
/** Detection inference time (ms) */
|
|
87
|
+
detTime: number;
|
|
88
|
+
/** 3D Pose estimation time (ms) */
|
|
89
|
+
poseTime: number;
|
|
90
|
+
/** Total processing time (ms) */
|
|
91
|
+
totalTime: number;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* COCO17 keypoint names
|
|
96
|
+
*/
|
|
97
|
+
const KEYPOINT_NAMES_3D = [
|
|
98
|
+
'nose',
|
|
99
|
+
'left_eye',
|
|
100
|
+
'right_eye',
|
|
101
|
+
'left_ear',
|
|
102
|
+
'right_ear',
|
|
103
|
+
'left_shoulder',
|
|
104
|
+
'right_shoulder',
|
|
105
|
+
'left_elbow',
|
|
106
|
+
'right_elbow',
|
|
107
|
+
'left_wrist',
|
|
108
|
+
'right_wrist',
|
|
109
|
+
'left_hip',
|
|
110
|
+
'right_hip',
|
|
111
|
+
'left_knee',
|
|
112
|
+
'right_knee',
|
|
113
|
+
'left_ankle',
|
|
114
|
+
'right_ankle',
|
|
115
|
+
];
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Default configuration - uses HuggingFace models
|
|
119
|
+
*/
|
|
120
|
+
const DEFAULT_CONFIG: Required<Pose3DDetectorConfig> = {
|
|
121
|
+
detModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/yolo/yolov12n.onnx',
|
|
122
|
+
poseModel: 'https://huggingface.co/Soykaf/RTMW3D-x/resolve/main/onnx/rtmw3d-x_8xb64_cocktail14-384x288-b0a0eab7_20240626.onnx',
|
|
123
|
+
detInputSize: [640, 640],
|
|
124
|
+
poseInputSize: [288, 384], // [width=288, height=384] - creates tensor [1,3,384,288]
|
|
125
|
+
detConfidence: 0.45,
|
|
126
|
+
nmsThreshold: 0.7,
|
|
127
|
+
poseConfidence: 0.3,
|
|
128
|
+
backend: 'webgpu', // Default to WebGPU for better performance
|
|
129
|
+
cache: true,
|
|
130
|
+
zRange: 2.1744869,
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
export class Pose3DDetector {
|
|
134
|
+
private config: Required<Pose3DDetectorConfig>;
|
|
135
|
+
private detSession: ort.InferenceSession | null = null;
|
|
136
|
+
private poseSession: ort.InferenceSession | null = null;
|
|
137
|
+
private initialized = false;
|
|
138
|
+
private outputNamesLogged = false;
|
|
139
|
+
|
|
140
|
+
// Pre-allocated buffers for better performance
|
|
141
|
+
private canvas: HTMLCanvasElement | null = null;
|
|
142
|
+
private ctx: CanvasRenderingContext2D | null = null;
|
|
143
|
+
private poseCanvas: HTMLCanvasElement | null = null;
|
|
144
|
+
private poseCtx: CanvasRenderingContext2D | null = null;
|
|
145
|
+
private poseTensorBuffer: Float32Array | null = null;
|
|
146
|
+
private detInputSize: [number, number] = [640, 640];
|
|
147
|
+
private poseInputSize: [number, number] = [288, 384]; // [width=288, height=384]
|
|
148
|
+
|
|
149
|
+
// Pre-allocated source canvas for pose cropping (avoid recreation)
|
|
150
|
+
private srcPoseCanvas: HTMLCanvasElement | null = null;
|
|
151
|
+
private srcPoseCtx: CanvasRenderingContext2D | null = null;
|
|
152
|
+
|
|
153
|
+
constructor(config: Pose3DDetectorConfig = {}) {
|
|
154
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
155
|
+
// Disable caching for large 3D models by default
|
|
156
|
+
if (config.cache === undefined) {
|
|
157
|
+
this.config.cache = false;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/**
|
|
162
|
+
* Initialize both detection and 3D pose models
|
|
163
|
+
*/
|
|
164
|
+
async init(): Promise<void> {
|
|
165
|
+
if (this.initialized) return;
|
|
166
|
+
|
|
167
|
+
try {
|
|
168
|
+
// Load detection model
|
|
169
|
+
console.log(`[Pose3DDetector] Loading detection model from: ${this.config.detModel}`);
|
|
170
|
+
let detBuffer: ArrayBuffer;
|
|
171
|
+
|
|
172
|
+
if (this.config.cache) {
|
|
173
|
+
const detCached = await isModelCached(this.config.detModel);
|
|
174
|
+
console.log(`[Pose3DDetector] Det model cache ${detCached ? 'hit' : 'miss'}`);
|
|
175
|
+
detBuffer = await getCachedModel(this.config.detModel);
|
|
176
|
+
} else {
|
|
177
|
+
const detResponse = await fetch(this.config.detModel);
|
|
178
|
+
if (!detResponse.ok) {
|
|
179
|
+
throw new Error(`Failed to fetch det model: HTTP ${detResponse.status}`);
|
|
180
|
+
}
|
|
181
|
+
detBuffer = await detResponse.arrayBuffer();
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
this.detSession = await ort.InferenceSession.create(detBuffer, {
|
|
185
|
+
executionProviders: [this.config.backend],
|
|
186
|
+
graphOptimizationLevel: 'all',
|
|
187
|
+
});
|
|
188
|
+
console.log(`[Pose3DDetector] Detection model loaded, size: ${(detBuffer.byteLength / 1024 / 1024).toFixed(2)} MB`);
|
|
189
|
+
|
|
190
|
+
// Load 3D pose model
|
|
191
|
+
console.log(`[Pose3DDetector] Loading 3D pose model from: ${this.config.poseModel}`);
|
|
192
|
+
let poseBuffer: ArrayBuffer;
|
|
193
|
+
|
|
194
|
+
if (this.config.cache) {
|
|
195
|
+
const poseCached = await isModelCached(this.config.poseModel);
|
|
196
|
+
console.log(`[Pose3DDetector] 3D Pose model cache ${poseCached ? 'hit' : 'miss'}`);
|
|
197
|
+
poseBuffer = await getCachedModel(this.config.poseModel);
|
|
198
|
+
} else {
|
|
199
|
+
const poseResponse = await fetch(this.config.poseModel);
|
|
200
|
+
if (!poseResponse.ok) {
|
|
201
|
+
throw new Error(`Failed to fetch pose model: HTTP ${poseResponse.status}`);
|
|
202
|
+
}
|
|
203
|
+
poseBuffer = await poseResponse.arrayBuffer();
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
this.poseSession = await ort.InferenceSession.create(poseBuffer, {
|
|
207
|
+
executionProviders: [this.config.backend],
|
|
208
|
+
graphOptimizationLevel: 'all',
|
|
209
|
+
});
|
|
210
|
+
console.log(`[Pose3DDetector] 3D Pose model loaded, size: ${(poseBuffer.byteLength / 1024 / 1024).toFixed(2)} MB`);
|
|
211
|
+
|
|
212
|
+
// Pre-allocate resources
|
|
213
|
+
const [detW, detH] = this.config.detInputSize;
|
|
214
|
+
this.detInputSize = [detW, detH];
|
|
215
|
+
|
|
216
|
+
const [poseW, poseH] = this.config.poseInputSize;
|
|
217
|
+
this.poseInputSize = [poseW, poseH];
|
|
218
|
+
|
|
219
|
+
// Main canvas for detection
|
|
220
|
+
this.canvas = document.createElement('canvas');
|
|
221
|
+
this.canvas.width = detW;
|
|
222
|
+
this.canvas.height = detH;
|
|
223
|
+
this.ctx = this.canvas.getContext('2d', {
|
|
224
|
+
willReadFrequently: true,
|
|
225
|
+
alpha: false
|
|
226
|
+
})!;
|
|
227
|
+
|
|
228
|
+
// Pose crop canvas
|
|
229
|
+
this.poseCanvas = document.createElement('canvas');
|
|
230
|
+
this.poseCanvas.width = poseW;
|
|
231
|
+
this.poseCanvas.height = poseH;
|
|
232
|
+
this.poseCtx = this.poseCanvas.getContext('2d', {
|
|
233
|
+
willReadFrequently: true,
|
|
234
|
+
alpha: false
|
|
235
|
+
})!;
|
|
236
|
+
|
|
237
|
+
// Pre-allocate pose tensor buffer
|
|
238
|
+
this.poseTensorBuffer = new Float32Array(3 * poseW * poseH);
|
|
239
|
+
|
|
240
|
+
// Source canvas will be created on first use (dynamic size)
|
|
241
|
+
this.srcPoseCanvas = null;
|
|
242
|
+
this.srcPoseCtx = null;
|
|
243
|
+
|
|
244
|
+
this.initialized = true;
|
|
245
|
+
console.log(`[Pose3DDetector] ✅ Initialized (det:${detW}x${detH}, pose:${poseW}x${poseH}, 3D)`);
|
|
246
|
+
} catch (error) {
|
|
247
|
+
console.error('[Pose3DDetector] ❌ Initialization failed:', error);
|
|
248
|
+
throw error;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Detect 3D poses from HTMLCanvasElement
|
|
254
|
+
*/
|
|
255
|
+
async detectFromCanvas(canvas: HTMLCanvasElement): Promise<Wholebody3DResult> {
|
|
256
|
+
const ctx = canvas.getContext('2d');
|
|
257
|
+
if (!ctx) {
|
|
258
|
+
throw new Error('Could not get 2D context from canvas');
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
262
|
+
return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* Detect 3D poses from HTMLVideoElement
|
|
267
|
+
*/
|
|
268
|
+
async detectFromVideo(
|
|
269
|
+
video: HTMLVideoElement,
|
|
270
|
+
targetCanvas?: HTMLCanvasElement
|
|
271
|
+
): Promise<Wholebody3DResult> {
|
|
272
|
+
if (video.readyState < 2) {
|
|
273
|
+
throw new Error('Video not ready. Ensure video is loaded and playing.');
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
const canvas = targetCanvas || document.createElement('canvas');
|
|
277
|
+
canvas.width = video.videoWidth;
|
|
278
|
+
canvas.height = video.videoHeight;
|
|
279
|
+
|
|
280
|
+
const ctx = canvas.getContext('2d');
|
|
281
|
+
if (!ctx) {
|
|
282
|
+
throw new Error('Could not get 2D context from canvas');
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
|
286
|
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
287
|
+
|
|
288
|
+
return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* Detect 3D poses from HTMLImageElement
|
|
293
|
+
*/
|
|
294
|
+
async detectFromImage(
|
|
295
|
+
image: HTMLImageElement,
|
|
296
|
+
targetCanvas?: HTMLCanvasElement
|
|
297
|
+
): Promise<Wholebody3DResult> {
|
|
298
|
+
if (!image.complete || !image.naturalWidth) {
|
|
299
|
+
throw new Error('Image not loaded. Ensure image is fully loaded.');
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
const canvas = targetCanvas || document.createElement('canvas');
|
|
303
|
+
canvas.width = image.naturalWidth;
|
|
304
|
+
canvas.height = image.naturalHeight;
|
|
305
|
+
|
|
306
|
+
const ctx = canvas.getContext('2d');
|
|
307
|
+
if (!ctx) {
|
|
308
|
+
throw new Error('Could not get 2D context from canvas');
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
ctx.drawImage(image, 0, 0);
|
|
312
|
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
313
|
+
|
|
314
|
+
return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Detect 3D poses from ImageBitmap
|
|
319
|
+
*/
|
|
320
|
+
async detectFromBitmap(
|
|
321
|
+
bitmap: ImageBitmap,
|
|
322
|
+
targetCanvas?: HTMLCanvasElement
|
|
323
|
+
): Promise<Wholebody3DResult> {
|
|
324
|
+
const canvas = targetCanvas || document.createElement('canvas');
|
|
325
|
+
canvas.width = bitmap.width;
|
|
326
|
+
canvas.height = bitmap.height;
|
|
327
|
+
|
|
328
|
+
const ctx = canvas.getContext('2d');
|
|
329
|
+
if (!ctx) {
|
|
330
|
+
throw new Error('Could not get 2D context from canvas');
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
ctx.drawImage(bitmap, 0, 0);
|
|
334
|
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
335
|
+
|
|
336
|
+
return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* Detect 3D poses from File
|
|
341
|
+
*/
|
|
342
|
+
async detectFromFile(
|
|
343
|
+
file: File,
|
|
344
|
+
targetCanvas?: HTMLCanvasElement
|
|
345
|
+
): Promise<Wholebody3DResult> {
|
|
346
|
+
return new Promise((resolve, reject) => {
|
|
347
|
+
const img = new Image();
|
|
348
|
+
img.onload = async () => {
|
|
349
|
+
try {
|
|
350
|
+
const results = await this.detectFromImage(img, targetCanvas);
|
|
351
|
+
resolve(results);
|
|
352
|
+
} catch (error) {
|
|
353
|
+
reject(error);
|
|
354
|
+
}
|
|
355
|
+
};
|
|
356
|
+
img.onerror = () => reject(new Error('Failed to load image from file'));
|
|
357
|
+
img.src = URL.createObjectURL(file);
|
|
358
|
+
});
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
/**
|
|
362
|
+
* Detect 3D poses from Blob
|
|
363
|
+
*/
|
|
364
|
+
async detectFromBlob(
|
|
365
|
+
blob: Blob,
|
|
366
|
+
targetCanvas?: HTMLCanvasElement
|
|
367
|
+
): Promise<Wholebody3DResult> {
|
|
368
|
+
const bitmap = await createImageBitmap(blob);
|
|
369
|
+
const results = await this.detectFromBitmap(bitmap, targetCanvas);
|
|
370
|
+
bitmap.close();
|
|
371
|
+
return results;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
/**
|
|
375
|
+
* Detect 3D poses from raw image data
|
|
376
|
+
*/
|
|
377
|
+
async detect(
|
|
378
|
+
imageData: Uint8Array,
|
|
379
|
+
width: number,
|
|
380
|
+
height: number
|
|
381
|
+
): Promise<Wholebody3DResult> {
|
|
382
|
+
if (!this.initialized) {
|
|
383
|
+
await this.init();
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
const startTime = performance.now();
|
|
387
|
+
|
|
388
|
+
// Step 1: Detect people
|
|
389
|
+
const detStart = performance.now();
|
|
390
|
+
const bboxes = await this.detectPeople(imageData, width, height);
|
|
391
|
+
const detTime = performance.now() - detStart;
|
|
392
|
+
|
|
393
|
+
// Step 2: Estimate 3D poses for each person
|
|
394
|
+
const poseStart = performance.now();
|
|
395
|
+
const allKeypoints: number[][][] = [];
|
|
396
|
+
const allScores: number[][] = [];
|
|
397
|
+
const allKeypointsSimcc: number[][][] = [];
|
|
398
|
+
const allKeypoints2d: number[][][] = [];
|
|
399
|
+
|
|
400
|
+
// Reset source canvas for new image (will be recreated on first bbox)
|
|
401
|
+
this.srcPoseCanvas = null;
|
|
402
|
+
this.srcPoseCtx = null;
|
|
403
|
+
|
|
404
|
+
for (const bbox of bboxes) {
|
|
405
|
+
const poseResult = await this.estimatePose3D(imageData, width, height, bbox);
|
|
406
|
+
allKeypoints.push(poseResult.keypoints);
|
|
407
|
+
allScores.push(poseResult.scores);
|
|
408
|
+
allKeypointsSimcc.push(poseResult.keypointsSimcc);
|
|
409
|
+
allKeypoints2d.push(poseResult.keypoints2d);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
const poseTime = performance.now() - poseStart;
|
|
413
|
+
const totalTime = performance.now() - startTime;
|
|
414
|
+
|
|
415
|
+
// Attach stats
|
|
416
|
+
const result: Wholebody3DResult = {
|
|
417
|
+
keypoints: allKeypoints,
|
|
418
|
+
scores: allScores,
|
|
419
|
+
keypointsSimcc: allKeypointsSimcc,
|
|
420
|
+
keypoints2d: allKeypoints2d,
|
|
421
|
+
};
|
|
422
|
+
|
|
423
|
+
(result as any).stats = {
|
|
424
|
+
personCount: allKeypoints.length,
|
|
425
|
+
detTime: Math.round(detTime),
|
|
426
|
+
poseTime: Math.round(poseTime),
|
|
427
|
+
totalTime: Math.round(totalTime),
|
|
428
|
+
} as Pose3DStats;
|
|
429
|
+
|
|
430
|
+
return result;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/**
|
|
434
|
+
* Detect people using YOLOX
|
|
435
|
+
*/
|
|
436
|
+
private async detectPeople(
|
|
437
|
+
imageData: Uint8Array,
|
|
438
|
+
width: number,
|
|
439
|
+
height: number
|
|
440
|
+
): Promise<Array<{ x1: number; y1: number; x2: number; y2: number; confidence: number }>> {
|
|
441
|
+
const [inputH, inputW] = this.config.detInputSize;
|
|
442
|
+
|
|
443
|
+
const { tensor, paddingX, paddingY, scaleX, scaleY } = this.preprocessYOLO(
|
|
444
|
+
imageData,
|
|
445
|
+
width,
|
|
446
|
+
height,
|
|
447
|
+
[inputW, inputH]
|
|
448
|
+
);
|
|
449
|
+
|
|
450
|
+
const inputTensor = new ort.Tensor('float32', tensor, [1, 3, inputH, inputW]);
|
|
451
|
+
const inputName = this.detSession!.inputNames[0];
|
|
452
|
+
|
|
453
|
+
const feeds: Record<string, ort.Tensor> = {};
|
|
454
|
+
feeds[inputName] = inputTensor;
|
|
455
|
+
|
|
456
|
+
const results = await this.detSession!.run(feeds);
|
|
457
|
+
const output = results[this.detSession!.outputNames[0]];
|
|
458
|
+
|
|
459
|
+
return this.postprocessYOLO(
|
|
460
|
+
output.data as Float32Array,
|
|
461
|
+
output.dims[1],
|
|
462
|
+
width,
|
|
463
|
+
height,
|
|
464
|
+
paddingX,
|
|
465
|
+
paddingY,
|
|
466
|
+
scaleX,
|
|
467
|
+
scaleY
|
|
468
|
+
);
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
/**
|
|
472
|
+
* Estimate 3D pose for a single person
|
|
473
|
+
*/
|
|
474
|
+
private async estimatePose3D(
|
|
475
|
+
imageData: Uint8Array,
|
|
476
|
+
imgWidth: number,
|
|
477
|
+
imgHeight: number,
|
|
478
|
+
bbox: { x1: number; y1: number; x2: number; y2: number; confidence: number }
|
|
479
|
+
): Promise<{
|
|
480
|
+
keypoints: number[][];
|
|
481
|
+
scores: number[];
|
|
482
|
+
keypointsSimcc: number[][];
|
|
483
|
+
keypoints2d: number[][];
|
|
484
|
+
}> {
|
|
485
|
+
const [inputW, inputH] = this.config.poseInputSize;
|
|
486
|
+
|
|
487
|
+
const { tensor, center, scale } = this.preprocessPose(
|
|
488
|
+
imageData,
|
|
489
|
+
imgWidth,
|
|
490
|
+
imgHeight,
|
|
491
|
+
bbox,
|
|
492
|
+
[inputW, inputH]
|
|
493
|
+
);
|
|
494
|
+
|
|
495
|
+
const inputTensor = new ort.Tensor('float32', tensor, [1, 3, inputH, inputW]);
|
|
496
|
+
|
|
497
|
+
// Use dynamic input name
|
|
498
|
+
const inputName = this.poseSession!.inputNames[0];
|
|
499
|
+
const feeds: Record<string, ort.Tensor> = {};
|
|
500
|
+
feeds[inputName] = inputTensor;
|
|
501
|
+
|
|
502
|
+
const results = await this.poseSession!.run(feeds);
|
|
503
|
+
|
|
504
|
+
// Debug output names on first run only
|
|
505
|
+
if (!this.outputNamesLogged) {
|
|
506
|
+
console.log('[Pose3DDetector] Output names:', this.poseSession!.outputNames);
|
|
507
|
+
console.log('[Pose3DDetector] Output shapes:', this.poseSession!.outputNames.map(k => results[k].dims));
|
|
508
|
+
this.outputNamesLogged = true;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
// Get output tensors using session's outputNames
|
|
512
|
+
// Model input is [width=288, height=384], so:
|
|
513
|
+
// X output has dim 576 (288*2), Y output has dim 768 (384*2)
|
|
514
|
+
const outputNames = this.poseSession!.outputNames;
|
|
515
|
+
let simccX: ort.Tensor, simccY: ort.Tensor, simccZ: ort.Tensor;
|
|
516
|
+
|
|
517
|
+
// Find outputs by shape
|
|
518
|
+
const shape0 = results[outputNames[0]].dims[2];
|
|
519
|
+
const shape1 = results[outputNames[1]].dims[2];
|
|
520
|
+
const shape2 = results[outputNames[2]].dims[2];
|
|
521
|
+
|
|
522
|
+
// X has smaller shape (576), Y has larger (768)
|
|
523
|
+
if (shape0 === 576) simccX = results[outputNames[0]];
|
|
524
|
+
else if (shape1 === 576) simccX = results[outputNames[1]];
|
|
525
|
+
else simccX = results[outputNames[2]];
|
|
526
|
+
|
|
527
|
+
if (shape0 === 768) simccY = results[outputNames[0]];
|
|
528
|
+
else if (shape1 === 768) simccY = results[outputNames[1]];
|
|
529
|
+
else simccY = results[outputNames[2]];
|
|
530
|
+
|
|
531
|
+
// Z is the remaining one
|
|
532
|
+
const usedIndices = [
|
|
533
|
+
simccX === results[outputNames[0]] ? 0 : simccX === results[outputNames[1]] ? 1 : 2,
|
|
534
|
+
simccY === results[outputNames[0]] ? 0 : simccY === results[outputNames[1]] ? 1 : 2,
|
|
535
|
+
];
|
|
536
|
+
simccZ = results[outputNames[3 - usedIndices[0] - usedIndices[1]]];
|
|
537
|
+
|
|
538
|
+
return this.postprocessPose3D(
|
|
539
|
+
simccX.data as Float32Array,
|
|
540
|
+
simccY.data as Float32Array,
|
|
541
|
+
simccZ.data as Float32Array,
|
|
542
|
+
simccX.dims as number[],
|
|
543
|
+
simccY.dims as number[],
|
|
544
|
+
simccZ.dims as number[],
|
|
545
|
+
center,
|
|
546
|
+
scale,
|
|
547
|
+
imgWidth,
|
|
548
|
+
imgHeight
|
|
549
|
+
);
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
private preprocessYOLO(
|
|
553
|
+
imageData: Uint8Array,
|
|
554
|
+
imgWidth: number,
|
|
555
|
+
imgHeight: number,
|
|
556
|
+
inputSize: [number, number]
|
|
557
|
+
): {
|
|
558
|
+
tensor: Float32Array;
|
|
559
|
+
paddingX: number;
|
|
560
|
+
paddingY: number;
|
|
561
|
+
scaleX: number;
|
|
562
|
+
scaleY: number;
|
|
563
|
+
} {
|
|
564
|
+
const [inputW, inputH] = inputSize;
|
|
565
|
+
|
|
566
|
+
if (!this.canvas || !this.ctx) {
|
|
567
|
+
this.canvas = document.createElement('canvas');
|
|
568
|
+
this.canvas.width = inputW;
|
|
569
|
+
this.canvas.height = inputH;
|
|
570
|
+
this.ctx = this.canvas.getContext('2d', { willReadFrequently: true, alpha: false })!;
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
const ctx = this.ctx;
|
|
574
|
+
ctx.fillStyle = '#000000';
|
|
575
|
+
ctx.fillRect(0, 0, inputW, inputH);
|
|
576
|
+
|
|
577
|
+
const aspectRatio = imgWidth / imgHeight;
|
|
578
|
+
const targetAspectRatio = inputW / inputH;
|
|
579
|
+
|
|
580
|
+
let drawWidth: number, drawHeight: number, offsetX: number, offsetY: number;
|
|
581
|
+
|
|
582
|
+
if (aspectRatio > targetAspectRatio) {
|
|
583
|
+
drawWidth = inputW;
|
|
584
|
+
drawHeight = Math.floor(inputW / aspectRatio);
|
|
585
|
+
offsetX = 0;
|
|
586
|
+
offsetY = Math.floor((inputH - drawHeight) / 2);
|
|
587
|
+
} else {
|
|
588
|
+
drawHeight = inputH;
|
|
589
|
+
drawWidth = Math.floor(inputH * aspectRatio);
|
|
590
|
+
offsetX = Math.floor((inputW - drawWidth) / 2);
|
|
591
|
+
offsetY = 0;
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
const srcCanvas = document.createElement('canvas');
|
|
595
|
+
const srcCtx = srcCanvas.getContext('2d')!;
|
|
596
|
+
srcCanvas.width = imgWidth;
|
|
597
|
+
srcCanvas.height = imgHeight;
|
|
598
|
+
|
|
599
|
+
const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
|
|
600
|
+
srcImageData.data.set(imageData);
|
|
601
|
+
srcCtx.putImageData(srcImageData, 0, 0);
|
|
602
|
+
|
|
603
|
+
ctx.drawImage(srcCanvas, 0, 0, imgWidth, imgHeight, offsetX, offsetY, drawWidth, drawHeight);
|
|
604
|
+
|
|
605
|
+
const paddedData = ctx.getImageData(0, 0, inputW, inputH);
|
|
606
|
+
const tensor = new Float32Array(inputW * inputH * 3);
|
|
607
|
+
|
|
608
|
+
for (let i = 0; i < paddedData.data.length; i += 4) {
|
|
609
|
+
const pixelIdx = i / 4;
|
|
610
|
+
tensor[pixelIdx] = paddedData.data[i] / 255;
|
|
611
|
+
tensor[pixelIdx + inputW * inputH] = paddedData.data[i + 1] / 255;
|
|
612
|
+
tensor[pixelIdx + 2 * inputW * inputH] = paddedData.data[i + 2] / 255;
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
const scaleX = imgWidth / drawWidth;
|
|
616
|
+
const scaleY = imgHeight / drawHeight;
|
|
617
|
+
|
|
618
|
+
return { tensor, paddingX: offsetX, paddingY: offsetY, scaleX, scaleY };
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
private postprocessYOLO(
|
|
622
|
+
output: Float32Array,
|
|
623
|
+
numDetections: number,
|
|
624
|
+
imgWidth: number,
|
|
625
|
+
imgHeight: number,
|
|
626
|
+
paddingX: number,
|
|
627
|
+
paddingY: number,
|
|
628
|
+
scaleX: number,
|
|
629
|
+
scaleY: number
|
|
630
|
+
): Array<{ x1: number; y1: number; x2: number; y2: number; confidence: number }> {
|
|
631
|
+
const detections: Array<{ x1: number; y1: number; x2: number; y2: number; confidence: number }> = [];
|
|
632
|
+
|
|
633
|
+
for (let i = 0; i < numDetections; i++) {
|
|
634
|
+
const idx = i * 6;
|
|
635
|
+
const x1 = output[idx];
|
|
636
|
+
const y1 = output[idx + 1];
|
|
637
|
+
const x2 = output[idx + 2];
|
|
638
|
+
const y2 = output[idx + 3];
|
|
639
|
+
const confidence = output[idx + 4];
|
|
640
|
+
const classId = Math.round(output[idx + 5]);
|
|
641
|
+
|
|
642
|
+
if (confidence < this.config.detConfidence || classId !== 0) continue;
|
|
643
|
+
|
|
644
|
+
const tx1 = (x1 - paddingX) * scaleX;
|
|
645
|
+
const ty1 = (y1 - paddingY) * scaleY;
|
|
646
|
+
const tx2 = (x2 - paddingX) * scaleX;
|
|
647
|
+
const ty2 = (y2 - paddingY) * scaleY;
|
|
648
|
+
|
|
649
|
+
detections.push({
|
|
650
|
+
x1: Math.max(0, tx1),
|
|
651
|
+
y1: Math.max(0, ty1),
|
|
652
|
+
x2: Math.min(imgWidth, tx2),
|
|
653
|
+
y2: Math.min(imgHeight, ty2),
|
|
654
|
+
confidence,
|
|
655
|
+
});
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
return this.applyNMS(detections, this.config.nmsThreshold);
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
private preprocessPose(
|
|
662
|
+
imageData: Uint8Array,
|
|
663
|
+
imgWidth: number,
|
|
664
|
+
imgHeight: number,
|
|
665
|
+
bbox: { x1: number; y1: number; x2: number; y2: number; confidence: number },
|
|
666
|
+
inputSize: [number, number]
|
|
667
|
+
): { tensor: Float32Array; center: [number, number]; scale: [number, number] } {
|
|
668
|
+
const [inputW, inputH] = inputSize;
|
|
669
|
+
const bboxWidth = bbox.x2 - bbox.x1;
|
|
670
|
+
const bboxHeight = bbox.y2 - bbox.y1;
|
|
671
|
+
|
|
672
|
+
// Center of bbox (same as Python)
|
|
673
|
+
const center: [number, number] = [
|
|
674
|
+
bbox.x1 + bboxWidth / 2,
|
|
675
|
+
bbox.y1 + bboxHeight / 2,
|
|
676
|
+
];
|
|
677
|
+
|
|
678
|
+
// Scale with padding (same as Python bbox_xyxy2cs with padding=1.25)
|
|
679
|
+
let scaleW = bboxWidth * 1.25;
|
|
680
|
+
let scaleH = bboxHeight * 1.25;
|
|
681
|
+
|
|
682
|
+
// Adjust scale to match model aspect ratio (same as top_down_affine)
|
|
683
|
+
const modelAspectRatio = inputW / inputH;
|
|
684
|
+
const bboxAspectRatio = scaleW / scaleH;
|
|
685
|
+
|
|
686
|
+
if (bboxAspectRatio > modelAspectRatio) {
|
|
687
|
+
scaleH = scaleW / modelAspectRatio;
|
|
688
|
+
} else {
|
|
689
|
+
scaleW = scaleH * modelAspectRatio;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
const scale: [number, number] = [scaleW, scaleH];
|
|
693
|
+
|
|
694
|
+
// Reuse pose canvas
|
|
695
|
+
if (!this.poseCanvas || !this.poseCtx) {
|
|
696
|
+
this.poseCanvas = document.createElement('canvas');
|
|
697
|
+
this.poseCanvas.width = inputW;
|
|
698
|
+
this.poseCanvas.height = inputH;
|
|
699
|
+
this.poseCtx = this.poseCanvas.getContext('2d', {
|
|
700
|
+
willReadFrequently: true,
|
|
701
|
+
alpha: false
|
|
702
|
+
})!;
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
// Reuse source canvas for original image (avoid recreation per bbox)
|
|
706
|
+
if (!this.srcPoseCanvas || !this.srcPoseCtx) {
|
|
707
|
+
this.srcPoseCanvas = document.createElement('canvas');
|
|
708
|
+
this.srcPoseCanvas.width = imgWidth;
|
|
709
|
+
this.srcPoseCanvas.height = imgHeight;
|
|
710
|
+
this.srcPoseCtx = this.srcPoseCanvas.getContext('2d', {
|
|
711
|
+
willReadFrequently: true,
|
|
712
|
+
alpha: false
|
|
713
|
+
})!;
|
|
714
|
+
// Copy image data once
|
|
715
|
+
const srcImageData = this.srcPoseCtx.createImageData(imgWidth, imgHeight);
|
|
716
|
+
srcImageData.data.set(imageData);
|
|
717
|
+
this.srcPoseCtx.putImageData(srcImageData, 0, 0);
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
const ctx = this.poseCtx;
|
|
721
|
+
ctx.clearRect(0, 0, inputW, inputH);
|
|
722
|
+
|
|
723
|
+
// Crop and resize using drawImage (single GPU operation)
|
|
724
|
+
const srcX = center[0] - scaleW / 2;
|
|
725
|
+
const srcY = center[1] - scaleH / 2;
|
|
726
|
+
ctx.drawImage(this.srcPoseCanvas, srcX, srcY, scaleW, scaleH, 0, 0, inputW, inputH);
|
|
727
|
+
|
|
728
|
+
const croppedData = ctx.getImageData(0, 0, inputW, inputH);
|
|
729
|
+
const tensor = this.poseTensorBuffer!;
|
|
730
|
+
const len = croppedData.data.length;
|
|
731
|
+
const planeSize = inputW * inputH;
|
|
732
|
+
|
|
733
|
+
// Normalization constants
|
|
734
|
+
const mean0 = 123.675, mean1 = 116.28, mean2 = 103.53;
|
|
735
|
+
const stdInv0 = 1 / 58.395, stdInv1 = 1 / 57.12, stdInv2 = 1 / 57.375;
|
|
736
|
+
|
|
737
|
+
// Optimized normalization loop - process 4 pixels at once (SIMD-like)
|
|
738
|
+
for (let i = 0; i < len; i += 16) {
|
|
739
|
+
const p1 = i / 4, p2 = p1 + 1, p3 = p1 + 2, p4 = p1 + 3;
|
|
740
|
+
|
|
741
|
+
// R channel
|
|
742
|
+
tensor[p1] = (croppedData.data[i] - mean0) * stdInv0;
|
|
743
|
+
tensor[p2] = (croppedData.data[i + 4] - mean0) * stdInv0;
|
|
744
|
+
tensor[p3] = (croppedData.data[i + 8] - mean0) * stdInv0;
|
|
745
|
+
tensor[p4] = (croppedData.data[i + 12] - mean0) * stdInv0;
|
|
746
|
+
|
|
747
|
+
// G channel
|
|
748
|
+
tensor[p1 + planeSize] = (croppedData.data[i + 1] - mean1) * stdInv1;
|
|
749
|
+
tensor[p2 + planeSize] = (croppedData.data[i + 5] - mean1) * stdInv1;
|
|
750
|
+
tensor[p3 + planeSize] = (croppedData.data[i + 9] - mean1) * stdInv1;
|
|
751
|
+
tensor[p4 + planeSize] = (croppedData.data[i + 13] - mean1) * stdInv1;
|
|
752
|
+
|
|
753
|
+
// B channel
|
|
754
|
+
tensor[p1 + planeSize * 2] = (croppedData.data[i + 2] - mean2) * stdInv2;
|
|
755
|
+
tensor[p2 + planeSize * 2] = (croppedData.data[i + 6] - mean2) * stdInv2;
|
|
756
|
+
tensor[p3 + planeSize * 2] = (croppedData.data[i + 10] - mean2) * stdInv2;
|
|
757
|
+
tensor[p4 + planeSize * 2] = (croppedData.data[i + 14] - mean2) * stdInv2;
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
return { tensor, center, scale };
|
|
761
|
+
}
|
|
762
|
+
|
|
763
|
+
private postprocessPose3D(
|
|
764
|
+
simccX: Float32Array,
|
|
765
|
+
simccY: Float32Array,
|
|
766
|
+
simccZ: Float32Array,
|
|
767
|
+
shapeX: number[],
|
|
768
|
+
shapeY: number[],
|
|
769
|
+
shapeZ: number[],
|
|
770
|
+
center: [number, number],
|
|
771
|
+
scale: [number, number],
|
|
772
|
+
imgWidth: number,
|
|
773
|
+
imgHeight: number
|
|
774
|
+
): {
|
|
775
|
+
keypoints: number[][];
|
|
776
|
+
scores: number[];
|
|
777
|
+
keypointsSimcc: number[][];
|
|
778
|
+
keypoints2d: number[][];
|
|
779
|
+
} {
|
|
780
|
+
const numKeypoints = shapeX[1];
|
|
781
|
+
const wx = shapeX[2];
|
|
782
|
+
const wy = shapeY[2];
|
|
783
|
+
const wz = shapeZ[2];
|
|
784
|
+
|
|
785
|
+
const keypoints: number[][] = [];
|
|
786
|
+
const scores: number[] = [];
|
|
787
|
+
const keypointsSimcc: number[][] = [];
|
|
788
|
+
const keypoints2d: number[][] = [];
|
|
789
|
+
|
|
790
|
+
for (let k = 0; k < numKeypoints; k++) {
|
|
791
|
+
let maxX = -Infinity, argmaxX = 0;
|
|
792
|
+
for (let i = 0; i < wx; i++) {
|
|
793
|
+
const val = simccX[k * wx + i];
|
|
794
|
+
if (val > maxX) { maxX = val; argmaxX = i; }
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
let maxY = -Infinity, argmaxY = 0;
|
|
798
|
+
for (let i = 0; i < wy; i++) {
|
|
799
|
+
const val = simccY[k * wy + i];
|
|
800
|
+
if (val > maxY) { maxY = val; argmaxY = i; }
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
let maxZ = -Infinity, argmaxZ = 0;
|
|
804
|
+
for (let i = 0; i < wz; i++) {
|
|
805
|
+
const val = simccZ[k * wz + i];
|
|
806
|
+
if (val > maxZ) { maxZ = val; argmaxZ = i; }
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
const score = maxX > maxY ? maxX : maxY;
|
|
810
|
+
|
|
811
|
+
// Normalize to [0, 1]
|
|
812
|
+
const normX = argmaxX / wx;
|
|
813
|
+
const normY = argmaxY / wy;
|
|
814
|
+
const normZ = argmaxZ / wz;
|
|
815
|
+
|
|
816
|
+
// 3D coordinates in model space
|
|
817
|
+
const kptX = (normX - 0.5) * 2.0;
|
|
818
|
+
const kptY = (normY - 0.5) * 2.0;
|
|
819
|
+
const kptZMetric = (normZ - 0.5) * this.config.zRange * 2;
|
|
820
|
+
|
|
821
|
+
keypoints.push([kptX, kptY, kptZMetric]);
|
|
822
|
+
keypointsSimcc.push([normX, normY, normZ]);
|
|
823
|
+
|
|
824
|
+
// 2D coordinates in original image space
|
|
825
|
+
// Convert from normalized SimCC coords [0, 1] to crop space, then to image space
|
|
826
|
+
// Formula: kpt = center - scale/2 + norm * scale (same as in rtmpose3d.ts)
|
|
827
|
+
const kpt2dX = normX * scale[0] + center[0] - 0.5 * scale[0];
|
|
828
|
+
const kpt2dY = normY * scale[1] + center[1] - 0.5 * scale[1];
|
|
829
|
+
|
|
830
|
+
// Clamp to image bounds
|
|
831
|
+
const clampedX = Math.max(0, Math.min(imgWidth, kpt2dX));
|
|
832
|
+
const clampedY = Math.max(0, Math.min(imgHeight, kpt2dY));
|
|
833
|
+
|
|
834
|
+
keypoints2d.push([clampedX, clampedY]);
|
|
835
|
+
|
|
836
|
+
scores.push(score);
|
|
837
|
+
}
|
|
838
|
+
|
|
839
|
+
return { keypoints, scores, keypointsSimcc, keypoints2d };
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
private applyNMS(
|
|
843
|
+
detections: Array<{ x1: number; y1: number; x2: number; y2: number; confidence: number }>,
|
|
844
|
+
iouThreshold: number
|
|
845
|
+
): typeof detections {
|
|
846
|
+
if (detections.length === 0) return [];
|
|
847
|
+
|
|
848
|
+
detections.sort((a, b) => b.confidence - a.confidence);
|
|
849
|
+
|
|
850
|
+
const selected: typeof detections = [];
|
|
851
|
+
const used = new Set<number>();
|
|
852
|
+
|
|
853
|
+
for (let i = 0; i < detections.length; i++) {
|
|
854
|
+
if (used.has(i)) continue;
|
|
855
|
+
|
|
856
|
+
selected.push(detections[i]);
|
|
857
|
+
used.add(i);
|
|
858
|
+
|
|
859
|
+
for (let j = i + 1; j < detections.length; j++) {
|
|
860
|
+
if (used.has(j)) continue;
|
|
861
|
+
|
|
862
|
+
const iou = this.calculateIoU(detections[i], detections[j]);
|
|
863
|
+
if (iou > iouThreshold) {
|
|
864
|
+
used.add(j);
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
return selected;
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
private calculateIoU(
|
|
873
|
+
box1: { x1: number; y1: number; x2: number; y2: number },
|
|
874
|
+
box2: { x1: number; y1: number; x2: number; y2: number }
|
|
875
|
+
): number {
|
|
876
|
+
const x1 = Math.max(box1.x1, box2.x1);
|
|
877
|
+
const y1 = Math.max(box1.y1, box2.y1);
|
|
878
|
+
const x2 = Math.min(box1.x2, box2.x2);
|
|
879
|
+
const y2 = Math.min(box1.y2, box2.y2);
|
|
880
|
+
|
|
881
|
+
if (x2 <= x1 || y2 <= y1) return 0;
|
|
882
|
+
|
|
883
|
+
const intersection = (x2 - x1) * (y2 - y1);
|
|
884
|
+
const area1 = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
|
|
885
|
+
const area2 = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);
|
|
886
|
+
const union = area1 + area2 - intersection;
|
|
887
|
+
|
|
888
|
+
return intersection / union;
|
|
889
|
+
}
|
|
890
|
+
}
|