rtmlib-ts 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +1 -0
- package/README.md +202 -0
- package/dist/core/base.d.ts +20 -0
- package/dist/core/base.d.ts.map +1 -0
- package/dist/core/base.js +40 -0
- package/dist/core/file.d.ts +11 -0
- package/dist/core/file.d.ts.map +1 -0
- package/dist/core/file.js +111 -0
- package/dist/core/modelCache.d.ts +35 -0
- package/dist/core/modelCache.d.ts.map +1 -0
- package/dist/core/modelCache.js +161 -0
- package/dist/core/posePostprocessing.d.ts +12 -0
- package/dist/core/posePostprocessing.d.ts.map +1 -0
- package/dist/core/posePostprocessing.js +76 -0
- package/dist/core/postprocessing.d.ts +10 -0
- package/dist/core/postprocessing.d.ts.map +1 -0
- package/dist/core/postprocessing.js +70 -0
- package/dist/core/preprocessing.d.ts +14 -0
- package/dist/core/preprocessing.d.ts.map +1 -0
- package/dist/core/preprocessing.js +79 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +31 -0
- package/dist/models/rtmpose.d.ts +25 -0
- package/dist/models/rtmpose.d.ts.map +1 -0
- package/dist/models/rtmpose.js +185 -0
- package/dist/models/rtmpose3d.d.ts +28 -0
- package/dist/models/rtmpose3d.d.ts.map +1 -0
- package/dist/models/rtmpose3d.js +184 -0
- package/dist/models/yolo12.d.ts +23 -0
- package/dist/models/yolo12.d.ts.map +1 -0
- package/dist/models/yolo12.js +165 -0
- package/dist/models/yolox.d.ts +18 -0
- package/dist/models/yolox.d.ts.map +1 -0
- package/dist/models/yolox.js +167 -0
- package/dist/solution/animalDetector.d.ts +229 -0
- package/dist/solution/animalDetector.d.ts.map +1 -0
- package/dist/solution/animalDetector.js +663 -0
- package/dist/solution/body.d.ts +16 -0
- package/dist/solution/body.d.ts.map +1 -0
- package/dist/solution/body.js +52 -0
- package/dist/solution/bodyWithFeet.d.ts +16 -0
- package/dist/solution/bodyWithFeet.d.ts.map +1 -0
- package/dist/solution/bodyWithFeet.js +52 -0
- package/dist/solution/customDetector.d.ts +137 -0
- package/dist/solution/customDetector.d.ts.map +1 -0
- package/dist/solution/customDetector.js +342 -0
- package/dist/solution/hand.d.ts +14 -0
- package/dist/solution/hand.d.ts.map +1 -0
- package/dist/solution/hand.js +20 -0
- package/dist/solution/index.d.ts +10 -0
- package/dist/solution/index.d.ts.map +1 -0
- package/dist/solution/index.js +9 -0
- package/dist/solution/objectDetector.d.ts +172 -0
- package/dist/solution/objectDetector.d.ts.map +1 -0
- package/dist/solution/objectDetector.js +606 -0
- package/dist/solution/pose3dDetector.d.ts +145 -0
- package/dist/solution/pose3dDetector.d.ts.map +1 -0
- package/dist/solution/pose3dDetector.js +611 -0
- package/dist/solution/poseDetector.d.ts +198 -0
- package/dist/solution/poseDetector.d.ts.map +1 -0
- package/dist/solution/poseDetector.js +622 -0
- package/dist/solution/poseTracker.d.ts +22 -0
- package/dist/solution/poseTracker.d.ts.map +1 -0
- package/dist/solution/poseTracker.js +106 -0
- package/dist/solution/wholebody.d.ts +19 -0
- package/dist/solution/wholebody.d.ts.map +1 -0
- package/dist/solution/wholebody.js +82 -0
- package/dist/solution/wholebody3d.d.ts +22 -0
- package/dist/solution/wholebody3d.d.ts.map +1 -0
- package/dist/solution/wholebody3d.js +75 -0
- package/dist/types/index.d.ts +52 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/visualization/draw.d.ts +57 -0
- package/dist/visualization/draw.d.ts.map +1 -0
- package/dist/visualization/draw.js +400 -0
- package/dist/visualization/skeleton/coco133.d.ts +350 -0
- package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco133.js +120 -0
- package/dist/visualization/skeleton/coco17.d.ts +180 -0
- package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco17.js +48 -0
- package/dist/visualization/skeleton/halpe26.d.ts +278 -0
- package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
- package/dist/visualization/skeleton/halpe26.js +70 -0
- package/dist/visualization/skeleton/hand21.d.ts +196 -0
- package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
- package/dist/visualization/skeleton/hand21.js +51 -0
- package/dist/visualization/skeleton/index.d.ts +10 -0
- package/dist/visualization/skeleton/index.d.ts.map +1 -0
- package/dist/visualization/skeleton/index.js +9 -0
- package/dist/visualization/skeleton/openpose134.d.ts +357 -0
- package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose134.js +116 -0
- package/dist/visualization/skeleton/openpose18.d.ts +177 -0
- package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose18.js +47 -0
- package/docs/ANIMAL_DETECTOR.md +450 -0
- package/docs/CUSTOM_DETECTOR.md +568 -0
- package/docs/OBJECT_DETECTOR.md +373 -0
- package/docs/POSE3D_DETECTOR.md +458 -0
- package/docs/POSE_DETECTOR.md +442 -0
- package/examples/README.md +119 -0
- package/examples/index.html +746 -0
- package/package.json +51 -0
- package/playground/README.md +114 -0
- package/playground/app/favicon.ico +0 -0
- package/playground/app/globals.css +17 -0
- package/playground/app/layout.tsx +19 -0
- package/playground/app/page.tsx +1338 -0
- package/playground/eslint.config.mjs +18 -0
- package/playground/next.config.ts +34 -0
- package/playground/package-lock.json +6723 -0
- package/playground/package.json +27 -0
- package/playground/postcss.config.mjs +7 -0
- package/playground/tsconfig.json +34 -0
- package/src/core/base.ts +66 -0
- package/src/core/file.ts +141 -0
- package/src/core/modelCache.ts +189 -0
- package/src/core/posePostprocessing.ts +91 -0
- package/src/core/postprocessing.ts +93 -0
- package/src/core/preprocessing.ts +127 -0
- package/src/index.ts +69 -0
- package/src/models/rtmpose.ts +265 -0
- package/src/models/rtmpose3d.ts +289 -0
- package/src/models/yolo12.ts +220 -0
- package/src/models/yolox.ts +214 -0
- package/src/solution/animalDetector.ts +955 -0
- package/src/solution/body.ts +89 -0
- package/src/solution/bodyWithFeet.ts +89 -0
- package/src/solution/customDetector.ts +474 -0
- package/src/solution/hand.ts +52 -0
- package/src/solution/index.ts +10 -0
- package/src/solution/objectDetector.ts +816 -0
- package/src/solution/pose3dDetector.ts +890 -0
- package/src/solution/poseDetector.ts +892 -0
- package/src/solution/poseTracker.ts +172 -0
- package/src/solution/wholebody.ts +130 -0
- package/src/solution/wholebody3d.ts +125 -0
- package/src/types/index.ts +62 -0
- package/src/visualization/draw.ts +543 -0
- package/src/visualization/skeleton/coco133.ts +131 -0
- package/src/visualization/skeleton/coco17.ts +49 -0
- package/src/visualization/skeleton/halpe26.ts +71 -0
- package/src/visualization/skeleton/hand21.ts +52 -0
- package/src/visualization/skeleton/index.ts +10 -0
- package/src/visualization/skeleton/openpose134.ts +125 -0
- package/src/visualization/skeleton/openpose18.ts +48 -0
- package/tsconfig.json +32 -0
|
@@ -0,0 +1,892 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PoseDetector - Unified API for person detection and pose estimation
|
|
3
|
+
* Combines YOLO12 detector with RTMW pose model in a single optimized interface
|
|
4
|
+
*
|
|
5
|
+
* @example
|
|
6
|
+
* ```typescript
|
|
7
|
+
* // Initialize with default models (from HuggingFace)
|
|
8
|
+
* const detector = new PoseDetector();
|
|
9
|
+
* await detector.init();
|
|
10
|
+
*
|
|
11
|
+
* // Or with custom models
|
|
12
|
+
* const detector = new PoseDetector({
|
|
13
|
+
* detModel: 'models/yolov12n.onnx',
|
|
14
|
+
* poseModel: 'models/rtmlib/end2end.onnx',
|
|
15
|
+
* });
|
|
16
|
+
* await detector.init();
|
|
17
|
+
*
|
|
18
|
+
* // From canvas
|
|
19
|
+
* const results = await detector.detectFromCanvas(canvas);
|
|
20
|
+
*
|
|
21
|
+
* // From video element
|
|
22
|
+
* const results = await detector.detectFromVideo(videoElement);
|
|
23
|
+
*
|
|
24
|
+
* // From raw image data
|
|
25
|
+
* const results = await detector.detect(imageData, width, height);
|
|
26
|
+
* ```
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
import * as ort from 'onnxruntime-web';
|
|
30
|
+
import { BBox, Detection } from '../types/index';
|
|
31
|
+
import { getCachedModel, isModelCached } from '../core/modelCache';
|
|
32
|
+
|
|
33
|
+
// Configure ONNX Runtime Web
|
|
34
|
+
ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/';
|
|
35
|
+
ort.env.wasm.simd = true;
|
|
36
|
+
ort.env.wasm.proxy = false;
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Configuration options for PoseDetector
|
|
40
|
+
*/
|
|
41
|
+
export interface PoseDetectorConfig {
|
|
42
|
+
/** Path to YOLO12 detection model (optional - uses default from HuggingFace if not specified) */
|
|
43
|
+
detModel?: string;
|
|
44
|
+
/** Path to RTMW pose estimation model (optional - uses default from HuggingFace if not specified) */
|
|
45
|
+
poseModel?: string;
|
|
46
|
+
/** Detection input size (default: [416, 416]) */
|
|
47
|
+
detInputSize?: [number, number];
|
|
48
|
+
/** Pose input size (default: [384, 288]) */
|
|
49
|
+
poseInputSize?: [number, number];
|
|
50
|
+
/** Detection confidence threshold (default: 0.5) */
|
|
51
|
+
detConfidence?: number;
|
|
52
|
+
/** NMS IoU threshold (default: 0.45) */
|
|
53
|
+
nmsThreshold?: number;
|
|
54
|
+
/** Pose keypoint confidence threshold (default: 0.3) */
|
|
55
|
+
poseConfidence?: number;
|
|
56
|
+
/** Execution backend (default: 'wasm') */
|
|
57
|
+
backend?: 'wasm' | 'webgpu';
|
|
58
|
+
/** Enable model caching (default: true) */
|
|
59
|
+
cache?: boolean;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Detected person with bounding box and keypoints
|
|
64
|
+
*/
|
|
65
|
+
export interface Person {
|
|
66
|
+
/** Bounding box coordinates */
|
|
67
|
+
bbox: {
|
|
68
|
+
x1: number;
|
|
69
|
+
y1: number;
|
|
70
|
+
x2: number;
|
|
71
|
+
y2: number;
|
|
72
|
+
confidence: number;
|
|
73
|
+
};
|
|
74
|
+
/** 17 COCO keypoints coordinates */
|
|
75
|
+
keypoints: Keypoint[];
|
|
76
|
+
/** Keypoint scores (0-1) */
|
|
77
|
+
scores: number[];
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Single keypoint with coordinates and visibility
|
|
82
|
+
*/
|
|
83
|
+
export interface Keypoint {
|
|
84
|
+
x: number;
|
|
85
|
+
y: number;
|
|
86
|
+
score: number;
|
|
87
|
+
visible: boolean;
|
|
88
|
+
name: string;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Detection statistics
|
|
93
|
+
*/
|
|
94
|
+
export interface PoseStats {
|
|
95
|
+
/** Number of detected people */
|
|
96
|
+
personCount: number;
|
|
97
|
+
/** Detection inference time (ms) */
|
|
98
|
+
detTime: number;
|
|
99
|
+
/** Pose estimation time (ms) */
|
|
100
|
+
poseTime: number;
|
|
101
|
+
/** Total processing time (ms) */
|
|
102
|
+
totalTime: number;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* COCO17 keypoint names
|
|
107
|
+
*/
|
|
108
|
+
const KEYPOINT_NAMES = [
|
|
109
|
+
'nose',
|
|
110
|
+
'left_eye',
|
|
111
|
+
'right_eye',
|
|
112
|
+
'left_ear',
|
|
113
|
+
'right_ear',
|
|
114
|
+
'left_shoulder',
|
|
115
|
+
'right_shoulder',
|
|
116
|
+
'left_elbow',
|
|
117
|
+
'right_elbow',
|
|
118
|
+
'left_wrist',
|
|
119
|
+
'right_wrist',
|
|
120
|
+
'left_hip',
|
|
121
|
+
'right_hip',
|
|
122
|
+
'left_knee',
|
|
123
|
+
'right_knee',
|
|
124
|
+
'left_ankle',
|
|
125
|
+
'right_ankle',
|
|
126
|
+
];
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Default configuration
|
|
130
|
+
*/
|
|
131
|
+
const DEFAULT_CONFIG: Required<PoseDetectorConfig> = {
|
|
132
|
+
detModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/yolo/yolov12n.onnx',
|
|
133
|
+
poseModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/rtmpose/end2end.onnx',
|
|
134
|
+
detInputSize: [416, 416], // Faster detection
|
|
135
|
+
poseInputSize: [384, 288], // Required by model
|
|
136
|
+
detConfidence: 0.5,
|
|
137
|
+
nmsThreshold: 0.45,
|
|
138
|
+
poseConfidence: 0.3,
|
|
139
|
+
backend: 'webgpu', // Default to WebGPU for better performance
|
|
140
|
+
cache: true,
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
export class PoseDetector {
|
|
144
|
+
private config: Required<PoseDetectorConfig>;
|
|
145
|
+
private detSession: ort.InferenceSession | null = null;
|
|
146
|
+
private poseSession: ort.InferenceSession | null = null;
|
|
147
|
+
private initialized = false;
|
|
148
|
+
|
|
149
|
+
// Pre-allocated buffers for maximum performance
|
|
150
|
+
private canvas: HTMLCanvasElement | null = null;
|
|
151
|
+
private ctx: CanvasRenderingContext2D | null = null;
|
|
152
|
+
private poseCanvas: HTMLCanvasElement | null = null;
|
|
153
|
+
private poseCtx: CanvasRenderingContext2D | null = null;
|
|
154
|
+
private poseTensorBuffer: Float32Array | null = null;
|
|
155
|
+
private detInputSize: [number, number] = [416, 416];
|
|
156
|
+
private poseInputSize: [number, number] = [384, 288];
|
|
157
|
+
|
|
158
|
+
constructor(config: PoseDetectorConfig) {
|
|
159
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Initialize both detection and pose models with pre-allocated resources
|
|
164
|
+
*/
|
|
165
|
+
async init(): Promise<void> {
|
|
166
|
+
if (this.initialized) return;
|
|
167
|
+
|
|
168
|
+
try {
|
|
169
|
+
// Load detection model
|
|
170
|
+
console.log(`[PoseDetector] Loading detection model from: ${this.config.detModel}`);
|
|
171
|
+
let detBuffer: ArrayBuffer;
|
|
172
|
+
|
|
173
|
+
if (this.config.cache) {
|
|
174
|
+
const detCached = await isModelCached(this.config.detModel);
|
|
175
|
+
console.log(`[PoseDetector] Det model cache ${detCached ? 'hit' : 'miss'}`);
|
|
176
|
+
detBuffer = await getCachedModel(this.config.detModel);
|
|
177
|
+
} else {
|
|
178
|
+
const detResponse = await fetch(this.config.detModel);
|
|
179
|
+
if (!detResponse.ok) {
|
|
180
|
+
throw new Error(`Failed to fetch det model: HTTP ${detResponse.status}`);
|
|
181
|
+
}
|
|
182
|
+
detBuffer = await detResponse.arrayBuffer();
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
this.detSession = await ort.InferenceSession.create(detBuffer, {
|
|
186
|
+
executionProviders: [this.config.backend],
|
|
187
|
+
graphOptimizationLevel: 'all',
|
|
188
|
+
});
|
|
189
|
+
console.log(`[PoseDetector] Detection model loaded, size: ${(detBuffer.byteLength / 1024 / 1024).toFixed(2)} MB`);
|
|
190
|
+
|
|
191
|
+
// Load pose model
|
|
192
|
+
console.log(`[PoseDetector] Loading pose model from: ${this.config.poseModel}`);
|
|
193
|
+
let poseBuffer: ArrayBuffer;
|
|
194
|
+
|
|
195
|
+
if (this.config.cache) {
|
|
196
|
+
const poseCached = await isModelCached(this.config.poseModel);
|
|
197
|
+
console.log(`[PoseDetector] Pose model cache ${poseCached ? 'hit' : 'miss'}`);
|
|
198
|
+
poseBuffer = await getCachedModel(this.config.poseModel);
|
|
199
|
+
} else {
|
|
200
|
+
const poseResponse = await fetch(this.config.poseModel);
|
|
201
|
+
if (!poseResponse.ok) {
|
|
202
|
+
throw new Error(`Failed to fetch pose model: HTTP ${poseResponse.status}`);
|
|
203
|
+
}
|
|
204
|
+
poseBuffer = await poseResponse.arrayBuffer();
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
this.poseSession = await ort.InferenceSession.create(poseBuffer, {
|
|
208
|
+
executionProviders: [this.config.backend],
|
|
209
|
+
graphOptimizationLevel: 'all',
|
|
210
|
+
});
|
|
211
|
+
console.log(`[PoseDetector] Pose model loaded, size: ${(poseBuffer.byteLength / 1024 / 1024).toFixed(2)} MB`);
|
|
212
|
+
|
|
213
|
+
// Pre-allocate all resources
|
|
214
|
+
const [detW, detH] = this.config.detInputSize;
|
|
215
|
+
this.detInputSize = [detW, detH];
|
|
216
|
+
|
|
217
|
+
const [poseW, poseH] = this.config.poseInputSize;
|
|
218
|
+
this.poseInputSize = [poseW, poseH];
|
|
219
|
+
|
|
220
|
+
// Main canvas for detection
|
|
221
|
+
this.canvas = document.createElement('canvas');
|
|
222
|
+
this.canvas.width = detW;
|
|
223
|
+
this.canvas.height = detH;
|
|
224
|
+
this.ctx = this.canvas.getContext('2d', {
|
|
225
|
+
willReadFrequently: true,
|
|
226
|
+
alpha: false
|
|
227
|
+
})!;
|
|
228
|
+
|
|
229
|
+
// Pose crop canvas (reused for each person)
|
|
230
|
+
this.poseCanvas = document.createElement('canvas');
|
|
231
|
+
this.poseCanvas.width = poseW;
|
|
232
|
+
this.poseCanvas.height = poseH;
|
|
233
|
+
this.poseCtx = this.poseCanvas.getContext('2d', {
|
|
234
|
+
willReadFrequently: true,
|
|
235
|
+
alpha: false
|
|
236
|
+
})!;
|
|
237
|
+
|
|
238
|
+
// Pre-allocate pose tensor buffer
|
|
239
|
+
this.poseTensorBuffer = new Float32Array(3 * poseW * poseH);
|
|
240
|
+
|
|
241
|
+
this.initialized = true;
|
|
242
|
+
console.log(`[PoseDetector] ✅ Initialized (det:${detW}x${detH}, pose:${poseW}x${poseH})`);
|
|
243
|
+
} catch (error) {
|
|
244
|
+
console.error('[PoseDetector] ❌ Initialization failed:', error);
|
|
245
|
+
throw error;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Detect poses from HTMLCanvasElement
|
|
251
|
+
* @param canvas - Canvas element containing the image
|
|
252
|
+
* @returns Array of detected people with keypoints
|
|
253
|
+
*/
|
|
254
|
+
async detectFromCanvas(canvas: HTMLCanvasElement): Promise<Person[]> {
|
|
255
|
+
const ctx = canvas.getContext('2d');
|
|
256
|
+
if (!ctx) {
|
|
257
|
+
throw new Error('Could not get 2D context from canvas');
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
261
|
+
return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Detect poses from HTMLVideoElement
|
|
266
|
+
* @param video - Video element to capture frame from
|
|
267
|
+
* @param targetCanvas - Optional canvas for frame extraction (creates one if not provided)
|
|
268
|
+
* @returns Array of detected people with keypoints
|
|
269
|
+
*/
|
|
270
|
+
async detectFromVideo(
|
|
271
|
+
video: HTMLVideoElement,
|
|
272
|
+
targetCanvas?: HTMLCanvasElement
|
|
273
|
+
): Promise<Person[]> {
|
|
274
|
+
if (video.readyState < 2) {
|
|
275
|
+
throw new Error('Video not ready. Ensure video is loaded and playing.');
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
const canvas = targetCanvas || document.createElement('canvas');
|
|
279
|
+
canvas.width = video.videoWidth;
|
|
280
|
+
canvas.height = video.videoHeight;
|
|
281
|
+
|
|
282
|
+
const ctx = canvas.getContext('2d');
|
|
283
|
+
if (!ctx) {
|
|
284
|
+
throw new Error('Could not get 2D context from canvas');
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
|
288
|
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
289
|
+
|
|
290
|
+
return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Detect poses from HTMLImageElement
|
|
295
|
+
* @param image - Image element to process
|
|
296
|
+
* @param targetCanvas - Optional canvas for image extraction (creates one if not provided)
|
|
297
|
+
* @returns Array of detected people with keypoints
|
|
298
|
+
*/
|
|
299
|
+
async detectFromImage(
|
|
300
|
+
image: HTMLImageElement,
|
|
301
|
+
targetCanvas?: HTMLCanvasElement
|
|
302
|
+
): Promise<Person[]> {
|
|
303
|
+
if (!image.complete || !image.naturalWidth) {
|
|
304
|
+
throw new Error('Image not loaded. Ensure image is fully loaded.');
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
const canvas = targetCanvas || document.createElement('canvas');
|
|
308
|
+
canvas.width = image.naturalWidth;
|
|
309
|
+
canvas.height = image.naturalHeight;
|
|
310
|
+
|
|
311
|
+
const ctx = canvas.getContext('2d');
|
|
312
|
+
if (!ctx) {
|
|
313
|
+
throw new Error('Could not get 2D context from canvas');
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
ctx.drawImage(image, 0, 0);
|
|
317
|
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
318
|
+
|
|
319
|
+
return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Detect poses from ImageBitmap (efficient for blob/file uploads)
|
|
324
|
+
* @param bitmap - ImageBitmap to process
|
|
325
|
+
* @param targetCanvas - Optional canvas for bitmap extraction (creates one if not provided)
|
|
326
|
+
* @returns Array of detected people with keypoints
|
|
327
|
+
*/
|
|
328
|
+
async detectFromBitmap(
|
|
329
|
+
bitmap: ImageBitmap,
|
|
330
|
+
targetCanvas?: HTMLCanvasElement
|
|
331
|
+
): Promise<Person[]> {
|
|
332
|
+
const canvas = targetCanvas || document.createElement('canvas');
|
|
333
|
+
canvas.width = bitmap.width;
|
|
334
|
+
canvas.height = bitmap.height;
|
|
335
|
+
|
|
336
|
+
const ctx = canvas.getContext('2d');
|
|
337
|
+
if (!ctx) {
|
|
338
|
+
throw new Error('Could not get 2D context from canvas');
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
ctx.drawImage(bitmap, 0, 0);
|
|
342
|
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
343
|
+
|
|
344
|
+
return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Detect poses from File (for file input uploads)
|
|
349
|
+
* @param file - File object from input element
|
|
350
|
+
* @param targetCanvas - Optional canvas for image extraction (creates one if not provided)
|
|
351
|
+
* @returns Array of detected people with keypoints
|
|
352
|
+
*/
|
|
353
|
+
async detectFromFile(
|
|
354
|
+
file: File,
|
|
355
|
+
targetCanvas?: HTMLCanvasElement
|
|
356
|
+
): Promise<Person[]> {
|
|
357
|
+
return new Promise((resolve, reject) => {
|
|
358
|
+
const img = new Image();
|
|
359
|
+
img.onload = async () => {
|
|
360
|
+
try {
|
|
361
|
+
const results = await this.detectFromImage(img, targetCanvas);
|
|
362
|
+
resolve(results);
|
|
363
|
+
} catch (error) {
|
|
364
|
+
reject(error);
|
|
365
|
+
}
|
|
366
|
+
};
|
|
367
|
+
img.onerror = () => reject(new Error('Failed to load image from file'));
|
|
368
|
+
img.src = URL.createObjectURL(file);
|
|
369
|
+
});
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Detect poses from Blob (for camera capture or downloads)
|
|
374
|
+
* @param blob - Blob object to process
|
|
375
|
+
* @param targetCanvas - Optional canvas for image extraction (creates one if not provided)
|
|
376
|
+
* @returns Array of detected people with keypoints
|
|
377
|
+
*/
|
|
378
|
+
async detectFromBlob(
|
|
379
|
+
blob: Blob,
|
|
380
|
+
targetCanvas?: HTMLCanvasElement
|
|
381
|
+
): Promise<Person[]> {
|
|
382
|
+
const bitmap = await createImageBitmap(blob);
|
|
383
|
+
const results = await this.detectFromBitmap(bitmap, targetCanvas);
|
|
384
|
+
bitmap.close();
|
|
385
|
+
return results;
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
/**
|
|
389
|
+
* Detect people and estimate poses in a single call
|
|
390
|
+
* @param imageData - Image data (Uint8Array RGB/RGBA)
|
|
391
|
+
* @param width - Image width
|
|
392
|
+
* @param height - Image height
|
|
393
|
+
* @returns Array of detected people with keypoints
|
|
394
|
+
*/
|
|
395
|
+
async detect(
|
|
396
|
+
imageData: Uint8Array,
|
|
397
|
+
width: number,
|
|
398
|
+
height: number
|
|
399
|
+
): Promise<Person[]> {
|
|
400
|
+
if (!this.initialized) {
|
|
401
|
+
await this.init();
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
const startTime = performance.now();
|
|
405
|
+
|
|
406
|
+
// Step 1: Detect people
|
|
407
|
+
const detStart = performance.now();
|
|
408
|
+
const bboxes = await this.detectPeople(imageData, width, height);
|
|
409
|
+
const detTime = performance.now() - detStart;
|
|
410
|
+
|
|
411
|
+
// Step 2: Estimate poses for each person
|
|
412
|
+
const poseStart = performance.now();
|
|
413
|
+
const people: Person[] = [];
|
|
414
|
+
|
|
415
|
+
for (const bbox of bboxes) {
|
|
416
|
+
const keypoints = await this.estimatePose(imageData, width, height, bbox);
|
|
417
|
+
people.push({
|
|
418
|
+
bbox: {
|
|
419
|
+
x1: bbox.x1,
|
|
420
|
+
y1: bbox.y1,
|
|
421
|
+
x2: bbox.x2,
|
|
422
|
+
y2: bbox.y2,
|
|
423
|
+
confidence: bbox.confidence,
|
|
424
|
+
},
|
|
425
|
+
keypoints,
|
|
426
|
+
scores: keypoints.map(k => k.score),
|
|
427
|
+
});
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
const poseTime = performance.now() - poseStart;
|
|
431
|
+
const totalTime = performance.now() - startTime;
|
|
432
|
+
|
|
433
|
+
// Attach stats (for debugging)
|
|
434
|
+
(people as any).stats = {
|
|
435
|
+
personCount: people.length,
|
|
436
|
+
detTime: Math.round(detTime),
|
|
437
|
+
poseTime: Math.round(poseTime),
|
|
438
|
+
totalTime: Math.round(totalTime),
|
|
439
|
+
} as PoseStats;
|
|
440
|
+
|
|
441
|
+
return people;
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
/**
|
|
445
|
+
* Get detection and pose statistics from last call
|
|
446
|
+
*/
|
|
447
|
+
getStats(): PoseStats | null {
|
|
448
|
+
return null; // Stats attached to results
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
/**
|
|
452
|
+
* Detect people using YOLO12
|
|
453
|
+
*/
|
|
454
|
+
private async detectPeople(
|
|
455
|
+
imageData: Uint8Array,
|
|
456
|
+
width: number,
|
|
457
|
+
height: number
|
|
458
|
+
): Promise<Array<{ x1: number; y1: number; x2: number; y2: number; confidence: number }>> {
|
|
459
|
+
const [inputH, inputW] = this.config.detInputSize;
|
|
460
|
+
|
|
461
|
+
// Preprocess
|
|
462
|
+
const { tensor, paddingX, paddingY, scaleX, scaleY } = this.preprocessYOLO(
|
|
463
|
+
imageData,
|
|
464
|
+
width,
|
|
465
|
+
height,
|
|
466
|
+
[inputW, inputH]
|
|
467
|
+
);
|
|
468
|
+
|
|
469
|
+
// Inference - use dynamic input name
|
|
470
|
+
const inputTensor = new ort.Tensor('float32', tensor, [1, 3, inputH, inputW]);
|
|
471
|
+
const inputName = this.detSession!.inputNames[0]; // Dynamic: 'images' or 'pixel_values'
|
|
472
|
+
|
|
473
|
+
const feeds: Record<string, ort.Tensor> = {};
|
|
474
|
+
feeds[inputName] = inputTensor;
|
|
475
|
+
|
|
476
|
+
const results = await this.detSession!.run(feeds);
|
|
477
|
+
const output = results[this.detSession!.outputNames[0]];
|
|
478
|
+
|
|
479
|
+
// Postprocess
|
|
480
|
+
return this.postprocessYOLO(
|
|
481
|
+
output.data as Float32Array,
|
|
482
|
+
output.dims[1],
|
|
483
|
+
width,
|
|
484
|
+
height,
|
|
485
|
+
paddingX,
|
|
486
|
+
paddingY,
|
|
487
|
+
scaleX,
|
|
488
|
+
scaleY
|
|
489
|
+
);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
/**
|
|
493
|
+
* Estimate pose for a single person
|
|
494
|
+
*/
|
|
495
|
+
private async estimatePose(
|
|
496
|
+
imageData: Uint8Array,
|
|
497
|
+
imgWidth: number,
|
|
498
|
+
imgHeight: number,
|
|
499
|
+
bbox: { x1: number; y1: number; x2: number; y2: number; confidence: number }
|
|
500
|
+
): Promise<Keypoint[]> {
|
|
501
|
+
const [inputH, inputW] = this.config.poseInputSize;
|
|
502
|
+
|
|
503
|
+
// Preprocess
|
|
504
|
+
const { tensor, center, scale } = this.preprocessPose(
|
|
505
|
+
imageData,
|
|
506
|
+
imgWidth,
|
|
507
|
+
imgHeight,
|
|
508
|
+
bbox,
|
|
509
|
+
[inputW, inputH]
|
|
510
|
+
);
|
|
511
|
+
|
|
512
|
+
// Inference
|
|
513
|
+
const inputTensor = new ort.Tensor('float32', tensor, [1, 3, inputH, inputW] as number[]);
|
|
514
|
+
const results = await this.poseSession!.run({ input: inputTensor });
|
|
515
|
+
|
|
516
|
+
// Postprocess
|
|
517
|
+
return this.postprocessPose(
|
|
518
|
+
results.simcc_x.data as Float32Array,
|
|
519
|
+
results.simcc_y.data as Float32Array,
|
|
520
|
+
results.simcc_x.dims as number[],
|
|
521
|
+
results.simcc_y.dims as number[],
|
|
522
|
+
center,
|
|
523
|
+
scale
|
|
524
|
+
);
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
/**
|
|
528
|
+
* YOLO preprocessing with letterbox
|
|
529
|
+
*/
|
|
530
|
+
private preprocessYOLO(
|
|
531
|
+
imageData: Uint8Array,
|
|
532
|
+
imgWidth: number,
|
|
533
|
+
imgHeight: number,
|
|
534
|
+
inputSize: [number, number]
|
|
535
|
+
): {
|
|
536
|
+
tensor: Float32Array;
|
|
537
|
+
paddingX: number;
|
|
538
|
+
paddingY: number;
|
|
539
|
+
scaleX: number;
|
|
540
|
+
scaleY: number;
|
|
541
|
+
} {
|
|
542
|
+
const [inputW, inputH] = inputSize;
|
|
543
|
+
|
|
544
|
+
// Reuse canvas
|
|
545
|
+
if (!this.canvas || !this.ctx) {
|
|
546
|
+
this.canvas = document.createElement('canvas');
|
|
547
|
+
this.ctx = this.canvas.getContext('2d', { willReadFrequently: true })!;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
this.canvas.width = inputW;
|
|
551
|
+
this.canvas.height = inputH;
|
|
552
|
+
const ctx = this.ctx;
|
|
553
|
+
|
|
554
|
+
// Black background
|
|
555
|
+
ctx.fillStyle = '#000000';
|
|
556
|
+
ctx.fillRect(0, 0, inputW, inputH);
|
|
557
|
+
|
|
558
|
+
// Calculate letterbox
|
|
559
|
+
const aspectRatio = imgWidth / imgHeight;
|
|
560
|
+
const targetAspectRatio = inputW / inputH;
|
|
561
|
+
|
|
562
|
+
let drawWidth: number, drawHeight: number, offsetX: number, offsetY: number;
|
|
563
|
+
|
|
564
|
+
if (aspectRatio > targetAspectRatio) {
|
|
565
|
+
drawWidth = inputW;
|
|
566
|
+
drawHeight = Math.floor(inputW / aspectRatio);
|
|
567
|
+
offsetX = 0;
|
|
568
|
+
offsetY = Math.floor((inputH - drawHeight) / 2);
|
|
569
|
+
} else {
|
|
570
|
+
drawHeight = inputH;
|
|
571
|
+
drawWidth = Math.floor(inputH * aspectRatio);
|
|
572
|
+
offsetX = Math.floor((inputW - drawWidth) / 2);
|
|
573
|
+
offsetY = 0;
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
// Create source canvas
|
|
577
|
+
const srcCanvas = document.createElement('canvas');
|
|
578
|
+
const srcCtx = srcCanvas.getContext('2d')!;
|
|
579
|
+
srcCanvas.width = imgWidth;
|
|
580
|
+
srcCanvas.height = imgHeight;
|
|
581
|
+
|
|
582
|
+
const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
|
|
583
|
+
srcImageData.data.set(imageData);
|
|
584
|
+
srcCtx.putImageData(srcImageData, 0, 0);
|
|
585
|
+
|
|
586
|
+
// Draw
|
|
587
|
+
ctx.drawImage(srcCanvas, 0, 0, imgWidth, imgHeight, offsetX, offsetY, drawWidth, drawHeight);
|
|
588
|
+
|
|
589
|
+
const paddedData = ctx.getImageData(0, 0, inputW, inputH);
|
|
590
|
+
|
|
591
|
+
// Normalize to [0, 1] and convert to CHW
|
|
592
|
+
const tensor = new Float32Array(inputW * inputH * 3);
|
|
593
|
+
for (let i = 0; i < paddedData.data.length; i += 4) {
|
|
594
|
+
const pixelIdx = i / 4;
|
|
595
|
+
tensor[pixelIdx] = paddedData.data[i] / 255;
|
|
596
|
+
tensor[pixelIdx + inputW * inputH] = paddedData.data[i + 1] / 255;
|
|
597
|
+
tensor[pixelIdx + 2 * inputW * inputH] = paddedData.data[i + 2] / 255;
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
const scaleX = imgWidth / drawWidth;
|
|
601
|
+
const scaleY = imgHeight / drawHeight;
|
|
602
|
+
|
|
603
|
+
return {
|
|
604
|
+
tensor,
|
|
605
|
+
paddingX: offsetX,
|
|
606
|
+
paddingY: offsetY,
|
|
607
|
+
scaleX,
|
|
608
|
+
scaleY,
|
|
609
|
+
};
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
/**
|
|
613
|
+
* YOLO postprocessing with NMS
|
|
614
|
+
*/
|
|
615
|
+
private postprocessYOLO(
|
|
616
|
+
output: Float32Array,
|
|
617
|
+
numDetections: number,
|
|
618
|
+
imgWidth: number,
|
|
619
|
+
imgHeight: number,
|
|
620
|
+
paddingX: number,
|
|
621
|
+
paddingY: number,
|
|
622
|
+
scaleX: number,
|
|
623
|
+
scaleY: number
|
|
624
|
+
): Array<{ x1: number; y1: number; x2: number; y2: number; confidence: number }> {
|
|
625
|
+
const detections: Array<{ x1: number; y1: number; x2: number; y2: number; confidence: number }> = [];
|
|
626
|
+
|
|
627
|
+
for (let i = 0; i < numDetections; i++) {
|
|
628
|
+
const idx = i * 6;
|
|
629
|
+
const x1 = output[idx];
|
|
630
|
+
const y1 = output[idx + 1];
|
|
631
|
+
const x2 = output[idx + 2];
|
|
632
|
+
const y2 = output[idx + 3];
|
|
633
|
+
const confidence = output[idx + 4];
|
|
634
|
+
const classId = Math.round(output[idx + 5]);
|
|
635
|
+
|
|
636
|
+
if (confidence < this.config.detConfidence || classId !== 0) continue;
|
|
637
|
+
|
|
638
|
+
// Transform coordinates
|
|
639
|
+
const tx1 = (x1 - paddingX) * scaleX;
|
|
640
|
+
const ty1 = (y1 - paddingY) * scaleY;
|
|
641
|
+
const tx2 = (x2 - paddingX) * scaleX;
|
|
642
|
+
const ty2 = (y2 - paddingY) * scaleY;
|
|
643
|
+
|
|
644
|
+
detections.push({
|
|
645
|
+
x1: Math.max(0, tx1),
|
|
646
|
+
y1: Math.max(0, ty1),
|
|
647
|
+
x2: Math.min(imgWidth, tx2),
|
|
648
|
+
y2: Math.min(imgHeight, ty2),
|
|
649
|
+
confidence,
|
|
650
|
+
});
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
// NMS
|
|
654
|
+
return this.applyNMS(detections, this.config.nmsThreshold);
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
/**
|
|
658
|
+
* Pose preprocessing with affine crop
|
|
659
|
+
*/
|
|
660
|
+
private preprocessPose(
|
|
661
|
+
imageData: Uint8Array,
|
|
662
|
+
imgWidth: number,
|
|
663
|
+
imgHeight: number,
|
|
664
|
+
bbox: { x1: number; y1: number; x2: number; y2: number; confidence: number },
|
|
665
|
+
inputSize: [number, number]
|
|
666
|
+
): { tensor: Float32Array; center: [number, number]; scale: [number, number] } {
|
|
667
|
+
const [inputW, inputH] = inputSize;
|
|
668
|
+
|
|
669
|
+
const bboxWidth = bbox.x2 - bbox.x1;
|
|
670
|
+
const bboxHeight = bbox.y2 - bbox.y1;
|
|
671
|
+
|
|
672
|
+
const center: [number, number] = [
|
|
673
|
+
bbox.x1 + bboxWidth / 2,
|
|
674
|
+
bbox.y1 + bboxHeight / 2,
|
|
675
|
+
];
|
|
676
|
+
|
|
677
|
+
// Aspect ratio preservation
|
|
678
|
+
const bboxAspectRatio = bboxWidth / bboxHeight;
|
|
679
|
+
const modelAspectRatio = inputW / inputH;
|
|
680
|
+
|
|
681
|
+
let scaleW: number, scaleH: number;
|
|
682
|
+
if (bboxAspectRatio > modelAspectRatio) {
|
|
683
|
+
scaleW = bboxWidth * 1.25;
|
|
684
|
+
scaleH = scaleW / modelAspectRatio;
|
|
685
|
+
} else {
|
|
686
|
+
scaleH = bboxHeight * 1.25;
|
|
687
|
+
scaleW = scaleH * modelAspectRatio;
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
const scale: [number, number] = [scaleW, scaleH];
|
|
691
|
+
|
|
692
|
+
// Reuse pre-allocated pose canvas
|
|
693
|
+
if (!this.poseCanvas || !this.poseCtx) {
|
|
694
|
+
this.poseCanvas = document.createElement('canvas');
|
|
695
|
+
this.poseCanvas.width = inputW;
|
|
696
|
+
this.poseCanvas.height = inputH;
|
|
697
|
+
this.poseCtx = this.poseCanvas.getContext('2d', {
|
|
698
|
+
willReadFrequently: true,
|
|
699
|
+
alpha: false
|
|
700
|
+
})!;
|
|
701
|
+
this.poseTensorBuffer = new Float32Array(3 * inputW * inputH);
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
const ctx = this.poseCtx;
|
|
705
|
+
|
|
706
|
+
// Fast clear
|
|
707
|
+
ctx.clearRect(0, 0, inputW, inputH);
|
|
708
|
+
|
|
709
|
+
// Create source
|
|
710
|
+
const srcCanvas = document.createElement('canvas');
|
|
711
|
+
const srcCtx = srcCanvas.getContext('2d')!;
|
|
712
|
+
srcCanvas.width = imgWidth;
|
|
713
|
+
srcCanvas.height = imgHeight;
|
|
714
|
+
|
|
715
|
+
const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
|
|
716
|
+
srcImageData.data.set(imageData);
|
|
717
|
+
srcCtx.putImageData(srcImageData, 0, 0);
|
|
718
|
+
|
|
719
|
+
// Crop and scale
|
|
720
|
+
const srcX = center[0] - scaleW / 2;
|
|
721
|
+
const srcY = center[1] - scaleH / 2;
|
|
722
|
+
ctx.drawImage(srcCanvas, srcX, srcY, scaleW, scaleH, 0, 0, inputW, inputH);
|
|
723
|
+
|
|
724
|
+
const croppedData = ctx.getImageData(0, 0, inputW, inputH);
|
|
725
|
+
|
|
726
|
+
// Optimized normalization with precomputed constants
|
|
727
|
+
const tensor = this.poseTensorBuffer!;
|
|
728
|
+
const len = croppedData.data.length;
|
|
729
|
+
const planeSize = inputW * inputH;
|
|
730
|
+
|
|
731
|
+
// Precompute normalization constants
|
|
732
|
+
const mean0 = 123.675, mean1 = 116.28, mean2 = 103.53;
|
|
733
|
+
const stdInv0 = 1 / 58.395, stdInv1 = 1 / 57.12, stdInv2 = 1 / 57.375;
|
|
734
|
+
|
|
735
|
+
// Unrolled loop (4 pixels at once)
|
|
736
|
+
for (let i = 0; i < len; i += 16) {
|
|
737
|
+
const p1 = i / 4, p2 = p1 + 1, p3 = p1 + 2, p4 = p1 + 3;
|
|
738
|
+
|
|
739
|
+
// R channel
|
|
740
|
+
tensor[p1] = (croppedData.data[i] - mean0) * stdInv0;
|
|
741
|
+
tensor[p2] = (croppedData.data[i + 4] - mean0) * stdInv0;
|
|
742
|
+
tensor[p3] = (croppedData.data[i + 8] - mean0) * stdInv0;
|
|
743
|
+
tensor[p4] = (croppedData.data[i + 12] - mean0) * stdInv0;
|
|
744
|
+
|
|
745
|
+
// G channel
|
|
746
|
+
tensor[p1 + planeSize] = (croppedData.data[i + 1] - mean1) * stdInv1;
|
|
747
|
+
tensor[p2 + planeSize] = (croppedData.data[i + 5] - mean1) * stdInv1;
|
|
748
|
+
tensor[p3 + planeSize] = (croppedData.data[i + 9] - mean1) * stdInv1;
|
|
749
|
+
tensor[p4 + planeSize] = (croppedData.data[i + 13] - mean1) * stdInv1;
|
|
750
|
+
|
|
751
|
+
// B channel
|
|
752
|
+
tensor[p1 + planeSize * 2] = (croppedData.data[i + 2] - mean2) * stdInv2;
|
|
753
|
+
tensor[p2 + planeSize * 2] = (croppedData.data[i + 6] - mean2) * stdInv2;
|
|
754
|
+
tensor[p3 + planeSize * 2] = (croppedData.data[i + 10] - mean2) * stdInv2;
|
|
755
|
+
tensor[p4 + planeSize * 2] = (croppedData.data[i + 14] - mean2) * stdInv2;
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
return { tensor, center, scale };
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
/**
|
|
762
|
+
* Pose postprocessing with SimCC decoding
|
|
763
|
+
*/
|
|
764
|
+
private postprocessPose(
|
|
765
|
+
simccX: Float32Array,
|
|
766
|
+
simccY: Float32Array,
|
|
767
|
+
shapeX: number[],
|
|
768
|
+
shapeY: number[],
|
|
769
|
+
center: [number, number],
|
|
770
|
+
scale: [number, number]
|
|
771
|
+
): Keypoint[] {
|
|
772
|
+
const numKeypoints = shapeX[1];
|
|
773
|
+
const wx = shapeX[2];
|
|
774
|
+
const wy = shapeY[2];
|
|
775
|
+
|
|
776
|
+
const keypoints: Keypoint[] = [];
|
|
777
|
+
|
|
778
|
+
for (let k = 0; k < numKeypoints; k++) {
|
|
779
|
+
// Argmax X
|
|
780
|
+
let maxX = -Infinity;
|
|
781
|
+
let argmaxX = 0;
|
|
782
|
+
for (let i = 0; i < wx; i++) {
|
|
783
|
+
const val = simccX[k * wx + i];
|
|
784
|
+
if (val > maxX) {
|
|
785
|
+
maxX = val;
|
|
786
|
+
argmaxX = i;
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
// Argmax Y
|
|
791
|
+
let maxY = -Infinity;
|
|
792
|
+
let argmaxY = 0;
|
|
793
|
+
for (let i = 0; i < wy; i++) {
|
|
794
|
+
const val = simccY[k * wy + i];
|
|
795
|
+
if (val > maxY) {
|
|
796
|
+
maxY = val;
|
|
797
|
+
argmaxY = i;
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
|
|
801
|
+
const score = 0.5 * (maxX + maxY);
|
|
802
|
+
const visible = score > this.config.poseConfidence;
|
|
803
|
+
|
|
804
|
+
// Transform to original coordinates
|
|
805
|
+
const normX = argmaxX / wx;
|
|
806
|
+
const normY = argmaxY / wy;
|
|
807
|
+
|
|
808
|
+
const x = (normX - 0.5) * scale[0] + center[0];
|
|
809
|
+
const y = (normY - 0.5) * scale[1] + center[1];
|
|
810
|
+
|
|
811
|
+
keypoints.push({
|
|
812
|
+
x,
|
|
813
|
+
y,
|
|
814
|
+
score,
|
|
815
|
+
visible,
|
|
816
|
+
name: KEYPOINT_NAMES[k] || `keypoint_${k}`,
|
|
817
|
+
});
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
return keypoints;
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
/**
|
|
824
|
+
* Non-Maximum Suppression
|
|
825
|
+
*/
|
|
826
|
+
private applyNMS(
|
|
827
|
+
detections: Array<{ x1: number; y1: number; x2: number; y2: number; confidence: number }>,
|
|
828
|
+
iouThreshold: number
|
|
829
|
+
): typeof detections {
|
|
830
|
+
if (detections.length === 0) return [];
|
|
831
|
+
|
|
832
|
+
detections.sort((a, b) => b.confidence - a.confidence);
|
|
833
|
+
|
|
834
|
+
const selected: typeof detections = [];
|
|
835
|
+
const used = new Set<number>();
|
|
836
|
+
|
|
837
|
+
for (let i = 0; i < detections.length; i++) {
|
|
838
|
+
if (used.has(i)) continue;
|
|
839
|
+
|
|
840
|
+
selected.push(detections[i]);
|
|
841
|
+
used.add(i);
|
|
842
|
+
|
|
843
|
+
for (let j = i + 1; j < detections.length; j++) {
|
|
844
|
+
if (used.has(j)) continue;
|
|
845
|
+
|
|
846
|
+
const iou = this.calculateIoU(detections[i], detections[j]);
|
|
847
|
+
if (iou > iouThreshold) {
|
|
848
|
+
used.add(j);
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
return selected;
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
/**
|
|
857
|
+
* Calculate IoU between two boxes
|
|
858
|
+
*/
|
|
859
|
+
private calculateIoU(
|
|
860
|
+
box1: { x1: number; y1: number; x2: number; y2: number },
|
|
861
|
+
box2: { x1: number; y1: number; x2: number; y2: number }
|
|
862
|
+
): number {
|
|
863
|
+
const x1 = Math.max(box1.x1, box2.x1);
|
|
864
|
+
const y1 = Math.max(box1.y1, box2.y1);
|
|
865
|
+
const x2 = Math.min(box1.x2, box2.x2);
|
|
866
|
+
const y2 = Math.min(box1.y2, box2.y2);
|
|
867
|
+
|
|
868
|
+
if (x2 <= x1 || y2 <= y1) return 0;
|
|
869
|
+
|
|
870
|
+
const intersection = (x2 - x1) * (y2 - y1);
|
|
871
|
+
const area1 = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
|
|
872
|
+
const area2 = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);
|
|
873
|
+
const union = area1 + area2 - intersection;
|
|
874
|
+
|
|
875
|
+
return intersection / union;
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
/**
|
|
879
|
+
* Dispose resources
|
|
880
|
+
*/
|
|
881
|
+
dispose(): void {
|
|
882
|
+
if (this.detSession) {
|
|
883
|
+
this.detSession.release();
|
|
884
|
+
this.detSession = null;
|
|
885
|
+
}
|
|
886
|
+
if (this.poseSession) {
|
|
887
|
+
this.poseSession.release();
|
|
888
|
+
this.poseSession = null;
|
|
889
|
+
}
|
|
890
|
+
this.initialized = false;
|
|
891
|
+
}
|
|
892
|
+
}
|