rtmlib-ts 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +1 -0
- package/README.md +202 -0
- package/dist/core/base.d.ts +20 -0
- package/dist/core/base.d.ts.map +1 -0
- package/dist/core/base.js +40 -0
- package/dist/core/file.d.ts +11 -0
- package/dist/core/file.d.ts.map +1 -0
- package/dist/core/file.js +111 -0
- package/dist/core/modelCache.d.ts +35 -0
- package/dist/core/modelCache.d.ts.map +1 -0
- package/dist/core/modelCache.js +161 -0
- package/dist/core/posePostprocessing.d.ts +12 -0
- package/dist/core/posePostprocessing.d.ts.map +1 -0
- package/dist/core/posePostprocessing.js +76 -0
- package/dist/core/postprocessing.d.ts +10 -0
- package/dist/core/postprocessing.d.ts.map +1 -0
- package/dist/core/postprocessing.js +70 -0
- package/dist/core/preprocessing.d.ts +14 -0
- package/dist/core/preprocessing.d.ts.map +1 -0
- package/dist/core/preprocessing.js +79 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +31 -0
- package/dist/models/rtmpose.d.ts +25 -0
- package/dist/models/rtmpose.d.ts.map +1 -0
- package/dist/models/rtmpose.js +185 -0
- package/dist/models/rtmpose3d.d.ts +28 -0
- package/dist/models/rtmpose3d.d.ts.map +1 -0
- package/dist/models/rtmpose3d.js +184 -0
- package/dist/models/yolo12.d.ts +23 -0
- package/dist/models/yolo12.d.ts.map +1 -0
- package/dist/models/yolo12.js +165 -0
- package/dist/models/yolox.d.ts +18 -0
- package/dist/models/yolox.d.ts.map +1 -0
- package/dist/models/yolox.js +167 -0
- package/dist/solution/animalDetector.d.ts +229 -0
- package/dist/solution/animalDetector.d.ts.map +1 -0
- package/dist/solution/animalDetector.js +663 -0
- package/dist/solution/body.d.ts +16 -0
- package/dist/solution/body.d.ts.map +1 -0
- package/dist/solution/body.js +52 -0
- package/dist/solution/bodyWithFeet.d.ts +16 -0
- package/dist/solution/bodyWithFeet.d.ts.map +1 -0
- package/dist/solution/bodyWithFeet.js +52 -0
- package/dist/solution/customDetector.d.ts +137 -0
- package/dist/solution/customDetector.d.ts.map +1 -0
- package/dist/solution/customDetector.js +342 -0
- package/dist/solution/hand.d.ts +14 -0
- package/dist/solution/hand.d.ts.map +1 -0
- package/dist/solution/hand.js +20 -0
- package/dist/solution/index.d.ts +10 -0
- package/dist/solution/index.d.ts.map +1 -0
- package/dist/solution/index.js +9 -0
- package/dist/solution/objectDetector.d.ts +172 -0
- package/dist/solution/objectDetector.d.ts.map +1 -0
- package/dist/solution/objectDetector.js +606 -0
- package/dist/solution/pose3dDetector.d.ts +145 -0
- package/dist/solution/pose3dDetector.d.ts.map +1 -0
- package/dist/solution/pose3dDetector.js +611 -0
- package/dist/solution/poseDetector.d.ts +198 -0
- package/dist/solution/poseDetector.d.ts.map +1 -0
- package/dist/solution/poseDetector.js +622 -0
- package/dist/solution/poseTracker.d.ts +22 -0
- package/dist/solution/poseTracker.d.ts.map +1 -0
- package/dist/solution/poseTracker.js +106 -0
- package/dist/solution/wholebody.d.ts +19 -0
- package/dist/solution/wholebody.d.ts.map +1 -0
- package/dist/solution/wholebody.js +82 -0
- package/dist/solution/wholebody3d.d.ts +22 -0
- package/dist/solution/wholebody3d.d.ts.map +1 -0
- package/dist/solution/wholebody3d.js +75 -0
- package/dist/types/index.d.ts +52 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/visualization/draw.d.ts +57 -0
- package/dist/visualization/draw.d.ts.map +1 -0
- package/dist/visualization/draw.js +400 -0
- package/dist/visualization/skeleton/coco133.d.ts +350 -0
- package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco133.js +120 -0
- package/dist/visualization/skeleton/coco17.d.ts +180 -0
- package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco17.js +48 -0
- package/dist/visualization/skeleton/halpe26.d.ts +278 -0
- package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
- package/dist/visualization/skeleton/halpe26.js +70 -0
- package/dist/visualization/skeleton/hand21.d.ts +196 -0
- package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
- package/dist/visualization/skeleton/hand21.js +51 -0
- package/dist/visualization/skeleton/index.d.ts +10 -0
- package/dist/visualization/skeleton/index.d.ts.map +1 -0
- package/dist/visualization/skeleton/index.js +9 -0
- package/dist/visualization/skeleton/openpose134.d.ts +357 -0
- package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose134.js +116 -0
- package/dist/visualization/skeleton/openpose18.d.ts +177 -0
- package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose18.js +47 -0
- package/docs/ANIMAL_DETECTOR.md +450 -0
- package/docs/CUSTOM_DETECTOR.md +568 -0
- package/docs/OBJECT_DETECTOR.md +373 -0
- package/docs/POSE3D_DETECTOR.md +458 -0
- package/docs/POSE_DETECTOR.md +442 -0
- package/examples/README.md +119 -0
- package/examples/index.html +746 -0
- package/package.json +51 -0
- package/playground/README.md +114 -0
- package/playground/app/favicon.ico +0 -0
- package/playground/app/globals.css +17 -0
- package/playground/app/layout.tsx +19 -0
- package/playground/app/page.tsx +1338 -0
- package/playground/eslint.config.mjs +18 -0
- package/playground/next.config.ts +34 -0
- package/playground/package-lock.json +6723 -0
- package/playground/package.json +27 -0
- package/playground/postcss.config.mjs +7 -0
- package/playground/tsconfig.json +34 -0
- package/src/core/base.ts +66 -0
- package/src/core/file.ts +141 -0
- package/src/core/modelCache.ts +189 -0
- package/src/core/posePostprocessing.ts +91 -0
- package/src/core/postprocessing.ts +93 -0
- package/src/core/preprocessing.ts +127 -0
- package/src/index.ts +69 -0
- package/src/models/rtmpose.ts +265 -0
- package/src/models/rtmpose3d.ts +289 -0
- package/src/models/yolo12.ts +220 -0
- package/src/models/yolox.ts +214 -0
- package/src/solution/animalDetector.ts +955 -0
- package/src/solution/body.ts +89 -0
- package/src/solution/bodyWithFeet.ts +89 -0
- package/src/solution/customDetector.ts +474 -0
- package/src/solution/hand.ts +52 -0
- package/src/solution/index.ts +10 -0
- package/src/solution/objectDetector.ts +816 -0
- package/src/solution/pose3dDetector.ts +890 -0
- package/src/solution/poseDetector.ts +892 -0
- package/src/solution/poseTracker.ts +172 -0
- package/src/solution/wholebody.ts +130 -0
- package/src/solution/wholebody3d.ts +125 -0
- package/src/types/index.ts +62 -0
- package/src/visualization/draw.ts +543 -0
- package/src/visualization/skeleton/coco133.ts +131 -0
- package/src/visualization/skeleton/coco17.ts +49 -0
- package/src/visualization/skeleton/halpe26.ts +71 -0
- package/src/visualization/skeleton/hand21.ts +52 -0
- package/src/visualization/skeleton/index.ts +10 -0
- package/src/visualization/skeleton/openpose134.ts +125 -0
- package/src/visualization/skeleton/openpose18.ts +48 -0
- package/tsconfig.json +32 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pre-processing utilities for pose estimation
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export function bboxXyxy2cs(
|
|
6
|
+
bbox: [number, number, number, number],
|
|
7
|
+
padding: number = 1.25
|
|
8
|
+
): { center: [number, number]; scale: [number, number] } {
|
|
9
|
+
const [x1, y1, x2, y2] = bbox;
|
|
10
|
+
|
|
11
|
+
const center: [number, number] = [(x1 + x2) / 2, (y1 + y2) / 2];
|
|
12
|
+
|
|
13
|
+
const w = x2 - x1;
|
|
14
|
+
const h = y2 - y1;
|
|
15
|
+
|
|
16
|
+
// Python: scale = w * padding, h * padding (different values!)
|
|
17
|
+
const scale: [number, number] = [w * padding, h * padding];
|
|
18
|
+
|
|
19
|
+
return { center, scale };
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function topDownAffine(
|
|
23
|
+
imageSize: [number, number],
|
|
24
|
+
scale: [number, number],
|
|
25
|
+
center: [number, number],
|
|
26
|
+
img: Uint8Array,
|
|
27
|
+
imgWidth: number,
|
|
28
|
+
imgHeight: number
|
|
29
|
+
): { resizedImg: Float32Array; scale: [number, number] } {
|
|
30
|
+
const [w, h] = imageSize;
|
|
31
|
+
|
|
32
|
+
const srcW = scale[0];
|
|
33
|
+
const srcH = scale[1];
|
|
34
|
+
|
|
35
|
+
// Calculate transformation matrix
|
|
36
|
+
const scaleX = w / srcW;
|
|
37
|
+
const scaleY = h / srcH;
|
|
38
|
+
|
|
39
|
+
// Create output array
|
|
40
|
+
const outputSize = w * h * 3;
|
|
41
|
+
const resizedImg = new Float32Array(outputSize);
|
|
42
|
+
|
|
43
|
+
// Simple bilinear interpolation
|
|
44
|
+
for (let y = 0; y < h; y++) {
|
|
45
|
+
for (let x = 0; x < w; x++) {
|
|
46
|
+
// Map output coordinates to input coordinates
|
|
47
|
+
const srcX = (x / w) * srcW + (center[0] - srcW / 2);
|
|
48
|
+
const srcY = (y / h) * srcH + (center[1] - srcH / 2);
|
|
49
|
+
|
|
50
|
+
// Get the four nearest pixels
|
|
51
|
+
const x0 = Math.floor(srcX);
|
|
52
|
+
const y0 = Math.floor(srcY);
|
|
53
|
+
const x1 = x0 + 1;
|
|
54
|
+
const y1 = y0 + 1;
|
|
55
|
+
|
|
56
|
+
const dx = srcX - x0;
|
|
57
|
+
const dy = srcY - y0;
|
|
58
|
+
|
|
59
|
+
// Sample from input image with bounds checking
|
|
60
|
+
for (let c = 0; c < 3; c++) {
|
|
61
|
+
const p00 = getPixel(img, imgWidth, imgHeight, x0, y0, c);
|
|
62
|
+
const p10 = getPixel(img, imgWidth, imgHeight, x1, y0, c);
|
|
63
|
+
const p01 = getPixel(img, imgWidth, imgHeight, x0, y1, c);
|
|
64
|
+
const p11 = getPixel(img, imgWidth, imgHeight, x1, y1, c);
|
|
65
|
+
|
|
66
|
+
// Bilinear interpolation
|
|
67
|
+
const value = p00 * (1 - dx) * (1 - dy) +
|
|
68
|
+
p10 * dx * (1 - dy) +
|
|
69
|
+
p01 * (1 - dx) * dy +
|
|
70
|
+
p11 * dx * dy;
|
|
71
|
+
|
|
72
|
+
resizedImg[y * w * 3 + x * 3 + c] = value;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Return original scale (not model dimensions) for postprocess
|
|
78
|
+
return { resizedImg, scale };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function getPixel(
|
|
82
|
+
img: Uint8Array,
|
|
83
|
+
width: number,
|
|
84
|
+
height: number,
|
|
85
|
+
x: number,
|
|
86
|
+
y: number,
|
|
87
|
+
channel: number
|
|
88
|
+
): number {
|
|
89
|
+
if (x < 0 || x >= width || y < 0 || y >= height) {
|
|
90
|
+
return 0;
|
|
91
|
+
}
|
|
92
|
+
return img[y * width * 3 + x * 3 + channel];
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
export function normalizeImage(
|
|
96
|
+
img: Float32Array,
|
|
97
|
+
mean: number[],
|
|
98
|
+
std: number[]
|
|
99
|
+
): Float32Array {
|
|
100
|
+
const normalized = new Float32Array(img.length);
|
|
101
|
+
|
|
102
|
+
for (let i = 0; i < img.length; i++) {
|
|
103
|
+
const channel = i % 3;
|
|
104
|
+
normalized[i] = (img[i] - mean[channel]) / std[channel];
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
return normalized;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export function transposeImage(
|
|
111
|
+
img: Float32Array,
|
|
112
|
+
height: number,
|
|
113
|
+
width: number
|
|
114
|
+
): Float32Array {
|
|
115
|
+
// HWC to CHW
|
|
116
|
+
const transposed = new Float32Array(img.length);
|
|
117
|
+
|
|
118
|
+
for (let c = 0; c < 3; c++) {
|
|
119
|
+
for (let h = 0; h < height; h++) {
|
|
120
|
+
for (let w = 0; w < width; w++) {
|
|
121
|
+
transposed[c * height * width + h * width + w] = img[h * width * 3 + w * 3 + c];
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return transposed;
|
|
127
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* rtmlib-ts - Real-Time Multi-Person Pose Estimation Library
|
|
3
|
+
*
|
|
4
|
+
* TypeScript port of rtmlib Python library
|
|
5
|
+
* Based on RTMPose, DWPose, RTMO, RTMW models
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Models
|
|
9
|
+
export { YOLOX } from './models/yolox';
|
|
10
|
+
export { YOLO12 } from './models/yolo12';
|
|
11
|
+
export { RTMPose } from './models/rtmpose';
|
|
12
|
+
export { RTMPose3D } from './models/rtmpose3d';
|
|
13
|
+
|
|
14
|
+
// Solutions (High-level APIs)
|
|
15
|
+
export { ObjectDetector, COCO_CLASSES } from './solution/objectDetector';
|
|
16
|
+
export { PoseDetector } from './solution/poseDetector';
|
|
17
|
+
export { Pose3DDetector, type Person3D, type Pose3DStats } from './solution/pose3dDetector';
|
|
18
|
+
export { CustomDetector, type CustomDetectorConfig, type DetectionResult } from './solution/customDetector';
|
|
19
|
+
export { AnimalDetector, ANIMAL_CLASSES, VITPOSE_MODELS, type VitPoseModelType, type DetectedAnimal, type AnimalKeypoint } from './solution/animalDetector';
|
|
20
|
+
export { Wholebody } from './solution/wholebody';
|
|
21
|
+
export { Wholebody3D, type Wholebody3DResult } from './solution/wholebody3d';
|
|
22
|
+
export { Body } from './solution/body';
|
|
23
|
+
export { Hand } from './solution/hand';
|
|
24
|
+
export { BodyWithFeet } from './solution/bodyWithFeet';
|
|
25
|
+
export { PoseTracker } from './solution/poseTracker';
|
|
26
|
+
|
|
27
|
+
// Visualization
|
|
28
|
+
export { drawBbox, drawSkeleton, drawDetectionsOnCanvas, drawPoseOnCanvas, drawResultsOnCanvas } from './visualization/draw';
|
|
29
|
+
|
|
30
|
+
// Model caching utilities
|
|
31
|
+
export {
|
|
32
|
+
getCachedModel,
|
|
33
|
+
isModelCached,
|
|
34
|
+
preloadModels,
|
|
35
|
+
clearModelCache,
|
|
36
|
+
getCacheSize,
|
|
37
|
+
getCacheInfo,
|
|
38
|
+
} from './core/modelCache';
|
|
39
|
+
|
|
40
|
+
// Types
|
|
41
|
+
export type {
|
|
42
|
+
Keypoint,
|
|
43
|
+
BodyResult,
|
|
44
|
+
HandResult,
|
|
45
|
+
FaceResult,
|
|
46
|
+
PoseResult,
|
|
47
|
+
BBox,
|
|
48
|
+
Detection,
|
|
49
|
+
ModelConfig,
|
|
50
|
+
ModeType,
|
|
51
|
+
BackendType,
|
|
52
|
+
DeviceType,
|
|
53
|
+
ImageData,
|
|
54
|
+
RGBImage,
|
|
55
|
+
BGRImage,
|
|
56
|
+
} from './types/index';
|
|
57
|
+
|
|
58
|
+
// Skeleton configurations
|
|
59
|
+
export {
|
|
60
|
+
coco17,
|
|
61
|
+
coco133,
|
|
62
|
+
hand21,
|
|
63
|
+
halpe26,
|
|
64
|
+
openpose18,
|
|
65
|
+
openpose134,
|
|
66
|
+
} from './visualization/skeleton/index';
|
|
67
|
+
|
|
68
|
+
// Version
|
|
69
|
+
export const VERSION = '0.0.1';
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RTMPose model for pose estimation
|
|
3
|
+
* Supports RTMPose, DWPose, RTMW variants
|
|
4
|
+
* Uses onnxruntime-web for browser compatibility
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { BaseTool } from '../core/base';
|
|
8
|
+
import { BBox, BackendType } from '../types/index';
|
|
9
|
+
|
|
10
|
+
export class RTMPose extends BaseTool {
|
|
11
|
+
private toOpenpose: boolean;
|
|
12
|
+
private simccSplitRatio: number = 2.0;
|
|
13
|
+
private initialized: boolean = false;
|
|
14
|
+
|
|
15
|
+
private readonly defaultMean: number[] = [123.675, 116.28, 103.53];
|
|
16
|
+
private readonly defaultStd: number[] = [58.395, 57.12, 57.375];
|
|
17
|
+
|
|
18
|
+
constructor(
|
|
19
|
+
onnxModel: string,
|
|
20
|
+
modelInputSize: [number, number] = [384, 288], // [height, width]
|
|
21
|
+
toOpenpose: boolean = false,
|
|
22
|
+
backend: BackendType = 'webgpu'
|
|
23
|
+
) {
|
|
24
|
+
super(onnxModel, modelInputSize, null, null, backend);
|
|
25
|
+
this.toOpenpose = toOpenpose;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async init(): Promise<void> {
|
|
29
|
+
// Web version - model path is direct URL
|
|
30
|
+
await super.init();
|
|
31
|
+
this.initialized = true;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async call(
|
|
35
|
+
image: Uint8Array,
|
|
36
|
+
imgWidth: number,
|
|
37
|
+
imgHeight: number,
|
|
38
|
+
bboxes: BBox[] = []
|
|
39
|
+
): Promise<{ keypoints: number[][]; scores: number[] }> {
|
|
40
|
+
if (!this.initialized) {
|
|
41
|
+
await this.init();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (bboxes.length === 0) {
|
|
45
|
+
bboxes = [{ x1: 0, y1: 0, x2: imgWidth, y2: imgHeight }];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const allKeypoints: number[][][] = [];
|
|
49
|
+
const allScores: number[][] = [];
|
|
50
|
+
|
|
51
|
+
for (const bbox of bboxes) {
|
|
52
|
+
const { tensor, center, scale, inputSize } = this.preprocess(
|
|
53
|
+
image,
|
|
54
|
+
imgWidth,
|
|
55
|
+
imgHeight,
|
|
56
|
+
bbox
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
const outputs = await this.inference(tensor, inputSize);
|
|
60
|
+
const { keypoints, scores } = this.postprocess(outputs[0].data as Float32Array, outputs[1].data as Float32Array, outputs[0].dims, outputs[1].dims, center, scale);
|
|
61
|
+
|
|
62
|
+
allKeypoints.push(keypoints);
|
|
63
|
+
allScores.push(scores);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Flatten results
|
|
67
|
+
const keypoints = allKeypoints.flat();
|
|
68
|
+
const scores = allScores.flat();
|
|
69
|
+
|
|
70
|
+
if (this.toOpenpose) {
|
|
71
|
+
const converted = this.convertCocoToOpenpose(keypoints, scores);
|
|
72
|
+
return converted;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return { keypoints, scores };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
private preprocess(
|
|
79
|
+
img: Uint8Array,
|
|
80
|
+
imgWidth: number,
|
|
81
|
+
imgHeight: number,
|
|
82
|
+
bbox: BBox
|
|
83
|
+
): { tensor: Float32Array; center: [number, number]; scale: [number, number]; inputSize: [number, number] } {
|
|
84
|
+
const [inputH, inputW] = this.modelInputSize; // H=384, W=288
|
|
85
|
+
|
|
86
|
+
// Center and scale from bbox with padding
|
|
87
|
+
const center: [number, number] = [
|
|
88
|
+
bbox.x1 + (bbox.x2 - bbox.x1) / 2,
|
|
89
|
+
bbox.y1 + (bbox.y2 - bbox.y1) / 2,
|
|
90
|
+
];
|
|
91
|
+
|
|
92
|
+
const bboxWidth = bbox.x2 - bbox.x1;
|
|
93
|
+
const bboxHeight = bbox.y2 - bbox.y1;
|
|
94
|
+
const bboxAspectRatio = bboxWidth / bboxHeight;
|
|
95
|
+
const modelAspectRatio = inputW / inputH;
|
|
96
|
+
|
|
97
|
+
let scaleW: number, scaleH: number;
|
|
98
|
+
if (bboxAspectRatio > modelAspectRatio) {
|
|
99
|
+
scaleW = bboxWidth * 1.25;
|
|
100
|
+
scaleH = scaleW / modelAspectRatio;
|
|
101
|
+
} else {
|
|
102
|
+
scaleH = bboxHeight * 1.25;
|
|
103
|
+
scaleW = scaleH * modelAspectRatio;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
const scale: [number, number] = [scaleW, scaleH];
|
|
107
|
+
|
|
108
|
+
// Create canvas for cropping
|
|
109
|
+
const canvas = document.createElement('canvas');
|
|
110
|
+
const ctx = canvas.getContext('2d')!;
|
|
111
|
+
canvas.width = inputW;
|
|
112
|
+
canvas.height = inputH;
|
|
113
|
+
|
|
114
|
+
ctx.fillStyle = '#FFFFFF';
|
|
115
|
+
ctx.fillRect(0, 0, inputW, inputH);
|
|
116
|
+
|
|
117
|
+
// Create source canvas from image data
|
|
118
|
+
const srcCanvas = document.createElement('canvas');
|
|
119
|
+
const srcCtx = srcCanvas.getContext('2d')!;
|
|
120
|
+
srcCanvas.width = imgWidth;
|
|
121
|
+
srcCanvas.height = imgHeight;
|
|
122
|
+
|
|
123
|
+
const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
|
|
124
|
+
srcImageData.data.set(img);
|
|
125
|
+
srcCtx.putImageData(srcImageData, 0, 0);
|
|
126
|
+
|
|
127
|
+
// Calculate source region
|
|
128
|
+
const srcX = center[0] - scaleW / 2;
|
|
129
|
+
const srcY = center[1] - scaleH / 2;
|
|
130
|
+
|
|
131
|
+
// Draw cropped and scaled region
|
|
132
|
+
ctx.drawImage(
|
|
133
|
+
srcCanvas,
|
|
134
|
+
srcX,
|
|
135
|
+
srcY,
|
|
136
|
+
scaleW,
|
|
137
|
+
scaleH,
|
|
138
|
+
0,
|
|
139
|
+
0,
|
|
140
|
+
inputW,
|
|
141
|
+
inputH
|
|
142
|
+
);
|
|
143
|
+
|
|
144
|
+
const imageData = ctx.getImageData(0, 0, inputW, inputH);
|
|
145
|
+
|
|
146
|
+
// Normalize with mean/std
|
|
147
|
+
const data = new Float32Array(inputW * inputH * 3);
|
|
148
|
+
for (let i = 0; i < imageData.data.length; i += 4) {
|
|
149
|
+
const pixelIndex = i / 4;
|
|
150
|
+
for (let c = 0; c < 3; c++) {
|
|
151
|
+
const value = imageData.data[i + c];
|
|
152
|
+
data[c * inputW * inputH + pixelIndex] = (value - this.defaultMean[c]) / this.defaultStd[c];
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return {
|
|
157
|
+
tensor: data,
|
|
158
|
+
center,
|
|
159
|
+
scale,
|
|
160
|
+
inputSize: [inputH, inputW],
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
private postprocess(
|
|
165
|
+
simccX: Float32Array,
|
|
166
|
+
simccY: Float32Array,
|
|
167
|
+
outputShapeX: number[],
|
|
168
|
+
outputShapeY: number[],
|
|
169
|
+
center: [number, number],
|
|
170
|
+
scale: [number, number]
|
|
171
|
+
): { keypoints: number[][]; scores: number[] } {
|
|
172
|
+
const numKeypoints = outputShapeX[1];
|
|
173
|
+
const wx = outputShapeX[2];
|
|
174
|
+
const wy = outputShapeY[2];
|
|
175
|
+
|
|
176
|
+
const keypoints: number[][] = [];
|
|
177
|
+
const scores: number[] = [];
|
|
178
|
+
|
|
179
|
+
for (let k = 0; k < numKeypoints; k++) {
|
|
180
|
+
// Find argmax for x
|
|
181
|
+
let maxX = -Infinity;
|
|
182
|
+
let argmaxX = 0;
|
|
183
|
+
for (let i = 0; i < wx; i++) {
|
|
184
|
+
const val = simccX[k * wx + i];
|
|
185
|
+
if (val > maxX) {
|
|
186
|
+
maxX = val;
|
|
187
|
+
argmaxX = i;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// Find argmax for y
|
|
192
|
+
let maxY = -Infinity;
|
|
193
|
+
let argmaxY = 0;
|
|
194
|
+
for (let i = 0; i < wy; i++) {
|
|
195
|
+
const val = simccY[k * wy + i];
|
|
196
|
+
if (val > maxY) {
|
|
197
|
+
maxY = val;
|
|
198
|
+
argmaxY = i;
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
const score = 0.5 * (maxX + maxY);
|
|
203
|
+
|
|
204
|
+
// Normalize to [0, 1] and transform to original image coordinates
|
|
205
|
+
const normX = argmaxX / wx;
|
|
206
|
+
const normY = argmaxY / wy;
|
|
207
|
+
|
|
208
|
+
const kptX = (normX - 0.5) * scale[0] + center[0];
|
|
209
|
+
const kptY = (normY - 0.5) * scale[1] + center[1];
|
|
210
|
+
|
|
211
|
+
keypoints.push([kptX, kptY]);
|
|
212
|
+
scores.push(score);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
return { keypoints, scores };
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
private convertCocoToOpenpose(
|
|
219
|
+
keypoints: number[][],
|
|
220
|
+
scores: number[]
|
|
221
|
+
): { keypoints: number[][]; scores: number[] } {
|
|
222
|
+
// COCO 17 keypoints to OpenPose 18 keypoints mapping
|
|
223
|
+
const cocoToOpenpose: number[] = [
|
|
224
|
+
0, // nose
|
|
225
|
+
1, // neck (average of shoulders)
|
|
226
|
+
2, // right_shoulder
|
|
227
|
+
3, // right_elbow
|
|
228
|
+
4, // right_wrist
|
|
229
|
+
5, // left_shoulder
|
|
230
|
+
6, // left_elbow
|
|
231
|
+
7, // left_wrist
|
|
232
|
+
8, // right_hip
|
|
233
|
+
9, // right_knee
|
|
234
|
+
10, // right_ankle
|
|
235
|
+
11, // left_hip
|
|
236
|
+
12, // left_knee
|
|
237
|
+
13, // left_ankle
|
|
238
|
+
14, // right_eye
|
|
239
|
+
15, // left_eye
|
|
240
|
+
16, // left_ear
|
|
241
|
+
];
|
|
242
|
+
|
|
243
|
+
const openposeKeypoints: number[][] = [];
|
|
244
|
+
const openposeScores: number[] = [];
|
|
245
|
+
|
|
246
|
+
for (let i = 0; i < 17; i++) {
|
|
247
|
+
if (i === 1) {
|
|
248
|
+
// Neck is average of shoulders
|
|
249
|
+
const rightShoulder = keypoints[2];
|
|
250
|
+
const leftShoulder = keypoints[5];
|
|
251
|
+
openposeKeypoints.push([
|
|
252
|
+
(rightShoulder[0] + leftShoulder[0]) / 2,
|
|
253
|
+
(rightShoulder[1] + leftShoulder[1]) / 2,
|
|
254
|
+
]);
|
|
255
|
+
openposeScores.push((scores[2] + scores[5]) / 2);
|
|
256
|
+
} else {
|
|
257
|
+
const cocoIdx = cocoToOpenpose[i];
|
|
258
|
+
openposeKeypoints.push([...keypoints[cocoIdx]]);
|
|
259
|
+
openposeScores.push(scores[cocoIdx]);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return { keypoints: openposeKeypoints, scores: openposeScores };
|
|
264
|
+
}
|
|
265
|
+
}
|