rtmlib-ts 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +1 -0
- package/README.md +202 -0
- package/dist/core/base.d.ts +20 -0
- package/dist/core/base.d.ts.map +1 -0
- package/dist/core/base.js +40 -0
- package/dist/core/file.d.ts +11 -0
- package/dist/core/file.d.ts.map +1 -0
- package/dist/core/file.js +111 -0
- package/dist/core/modelCache.d.ts +35 -0
- package/dist/core/modelCache.d.ts.map +1 -0
- package/dist/core/modelCache.js +161 -0
- package/dist/core/posePostprocessing.d.ts +12 -0
- package/dist/core/posePostprocessing.d.ts.map +1 -0
- package/dist/core/posePostprocessing.js +76 -0
- package/dist/core/postprocessing.d.ts +10 -0
- package/dist/core/postprocessing.d.ts.map +1 -0
- package/dist/core/postprocessing.js +70 -0
- package/dist/core/preprocessing.d.ts +14 -0
- package/dist/core/preprocessing.d.ts.map +1 -0
- package/dist/core/preprocessing.js +79 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +31 -0
- package/dist/models/rtmpose.d.ts +25 -0
- package/dist/models/rtmpose.d.ts.map +1 -0
- package/dist/models/rtmpose.js +185 -0
- package/dist/models/rtmpose3d.d.ts +28 -0
- package/dist/models/rtmpose3d.d.ts.map +1 -0
- package/dist/models/rtmpose3d.js +184 -0
- package/dist/models/yolo12.d.ts +23 -0
- package/dist/models/yolo12.d.ts.map +1 -0
- package/dist/models/yolo12.js +165 -0
- package/dist/models/yolox.d.ts +18 -0
- package/dist/models/yolox.d.ts.map +1 -0
- package/dist/models/yolox.js +167 -0
- package/dist/solution/animalDetector.d.ts +229 -0
- package/dist/solution/animalDetector.d.ts.map +1 -0
- package/dist/solution/animalDetector.js +663 -0
- package/dist/solution/body.d.ts +16 -0
- package/dist/solution/body.d.ts.map +1 -0
- package/dist/solution/body.js +52 -0
- package/dist/solution/bodyWithFeet.d.ts +16 -0
- package/dist/solution/bodyWithFeet.d.ts.map +1 -0
- package/dist/solution/bodyWithFeet.js +52 -0
- package/dist/solution/customDetector.d.ts +137 -0
- package/dist/solution/customDetector.d.ts.map +1 -0
- package/dist/solution/customDetector.js +342 -0
- package/dist/solution/hand.d.ts +14 -0
- package/dist/solution/hand.d.ts.map +1 -0
- package/dist/solution/hand.js +20 -0
- package/dist/solution/index.d.ts +10 -0
- package/dist/solution/index.d.ts.map +1 -0
- package/dist/solution/index.js +9 -0
- package/dist/solution/objectDetector.d.ts +172 -0
- package/dist/solution/objectDetector.d.ts.map +1 -0
- package/dist/solution/objectDetector.js +606 -0
- package/dist/solution/pose3dDetector.d.ts +145 -0
- package/dist/solution/pose3dDetector.d.ts.map +1 -0
- package/dist/solution/pose3dDetector.js +611 -0
- package/dist/solution/poseDetector.d.ts +198 -0
- package/dist/solution/poseDetector.d.ts.map +1 -0
- package/dist/solution/poseDetector.js +622 -0
- package/dist/solution/poseTracker.d.ts +22 -0
- package/dist/solution/poseTracker.d.ts.map +1 -0
- package/dist/solution/poseTracker.js +106 -0
- package/dist/solution/wholebody.d.ts +19 -0
- package/dist/solution/wholebody.d.ts.map +1 -0
- package/dist/solution/wholebody.js +82 -0
- package/dist/solution/wholebody3d.d.ts +22 -0
- package/dist/solution/wholebody3d.d.ts.map +1 -0
- package/dist/solution/wholebody3d.js +75 -0
- package/dist/types/index.d.ts +52 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/visualization/draw.d.ts +57 -0
- package/dist/visualization/draw.d.ts.map +1 -0
- package/dist/visualization/draw.js +400 -0
- package/dist/visualization/skeleton/coco133.d.ts +350 -0
- package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco133.js +120 -0
- package/dist/visualization/skeleton/coco17.d.ts +180 -0
- package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco17.js +48 -0
- package/dist/visualization/skeleton/halpe26.d.ts +278 -0
- package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
- package/dist/visualization/skeleton/halpe26.js +70 -0
- package/dist/visualization/skeleton/hand21.d.ts +196 -0
- package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
- package/dist/visualization/skeleton/hand21.js +51 -0
- package/dist/visualization/skeleton/index.d.ts +10 -0
- package/dist/visualization/skeleton/index.d.ts.map +1 -0
- package/dist/visualization/skeleton/index.js +9 -0
- package/dist/visualization/skeleton/openpose134.d.ts +357 -0
- package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose134.js +116 -0
- package/dist/visualization/skeleton/openpose18.d.ts +177 -0
- package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose18.js +47 -0
- package/docs/ANIMAL_DETECTOR.md +450 -0
- package/docs/CUSTOM_DETECTOR.md +568 -0
- package/docs/OBJECT_DETECTOR.md +373 -0
- package/docs/POSE3D_DETECTOR.md +458 -0
- package/docs/POSE_DETECTOR.md +442 -0
- package/examples/README.md +119 -0
- package/examples/index.html +746 -0
- package/package.json +51 -0
- package/playground/README.md +114 -0
- package/playground/app/favicon.ico +0 -0
- package/playground/app/globals.css +17 -0
- package/playground/app/layout.tsx +19 -0
- package/playground/app/page.tsx +1338 -0
- package/playground/eslint.config.mjs +18 -0
- package/playground/next.config.ts +34 -0
- package/playground/package-lock.json +6723 -0
- package/playground/package.json +27 -0
- package/playground/postcss.config.mjs +7 -0
- package/playground/tsconfig.json +34 -0
- package/src/core/base.ts +66 -0
- package/src/core/file.ts +141 -0
- package/src/core/modelCache.ts +189 -0
- package/src/core/posePostprocessing.ts +91 -0
- package/src/core/postprocessing.ts +93 -0
- package/src/core/preprocessing.ts +127 -0
- package/src/index.ts +69 -0
- package/src/models/rtmpose.ts +265 -0
- package/src/models/rtmpose3d.ts +289 -0
- package/src/models/yolo12.ts +220 -0
- package/src/models/yolox.ts +214 -0
- package/src/solution/animalDetector.ts +955 -0
- package/src/solution/body.ts +89 -0
- package/src/solution/bodyWithFeet.ts +89 -0
- package/src/solution/customDetector.ts +474 -0
- package/src/solution/hand.ts +52 -0
- package/src/solution/index.ts +10 -0
- package/src/solution/objectDetector.ts +816 -0
- package/src/solution/pose3dDetector.ts +890 -0
- package/src/solution/poseDetector.ts +892 -0
- package/src/solution/poseTracker.ts +172 -0
- package/src/solution/wholebody.ts +130 -0
- package/src/solution/wholebody3d.ts +125 -0
- package/src/types/index.ts +62 -0
- package/src/visualization/draw.ts +543 -0
- package/src/visualization/skeleton/coco133.ts +131 -0
- package/src/visualization/skeleton/coco17.ts +49 -0
- package/src/visualization/skeleton/halpe26.ts +71 -0
- package/src/visualization/skeleton/hand21.ts +52 -0
- package/src/visualization/skeleton/index.ts +10 -0
- package/src/visualization/skeleton/openpose134.ts +125 -0
- package/src/visualization/skeleton/openpose18.ts +48 -0
- package/tsconfig.json +32 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* YOLOX object detection model
|
|
3
|
+
* Based on https://github.com/IDEA-Research/DWPose/blob/opencv_onnx/ControlNet-v1-1-nightly/annotator/dwpose/cv_ox_det.py
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { BaseTool } from '../core/base';
|
|
7
|
+
import { multiclassNms } from '../core/postprocessing';
|
|
8
|
+
import { BBox, BackendType } from '../types/index';
|
|
9
|
+
|
|
10
|
+
export class YOLOX extends BaseTool {
|
|
11
|
+
private nmsThr: number;
|
|
12
|
+
public scoreThr: number;
|
|
13
|
+
private initialized: boolean = false;
|
|
14
|
+
|
|
15
|
+
constructor(
|
|
16
|
+
onnxModel: string,
|
|
17
|
+
modelInputSize: [number, number] = [640, 640],
|
|
18
|
+
nmsThr: number = 0.45,
|
|
19
|
+
scoreThr: number = 0.3, // Lower default threshold
|
|
20
|
+
backend: BackendType = 'webgpu'
|
|
21
|
+
) {
|
|
22
|
+
super(onnxModel, modelInputSize, null, null, backend);
|
|
23
|
+
this.nmsThr = nmsThr;
|
|
24
|
+
this.scoreThr = scoreThr;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
async init(): Promise<void> {
|
|
28
|
+
// Web version - model path is direct URL
|
|
29
|
+
await super.init();
|
|
30
|
+
this.initialized = true;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
async call(image: Uint8Array, imgWidth: number, imgHeight: number): Promise<BBox[]> {
|
|
34
|
+
if (!this.initialized) {
|
|
35
|
+
await this.init();
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const { paddedImg, ratio } = this.preprocess(image, imgWidth, imgHeight);
|
|
39
|
+
const outputs = await this.inference(paddedImg);
|
|
40
|
+
|
|
41
|
+
console.log(`YOLOX: Got ${outputs.length} outputs`);
|
|
42
|
+
for (let i = 0; i < outputs.length; i++) {
|
|
43
|
+
console.log(` Output[${i}]: dims=[${outputs[i].dims}], type=${outputs[i].type}`);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// For end2end YOLOX with built-in NMS:
|
|
47
|
+
// Output 0: [1, num_dets, 5] where 5 = [x1, y1, x2, y2, score]
|
|
48
|
+
// Output 1: [1, num_dets] or [1, 1] with count
|
|
49
|
+
|
|
50
|
+
const detOutput = outputs[0];
|
|
51
|
+
const detShape = detOutput.dims; // [1, num_dets, 5]
|
|
52
|
+
|
|
53
|
+
console.log(`YOLOX: detShape=[${detShape}], ratio=${ratio}`);
|
|
54
|
+
|
|
55
|
+
if (detShape.length === 3 && detShape[2] === 5 && detOutput.type === 'float32') {
|
|
56
|
+
const detArray = detOutput.data as Float32Array;
|
|
57
|
+
const numDets = detShape[1];
|
|
58
|
+
const boxes: BBox[] = [];
|
|
59
|
+
|
|
60
|
+
console.log(`YOLOX: Raw detections (first 5):`);
|
|
61
|
+
for (let i = 0; i < Math.min(5, numDets); i++) {
|
|
62
|
+
const baseIdx = i * 5;
|
|
63
|
+
const x1 = detArray[baseIdx];
|
|
64
|
+
const y1 = detArray[baseIdx + 1];
|
|
65
|
+
const x2 = detArray[baseIdx + 2];
|
|
66
|
+
const y2 = detArray[baseIdx + 3];
|
|
67
|
+
const score = detArray[baseIdx + 4];
|
|
68
|
+
console.log(` [${i}] raw=[${x1.toFixed(2)}, ${y1.toFixed(2)}, ${x2.toFixed(2)}, ${y2.toFixed(2)}] score=${score.toFixed(4)}`);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
for (let i = 0; i < numDets; i++) {
|
|
72
|
+
const baseIdx = i * 5;
|
|
73
|
+
let x1 = detArray[baseIdx];
|
|
74
|
+
let y1 = detArray[baseIdx + 1];
|
|
75
|
+
let x2 = detArray[baseIdx + 2];
|
|
76
|
+
let y2 = detArray[baseIdx + 3];
|
|
77
|
+
const score = detArray[baseIdx + 4];
|
|
78
|
+
|
|
79
|
+
// Scale to original image
|
|
80
|
+
x1 /= ratio;
|
|
81
|
+
y1 /= ratio;
|
|
82
|
+
x2 /= ratio;
|
|
83
|
+
y2 /= ratio;
|
|
84
|
+
|
|
85
|
+
// Python uses score > 0.3 threshold
|
|
86
|
+
if (score > 0.3 && x2 > x1 && y2 > y1) {
|
|
87
|
+
boxes.push({ x1, y1, x2, y2 });
|
|
88
|
+
console.log(` [${i}] ACCEPTED: [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}] score=${score.toFixed(3)}`);
|
|
89
|
+
} else if (score > 0.1) {
|
|
90
|
+
console.log(` [${i}] rejected (score=${score.toFixed(3)}): [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}]`);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
console.log(`YOLOX: Found ${boxes.length} boxes`);
|
|
95
|
+
return boxes;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return [];
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
private preprocess(
|
|
102
|
+
img: Uint8Array,
|
|
103
|
+
imgWidth: number,
|
|
104
|
+
imgHeight: number
|
|
105
|
+
): { paddedImg: Float32Array; ratio: number } {
|
|
106
|
+
const [inputH, inputW] = this.modelInputSize;
|
|
107
|
+
|
|
108
|
+
let paddedImg: Uint8Array;
|
|
109
|
+
let ratio: number;
|
|
110
|
+
|
|
111
|
+
if (imgHeight === inputH && imgWidth === inputW) {
|
|
112
|
+
paddedImg = img;
|
|
113
|
+
ratio = 1.0;
|
|
114
|
+
} else {
|
|
115
|
+
paddedImg = new Uint8Array(inputH * inputW * 3).fill(114);
|
|
116
|
+
|
|
117
|
+
ratio = Math.min(inputH / imgHeight, inputW / imgWidth);
|
|
118
|
+
const resizedW = Math.floor(imgWidth * ratio);
|
|
119
|
+
const resizedH = Math.floor(imgHeight * ratio);
|
|
120
|
+
|
|
121
|
+
// Resize image (simple nearest neighbor for now)
|
|
122
|
+
for (let y = 0; y < resizedH; y++) {
|
|
123
|
+
for (let x = 0; x < resizedW; x++) {
|
|
124
|
+
const srcX = Math.floor(x / ratio);
|
|
125
|
+
const srcY = Math.floor(y / ratio);
|
|
126
|
+
for (let c = 0; c < 3; c++) {
|
|
127
|
+
paddedImg[(y * inputW + x) * 3 + c] = img[(srcY * imgWidth + srcX) * 3 + c];
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// YOLOX uses simple normalization to [0, 1]
|
|
134
|
+
// Convert to float32 and normalize to [0, 1]
|
|
135
|
+
// Try BGR format (OpenCV standard)
|
|
136
|
+
const floatImg = new Float32Array(paddedImg.length);
|
|
137
|
+
for (let i = 0; i < paddedImg.length; i += 3) {
|
|
138
|
+
// Swap RGB to BGR
|
|
139
|
+
floatImg[i] = paddedImg[i + 2] / 255.0; // B
|
|
140
|
+
floatImg[i + 1] = paddedImg[i + 1] / 255.0; // G
|
|
141
|
+
floatImg[i + 2] = paddedImg[i] / 255.0; // R
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Transpose HWC to CHW
|
|
145
|
+
const transposed = new Float32Array(inputH * inputW * 3);
|
|
146
|
+
for (let c = 0; c < 3; c++) {
|
|
147
|
+
for (let h = 0; h < inputH; h++) {
|
|
148
|
+
for (let w = 0; w < inputW; w++) {
|
|
149
|
+
transposed[c * inputH * inputW + h * inputW + w] = floatImg[h * inputW * 3 + w * 3 + c];
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
console.log(`YOLOX preprocess: input ${imgWidth}x${imgHeight} -> ${inputW}x${inputH}, ratio=${ratio} (BGR)`);
|
|
155
|
+
|
|
156
|
+
return { paddedImg: transposed, ratio };
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
private postprocess(outputs: any, ratio: number): BBox[] {
|
|
160
|
+
const outputArray = new Float32Array(outputs.data);
|
|
161
|
+
const outputShape = outputs.dims;
|
|
162
|
+
|
|
163
|
+
console.log(`YOLOX output shape: [${outputShape}], ratio: ${ratio}`);
|
|
164
|
+
console.log(`First 20 values: ${Array.from(outputArray.slice(0, 20)).map(v => v.toFixed(4)).join(', ')}`);
|
|
165
|
+
|
|
166
|
+
// outputShape: [1, num_boxes, 5] or [1, num_boxes, 6]
|
|
167
|
+
// For YOLOX with NMS: [batch, num_dets, 5] where 5 = [x1, y1, x2, y2, score]
|
|
168
|
+
|
|
169
|
+
if (outputShape.length === 3 && outputShape[2] >= 5) {
|
|
170
|
+
const numBoxes = outputShape[1];
|
|
171
|
+
const boxes: BBox[] = [];
|
|
172
|
+
const hasClassInfo = outputShape[2] >= 6;
|
|
173
|
+
|
|
174
|
+
console.log(`Processing ${numBoxes} boxes, hasClassInfo: ${hasClassInfo}`);
|
|
175
|
+
|
|
176
|
+
for (let i = 0; i < numBoxes; i++) {
|
|
177
|
+
const baseIdx = i * outputShape[2];
|
|
178
|
+
const score = outputArray[baseIdx + 4];
|
|
179
|
+
|
|
180
|
+
// Filter by score threshold
|
|
181
|
+
if (score < this.scoreThr) continue;
|
|
182
|
+
|
|
183
|
+
// Check class if available
|
|
184
|
+
if (hasClassInfo) {
|
|
185
|
+
const classId = outputArray[baseIdx + 5];
|
|
186
|
+
if (classId !== 0) continue; // Only person class
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const x1 = outputArray[baseIdx] / ratio;
|
|
190
|
+
const y1 = outputArray[baseIdx + 1] / ratio;
|
|
191
|
+
const x2 = outputArray[baseIdx + 2] / ratio;
|
|
192
|
+
const y2 = outputArray[baseIdx + 3] / ratio;
|
|
193
|
+
|
|
194
|
+
// Validate box coordinates
|
|
195
|
+
if (x1 >= x2 || y1 >= y2) continue;
|
|
196
|
+
if (x2 < 0 || y2 < 0 || x1 > this.modelInputSize[1] / ratio || y1 > this.modelInputSize[0] / ratio) continue;
|
|
197
|
+
|
|
198
|
+
console.log(`Found box: [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}] score: ${score.toFixed(3)}`);
|
|
199
|
+
|
|
200
|
+
boxes.push({
|
|
201
|
+
x1: Math.max(0, x1),
|
|
202
|
+
y1: Math.max(0, y1),
|
|
203
|
+
x2: Math.min(outputShape[1] * ratio, x2),
|
|
204
|
+
y2: Math.min(outputShape[0] * ratio, y2),
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
console.log(`Total boxes found: ${boxes.length}`);
|
|
209
|
+
return boxes;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
return [];
|
|
213
|
+
}
|
|
214
|
+
}
|