npm - rtmlib-ts - Versions diffs - 0.0.2 - Mend

rtmlib-ts 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

package/.gitattributes +1 -0
package/README.md +202 -0
package/dist/core/base.d.ts +20 -0
package/dist/core/base.d.ts.map +1 -0
package/dist/core/base.js +40 -0
package/dist/core/file.d.ts +11 -0
package/dist/core/file.d.ts.map +1 -0
package/dist/core/file.js +111 -0
package/dist/core/modelCache.d.ts +35 -0
package/dist/core/modelCache.d.ts.map +1 -0
package/dist/core/modelCache.js +161 -0
package/dist/core/posePostprocessing.d.ts +12 -0
package/dist/core/posePostprocessing.d.ts.map +1 -0
package/dist/core/posePostprocessing.js +76 -0
package/dist/core/postprocessing.d.ts +10 -0
package/dist/core/postprocessing.d.ts.map +1 -0
package/dist/core/postprocessing.js +70 -0
package/dist/core/preprocessing.d.ts +14 -0
package/dist/core/preprocessing.d.ts.map +1 -0
package/dist/core/preprocessing.js +79 -0
package/dist/index.d.ts +27 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +31 -0
package/dist/models/rtmpose.d.ts +25 -0
package/dist/models/rtmpose.d.ts.map +1 -0
package/dist/models/rtmpose.js +185 -0
package/dist/models/rtmpose3d.d.ts +28 -0
package/dist/models/rtmpose3d.d.ts.map +1 -0
package/dist/models/rtmpose3d.js +184 -0
package/dist/models/yolo12.d.ts +23 -0
package/dist/models/yolo12.d.ts.map +1 -0
package/dist/models/yolo12.js +165 -0
package/dist/models/yolox.d.ts +18 -0
package/dist/models/yolox.d.ts.map +1 -0
package/dist/models/yolox.js +167 -0
package/dist/solution/animalDetector.d.ts +229 -0
package/dist/solution/animalDetector.d.ts.map +1 -0
package/dist/solution/animalDetector.js +663 -0
package/dist/solution/body.d.ts +16 -0
package/dist/solution/body.d.ts.map +1 -0
package/dist/solution/body.js +52 -0
package/dist/solution/bodyWithFeet.d.ts +16 -0
package/dist/solution/bodyWithFeet.d.ts.map +1 -0
package/dist/solution/bodyWithFeet.js +52 -0
package/dist/solution/customDetector.d.ts +137 -0
package/dist/solution/customDetector.d.ts.map +1 -0
package/dist/solution/customDetector.js +342 -0
package/dist/solution/hand.d.ts +14 -0
package/dist/solution/hand.d.ts.map +1 -0
package/dist/solution/hand.js +20 -0
package/dist/solution/index.d.ts +10 -0
package/dist/solution/index.d.ts.map +1 -0
package/dist/solution/index.js +9 -0
package/dist/solution/objectDetector.d.ts +172 -0
package/dist/solution/objectDetector.d.ts.map +1 -0
package/dist/solution/objectDetector.js +606 -0
package/dist/solution/pose3dDetector.d.ts +145 -0
package/dist/solution/pose3dDetector.d.ts.map +1 -0
package/dist/solution/pose3dDetector.js +611 -0
package/dist/solution/poseDetector.d.ts +198 -0
package/dist/solution/poseDetector.d.ts.map +1 -0
package/dist/solution/poseDetector.js +622 -0
package/dist/solution/poseTracker.d.ts +22 -0
package/dist/solution/poseTracker.d.ts.map +1 -0
package/dist/solution/poseTracker.js +106 -0
package/dist/solution/wholebody.d.ts +19 -0
package/dist/solution/wholebody.d.ts.map +1 -0
package/dist/solution/wholebody.js +82 -0
package/dist/solution/wholebody3d.d.ts +22 -0
package/dist/solution/wholebody3d.d.ts.map +1 -0
package/dist/solution/wholebody3d.js +75 -0
package/dist/types/index.d.ts +52 -0
package/dist/types/index.d.ts.map +1 -0
package/dist/types/index.js +5 -0
package/dist/visualization/draw.d.ts +57 -0
package/dist/visualization/draw.d.ts.map +1 -0
package/dist/visualization/draw.js +400 -0
package/dist/visualization/skeleton/coco133.d.ts +350 -0
package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
package/dist/visualization/skeleton/coco133.js +120 -0
package/dist/visualization/skeleton/coco17.d.ts +180 -0
package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
package/dist/visualization/skeleton/coco17.js +48 -0
package/dist/visualization/skeleton/halpe26.d.ts +278 -0
package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
package/dist/visualization/skeleton/halpe26.js +70 -0
package/dist/visualization/skeleton/hand21.d.ts +196 -0
package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
package/dist/visualization/skeleton/hand21.js +51 -0
package/dist/visualization/skeleton/index.d.ts +10 -0
package/dist/visualization/skeleton/index.d.ts.map +1 -0
package/dist/visualization/skeleton/index.js +9 -0
package/dist/visualization/skeleton/openpose134.d.ts +357 -0
package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
package/dist/visualization/skeleton/openpose134.js +116 -0
package/dist/visualization/skeleton/openpose18.d.ts +177 -0
package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
package/dist/visualization/skeleton/openpose18.js +47 -0
package/docs/ANIMAL_DETECTOR.md +450 -0
package/docs/CUSTOM_DETECTOR.md +568 -0
package/docs/OBJECT_DETECTOR.md +373 -0
package/docs/POSE3D_DETECTOR.md +458 -0
package/docs/POSE_DETECTOR.md +442 -0
package/examples/README.md +119 -0
package/examples/index.html +746 -0
package/package.json +51 -0
package/playground/README.md +114 -0
package/playground/app/favicon.ico +0 -0
package/playground/app/globals.css +17 -0
package/playground/app/layout.tsx +19 -0
package/playground/app/page.tsx +1338 -0
package/playground/eslint.config.mjs +18 -0
package/playground/next.config.ts +34 -0
package/playground/package-lock.json +6723 -0
package/playground/package.json +27 -0
package/playground/postcss.config.mjs +7 -0
package/playground/tsconfig.json +34 -0
package/src/core/base.ts +66 -0
package/src/core/file.ts +141 -0
package/src/core/modelCache.ts +189 -0
package/src/core/posePostprocessing.ts +91 -0
package/src/core/postprocessing.ts +93 -0
package/src/core/preprocessing.ts +127 -0
package/src/index.ts +69 -0
package/src/models/rtmpose.ts +265 -0
package/src/models/rtmpose3d.ts +289 -0
package/src/models/yolo12.ts +220 -0
package/src/models/yolox.ts +214 -0
package/src/solution/animalDetector.ts +955 -0
package/src/solution/body.ts +89 -0
package/src/solution/bodyWithFeet.ts +89 -0
package/src/solution/customDetector.ts +474 -0
package/src/solution/hand.ts +52 -0
package/src/solution/index.ts +10 -0
package/src/solution/objectDetector.ts +816 -0
package/src/solution/pose3dDetector.ts +890 -0
package/src/solution/poseDetector.ts +892 -0
package/src/solution/poseTracker.ts +172 -0
package/src/solution/wholebody.ts +130 -0
package/src/solution/wholebody3d.ts +125 -0
package/src/types/index.ts +62 -0
package/src/visualization/draw.ts +543 -0
package/src/visualization/skeleton/coco133.ts +131 -0
package/src/visualization/skeleton/coco17.ts +49 -0
package/src/visualization/skeleton/halpe26.ts +71 -0
package/src/visualization/skeleton/hand21.ts +52 -0
package/src/visualization/skeleton/index.ts +10 -0
package/src/visualization/skeleton/openpose134.ts +125 -0
package/src/visualization/skeleton/openpose18.ts +48 -0
package/tsconfig.json +32 -0

package/dist/models/rtmpose3d.js ADDED Viewed

@@ -0,0 +1,184 @@
+/**
+ * RTMPose3D model for 3D pose estimation
+ * Extends RTMPose with Z-axis prediction
+ * Based on rtmlib RTMPose3d class
+ */
+import { BaseTool } from '../core/base';
+export class RTMPose3D extends BaseTool {
+    constructor(onnxModel, modelInputSize = [288, 384], // [width=288, height=384] - creates tensor [1,3,384,288]
+    toOpenpose = false, backend = 'webgpu', zRange) {
+        super(onnxModel, modelInputSize, null, null, backend);
+        this.simccSplitRatio = 2.0;
+        this.zRange = 2.1744869;
+        this.initialized = false;
+        this.defaultMean = [123.675, 116.28, 103.53];
+        this.defaultStd = [58.395, 57.12, 57.375];
+        this.toOpenpose = toOpenpose;
+        if (zRange !== undefined) {
+            this.zRange = zRange;
+        }
+    }
+    async init() {
+        await super.init();
+        this.initialized = true;
+    }
+    async call(image, imgWidth, imgHeight, bboxes = []) {
+        if (!this.initialized) {
+            await this.init();
+        }
+        if (bboxes.length === 0) {
+            bboxes = [{ x1: 0, y1: 0, x2: imgWidth, y2: imgHeight }];
+        }
+        const allKeypoints = [];
+        const allScores = [];
+        const allKeypointsSimcc = [];
+        const allKeypoints2d = [];
+        for (const bbox of bboxes) {
+            const { tensor, center, scale, inputSize } = this.preprocess(image, imgWidth, imgHeight, bbox);
+            const outputs = await this.inference(tensor, inputSize);
+            const { keypoints, scores, keypointsSimcc, keypoints2d } = this.postprocess(outputs[0].data, outputs[1].data, outputs[2].data, outputs[0].dims, outputs[1].dims, outputs[2].dims, center, scale);
+            allKeypoints.push(keypoints);
+            allScores.push(scores);
+            allKeypointsSimcc.push(keypointsSimcc);
+            allKeypoints2d.push(keypoints2d);
+        }
+        return {
+            keypoints: allKeypoints,
+            scores: allScores,
+            keypointsSimcc: allKeypointsSimcc,
+            keypoints2d: allKeypoints2d,
+        };
+    }
+    preprocess(img, imgWidth, imgHeight, bbox) {
+        const [inputH, inputW] = this.modelInputSize;
+        // Center and scale from bbox with padding (1.25 as in Python)
+        const center = [
+            bbox.x1 + (bbox.x2 - bbox.x1) / 2,
+            bbox.y1 + (bbox.y2 - bbox.y1) / 2,
+        ];
+        const bboxWidth = bbox.x2 - bbox.x1;
+        const bboxHeight = bbox.y2 - bbox.y1;
+        const padding = 1.25;
+        // Adjust scale to maintain aspect ratio
+        const aspectRatio = inputW / inputH;
+        const bboxAspectRatio = bboxWidth / bboxHeight;
+        let scaleW, scaleH;
+        if (bboxAspectRatio > aspectRatio) {
+            scaleW = bboxWidth * padding;
+            scaleH = scaleW / aspectRatio;
+        }
+        else {
+            scaleH = bboxHeight * padding;
+            scaleW = scaleH * aspectRatio;
+        }
+        const scale = [scaleW, scaleH];
+        // Create canvas for cropping
+        const canvas = document.createElement('canvas');
+        const ctx = canvas.getContext('2d');
+        canvas.width = inputW;
+        canvas.height = inputH;
+        ctx.fillStyle = '#FFFFFF';
+        ctx.fillRect(0, 0, inputW, inputH);
+        // Create source canvas from image data
+        const srcCanvas = document.createElement('canvas');
+        const srcCtx = srcCanvas.getContext('2d');
+        srcCanvas.width = imgWidth;
+        srcCanvas.height = imgHeight;
+        const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
+        srcImageData.data.set(img);
+        srcCtx.putImageData(srcImageData, 0, 0);
+        // Calculate source region
+        const srcX = center[0] - scaleW / 2;
+        const srcY = center[1] - scaleH / 2;
+        // Draw cropped and scaled region using warpAffine-like transformation
+        this.warpAffine(ctx, srcCanvas, center, scale, inputW, inputH, srcX, srcY);
+        const imageData = ctx.getImageData(0, 0, inputW, inputH);
+        // Normalize with mean/std
+        const data = new Float32Array(inputW * inputH * 3);
+        for (let i = 0; i < imageData.data.length; i += 4) {
+            const pixelIndex = i / 4;
+            for (let c = 0; c < 3; c++) {
+                const value = imageData.data[i + c];
+                data[c * inputW * inputH + pixelIndex] =
+                    (value - this.defaultMean[c]) / this.defaultStd[c];
+            }
+        }
+        return {
+            tensor: data,
+            center,
+            scale,
+            inputSize: [inputH, inputW],
+        };
+    }
+    warpAffine(ctx, srcCanvas, center, scale, dstWidth, dstHeight, srcX, srcY) {
+        // Simple affine transform using canvas drawImage
+        // For more accurate transformation, OpenCV bindings would be needed
+        ctx.drawImage(srcCanvas, srcX, srcY, scale[0], scale[1], 0, 0, dstWidth, dstHeight);
+    }
+    postprocess(simccX, simccY, simccZ, outputShapeX, outputShapeY, outputShapeZ, center, scale) {
+        const numKeypoints = outputShapeX[1];
+        const wx = outputShapeX[2];
+        const wy = outputShapeY[2];
+        const wz = outputShapeZ[2];
+        const keypoints = [];
+        const scores = [];
+        const keypointsSimcc = [];
+        const keypoints2d = [];
+        for (let k = 0; k < numKeypoints; k++) {
+            // Find argmax for x
+            let maxX = -Infinity;
+            let argmaxX = 0;
+            for (let i = 0; i < wx; i++) {
+                const val = simccX[k * wx + i];
+                if (val > maxX) {
+                    maxX = val;
+                    argmaxX = i;
+                }
+            }
+            // Find argmax for y
+            let maxY = -Infinity;
+            let argmaxY = 0;
+            for (let i = 0; i < wy; i++) {
+                const val = simccY[k * wy + i];
+                if (val > maxY) {
+                    maxY = val;
+                    argmaxY = i;
+                }
+            }
+            // Find argmax for z
+            let maxZ = -Infinity;
+            let argmaxZ = 0;
+            for (let i = 0; i < wz; i++) {
+                const val = simccZ[k * wz + i];
+                if (val > maxZ) {
+                    maxZ = val;
+                    argmaxZ = i;
+                }
+            }
+            // Score is max of x and y (as in Python)
+            const score = maxX > maxY ? maxX : maxY;
+            // Normalize to [0, 1] and transform to original image coordinates
+            const normX = argmaxX / wx;
+            const normY = argmaxY / wy;
+            const normZ = argmaxZ / wz;
+            // Apply split ratio
+            const kptX = (normX - 0.5) * this.simccSplitRatio;
+            const kptY = (normY - 0.5) * this.simccSplitRatio;
+            const kptZ = (normZ - 0.5) * this.simccSplitRatio;
+            // Convert Z to metric scale
+            // Python uses model_input_size[-1] which is width (384) in (H, W) format
+            // TypeScript uses modelInputSize[0] which is width (288) in [W, H] format
+            const kptZMetric = (normZ / (this.modelInputSize[0] / 2) - 1) * this.zRange;
+            // 3D keypoint
+            keypoints.push([kptX, kptY, kptZMetric]);
+            // SimCC coordinates (normalized)
+            keypointsSimcc.push([normX, normY, normZ]);
+            // 2D keypoint in original image coordinates
+            const kpt2dX = normX * scale[0] + center[0] - 0.5 * scale[0];
+            const kpt2dY = normY * scale[1] + center[1] - 0.5 * scale[1];
+            keypoints2d.push([kpt2dX, kpt2dY]);
+            scores.push(score);
+        }
+        return { keypoints, scores, keypointsSimcc, keypoints2d };
+    }
+}

package/dist/models/yolo12.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+/**
+ * YOLO12 object detection model
+ * Based on YOLO12 architecture for person detection
+ * Compatible with Ultralytics YOLOv12 ONNX export
+ * Uses onnxruntime-web for inference
+ */
+import { BaseTool } from '../core/base';
+import { Detection, BackendType } from '../types/index';
+export declare class YOLO12 extends BaseTool {
+    private nmsThr;
+    scoreThr: number;
+    private initialized;
+    private paddingX;
+    private paddingY;
+    private scaleX;
+    private scaleY;
+    constructor(modelPath: string, modelInputSize?: [number, number], nmsThr?: number, scoreThr?: number, backend?: BackendType);
+    init(): Promise<void>;
+    call(image: Uint8Array, imgWidth: number, imgHeight: number): Promise<Detection[]>;
+    private preprocess;
+    private applyNms;
+}
+//# sourceMappingURL=yolo12.d.ts.map

package/dist/models/yolo12.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"yolo12.d.ts","sourceRoot":"","sources":["../../src/models/yolo12.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AACxC,OAAO,EAAQ,SAAS,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAE9D,qBAAa,MAAO,SAAQ,QAAQ;IAClC,OAAO,CAAC,MAAM,CAAS;IAChB,QAAQ,EAAE,MAAM,CAAC;IACxB,OAAO,CAAC,WAAW,CAAkB;IACrC,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,QAAQ,CAAa;IAC7B,OAAO,CAAC,MAAM,CAAa;IAC3B,OAAO,CAAC,MAAM,CAAa;gBAGzB,SAAS,EAAE,MAAM,EACjB,cAAc,GAAE,CAAC,MAAM,EAAE,MAAM,CAAc,EAC7C,MAAM,GAAE,MAAa,EACrB,QAAQ,GAAE,MAAY,EACtB,OAAO,GAAE,WAAsB;IAO3B,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAKrB,IAAI,CACR,KAAK,EAAE,UAAU,EACjB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,SAAS,EAAE,CAAC;IA8DvB,OAAO,CAAC,UAAU;IAoElB,OAAO,CAAC,QAAQ;CAiDjB"}

package/dist/models/yolo12.js ADDED Viewed

@@ -0,0 +1,165 @@
+/**
+ * YOLO12 object detection model
+ * Based on YOLO12 architecture for person detection
+ * Compatible with Ultralytics YOLOv12 ONNX export
+ * Uses onnxruntime-web for inference
+ */
+import { BaseTool } from '../core/base';
+export class YOLO12 extends BaseTool {
+    constructor(modelPath, modelInputSize = [640, 640], nmsThr = 0.45, scoreThr = 0.5, backend = 'webgpu') {
+        super(modelPath, modelInputSize, null, null, backend);
+        this.initialized = false;
+        this.paddingX = 0;
+        this.paddingY = 0;
+        this.scaleX = 1;
+        this.scaleY = 1;
+        this.nmsThr = nmsThr;
+        this.scoreThr = scoreThr;
+    }
+    async init() {
+        await super.init();
+        this.initialized = true;
+    }
+    async call(image, imgWidth, imgHeight) {
+        if (!this.initialized) {
+            await this.init();
+        }
+        const { paddedImg } = this.preprocess(image, imgWidth, imgHeight);
+        const outputs = await this.inference(paddedImg);
+        // YOLO12 output format: [1, num_boxes, 6] where 6 = [x1, y1, x2, y2, score, class_id]
+        const detOutput = outputs[0];
+        const detShape = detOutput.dims;
+        if (detShape.length !== 3 || detShape[2] !== 6 || detOutput.type !== 'float32') {
+            console.error(`YOLO12: Unexpected output shape [${detShape}] or type ${detOutput.type}`);
+            return [];
+        }
+        const detArray = detOutput.data;
+        const numBoxes = detShape[1];
+        const detections = [];
+        for (let i = 0; i < numBoxes; i++) {
+            const baseIdx = i * 6;
+            let x1 = detArray[baseIdx];
+            let y1 = detArray[baseIdx + 1];
+            let x2 = detArray[baseIdx + 2];
+            let y2 = detArray[baseIdx + 3];
+            const score = detArray[baseIdx + 4];
+            const classId = detArray[baseIdx + 5];
+            // Filter by score threshold and class (0 = person in COCO)
+            if (score < this.scoreThr || classId !== 0) {
+                continue;
+            }
+            // Transform from padded coordinates to original image coordinates
+            const transformedX1 = (x1 - this.paddingX) * this.scaleX;
+            const transformedY1 = (y1 - this.paddingY) * this.scaleY;
+            const transformedX2 = (x2 - this.paddingX) * this.scaleX;
+            const transformedY2 = (y2 - this.paddingY) * this.scaleY;
+            // Validate box coordinates
+            if (transformedX1 >= transformedX2 || transformedY1 >= transformedY2) {
+                continue;
+            }
+            detections.push({
+                bbox: {
+                    x1: Math.max(0, transformedX1),
+                    y1: Math.max(0, transformedY1),
+                    x2: Math.min(imgWidth, transformedX2),
+                    y2: Math.min(imgHeight, transformedY2),
+                },
+                score,
+                classId: Math.round(classId),
+            });
+        }
+        // Apply NMS
+        return this.applyNms(detections, this.nmsThr);
+    }
+    preprocess(img, imgWidth, imgHeight) {
+        const [inputH, inputW] = this.modelInputSize;
+        // Create canvas for padded image (black background)
+        const paddedImg = new Uint8Array(inputH * inputW * 3).fill(0);
+        // Calculate scaling and positioning to maintain aspect ratio
+        const aspectRatio = imgWidth / imgHeight;
+        const targetAspectRatio = inputW / inputH;
+        let drawWidth, drawHeight;
+        if (aspectRatio > targetAspectRatio) {
+            // Image is wider - fit to width, add padding top/bottom
+            drawWidth = inputW;
+            drawHeight = Math.floor(inputW / aspectRatio);
+            this.paddingX = 0;
+            this.paddingY = (inputH - drawHeight) / 2;
+        }
+        else {
+            // Image is taller - fit to height, add padding left/right
+            drawHeight = inputH;
+            drawWidth = Math.floor(inputH * aspectRatio);
+            this.paddingX = (inputW - drawWidth) / 2;
+            this.paddingY = 0;
+        }
+        // Calculate scale factors
+        this.scaleX = imgWidth / drawWidth;
+        this.scaleY = imgHeight / drawHeight;
+        // Resize image onto padded canvas (nearest neighbor)
+        for (let y = 0; y < drawHeight; y++) {
+            for (let x = 0; x < drawWidth; x++) {
+                const srcX = Math.floor(x * this.scaleX);
+                const srcY = Math.floor(y * this.scaleY);
+                const dstX = Math.floor(x + this.paddingX);
+                const dstY = Math.floor(y + this.paddingY);
+                for (let c = 0; c < 3; c++) {
+                    paddedImg[(dstY * inputW + dstX) * 3 + c] = img[(srcY * imgWidth + srcX) * 3 + c];
+                }
+            }
+        }
+        // Normalize to [0, 1] and convert to float32
+        const floatImg = new Float32Array(paddedImg.length);
+        for (let i = 0; i < paddedImg.length; i++) {
+            floatImg[i] = paddedImg[i] / 255.0;
+        }
+        // Transpose HWC to CHW
+        const transposed = new Float32Array(3 * inputH * inputW);
+        for (let c = 0; c < 3; c++) {
+            for (let h = 0; h < inputH; h++) {
+                for (let w = 0; w < inputW; w++) {
+                    transposed[c * inputH * inputW + h * inputW + w] =
+                        floatImg[h * inputW * 3 + w * 3 + c];
+                }
+            }
+        }
+        return { paddedImg: transposed, ratio: 1 };
+    }
+    applyNms(detections, iouThreshold) {
+        if (detections.length === 0) {
+            return [];
+        }
+        // Sort by score descending
+        detections.sort((a, b) => b.score - a.score);
+        const selected = [];
+        const used = new Array(detections.length).fill(false);
+        for (let i = 0; i < detections.length; i++) {
+            if (used[i]) {
+                continue;
+            }
+            selected.push(detections[i]);
+            used[i] = true;
+            const boxA = detections[i].bbox;
+            for (let j = i + 1; j < detections.length; j++) {
+                if (used[j]) {
+                    continue;
+                }
+                const boxB = detections[j].bbox;
+                // Calculate IoU
+                const x1 = Math.max(boxA.x1, boxB.x1);
+                const y1 = Math.max(boxA.y1, boxB.y1);
+                const x2 = Math.min(boxA.x2, boxB.x2);
+                const y2 = Math.min(boxA.y2, boxB.y2);
+                const intersection = Math.max(0, x2 - x1) * Math.max(0, y2 - y1);
+                const areaA = (boxA.x2 - boxA.x1) * (boxA.y2 - boxA.y1);
+                const areaB = (boxB.x2 - boxB.x1) * (boxB.y2 - boxB.y1);
+                const union = areaA + areaB - intersection;
+                const iou = union > 0 ? intersection / union : 0;
+                if (iou <= iouThreshold) {
+                    used[j] = true;
+                }
+            }
+        }
+        return selected;
+    }
+}

package/dist/models/yolox.d.ts ADDED Viewed

@@ -0,0 +1,18 @@
+/**
+ * YOLOX object detection model
+ * Based on https://github.com/IDEA-Research/DWPose/blob/opencv_onnx/ControlNet-v1-1-nightly/annotator/dwpose/cv_ox_det.py
+ */
+import { BaseTool } from '../core/base';
+import { BBox, BackendType } from '../types/index';
+export declare class YOLOX extends BaseTool {
+    private nmsThr;
+    scoreThr: number;
+    private initialized;
+    constructor(onnxModel: string, modelInputSize?: [number, number], nmsThr?: number, scoreThr?: number, // Lower default threshold
+    backend?: BackendType);
+    init(): Promise<void>;
+    call(image: Uint8Array, imgWidth: number, imgHeight: number): Promise<BBox[]>;
+    private preprocess;
+    private postprocess;
+}
+//# sourceMappingURL=yolox.d.ts.map

package/dist/models/yolox.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"yolox.d.ts","sourceRoot":"","sources":["../../src/models/yolox.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AAExC,OAAO,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAEnD,qBAAa,KAAM,SAAQ,QAAQ;IACjC,OAAO,CAAC,MAAM,CAAS;IAChB,QAAQ,EAAE,MAAM,CAAC;IACxB,OAAO,CAAC,WAAW,CAAkB;gBAGnC,SAAS,EAAE,MAAM,EACjB,cAAc,GAAE,CAAC,MAAM,EAAE,MAAM,CAAc,EAC7C,MAAM,GAAE,MAAa,EACrB,QAAQ,GAAE,MAAY,EAAG,0BAA0B;IACnD,OAAO,GAAE,WAAsB;IAO3B,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAMrB,IAAI,CAAC,KAAK,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;IAoEnF,OAAO,CAAC,UAAU;IA0DlB,OAAO,CAAC,WAAW;CAuDpB"}

package/dist/models/yolox.js ADDED Viewed

@@ -0,0 +1,167 @@
+/**
+ * YOLOX object detection model
+ * Based on https://github.com/IDEA-Research/DWPose/blob/opencv_onnx/ControlNet-v1-1-nightly/annotator/dwpose/cv_ox_det.py
+ */
+import { BaseTool } from '../core/base';
+export class YOLOX extends BaseTool {
+    constructor(onnxModel, modelInputSize = [640, 640], nmsThr = 0.45, scoreThr = 0.3, // Lower default threshold
+    backend = 'webgpu') {
+        super(onnxModel, modelInputSize, null, null, backend);
+        this.initialized = false;
+        this.nmsThr = nmsThr;
+        this.scoreThr = scoreThr;
+    }
+    async init() {
+        // Web version - model path is direct URL
+        await super.init();
+        this.initialized = true;
+    }
+    async call(image, imgWidth, imgHeight) {
+        if (!this.initialized) {
+            await this.init();
+        }
+        const { paddedImg, ratio } = this.preprocess(image, imgWidth, imgHeight);
+        const outputs = await this.inference(paddedImg);
+        console.log(`YOLOX: Got ${outputs.length} outputs`);
+        for (let i = 0; i < outputs.length; i++) {
+            console.log(`  Output[${i}]: dims=[${outputs[i].dims}], type=${outputs[i].type}`);
+        }
+        // For end2end YOLOX with built-in NMS:
+        // Output 0: [1, num_dets, 5] where 5 = [x1, y1, x2, y2, score]
+        // Output 1: [1, num_dets] or [1, 1] with count
+        const detOutput = outputs[0];
+        const detShape = detOutput.dims; // [1, num_dets, 5]
+        console.log(`YOLOX: detShape=[${detShape}], ratio=${ratio}`);
+        if (detShape.length === 3 && detShape[2] === 5 && detOutput.type === 'float32') {
+            const detArray = detOutput.data;
+            const numDets = detShape[1];
+            const boxes = [];
+            console.log(`YOLOX: Raw detections (first 5):`);
+            for (let i = 0; i < Math.min(5, numDets); i++) {
+                const baseIdx = i * 5;
+                const x1 = detArray[baseIdx];
+                const y1 = detArray[baseIdx + 1];
+                const x2 = detArray[baseIdx + 2];
+                const y2 = detArray[baseIdx + 3];
+                const score = detArray[baseIdx + 4];
+                console.log(`  [${i}] raw=[${x1.toFixed(2)}, ${y1.toFixed(2)}, ${x2.toFixed(2)}, ${y2.toFixed(2)}] score=${score.toFixed(4)}`);
+            }
+            for (let i = 0; i < numDets; i++) {
+                const baseIdx = i * 5;
+                let x1 = detArray[baseIdx];
+                let y1 = detArray[baseIdx + 1];
+                let x2 = detArray[baseIdx + 2];
+                let y2 = detArray[baseIdx + 3];
+                const score = detArray[baseIdx + 4];
+                // Scale to original image
+                x1 /= ratio;
+                y1 /= ratio;
+                x2 /= ratio;
+                y2 /= ratio;
+                // Python uses score > 0.3 threshold
+                if (score > 0.3 && x2 > x1 && y2 > y1) {
+                    boxes.push({ x1, y1, x2, y2 });
+                    console.log(`  [${i}] ACCEPTED: [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}] score=${score.toFixed(3)}`);
+                }
+                else if (score > 0.1) {
+                    console.log(`  [${i}] rejected (score=${score.toFixed(3)}): [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}]`);
+                }
+            }
+            console.log(`YOLOX: Found ${boxes.length} boxes`);
+            return boxes;
+        }
+        return [];
+    }
+    preprocess(img, imgWidth, imgHeight) {
+        const [inputH, inputW] = this.modelInputSize;
+        let paddedImg;
+        let ratio;
+        if (imgHeight === inputH && imgWidth === inputW) {
+            paddedImg = img;
+            ratio = 1.0;
+        }
+        else {
+            paddedImg = new Uint8Array(inputH * inputW * 3).fill(114);
+            ratio = Math.min(inputH / imgHeight, inputW / imgWidth);
+            const resizedW = Math.floor(imgWidth * ratio);
+            const resizedH = Math.floor(imgHeight * ratio);
+            // Resize image (simple nearest neighbor for now)
+            for (let y = 0; y < resizedH; y++) {
+                for (let x = 0; x < resizedW; x++) {
+                    const srcX = Math.floor(x / ratio);
+                    const srcY = Math.floor(y / ratio);
+                    for (let c = 0; c < 3; c++) {
+                        paddedImg[(y * inputW + x) * 3 + c] = img[(srcY * imgWidth + srcX) * 3 + c];
+                    }
+                }
+            }
+        }
+        // YOLOX uses simple normalization to [0, 1]
+        // Convert to float32 and normalize to [0, 1]
+        // Try BGR format (OpenCV standard)
+        const floatImg = new Float32Array(paddedImg.length);
+        for (let i = 0; i < paddedImg.length; i += 3) {
+            // Swap RGB to BGR
+            floatImg[i] = paddedImg[i + 2] / 255.0; // B
+            floatImg[i + 1] = paddedImg[i + 1] / 255.0; // G
+            floatImg[i + 2] = paddedImg[i] / 255.0; // R
+        }
+        // Transpose HWC to CHW
+        const transposed = new Float32Array(inputH * inputW * 3);
+        for (let c = 0; c < 3; c++) {
+            for (let h = 0; h < inputH; h++) {
+                for (let w = 0; w < inputW; w++) {
+                    transposed[c * inputH * inputW + h * inputW + w] = floatImg[h * inputW * 3 + w * 3 + c];
+                }
+            }
+        }
+        console.log(`YOLOX preprocess: input ${imgWidth}x${imgHeight} -> ${inputW}x${inputH}, ratio=${ratio} (BGR)`);
+        return { paddedImg: transposed, ratio };
+    }
+    postprocess(outputs, ratio) {
+        const outputArray = new Float32Array(outputs.data);
+        const outputShape = outputs.dims;
+        console.log(`YOLOX output shape: [${outputShape}], ratio: ${ratio}`);
+        console.log(`First 20 values: ${Array.from(outputArray.slice(0, 20)).map(v => v.toFixed(4)).join(', ')}`);
+        // outputShape: [1, num_boxes, 5] or [1, num_boxes, 6]
+        // For YOLOX with NMS: [batch, num_dets, 5] where 5 = [x1, y1, x2, y2, score]
+        if (outputShape.length === 3 && outputShape[2] >= 5) {
+            const numBoxes = outputShape[1];
+            const boxes = [];
+            const hasClassInfo = outputShape[2] >= 6;
+            console.log(`Processing ${numBoxes} boxes, hasClassInfo: ${hasClassInfo}`);
+            for (let i = 0; i < numBoxes; i++) {
+                const baseIdx = i * outputShape[2];
+                const score = outputArray[baseIdx + 4];
+                // Filter by score threshold
+                if (score < this.scoreThr)
+                    continue;
+                // Check class if available
+                if (hasClassInfo) {
+                    const classId = outputArray[baseIdx + 5];
+                    if (classId !== 0)
+                        continue; // Only person class
+                }
+                const x1 = outputArray[baseIdx] / ratio;
+                const y1 = outputArray[baseIdx + 1] / ratio;
+                const x2 = outputArray[baseIdx + 2] / ratio;
+                const y2 = outputArray[baseIdx + 3] / ratio;
+                // Validate box coordinates
+                if (x1 >= x2 || y1 >= y2)
+                    continue;
+                if (x2 < 0 || y2 < 0 || x1 > this.modelInputSize[1] / ratio || y1 > this.modelInputSize[0] / ratio)
+                    continue;
+                console.log(`Found box: [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}] score: ${score.toFixed(3)}`);
+                boxes.push({
+                    x1: Math.max(0, x1),
+                    y1: Math.max(0, y1),
+                    x2: Math.min(outputShape[1] * ratio, x2),
+                    y2: Math.min(outputShape[0] * ratio, y2),
+                });
+            }
+            console.log(`Total boxes found: ${boxes.length}`);
+            return boxes;
+        }
+        return [];
+    }
+}