npm - rtmlib-ts - Versions diffs - 0.0.2 - Mend

rtmlib-ts 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

package/.gitattributes +1 -0
package/README.md +202 -0
package/dist/core/base.d.ts +20 -0
package/dist/core/base.d.ts.map +1 -0
package/dist/core/base.js +40 -0
package/dist/core/file.d.ts +11 -0
package/dist/core/file.d.ts.map +1 -0
package/dist/core/file.js +111 -0
package/dist/core/modelCache.d.ts +35 -0
package/dist/core/modelCache.d.ts.map +1 -0
package/dist/core/modelCache.js +161 -0
package/dist/core/posePostprocessing.d.ts +12 -0
package/dist/core/posePostprocessing.d.ts.map +1 -0
package/dist/core/posePostprocessing.js +76 -0
package/dist/core/postprocessing.d.ts +10 -0
package/dist/core/postprocessing.d.ts.map +1 -0
package/dist/core/postprocessing.js +70 -0
package/dist/core/preprocessing.d.ts +14 -0
package/dist/core/preprocessing.d.ts.map +1 -0
package/dist/core/preprocessing.js +79 -0
package/dist/index.d.ts +27 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +31 -0
package/dist/models/rtmpose.d.ts +25 -0
package/dist/models/rtmpose.d.ts.map +1 -0
package/dist/models/rtmpose.js +185 -0
package/dist/models/rtmpose3d.d.ts +28 -0
package/dist/models/rtmpose3d.d.ts.map +1 -0
package/dist/models/rtmpose3d.js +184 -0
package/dist/models/yolo12.d.ts +23 -0
package/dist/models/yolo12.d.ts.map +1 -0
package/dist/models/yolo12.js +165 -0
package/dist/models/yolox.d.ts +18 -0
package/dist/models/yolox.d.ts.map +1 -0
package/dist/models/yolox.js +167 -0
package/dist/solution/animalDetector.d.ts +229 -0
package/dist/solution/animalDetector.d.ts.map +1 -0
package/dist/solution/animalDetector.js +663 -0
package/dist/solution/body.d.ts +16 -0
package/dist/solution/body.d.ts.map +1 -0
package/dist/solution/body.js +52 -0
package/dist/solution/bodyWithFeet.d.ts +16 -0
package/dist/solution/bodyWithFeet.d.ts.map +1 -0
package/dist/solution/bodyWithFeet.js +52 -0
package/dist/solution/customDetector.d.ts +137 -0
package/dist/solution/customDetector.d.ts.map +1 -0
package/dist/solution/customDetector.js +342 -0
package/dist/solution/hand.d.ts +14 -0
package/dist/solution/hand.d.ts.map +1 -0
package/dist/solution/hand.js +20 -0
package/dist/solution/index.d.ts +10 -0
package/dist/solution/index.d.ts.map +1 -0
package/dist/solution/index.js +9 -0
package/dist/solution/objectDetector.d.ts +172 -0
package/dist/solution/objectDetector.d.ts.map +1 -0
package/dist/solution/objectDetector.js +606 -0
package/dist/solution/pose3dDetector.d.ts +145 -0
package/dist/solution/pose3dDetector.d.ts.map +1 -0
package/dist/solution/pose3dDetector.js +611 -0
package/dist/solution/poseDetector.d.ts +198 -0
package/dist/solution/poseDetector.d.ts.map +1 -0
package/dist/solution/poseDetector.js +622 -0
package/dist/solution/poseTracker.d.ts +22 -0
package/dist/solution/poseTracker.d.ts.map +1 -0
package/dist/solution/poseTracker.js +106 -0
package/dist/solution/wholebody.d.ts +19 -0
package/dist/solution/wholebody.d.ts.map +1 -0
package/dist/solution/wholebody.js +82 -0
package/dist/solution/wholebody3d.d.ts +22 -0
package/dist/solution/wholebody3d.d.ts.map +1 -0
package/dist/solution/wholebody3d.js +75 -0
package/dist/types/index.d.ts +52 -0
package/dist/types/index.d.ts.map +1 -0
package/dist/types/index.js +5 -0
package/dist/visualization/draw.d.ts +57 -0
package/dist/visualization/draw.d.ts.map +1 -0
package/dist/visualization/draw.js +400 -0
package/dist/visualization/skeleton/coco133.d.ts +350 -0
package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
package/dist/visualization/skeleton/coco133.js +120 -0
package/dist/visualization/skeleton/coco17.d.ts +180 -0
package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
package/dist/visualization/skeleton/coco17.js +48 -0
package/dist/visualization/skeleton/halpe26.d.ts +278 -0
package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
package/dist/visualization/skeleton/halpe26.js +70 -0
package/dist/visualization/skeleton/hand21.d.ts +196 -0
package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
package/dist/visualization/skeleton/hand21.js +51 -0
package/dist/visualization/skeleton/index.d.ts +10 -0
package/dist/visualization/skeleton/index.d.ts.map +1 -0
package/dist/visualization/skeleton/index.js +9 -0
package/dist/visualization/skeleton/openpose134.d.ts +357 -0
package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
package/dist/visualization/skeleton/openpose134.js +116 -0
package/dist/visualization/skeleton/openpose18.d.ts +177 -0
package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
package/dist/visualization/skeleton/openpose18.js +47 -0
package/docs/ANIMAL_DETECTOR.md +450 -0
package/docs/CUSTOM_DETECTOR.md +568 -0
package/docs/OBJECT_DETECTOR.md +373 -0
package/docs/POSE3D_DETECTOR.md +458 -0
package/docs/POSE_DETECTOR.md +442 -0
package/examples/README.md +119 -0
package/examples/index.html +746 -0
package/package.json +51 -0
package/playground/README.md +114 -0
package/playground/app/favicon.ico +0 -0
package/playground/app/globals.css +17 -0
package/playground/app/layout.tsx +19 -0
package/playground/app/page.tsx +1338 -0
package/playground/eslint.config.mjs +18 -0
package/playground/next.config.ts +34 -0
package/playground/package-lock.json +6723 -0
package/playground/package.json +27 -0
package/playground/postcss.config.mjs +7 -0
package/playground/tsconfig.json +34 -0
package/src/core/base.ts +66 -0
package/src/core/file.ts +141 -0
package/src/core/modelCache.ts +189 -0
package/src/core/posePostprocessing.ts +91 -0
package/src/core/postprocessing.ts +93 -0
package/src/core/preprocessing.ts +127 -0
package/src/index.ts +69 -0
package/src/models/rtmpose.ts +265 -0
package/src/models/rtmpose3d.ts +289 -0
package/src/models/yolo12.ts +220 -0
package/src/models/yolox.ts +214 -0
package/src/solution/animalDetector.ts +955 -0
package/src/solution/body.ts +89 -0
package/src/solution/bodyWithFeet.ts +89 -0
package/src/solution/customDetector.ts +474 -0
package/src/solution/hand.ts +52 -0
package/src/solution/index.ts +10 -0
package/src/solution/objectDetector.ts +816 -0
package/src/solution/pose3dDetector.ts +890 -0
package/src/solution/poseDetector.ts +892 -0
package/src/solution/poseTracker.ts +172 -0
package/src/solution/wholebody.ts +130 -0
package/src/solution/wholebody3d.ts +125 -0
package/src/types/index.ts +62 -0
package/src/visualization/draw.ts +543 -0
package/src/visualization/skeleton/coco133.ts +131 -0
package/src/visualization/skeleton/coco17.ts +49 -0
package/src/visualization/skeleton/halpe26.ts +71 -0
package/src/visualization/skeleton/hand21.ts +52 -0
package/src/visualization/skeleton/index.ts +10 -0
package/src/visualization/skeleton/openpose134.ts +125 -0
package/src/visualization/skeleton/openpose18.ts +48 -0
package/tsconfig.json +32 -0

package/dist/solution/poseDetector.js ADDED Viewed

@@ -0,0 +1,622 @@
+/**
+ * PoseDetector - Unified API for person detection and pose estimation
+ * Combines YOLO12 detector with RTMW pose model in a single optimized interface
+ *
+ * @example
+ * ```typescript
+ * // Initialize with default models (from HuggingFace)
+ * const detector = new PoseDetector();
+ * await detector.init();
+ *
+ * // Or with custom models
+ * const detector = new PoseDetector({
+ *   detModel: 'models/yolov12n.onnx',
+ *   poseModel: 'models/rtmlib/end2end.onnx',
+ * });
+ * await detector.init();
+ *
+ * // From canvas
+ * const results = await detector.detectFromCanvas(canvas);
+ *
+ * // From video element
+ * const results = await detector.detectFromVideo(videoElement);
+ *
+ * // From raw image data
+ * const results = await detector.detect(imageData, width, height);
+ * ```
+ */
+import * as ort from 'onnxruntime-web';
+import { getCachedModel, isModelCached } from '../core/modelCache';
+// Configure ONNX Runtime Web
+ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/';
+ort.env.wasm.simd = true;
+ort.env.wasm.proxy = false;
+/**
+ * COCO17 keypoint names
+ */
+const KEYPOINT_NAMES = [
+    'nose',
+    'left_eye',
+    'right_eye',
+    'left_ear',
+    'right_ear',
+    'left_shoulder',
+    'right_shoulder',
+    'left_elbow',
+    'right_elbow',
+    'left_wrist',
+    'right_wrist',
+    'left_hip',
+    'right_hip',
+    'left_knee',
+    'right_knee',
+    'left_ankle',
+    'right_ankle',
+];
+/**
+ * Default configuration
+ */
+const DEFAULT_CONFIG = {
+    detModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/yolo/yolov12n.onnx',
+    poseModel: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/rtmpose/end2end.onnx',
+    detInputSize: [416, 416], // Faster detection
+    poseInputSize: [384, 288], // Required by model
+    detConfidence: 0.5,
+    nmsThreshold: 0.45,
+    poseConfidence: 0.3,
+    backend: 'webgpu', // Default to WebGPU for better performance
+    cache: true,
+};
+export class PoseDetector {
+    constructor(config) {
+        this.detSession = null;
+        this.poseSession = null;
+        this.initialized = false;
+        // Pre-allocated buffers for maximum performance
+        this.canvas = null;
+        this.ctx = null;
+        this.poseCanvas = null;
+        this.poseCtx = null;
+        this.poseTensorBuffer = null;
+        this.detInputSize = [416, 416];
+        this.poseInputSize = [384, 288];
+        this.config = { ...DEFAULT_CONFIG, ...config };
+    }
+    /**
+     * Initialize both detection and pose models with pre-allocated resources
+     */
+    async init() {
+        if (this.initialized)
+            return;
+        try {
+            // Load detection model
+            console.log(`[PoseDetector] Loading detection model from: ${this.config.detModel}`);
+            let detBuffer;
+            if (this.config.cache) {
+                const detCached = await isModelCached(this.config.detModel);
+                console.log(`[PoseDetector] Det model cache ${detCached ? 'hit' : 'miss'}`);
+                detBuffer = await getCachedModel(this.config.detModel);
+            }
+            else {
+                const detResponse = await fetch(this.config.detModel);
+                if (!detResponse.ok) {
+                    throw new Error(`Failed to fetch det model: HTTP ${detResponse.status}`);
+                }
+                detBuffer = await detResponse.arrayBuffer();
+            }
+            this.detSession = await ort.InferenceSession.create(detBuffer, {
+                executionProviders: [this.config.backend],
+                graphOptimizationLevel: 'all',
+            });
+            console.log(`[PoseDetector] Detection model loaded, size: ${(detBuffer.byteLength / 1024 / 1024).toFixed(2)} MB`);
+            // Load pose model
+            console.log(`[PoseDetector] Loading pose model from: ${this.config.poseModel}`);
+            let poseBuffer;
+            if (this.config.cache) {
+                const poseCached = await isModelCached(this.config.poseModel);
+                console.log(`[PoseDetector] Pose model cache ${poseCached ? 'hit' : 'miss'}`);
+                poseBuffer = await getCachedModel(this.config.poseModel);
+            }
+            else {
+                const poseResponse = await fetch(this.config.poseModel);
+                if (!poseResponse.ok) {
+                    throw new Error(`Failed to fetch pose model: HTTP ${poseResponse.status}`);
+                }
+                poseBuffer = await poseResponse.arrayBuffer();
+            }
+            this.poseSession = await ort.InferenceSession.create(poseBuffer, {
+                executionProviders: [this.config.backend],
+                graphOptimizationLevel: 'all',
+            });
+            console.log(`[PoseDetector] Pose model loaded, size: ${(poseBuffer.byteLength / 1024 / 1024).toFixed(2)} MB`);
+            // Pre-allocate all resources
+            const [detW, detH] = this.config.detInputSize;
+            this.detInputSize = [detW, detH];
+            const [poseW, poseH] = this.config.poseInputSize;
+            this.poseInputSize = [poseW, poseH];
+            // Main canvas for detection
+            this.canvas = document.createElement('canvas');
+            this.canvas.width = detW;
+            this.canvas.height = detH;
+            this.ctx = this.canvas.getContext('2d', {
+                willReadFrequently: true,
+                alpha: false
+            });
+            // Pose crop canvas (reused for each person)
+            this.poseCanvas = document.createElement('canvas');
+            this.poseCanvas.width = poseW;
+            this.poseCanvas.height = poseH;
+            this.poseCtx = this.poseCanvas.getContext('2d', {
+                willReadFrequently: true,
+                alpha: false
+            });
+            // Pre-allocate pose tensor buffer
+            this.poseTensorBuffer = new Float32Array(3 * poseW * poseH);
+            this.initialized = true;
+            console.log(`[PoseDetector] ✅ Initialized (det:${detW}x${detH}, pose:${poseW}x${poseH})`);
+        }
+        catch (error) {
+            console.error('[PoseDetector] ❌ Initialization failed:', error);
+            throw error;
+        }
+    }
+    /**
+     * Detect poses from HTMLCanvasElement
+     * @param canvas - Canvas element containing the image
+     * @returns Array of detected people with keypoints
+     */
+    async detectFromCanvas(canvas) {
+        const ctx = canvas.getContext('2d');
+        if (!ctx) {
+            throw new Error('Could not get 2D context from canvas');
+        }
+        const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
+        return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
+    }
+    /**
+     * Detect poses from HTMLVideoElement
+     * @param video - Video element to capture frame from
+     * @param targetCanvas - Optional canvas for frame extraction (creates one if not provided)
+     * @returns Array of detected people with keypoints
+     */
+    async detectFromVideo(video, targetCanvas) {
+        if (video.readyState < 2) {
+            throw new Error('Video not ready. Ensure video is loaded and playing.');
+        }
+        const canvas = targetCanvas || document.createElement('canvas');
+        canvas.width = video.videoWidth;
+        canvas.height = video.videoHeight;
+        const ctx = canvas.getContext('2d');
+        if (!ctx) {
+            throw new Error('Could not get 2D context from canvas');
+        }
+        ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
+        const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
+        return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
+    }
+    /**
+     * Detect poses from HTMLImageElement
+     * @param image - Image element to process
+     * @param targetCanvas - Optional canvas for image extraction (creates one if not provided)
+     * @returns Array of detected people with keypoints
+     */
+    async detectFromImage(image, targetCanvas) {
+        if (!image.complete || !image.naturalWidth) {
+            throw new Error('Image not loaded. Ensure image is fully loaded.');
+        }
+        const canvas = targetCanvas || document.createElement('canvas');
+        canvas.width = image.naturalWidth;
+        canvas.height = image.naturalHeight;
+        const ctx = canvas.getContext('2d');
+        if (!ctx) {
+            throw new Error('Could not get 2D context from canvas');
+        }
+        ctx.drawImage(image, 0, 0);
+        const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
+        return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
+    }
+    /**
+     * Detect poses from ImageBitmap (efficient for blob/file uploads)
+     * @param bitmap - ImageBitmap to process
+     * @param targetCanvas - Optional canvas for bitmap extraction (creates one if not provided)
+     * @returns Array of detected people with keypoints
+     */
+    async detectFromBitmap(bitmap, targetCanvas) {
+        const canvas = targetCanvas || document.createElement('canvas');
+        canvas.width = bitmap.width;
+        canvas.height = bitmap.height;
+        const ctx = canvas.getContext('2d');
+        if (!ctx) {
+            throw new Error('Could not get 2D context from canvas');
+        }
+        ctx.drawImage(bitmap, 0, 0);
+        const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
+        return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
+    }
+    /**
+     * Detect poses from File (for file input uploads)
+     * @param file - File object from input element
+     * @param targetCanvas - Optional canvas for image extraction (creates one if not provided)
+     * @returns Array of detected people with keypoints
+     */
+    async detectFromFile(file, targetCanvas) {
+        return new Promise((resolve, reject) => {
+            const img = new Image();
+            img.onload = async () => {
+                try {
+                    const results = await this.detectFromImage(img, targetCanvas);
+                    resolve(results);
+                }
+                catch (error) {
+                    reject(error);
+                }
+            };
+            img.onerror = () => reject(new Error('Failed to load image from file'));
+            img.src = URL.createObjectURL(file);
+        });
+    }
+    /**
+     * Detect poses from Blob (for camera capture or downloads)
+     * @param blob - Blob object to process
+     * @param targetCanvas - Optional canvas for image extraction (creates one if not provided)
+     * @returns Array of detected people with keypoints
+     */
+    async detectFromBlob(blob, targetCanvas) {
+        const bitmap = await createImageBitmap(blob);
+        const results = await this.detectFromBitmap(bitmap, targetCanvas);
+        bitmap.close();
+        return results;
+    }
+    /**
+     * Detect people and estimate poses in a single call
+     * @param imageData - Image data (Uint8Array RGB/RGBA)
+     * @param width - Image width
+     * @param height - Image height
+     * @returns Array of detected people with keypoints
+     */
+    async detect(imageData, width, height) {
+        if (!this.initialized) {
+            await this.init();
+        }
+        const startTime = performance.now();
+        // Step 1: Detect people
+        const detStart = performance.now();
+        const bboxes = await this.detectPeople(imageData, width, height);
+        const detTime = performance.now() - detStart;
+        // Step 2: Estimate poses for each person
+        const poseStart = performance.now();
+        const people = [];
+        for (const bbox of bboxes) {
+            const keypoints = await this.estimatePose(imageData, width, height, bbox);
+            people.push({
+                bbox: {
+                    x1: bbox.x1,
+                    y1: bbox.y1,
+                    x2: bbox.x2,
+                    y2: bbox.y2,
+                    confidence: bbox.confidence,
+                },
+                keypoints,
+                scores: keypoints.map(k => k.score),
+            });
+        }
+        const poseTime = performance.now() - poseStart;
+        const totalTime = performance.now() - startTime;
+        // Attach stats (for debugging)
+        people.stats = {
+            personCount: people.length,
+            detTime: Math.round(detTime),
+            poseTime: Math.round(poseTime),
+            totalTime: Math.round(totalTime),
+        };
+        return people;
+    }
+    /**
+     * Get detection and pose statistics from last call
+     */
+    getStats() {
+        return null; // Stats attached to results
+    }
+    /**
+     * Detect people using YOLO12
+     */
+    async detectPeople(imageData, width, height) {
+        const [inputH, inputW] = this.config.detInputSize;
+        // Preprocess
+        const { tensor, paddingX, paddingY, scaleX, scaleY } = this.preprocessYOLO(imageData, width, height, [inputW, inputH]);
+        // Inference - use dynamic input name
+        const inputTensor = new ort.Tensor('float32', tensor, [1, 3, inputH, inputW]);
+        const inputName = this.detSession.inputNames[0]; // Dynamic: 'images' or 'pixel_values'
+        const feeds = {};
+        feeds[inputName] = inputTensor;
+        const results = await this.detSession.run(feeds);
+        const output = results[this.detSession.outputNames[0]];
+        // Postprocess
+        return this.postprocessYOLO(output.data, output.dims[1], width, height, paddingX, paddingY, scaleX, scaleY);
+    }
+    /**
+     * Estimate pose for a single person
+     */
+    async estimatePose(imageData, imgWidth, imgHeight, bbox) {
+        const [inputH, inputW] = this.config.poseInputSize;
+        // Preprocess
+        const { tensor, center, scale } = this.preprocessPose(imageData, imgWidth, imgHeight, bbox, [inputW, inputH]);
+        // Inference
+        const inputTensor = new ort.Tensor('float32', tensor, [1, 3, inputH, inputW]);
+        const results = await this.poseSession.run({ input: inputTensor });
+        // Postprocess
+        return this.postprocessPose(results.simcc_x.data, results.simcc_y.data, results.simcc_x.dims, results.simcc_y.dims, center, scale);
+    }
+    /**
+     * YOLO preprocessing with letterbox
+     */
+    preprocessYOLO(imageData, imgWidth, imgHeight, inputSize) {
+        const [inputW, inputH] = inputSize;
+        // Reuse canvas
+        if (!this.canvas || !this.ctx) {
+            this.canvas = document.createElement('canvas');
+            this.ctx = this.canvas.getContext('2d', { willReadFrequently: true });
+        }
+        this.canvas.width = inputW;
+        this.canvas.height = inputH;
+        const ctx = this.ctx;
+        // Black background
+        ctx.fillStyle = '#000000';
+        ctx.fillRect(0, 0, inputW, inputH);
+        // Calculate letterbox
+        const aspectRatio = imgWidth / imgHeight;
+        const targetAspectRatio = inputW / inputH;
+        let drawWidth, drawHeight, offsetX, offsetY;
+        if (aspectRatio > targetAspectRatio) {
+            drawWidth = inputW;
+            drawHeight = Math.floor(inputW / aspectRatio);
+            offsetX = 0;
+            offsetY = Math.floor((inputH - drawHeight) / 2);
+        }
+        else {
+            drawHeight = inputH;
+            drawWidth = Math.floor(inputH * aspectRatio);
+            offsetX = Math.floor((inputW - drawWidth) / 2);
+            offsetY = 0;
+        }
+        // Create source canvas
+        const srcCanvas = document.createElement('canvas');
+        const srcCtx = srcCanvas.getContext('2d');
+        srcCanvas.width = imgWidth;
+        srcCanvas.height = imgHeight;
+        const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
+        srcImageData.data.set(imageData);
+        srcCtx.putImageData(srcImageData, 0, 0);
+        // Draw
+        ctx.drawImage(srcCanvas, 0, 0, imgWidth, imgHeight, offsetX, offsetY, drawWidth, drawHeight);
+        const paddedData = ctx.getImageData(0, 0, inputW, inputH);
+        // Normalize to [0, 1] and convert to CHW
+        const tensor = new Float32Array(inputW * inputH * 3);
+        for (let i = 0; i < paddedData.data.length; i += 4) {
+            const pixelIdx = i / 4;
+            tensor[pixelIdx] = paddedData.data[i] / 255;
+            tensor[pixelIdx + inputW * inputH] = paddedData.data[i + 1] / 255;
+            tensor[pixelIdx + 2 * inputW * inputH] = paddedData.data[i + 2] / 255;
+        }
+        const scaleX = imgWidth / drawWidth;
+        const scaleY = imgHeight / drawHeight;
+        return {
+            tensor,
+            paddingX: offsetX,
+            paddingY: offsetY,
+            scaleX,
+            scaleY,
+        };
+    }
+    /**
+     * YOLO postprocessing with NMS
+     */
+    postprocessYOLO(output, numDetections, imgWidth, imgHeight, paddingX, paddingY, scaleX, scaleY) {
+        const detections = [];
+        for (let i = 0; i < numDetections; i++) {
+            const idx = i * 6;
+            const x1 = output[idx];
+            const y1 = output[idx + 1];
+            const x2 = output[idx + 2];
+            const y2 = output[idx + 3];
+            const confidence = output[idx + 4];
+            const classId = Math.round(output[idx + 5]);
+            if (confidence < this.config.detConfidence || classId !== 0)
+                continue;
+            // Transform coordinates
+            const tx1 = (x1 - paddingX) * scaleX;
+            const ty1 = (y1 - paddingY) * scaleY;
+            const tx2 = (x2 - paddingX) * scaleX;
+            const ty2 = (y2 - paddingY) * scaleY;
+            detections.push({
+                x1: Math.max(0, tx1),
+                y1: Math.max(0, ty1),
+                x2: Math.min(imgWidth, tx2),
+                y2: Math.min(imgHeight, ty2),
+                confidence,
+            });
+        }
+        // NMS
+        return this.applyNMS(detections, this.config.nmsThreshold);
+    }
+    /**
+     * Pose preprocessing with affine crop
+     */
+    preprocessPose(imageData, imgWidth, imgHeight, bbox, inputSize) {
+        const [inputW, inputH] = inputSize;
+        const bboxWidth = bbox.x2 - bbox.x1;
+        const bboxHeight = bbox.y2 - bbox.y1;
+        const center = [
+            bbox.x1 + bboxWidth / 2,
+            bbox.y1 + bboxHeight / 2,
+        ];
+        // Aspect ratio preservation
+        const bboxAspectRatio = bboxWidth / bboxHeight;
+        const modelAspectRatio = inputW / inputH;
+        let scaleW, scaleH;
+        if (bboxAspectRatio > modelAspectRatio) {
+            scaleW = bboxWidth * 1.25;
+            scaleH = scaleW / modelAspectRatio;
+        }
+        else {
+            scaleH = bboxHeight * 1.25;
+            scaleW = scaleH * modelAspectRatio;
+        }
+        const scale = [scaleW, scaleH];
+        // Reuse pre-allocated pose canvas
+        if (!this.poseCanvas || !this.poseCtx) {
+            this.poseCanvas = document.createElement('canvas');
+            this.poseCanvas.width = inputW;
+            this.poseCanvas.height = inputH;
+            this.poseCtx = this.poseCanvas.getContext('2d', {
+                willReadFrequently: true,
+                alpha: false
+            });
+            this.poseTensorBuffer = new Float32Array(3 * inputW * inputH);
+        }
+        const ctx = this.poseCtx;
+        // Fast clear
+        ctx.clearRect(0, 0, inputW, inputH);
+        // Create source
+        const srcCanvas = document.createElement('canvas');
+        const srcCtx = srcCanvas.getContext('2d');
+        srcCanvas.width = imgWidth;
+        srcCanvas.height = imgHeight;
+        const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
+        srcImageData.data.set(imageData);
+        srcCtx.putImageData(srcImageData, 0, 0);
+        // Crop and scale
+        const srcX = center[0] - scaleW / 2;
+        const srcY = center[1] - scaleH / 2;
+        ctx.drawImage(srcCanvas, srcX, srcY, scaleW, scaleH, 0, 0, inputW, inputH);
+        const croppedData = ctx.getImageData(0, 0, inputW, inputH);
+        // Optimized normalization with precomputed constants
+        const tensor = this.poseTensorBuffer;
+        const len = croppedData.data.length;
+        const planeSize = inputW * inputH;
+        // Precompute normalization constants
+        const mean0 = 123.675, mean1 = 116.28, mean2 = 103.53;
+        const stdInv0 = 1 / 58.395, stdInv1 = 1 / 57.12, stdInv2 = 1 / 57.375;
+        // Unrolled loop (4 pixels at once)
+        for (let i = 0; i < len; i += 16) {
+            const p1 = i / 4, p2 = p1 + 1, p3 = p1 + 2, p4 = p1 + 3;
+            // R channel
+            tensor[p1] = (croppedData.data[i] - mean0) * stdInv0;
+            tensor[p2] = (croppedData.data[i + 4] - mean0) * stdInv0;
+            tensor[p3] = (croppedData.data[i + 8] - mean0) * stdInv0;
+            tensor[p4] = (croppedData.data[i + 12] - mean0) * stdInv0;
+            // G channel
+            tensor[p1 + planeSize] = (croppedData.data[i + 1] - mean1) * stdInv1;
+            tensor[p2 + planeSize] = (croppedData.data[i + 5] - mean1) * stdInv1;
+            tensor[p3 + planeSize] = (croppedData.data[i + 9] - mean1) * stdInv1;
+            tensor[p4 + planeSize] = (croppedData.data[i + 13] - mean1) * stdInv1;
+            // B channel
+            tensor[p1 + planeSize * 2] = (croppedData.data[i + 2] - mean2) * stdInv2;
+            tensor[p2 + planeSize * 2] = (croppedData.data[i + 6] - mean2) * stdInv2;
+            tensor[p3 + planeSize * 2] = (croppedData.data[i + 10] - mean2) * stdInv2;
+            tensor[p4 + planeSize * 2] = (croppedData.data[i + 14] - mean2) * stdInv2;
+        }
+        return { tensor, center, scale };
+    }
+    /**
+     * Pose postprocessing with SimCC decoding
+     */
+    postprocessPose(simccX, simccY, shapeX, shapeY, center, scale) {
+        const numKeypoints = shapeX[1];
+        const wx = shapeX[2];
+        const wy = shapeY[2];
+        const keypoints = [];
+        for (let k = 0; k < numKeypoints; k++) {
+            // Argmax X
+            let maxX = -Infinity;
+            let argmaxX = 0;
+            for (let i = 0; i < wx; i++) {
+                const val = simccX[k * wx + i];
+                if (val > maxX) {
+                    maxX = val;
+                    argmaxX = i;
+                }
+            }
+            // Argmax Y
+            let maxY = -Infinity;
+            let argmaxY = 0;
+            for (let i = 0; i < wy; i++) {
+                const val = simccY[k * wy + i];
+                if (val > maxY) {
+                    maxY = val;
+                    argmaxY = i;
+                }
+            }
+            const score = 0.5 * (maxX + maxY);
+            const visible = score > this.config.poseConfidence;
+            // Transform to original coordinates
+            const normX = argmaxX / wx;
+            const normY = argmaxY / wy;
+            const x = (normX - 0.5) * scale[0] + center[0];
+            const y = (normY - 0.5) * scale[1] + center[1];
+            keypoints.push({
+                x,
+                y,
+                score,
+                visible,
+                name: KEYPOINT_NAMES[k] || `keypoint_${k}`,
+            });
+        }
+        return keypoints;
+    }
+    /**
+     * Non-Maximum Suppression
+     */
+    applyNMS(detections, iouThreshold) {
+        if (detections.length === 0)
+            return [];
+        detections.sort((a, b) => b.confidence - a.confidence);
+        const selected = [];
+        const used = new Set();
+        for (let i = 0; i < detections.length; i++) {
+            if (used.has(i))
+                continue;
+            selected.push(detections[i]);
+            used.add(i);
+            for (let j = i + 1; j < detections.length; j++) {
+                if (used.has(j))
+                    continue;
+                const iou = this.calculateIoU(detections[i], detections[j]);
+                if (iou > iouThreshold) {
+                    used.add(j);
+                }
+            }
+        }
+        return selected;
+    }
+    /**
+     * Calculate IoU between two boxes
+     */
+    calculateIoU(box1, box2) {
+        const x1 = Math.max(box1.x1, box2.x1);
+        const y1 = Math.max(box1.y1, box2.y1);
+        const x2 = Math.min(box1.x2, box2.x2);
+        const y2 = Math.min(box1.y2, box2.y2);
+        if (x2 <= x1 || y2 <= y1)
+            return 0;
+        const intersection = (x2 - x1) * (y2 - y1);
+        const area1 = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
+        const area2 = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);
+        const union = area1 + area2 - intersection;
+        return intersection / union;
+    }
+    /**
+     * Dispose resources
+     */
+    dispose() {
+        if (this.detSession) {
+            this.detSession.release();
+            this.detSession = null;
+        }
+        if (this.poseSession) {
+            this.poseSession.release();
+            this.poseSession = null;
+        }
+        this.initialized = false;
+    }
+}

package/dist/solution/poseTracker.d.ts ADDED Viewed

@@ -0,0 +1,22 @@
+/**
+ * PoseTracker - tracks poses across frames with cached detections
+ * Reduces detection frequency for better performance
+ */
+import { Wholebody } from './wholebody';
+export declare class PoseTracker {
+    private wholebody;
+    private detFrequency;
+    private cachedBoxes;
+    private frameCount;
+    private nextId;
+    constructor(WholebodyClass: typeof Wholebody, detFrequency?: number, toOpenpose?: boolean, mode?: 'performance' | 'lightweight' | 'balanced', backend?: 'onnxruntime', device?: string);
+    init(): Promise<void>;
+    call(image: Uint8Array, imgWidth: number, imgHeight: number): Promise<{
+        keypoints: number[][];
+        scores: number[];
+    }>;
+    private updateCachedBoxes;
+    private calculateIoU;
+    private cleanupCachedBoxes;
+}
+//# sourceMappingURL=poseTracker.d.ts.map

package/dist/solution/poseTracker.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"poseTracker.d.ts","sourceRoot":"","sources":["../../src/solution/poseTracker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAUxC,qBAAa,WAAW;IACtB,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,WAAW,CAAoB;IACvC,OAAO,CAAC,UAAU,CAAa;IAC/B,OAAO,CAAC,MAAM,CAAa;gBAGzB,cAAc,EAAE,OAAO,SAAS,EAChC,YAAY,GAAE,MAAU,EACxB,UAAU,GAAE,OAAe,EAC3B,IAAI,GAAE,aAAa,GAAG,aAAa,GAAG,UAAuB,EAC7D,OAAO,GAAE,aAA6B,EACtC,MAAM,GAAE,MAAc;IAelB,IAAI,IAAI,OAAO,CAAC,IAAI,CAAC;IAIrB,IAAI,CACR,KAAK,EAAE,UAAU,EACjB,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC;QAAE,SAAS,EAAE,MAAM,EAAE,EAAE,CAAC;QAAC,MAAM,EAAE,MAAM,EAAE,CAAA;KAAE,CAAC;IAmBvD,OAAO,CAAC,iBAAiB;IA6EzB,OAAO,CAAC,YAAY;IAiBpB,OAAO,CAAC,kBAAkB;CAO3B"}