rtmlib-ts 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +1 -0
- package/README.md +202 -0
- package/dist/core/base.d.ts +20 -0
- package/dist/core/base.d.ts.map +1 -0
- package/dist/core/base.js +40 -0
- package/dist/core/file.d.ts +11 -0
- package/dist/core/file.d.ts.map +1 -0
- package/dist/core/file.js +111 -0
- package/dist/core/modelCache.d.ts +35 -0
- package/dist/core/modelCache.d.ts.map +1 -0
- package/dist/core/modelCache.js +161 -0
- package/dist/core/posePostprocessing.d.ts +12 -0
- package/dist/core/posePostprocessing.d.ts.map +1 -0
- package/dist/core/posePostprocessing.js +76 -0
- package/dist/core/postprocessing.d.ts +10 -0
- package/dist/core/postprocessing.d.ts.map +1 -0
- package/dist/core/postprocessing.js +70 -0
- package/dist/core/preprocessing.d.ts +14 -0
- package/dist/core/preprocessing.d.ts.map +1 -0
- package/dist/core/preprocessing.js +79 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +31 -0
- package/dist/models/rtmpose.d.ts +25 -0
- package/dist/models/rtmpose.d.ts.map +1 -0
- package/dist/models/rtmpose.js +185 -0
- package/dist/models/rtmpose3d.d.ts +28 -0
- package/dist/models/rtmpose3d.d.ts.map +1 -0
- package/dist/models/rtmpose3d.js +184 -0
- package/dist/models/yolo12.d.ts +23 -0
- package/dist/models/yolo12.d.ts.map +1 -0
- package/dist/models/yolo12.js +165 -0
- package/dist/models/yolox.d.ts +18 -0
- package/dist/models/yolox.d.ts.map +1 -0
- package/dist/models/yolox.js +167 -0
- package/dist/solution/animalDetector.d.ts +229 -0
- package/dist/solution/animalDetector.d.ts.map +1 -0
- package/dist/solution/animalDetector.js +663 -0
- package/dist/solution/body.d.ts +16 -0
- package/dist/solution/body.d.ts.map +1 -0
- package/dist/solution/body.js +52 -0
- package/dist/solution/bodyWithFeet.d.ts +16 -0
- package/dist/solution/bodyWithFeet.d.ts.map +1 -0
- package/dist/solution/bodyWithFeet.js +52 -0
- package/dist/solution/customDetector.d.ts +137 -0
- package/dist/solution/customDetector.d.ts.map +1 -0
- package/dist/solution/customDetector.js +342 -0
- package/dist/solution/hand.d.ts +14 -0
- package/dist/solution/hand.d.ts.map +1 -0
- package/dist/solution/hand.js +20 -0
- package/dist/solution/index.d.ts +10 -0
- package/dist/solution/index.d.ts.map +1 -0
- package/dist/solution/index.js +9 -0
- package/dist/solution/objectDetector.d.ts +172 -0
- package/dist/solution/objectDetector.d.ts.map +1 -0
- package/dist/solution/objectDetector.js +606 -0
- package/dist/solution/pose3dDetector.d.ts +145 -0
- package/dist/solution/pose3dDetector.d.ts.map +1 -0
- package/dist/solution/pose3dDetector.js +611 -0
- package/dist/solution/poseDetector.d.ts +198 -0
- package/dist/solution/poseDetector.d.ts.map +1 -0
- package/dist/solution/poseDetector.js +622 -0
- package/dist/solution/poseTracker.d.ts +22 -0
- package/dist/solution/poseTracker.d.ts.map +1 -0
- package/dist/solution/poseTracker.js +106 -0
- package/dist/solution/wholebody.d.ts +19 -0
- package/dist/solution/wholebody.d.ts.map +1 -0
- package/dist/solution/wholebody.js +82 -0
- package/dist/solution/wholebody3d.d.ts +22 -0
- package/dist/solution/wholebody3d.d.ts.map +1 -0
- package/dist/solution/wholebody3d.js +75 -0
- package/dist/types/index.d.ts +52 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/visualization/draw.d.ts +57 -0
- package/dist/visualization/draw.d.ts.map +1 -0
- package/dist/visualization/draw.js +400 -0
- package/dist/visualization/skeleton/coco133.d.ts +350 -0
- package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco133.js +120 -0
- package/dist/visualization/skeleton/coco17.d.ts +180 -0
- package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco17.js +48 -0
- package/dist/visualization/skeleton/halpe26.d.ts +278 -0
- package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
- package/dist/visualization/skeleton/halpe26.js +70 -0
- package/dist/visualization/skeleton/hand21.d.ts +196 -0
- package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
- package/dist/visualization/skeleton/hand21.js +51 -0
- package/dist/visualization/skeleton/index.d.ts +10 -0
- package/dist/visualization/skeleton/index.d.ts.map +1 -0
- package/dist/visualization/skeleton/index.js +9 -0
- package/dist/visualization/skeleton/openpose134.d.ts +357 -0
- package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose134.js +116 -0
- package/dist/visualization/skeleton/openpose18.d.ts +177 -0
- package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose18.js +47 -0
- package/docs/ANIMAL_DETECTOR.md +450 -0
- package/docs/CUSTOM_DETECTOR.md +568 -0
- package/docs/OBJECT_DETECTOR.md +373 -0
- package/docs/POSE3D_DETECTOR.md +458 -0
- package/docs/POSE_DETECTOR.md +442 -0
- package/examples/README.md +119 -0
- package/examples/index.html +746 -0
- package/package.json +51 -0
- package/playground/README.md +114 -0
- package/playground/app/favicon.ico +0 -0
- package/playground/app/globals.css +17 -0
- package/playground/app/layout.tsx +19 -0
- package/playground/app/page.tsx +1338 -0
- package/playground/eslint.config.mjs +18 -0
- package/playground/next.config.ts +34 -0
- package/playground/package-lock.json +6723 -0
- package/playground/package.json +27 -0
- package/playground/postcss.config.mjs +7 -0
- package/playground/tsconfig.json +34 -0
- package/src/core/base.ts +66 -0
- package/src/core/file.ts +141 -0
- package/src/core/modelCache.ts +189 -0
- package/src/core/posePostprocessing.ts +91 -0
- package/src/core/postprocessing.ts +93 -0
- package/src/core/preprocessing.ts +127 -0
- package/src/index.ts +69 -0
- package/src/models/rtmpose.ts +265 -0
- package/src/models/rtmpose3d.ts +289 -0
- package/src/models/yolo12.ts +220 -0
- package/src/models/yolox.ts +214 -0
- package/src/solution/animalDetector.ts +955 -0
- package/src/solution/body.ts +89 -0
- package/src/solution/bodyWithFeet.ts +89 -0
- package/src/solution/customDetector.ts +474 -0
- package/src/solution/hand.ts +52 -0
- package/src/solution/index.ts +10 -0
- package/src/solution/objectDetector.ts +816 -0
- package/src/solution/pose3dDetector.ts +890 -0
- package/src/solution/poseDetector.ts +892 -0
- package/src/solution/poseTracker.ts +172 -0
- package/src/solution/wholebody.ts +130 -0
- package/src/solution/wholebody3d.ts +125 -0
- package/src/types/index.ts +62 -0
- package/src/visualization/draw.ts +543 -0
- package/src/visualization/skeleton/coco133.ts +131 -0
- package/src/visualization/skeleton/coco17.ts +49 -0
- package/src/visualization/skeleton/halpe26.ts +71 -0
- package/src/visualization/skeleton/hand21.ts +52 -0
- package/src/visualization/skeleton/index.ts +10 -0
- package/src/visualization/skeleton/openpose134.ts +125 -0
- package/src/visualization/skeleton/openpose18.ts +48 -0
- package/tsconfig.json +32 -0
|
@@ -0,0 +1,606 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ObjectDetector - Universal object detection API
|
|
3
|
+
* Supports YOLO12 and other YOLO models for multi-class detection
|
|
4
|
+
*
|
|
5
|
+
* @example
|
|
6
|
+
* ```typescript
|
|
7
|
+
* // Initialize with default model (YOLOv12n from HuggingFace)
|
|
8
|
+
* const detector = new ObjectDetector({
|
|
9
|
+
* classes: ['person', 'car', 'dog'], // Filter specific classes
|
|
10
|
+
* });
|
|
11
|
+
* await detector.init();
|
|
12
|
+
*
|
|
13
|
+
* // Or with custom model
|
|
14
|
+
* const detector = new ObjectDetector({
|
|
15
|
+
* model: 'models/yolov12n.onnx',
|
|
16
|
+
* classes: ['person'],
|
|
17
|
+
* });
|
|
18
|
+
* await detector.init();
|
|
19
|
+
*
|
|
20
|
+
* // Detect from canvas
|
|
21
|
+
* const objects = await detector.detectFromCanvas(canvas);
|
|
22
|
+
*
|
|
23
|
+
* // Detect all classes
|
|
24
|
+
* const allObjects = await detector.detectFromCanvas(canvas, { classes: null });
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
27
|
+
import * as ort from 'onnxruntime-web';
|
|
28
|
+
import { getCachedModel, isModelCached } from '../core/modelCache';
|
|
29
|
+
// Configure ONNX Runtime Web
|
|
30
|
+
ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/';
|
|
31
|
+
ort.env.wasm.simd = true;
|
|
32
|
+
ort.env.wasm.proxy = false;
|
|
33
|
+
/**
|
|
34
|
+
* COCO 80-class names
|
|
35
|
+
*/
|
|
36
|
+
export const COCO_CLASSES = [
|
|
37
|
+
'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
|
|
38
|
+
'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
|
|
39
|
+
'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
|
|
40
|
+
'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
|
|
41
|
+
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
|
|
42
|
+
'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
|
|
43
|
+
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
|
|
44
|
+
'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
|
|
45
|
+
'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
|
|
46
|
+
'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush',
|
|
47
|
+
];
|
|
48
|
+
/**
|
|
49
|
+
* Default configuration
|
|
50
|
+
*/
|
|
51
|
+
const DEFAULT_CONFIG = {
|
|
52
|
+
model: 'https://huggingface.co/demon2233/rtmlib-ts/resolve/main/yolo/yolov12n.onnx',
|
|
53
|
+
inputSize: [416, 416], // Faster default
|
|
54
|
+
confidence: 0.5,
|
|
55
|
+
nmsThreshold: 0.45,
|
|
56
|
+
classes: ['person'],
|
|
57
|
+
backend: 'webgpu', // Default to WebGPU for better performance
|
|
58
|
+
mode: 'balanced',
|
|
59
|
+
device: 'cpu',
|
|
60
|
+
cache: true,
|
|
61
|
+
};
|
|
62
|
+
// Performance presets
|
|
63
|
+
const MODE_PRESETS = {
|
|
64
|
+
performance: { inputSize: [640, 640], confidence: 0.3 }, // High accuracy
|
|
65
|
+
balanced: { inputSize: [416, 416], confidence: 0.5 }, // Balanced
|
|
66
|
+
lightweight: { inputSize: [320, 320], confidence: 0.6 }, // Fastest
|
|
67
|
+
};
|
|
68
|
+
export class ObjectDetector {
|
|
69
|
+
constructor(config) {
|
|
70
|
+
this.session = null;
|
|
71
|
+
this.initialized = false;
|
|
72
|
+
this.classFilter = null;
|
|
73
|
+
// Pre-allocated reusable resources for performance
|
|
74
|
+
this.canvas = null;
|
|
75
|
+
this.ctx = null;
|
|
76
|
+
this.tensorBuffer = null;
|
|
77
|
+
this.inputSize = [416, 416];
|
|
78
|
+
// Apply mode preset if specified
|
|
79
|
+
let finalConfig = { ...DEFAULT_CONFIG, ...config };
|
|
80
|
+
// Apply mode preset if specified
|
|
81
|
+
if (config.mode && MODE_PRESETS[config.mode]) {
|
|
82
|
+
const preset = MODE_PRESETS[config.mode];
|
|
83
|
+
// Only override if not explicitly set
|
|
84
|
+
if (!config.inputSize)
|
|
85
|
+
finalConfig.inputSize = preset.inputSize;
|
|
86
|
+
if (!config.confidence)
|
|
87
|
+
finalConfig.confidence = preset.confidence;
|
|
88
|
+
}
|
|
89
|
+
this.config = finalConfig;
|
|
90
|
+
this.updateClassFilter();
|
|
91
|
+
console.log(`[ObjectDetector] Initialized with mode: ${config.mode || 'balanced'}, input: ${this.config.inputSize[0]}x${this.config.inputSize[1]}`);
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Update class filter based on config
|
|
95
|
+
*/
|
|
96
|
+
updateClassFilter() {
|
|
97
|
+
if (!this.config.classes) {
|
|
98
|
+
this.classFilter = null;
|
|
99
|
+
return;
|
|
100
|
+
}
|
|
101
|
+
this.classFilter = new Set();
|
|
102
|
+
this.config.classes.forEach((className) => {
|
|
103
|
+
const classId = COCO_CLASSES.indexOf(className.toLowerCase());
|
|
104
|
+
if (classId !== -1) {
|
|
105
|
+
this.classFilter.add(classId);
|
|
106
|
+
}
|
|
107
|
+
else {
|
|
108
|
+
console.warn(`[ObjectDetector] Unknown class: ${className}`);
|
|
109
|
+
}
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Set which classes to detect
|
|
114
|
+
* @param classes - Array of class names or null for all classes
|
|
115
|
+
*/
|
|
116
|
+
setClasses(classes) {
|
|
117
|
+
this.config.classes = classes;
|
|
118
|
+
this.updateClassFilter();
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Get list of available COCO classes
|
|
122
|
+
*/
|
|
123
|
+
getAvailableClasses() {
|
|
124
|
+
return [...COCO_CLASSES];
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Get currently filtered classes
|
|
128
|
+
*/
|
|
129
|
+
getFilteredClasses() {
|
|
130
|
+
return this.config.classes;
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Initialize detection model and pre-allocate resources
|
|
134
|
+
*/
|
|
135
|
+
async init() {
|
|
136
|
+
if (this.initialized)
|
|
137
|
+
return;
|
|
138
|
+
try {
|
|
139
|
+
console.log(`[ObjectDetector] Loading model from: ${this.config.model}`);
|
|
140
|
+
let modelBuffer;
|
|
141
|
+
// Use cached model if caching is enabled
|
|
142
|
+
if (this.config.cache) {
|
|
143
|
+
const isCached = await isModelCached(this.config.model);
|
|
144
|
+
console.log(`[ObjectDetector] Cache ${isCached ? 'hit' : 'miss'} for model`);
|
|
145
|
+
modelBuffer = await getCachedModel(this.config.model);
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
console.log(`[ObjectDetector] Caching disabled, fetching from network`);
|
|
149
|
+
const response = await fetch(this.config.model);
|
|
150
|
+
if (!response.ok) {
|
|
151
|
+
throw new Error(`Failed to fetch model: HTTP ${response.status} ${response.statusText}`);
|
|
152
|
+
}
|
|
153
|
+
modelBuffer = await response.arrayBuffer();
|
|
154
|
+
}
|
|
155
|
+
console.log(`[ObjectDetector] Model loaded, size: ${(modelBuffer.byteLength / 1024 / 1024).toFixed(2)} MB`);
|
|
156
|
+
this.session = await ort.InferenceSession.create(modelBuffer, {
|
|
157
|
+
executionProviders: [this.config.backend],
|
|
158
|
+
graphOptimizationLevel: 'all',
|
|
159
|
+
});
|
|
160
|
+
// Pre-allocate canvas and tensor buffer for performance
|
|
161
|
+
const [w, h] = this.config.inputSize;
|
|
162
|
+
this.inputSize = [w, h];
|
|
163
|
+
this.canvas = document.createElement('canvas');
|
|
164
|
+
this.canvas.width = w;
|
|
165
|
+
this.canvas.height = h;
|
|
166
|
+
this.ctx = this.canvas.getContext('2d', {
|
|
167
|
+
willReadFrequently: true,
|
|
168
|
+
alpha: false // Faster, no transparency
|
|
169
|
+
});
|
|
170
|
+
// Pre-allocate tensor buffer (3 channels * width * height)
|
|
171
|
+
this.tensorBuffer = new Float32Array(3 * w * h);
|
|
172
|
+
this.initialized = true;
|
|
173
|
+
console.log(`[ObjectDetector] ✅ Initialized (${w}x${h}, ${this.config.backend})`);
|
|
174
|
+
}
|
|
175
|
+
catch (error) {
|
|
176
|
+
console.error('[ObjectDetector] ❌ Initialization failed:', error);
|
|
177
|
+
throw error;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
/**
|
|
181
|
+
* Detect objects from HTMLCanvasElement
|
|
182
|
+
*/
|
|
183
|
+
async detectFromCanvas(canvas) {
|
|
184
|
+
const ctx = canvas.getContext('2d');
|
|
185
|
+
if (!ctx) {
|
|
186
|
+
throw new Error('Could not get 2D context from canvas');
|
|
187
|
+
}
|
|
188
|
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
189
|
+
return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
|
|
190
|
+
}
|
|
191
|
+
/**
|
|
192
|
+
* Detect objects from HTMLVideoElement
|
|
193
|
+
*/
|
|
194
|
+
async detectFromVideo(video, targetCanvas) {
|
|
195
|
+
if (video.readyState < 2) {
|
|
196
|
+
throw new Error('Video not ready. Ensure video is loaded and playing.');
|
|
197
|
+
}
|
|
198
|
+
const canvas = targetCanvas || document.createElement('canvas');
|
|
199
|
+
canvas.width = video.videoWidth;
|
|
200
|
+
canvas.height = video.videoHeight;
|
|
201
|
+
const ctx = canvas.getContext('2d');
|
|
202
|
+
if (!ctx) {
|
|
203
|
+
throw new Error('Could not get 2D context from canvas');
|
|
204
|
+
}
|
|
205
|
+
ctx.drawImage(video, 0, 0, canvas.width, canvas.height);
|
|
206
|
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
207
|
+
return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
|
|
208
|
+
}
|
|
209
|
+
/**
|
|
210
|
+
* Detect objects from HTMLImageElement
|
|
211
|
+
*/
|
|
212
|
+
async detectFromImage(image, targetCanvas) {
|
|
213
|
+
if (!image.complete || !image.naturalWidth) {
|
|
214
|
+
throw new Error('Image not loaded. Ensure image is fully loaded.');
|
|
215
|
+
}
|
|
216
|
+
const canvas = targetCanvas || document.createElement('canvas');
|
|
217
|
+
canvas.width = image.naturalWidth;
|
|
218
|
+
canvas.height = image.naturalHeight;
|
|
219
|
+
const ctx = canvas.getContext('2d');
|
|
220
|
+
if (!ctx) {
|
|
221
|
+
throw new Error('Could not get 2D context from canvas');
|
|
222
|
+
}
|
|
223
|
+
ctx.drawImage(image, 0, 0);
|
|
224
|
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
225
|
+
return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Detect objects from ImageBitmap
|
|
229
|
+
*/
|
|
230
|
+
async detectFromBitmap(bitmap, targetCanvas) {
|
|
231
|
+
const canvas = targetCanvas || document.createElement('canvas');
|
|
232
|
+
canvas.width = bitmap.width;
|
|
233
|
+
canvas.height = bitmap.height;
|
|
234
|
+
const ctx = canvas.getContext('2d');
|
|
235
|
+
if (!ctx) {
|
|
236
|
+
throw new Error('Could not get 2D context from canvas');
|
|
237
|
+
}
|
|
238
|
+
ctx.drawImage(bitmap, 0, 0);
|
|
239
|
+
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
240
|
+
return this.detect(new Uint8Array(imageData.data.buffer), canvas.width, canvas.height);
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Detect objects from File
|
|
244
|
+
*/
|
|
245
|
+
async detectFromFile(file, targetCanvas) {
|
|
246
|
+
return new Promise((resolve, reject) => {
|
|
247
|
+
const img = new Image();
|
|
248
|
+
img.onload = async () => {
|
|
249
|
+
try {
|
|
250
|
+
const results = await this.detectFromImage(img, targetCanvas);
|
|
251
|
+
resolve(results);
|
|
252
|
+
}
|
|
253
|
+
catch (error) {
|
|
254
|
+
reject(error);
|
|
255
|
+
}
|
|
256
|
+
};
|
|
257
|
+
img.onerror = () => reject(new Error('Failed to load image from file'));
|
|
258
|
+
img.src = URL.createObjectURL(file);
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Detect objects from Blob
|
|
263
|
+
*/
|
|
264
|
+
async detectFromBlob(blob, targetCanvas) {
|
|
265
|
+
const bitmap = await createImageBitmap(blob);
|
|
266
|
+
const results = await this.detectFromBitmap(bitmap, targetCanvas);
|
|
267
|
+
bitmap.close();
|
|
268
|
+
return results;
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Detect objects from raw image data
|
|
272
|
+
*/
|
|
273
|
+
async detect(imageData, width, height) {
|
|
274
|
+
if (!this.initialized) {
|
|
275
|
+
await this.init();
|
|
276
|
+
}
|
|
277
|
+
const startTime = performance.now();
|
|
278
|
+
const [inputH, inputW] = this.config.inputSize;
|
|
279
|
+
// Preprocess
|
|
280
|
+
const { tensor, paddingX, paddingY, scaleX, scaleY } = this.preprocess(imageData, width, height, [inputW, inputH]);
|
|
281
|
+
// Inference - use dynamic input name
|
|
282
|
+
const inputTensor = new ort.Tensor('float32', tensor, [1, 3, inputH, inputW]);
|
|
283
|
+
const inputName = this.session.inputNames[0]; // Dynamic: 'images' or 'pixel_values'
|
|
284
|
+
console.log(`[ObjectDetector] Using input name: ${inputName}`);
|
|
285
|
+
console.log(`[ObjectDetector] Input shape: [1, 3, ${inputH}, ${inputW}]`);
|
|
286
|
+
const feeds = {};
|
|
287
|
+
feeds[inputName] = inputTensor;
|
|
288
|
+
const results = await this.session.run(feeds);
|
|
289
|
+
const output = results[this.session.outputNames[0]];
|
|
290
|
+
console.log(`[ObjectDetector] Output shape: [${output.dims}]`);
|
|
291
|
+
console.log(`[ObjectDetector] Output type: ${output.type}`);
|
|
292
|
+
// Postprocess
|
|
293
|
+
const detections = this.postprocess(output.data, output.dims[1], output.dims, width, height, paddingX, paddingY, scaleX, scaleY);
|
|
294
|
+
const inferenceTime = performance.now() - startTime;
|
|
295
|
+
// Attach stats
|
|
296
|
+
detections.stats = this.calculateStats(detections, inferenceTime);
|
|
297
|
+
return detections;
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Optimized preprocess with resource reuse
|
|
301
|
+
*/
|
|
302
|
+
preprocess(imageData, imgWidth, imgHeight, inputSize) {
|
|
303
|
+
const [inputW, inputH] = inputSize;
|
|
304
|
+
// Reuse pre-allocated canvas
|
|
305
|
+
if (!this.canvas || !this.ctx) {
|
|
306
|
+
this.canvas = document.createElement('canvas');
|
|
307
|
+
this.canvas.width = inputW;
|
|
308
|
+
this.canvas.height = inputH;
|
|
309
|
+
this.ctx = this.canvas.getContext('2d', {
|
|
310
|
+
willReadFrequently: true,
|
|
311
|
+
alpha: false
|
|
312
|
+
});
|
|
313
|
+
this.tensorBuffer = new Float32Array(3 * inputW * inputH);
|
|
314
|
+
}
|
|
315
|
+
const ctx = this.ctx;
|
|
316
|
+
// Fast clear
|
|
317
|
+
ctx.clearRect(0, 0, inputW, inputH);
|
|
318
|
+
// Calculate letterbox
|
|
319
|
+
const aspectRatio = imgWidth / imgHeight;
|
|
320
|
+
const targetAspectRatio = inputW / inputH;
|
|
321
|
+
let drawWidth, drawHeight, offsetX, offsetY;
|
|
322
|
+
if (aspectRatio > targetAspectRatio) {
|
|
323
|
+
drawWidth = inputW;
|
|
324
|
+
drawHeight = (inputW / aspectRatio) | 0; // Faster than Math.floor
|
|
325
|
+
offsetX = 0;
|
|
326
|
+
offsetY = ((inputH - drawHeight) / 2) | 0;
|
|
327
|
+
}
|
|
328
|
+
else {
|
|
329
|
+
drawHeight = inputH;
|
|
330
|
+
drawWidth = (inputH * aspectRatio) | 0;
|
|
331
|
+
offsetX = ((inputW - drawWidth) / 2) | 0;
|
|
332
|
+
offsetY = 0;
|
|
333
|
+
}
|
|
334
|
+
// Draw directly without intermediate canvas (faster)
|
|
335
|
+
const srcCanvas = document.createElement('canvas');
|
|
336
|
+
srcCanvas.width = imgWidth;
|
|
337
|
+
srcCanvas.height = imgHeight;
|
|
338
|
+
const srcCtx = srcCanvas.getContext('2d');
|
|
339
|
+
const srcImageData = srcCtx.createImageData(imgWidth, imgHeight);
|
|
340
|
+
srcImageData.data.set(imageData);
|
|
341
|
+
srcCtx.putImageData(srcImageData, 0, 0);
|
|
342
|
+
// Draw with letterbox
|
|
343
|
+
ctx.drawImage(srcCanvas, 0, 0, imgWidth, imgHeight, offsetX, offsetY, drawWidth, drawHeight);
|
|
344
|
+
const paddedData = ctx.getImageData(0, 0, inputW, inputH);
|
|
345
|
+
// Optimized normalization loop (reuse buffer)
|
|
346
|
+
const tensor = this.tensorBuffer;
|
|
347
|
+
const len = paddedData.data.length;
|
|
348
|
+
const planeSize = inputW * inputH;
|
|
349
|
+
// Unroll loop for speed (process 4 pixels at once)
|
|
350
|
+
for (let i = 0; i < len; i += 16) {
|
|
351
|
+
const i1 = i, i2 = i + 4, i3 = i + 8, i4 = i + 12;
|
|
352
|
+
const p1 = i1 / 4, p2 = i2 / 4, p3 = i3 / 4, p4 = i4 / 4;
|
|
353
|
+
// R channel
|
|
354
|
+
tensor[p1] = paddedData.data[i1] * 0.003921569; // / 255
|
|
355
|
+
tensor[p2] = paddedData.data[i2] * 0.003921569;
|
|
356
|
+
tensor[p3] = paddedData.data[i3] * 0.003921569;
|
|
357
|
+
tensor[p4] = paddedData.data[i4] * 0.003921569;
|
|
358
|
+
// G channel
|
|
359
|
+
tensor[p1 + planeSize] = paddedData.data[i1 + 1] * 0.003921569;
|
|
360
|
+
tensor[p2 + planeSize] = paddedData.data[i2 + 1] * 0.003921569;
|
|
361
|
+
tensor[p3 + planeSize] = paddedData.data[i3 + 1] * 0.003921569;
|
|
362
|
+
tensor[p4 + planeSize] = paddedData.data[i4 + 1] * 0.003921569;
|
|
363
|
+
// B channel
|
|
364
|
+
tensor[p1 + planeSize * 2] = paddedData.data[i1 + 2] * 0.003921569;
|
|
365
|
+
tensor[p2 + planeSize * 2] = paddedData.data[i2 + 2] * 0.003921569;
|
|
366
|
+
tensor[p3 + planeSize * 2] = paddedData.data[i3 + 2] * 0.003921569;
|
|
367
|
+
tensor[p4 + planeSize * 2] = paddedData.data[i4 + 2] * 0.003921569;
|
|
368
|
+
}
|
|
369
|
+
const scaleX = imgWidth / drawWidth;
|
|
370
|
+
const scaleY = imgHeight / drawHeight;
|
|
371
|
+
return {
|
|
372
|
+
tensor,
|
|
373
|
+
paddingX: offsetX,
|
|
374
|
+
paddingY: offsetY,
|
|
375
|
+
scaleX,
|
|
376
|
+
scaleY,
|
|
377
|
+
};
|
|
378
|
+
}
|
|
379
|
+
/**
|
|
380
|
+
* Postprocess YOLO output - supports multiple output formats
|
|
381
|
+
*/
|
|
382
|
+
postprocess(output, numDetections, outputShape, imgWidth, imgHeight, paddingX, paddingY, scaleX, scaleY) {
|
|
383
|
+
const detections = [];
|
|
384
|
+
// Format 1: [batch, boxes, 6] - [x1, y1, x2, y2, conf, class]
|
|
385
|
+
if (outputShape.length === 3 && outputShape[2] === 6) {
|
|
386
|
+
for (let i = 0; i < numDetections; i++) {
|
|
387
|
+
const idx = i * 6;
|
|
388
|
+
const x1 = output[idx];
|
|
389
|
+
const y1 = output[idx + 1];
|
|
390
|
+
const x2 = output[idx + 2];
|
|
391
|
+
const y2 = output[idx + 3];
|
|
392
|
+
const confidence = output[idx + 4];
|
|
393
|
+
const classId = Math.round(output[idx + 5]);
|
|
394
|
+
if (confidence < this.config.confidence)
|
|
395
|
+
continue;
|
|
396
|
+
if (this.classFilter && !this.classFilter.has(classId))
|
|
397
|
+
continue;
|
|
398
|
+
if (x2 <= x1 || y2 <= y1)
|
|
399
|
+
continue;
|
|
400
|
+
const tx1 = (x1 - paddingX) * scaleX;
|
|
401
|
+
const ty1 = (y1 - paddingY) * scaleY;
|
|
402
|
+
const tx2 = (x2 - paddingX) * scaleX;
|
|
403
|
+
const ty2 = (y2 - paddingY) * scaleY;
|
|
404
|
+
detections.push({
|
|
405
|
+
bbox: {
|
|
406
|
+
x1: Math.max(0, tx1),
|
|
407
|
+
y1: Math.max(0, ty1),
|
|
408
|
+
x2: Math.min(imgWidth, tx2),
|
|
409
|
+
y2: Math.min(imgHeight, ty2),
|
|
410
|
+
confidence,
|
|
411
|
+
},
|
|
412
|
+
classId,
|
|
413
|
+
className: COCO_CLASSES[classId] || `class_${classId}`,
|
|
414
|
+
confidence,
|
|
415
|
+
});
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
// Format 2: [batch, boxes, 80+] - YOLOv26 style
|
|
419
|
+
// Format: [class_scores..., cx, cy, w, h] - center format with width/height
|
|
420
|
+
else if (outputShape.length === 3 && outputShape[2] >= 80) {
|
|
421
|
+
const numClasses = outputShape[2] - 4;
|
|
422
|
+
const [inputH, inputW] = this.config.inputSize;
|
|
423
|
+
console.log(`[ObjectDetector] Trying YOLOv26 format (center format) with ${numClasses} classes`);
|
|
424
|
+
for (let i = 0; i < numDetections; i++) {
|
|
425
|
+
const baseIdx = i * outputShape[2];
|
|
426
|
+
// Raw bbox values - try direct interpretation first
|
|
427
|
+
// YOLOv26 may output already decoded coordinates
|
|
428
|
+
let x1 = output[baseIdx + numClasses];
|
|
429
|
+
let y1 = output[baseIdx + numClasses + 1];
|
|
430
|
+
let x2 = output[baseIdx + numClasses + 2];
|
|
431
|
+
let y2 = output[baseIdx + numClasses + 3];
|
|
432
|
+
// If values are very small (< 1), they might be logits - apply sigmoid
|
|
433
|
+
if (Math.abs(x1) < 1 && Math.abs(y1) < 1) {
|
|
434
|
+
// Apply sigmoid and scale
|
|
435
|
+
x1 = (1 / (1 + Math.exp(-x1))) * inputW;
|
|
436
|
+
y1 = (1 / (1 + Math.exp(-y1))) * inputH;
|
|
437
|
+
x2 = (1 / (1 + Math.exp(-x2))) * inputW;
|
|
438
|
+
y2 = (1 / (1 + Math.exp(-y2))) * inputH;
|
|
439
|
+
}
|
|
440
|
+
// If values are negative but large, apply sigmoid only
|
|
441
|
+
else if (x1 < 0 || y1 < 0) {
|
|
442
|
+
x1 = (1 / (1 + Math.exp(-x1))) * inputW;
|
|
443
|
+
y1 = (1 / (1 + Math.exp(-y1))) * inputH;
|
|
444
|
+
x2 = (1 / (1 + Math.exp(-x2))) * inputW;
|
|
445
|
+
y2 = (1 / (1 + Math.exp(-y2))) * inputH;
|
|
446
|
+
}
|
|
447
|
+
// Otherwise use as-is (already decoded)
|
|
448
|
+
// Debug first detection
|
|
449
|
+
if (i === 0) {
|
|
450
|
+
console.log(`[ObjectDetector] Raw bbox: [${output[baseIdx + numClasses]}, ${output[baseIdx + numClasses + 1]}, ${output[baseIdx + numClasses + 2]}, ${output[baseIdx + numClasses + 3]}]`);
|
|
451
|
+
console.log(`[ObjectDetector] Decoded bbox: [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}]`);
|
|
452
|
+
}
|
|
453
|
+
// Find best class and confidence
|
|
454
|
+
let bestClass = 0;
|
|
455
|
+
let bestScore = -Infinity;
|
|
456
|
+
for (let c = 0; c < numClasses; c++) {
|
|
457
|
+
const score = output[baseIdx + c];
|
|
458
|
+
if (score > bestScore) {
|
|
459
|
+
bestScore = score;
|
|
460
|
+
bestClass = c;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
// Apply sigmoid to class score
|
|
464
|
+
const confidence = 1 / (1 + Math.exp(-bestScore));
|
|
465
|
+
// Debug first few detections
|
|
466
|
+
if (i < 5 && confidence > 0.05) {
|
|
467
|
+
console.log(`[ObjectDetector] Box ${i}: [${x1.toFixed(1)}, ${y1.toFixed(1)}, ${x2.toFixed(1)}, ${y2.toFixed(1)}]`);
|
|
468
|
+
console.log(`[ObjectDetector] -> class=${bestClass} (${COCO_CLASSES[bestClass] || 'unknown'}), confidence=${(confidence * 100).toFixed(1)}%`);
|
|
469
|
+
}
|
|
470
|
+
if (confidence < this.config.confidence)
|
|
471
|
+
continue;
|
|
472
|
+
if (this.classFilter && !this.classFilter.has(bestClass))
|
|
473
|
+
continue;
|
|
474
|
+
if (x2 <= x1 || y2 <= y1)
|
|
475
|
+
continue;
|
|
476
|
+
if (x1 < 0 && x2 < 0)
|
|
477
|
+
continue;
|
|
478
|
+
if (y1 < 0 && y2 < 0)
|
|
479
|
+
continue;
|
|
480
|
+
// Transform to original image space
|
|
481
|
+
const tx1 = (x1 - paddingX) * scaleX;
|
|
482
|
+
const ty1 = (y1 - paddingY) * scaleY;
|
|
483
|
+
const tx2 = (x2 - paddingX) * scaleX;
|
|
484
|
+
const ty2 = (y2 - paddingY) * scaleY;
|
|
485
|
+
detections.push({
|
|
486
|
+
bbox: {
|
|
487
|
+
x1: Math.max(0, tx1),
|
|
488
|
+
y1: Math.max(0, ty1),
|
|
489
|
+
x2: Math.min(imgWidth, tx2),
|
|
490
|
+
y2: Math.min(imgHeight, ty2),
|
|
491
|
+
confidence,
|
|
492
|
+
},
|
|
493
|
+
classId: bestClass,
|
|
494
|
+
className: COCO_CLASSES[bestClass] || `class_${bestClass}`,
|
|
495
|
+
confidence,
|
|
496
|
+
});
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
// Debug logging
|
|
500
|
+
if (detections.length > 0) {
|
|
501
|
+
console.log(`[ObjectDetector] ✅ Found ${detections.length} detections`);
|
|
502
|
+
console.log(`[ObjectDetector] First:`, detections[0]);
|
|
503
|
+
}
|
|
504
|
+
else {
|
|
505
|
+
console.log(`[ObjectDetector] ❌ No detections above threshold ${this.config.confidence}`);
|
|
506
|
+
// Log top 3 scores for debugging
|
|
507
|
+
const topScores = [];
|
|
508
|
+
const numClasses = outputShape.length === 3 ? outputShape[2] - 4 : 80;
|
|
509
|
+
for (let i = 0; i < Math.min(3, numDetections); i++) {
|
|
510
|
+
const baseIdx = i * outputShape[2];
|
|
511
|
+
let bestScore = -Infinity;
|
|
512
|
+
for (let c = 0; c < numClasses; c++) {
|
|
513
|
+
const score = output[baseIdx + c];
|
|
514
|
+
if (score > bestScore)
|
|
515
|
+
bestScore = score;
|
|
516
|
+
}
|
|
517
|
+
const confidence = bestScore > 0 && bestScore <= 1 ? bestScore : 1 / (1 + Math.exp(-bestScore));
|
|
518
|
+
topScores.push(confidence);
|
|
519
|
+
}
|
|
520
|
+
console.log(`[ObjectDetector] Top 3 confidences: ${topScores.map(s => (s * 100).toFixed(1) + '%').join(', ')}`);
|
|
521
|
+
}
|
|
522
|
+
// NMS
|
|
523
|
+
return this.applyMultiClassNMS(detections, this.config.nmsThreshold);
|
|
524
|
+
}
|
|
525
|
+
/**
|
|
526
|
+
* Multi-class Non-Maximum Suppression
|
|
527
|
+
*/
|
|
528
|
+
applyMultiClassNMS(detections, iouThreshold) {
|
|
529
|
+
if (detections.length === 0)
|
|
530
|
+
return [];
|
|
531
|
+
// Group by class
|
|
532
|
+
const byClass = new Map();
|
|
533
|
+
detections.forEach((det) => {
|
|
534
|
+
const classDets = byClass.get(det.classId) || [];
|
|
535
|
+
classDets.push(det);
|
|
536
|
+
byClass.set(det.classId, classDets);
|
|
537
|
+
});
|
|
538
|
+
// Apply NMS per class
|
|
539
|
+
const selected = [];
|
|
540
|
+
byClass.forEach((classDets) => {
|
|
541
|
+
classDets.sort((a, b) => b.confidence - a.confidence);
|
|
542
|
+
const used = new Set();
|
|
543
|
+
for (let i = 0; i < classDets.length; i++) {
|
|
544
|
+
if (used.has(i))
|
|
545
|
+
continue;
|
|
546
|
+
selected.push(classDets[i]);
|
|
547
|
+
used.add(i);
|
|
548
|
+
for (let j = i + 1; j < classDets.length; j++) {
|
|
549
|
+
if (used.has(j))
|
|
550
|
+
continue;
|
|
551
|
+
const iou = this.calculateIoU(classDets[i].bbox, classDets[j].bbox);
|
|
552
|
+
if (iou > iouThreshold) {
|
|
553
|
+
used.add(j);
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
});
|
|
558
|
+
return selected;
|
|
559
|
+
}
|
|
560
|
+
/**
|
|
561
|
+
* Calculate IoU between two boxes
|
|
562
|
+
*/
|
|
563
|
+
calculateIoU(box1, box2) {
|
|
564
|
+
const x1 = Math.max(box1.x1, box2.x1);
|
|
565
|
+
const y1 = Math.max(box1.y1, box2.y1);
|
|
566
|
+
const x2 = Math.min(box1.x2, box2.x2);
|
|
567
|
+
const y2 = Math.min(box1.y2, box2.y2);
|
|
568
|
+
if (x2 <= x1 || y2 <= y1)
|
|
569
|
+
return 0;
|
|
570
|
+
const intersection = (x2 - x1) * (y2 - y1);
|
|
571
|
+
const area1 = (box1.x2 - box1.x1) * (box1.y2 - box1.y1);
|
|
572
|
+
const area2 = (box2.x2 - box2.x1) * (box2.y2 - box2.y1);
|
|
573
|
+
const union = area1 + area2 - intersection;
|
|
574
|
+
return intersection / union;
|
|
575
|
+
}
|
|
576
|
+
/**
|
|
577
|
+
* Calculate detection statistics
|
|
578
|
+
*/
|
|
579
|
+
calculateStats(detections, inferenceTime) {
|
|
580
|
+
const classCounts = {};
|
|
581
|
+
detections.forEach((det) => {
|
|
582
|
+
classCounts[det.className] = (classCounts[det.className] || 0) + 1;
|
|
583
|
+
});
|
|
584
|
+
return {
|
|
585
|
+
totalCount: detections.length,
|
|
586
|
+
classCounts,
|
|
587
|
+
inferenceTime: Math.round(inferenceTime),
|
|
588
|
+
};
|
|
589
|
+
}
|
|
590
|
+
/**
|
|
591
|
+
* Get statistics from last detection
|
|
592
|
+
*/
|
|
593
|
+
getStats() {
|
|
594
|
+
return null;
|
|
595
|
+
}
|
|
596
|
+
/**
|
|
597
|
+
* Dispose resources
|
|
598
|
+
*/
|
|
599
|
+
dispose() {
|
|
600
|
+
if (this.session) {
|
|
601
|
+
this.session.release();
|
|
602
|
+
this.session = null;
|
|
603
|
+
}
|
|
604
|
+
this.initialized = false;
|
|
605
|
+
}
|
|
606
|
+
}
|