rtmlib-ts 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +1 -0
- package/README.md +202 -0
- package/dist/core/base.d.ts +20 -0
- package/dist/core/base.d.ts.map +1 -0
- package/dist/core/base.js +40 -0
- package/dist/core/file.d.ts +11 -0
- package/dist/core/file.d.ts.map +1 -0
- package/dist/core/file.js +111 -0
- package/dist/core/modelCache.d.ts +35 -0
- package/dist/core/modelCache.d.ts.map +1 -0
- package/dist/core/modelCache.js +161 -0
- package/dist/core/posePostprocessing.d.ts +12 -0
- package/dist/core/posePostprocessing.d.ts.map +1 -0
- package/dist/core/posePostprocessing.js +76 -0
- package/dist/core/postprocessing.d.ts +10 -0
- package/dist/core/postprocessing.d.ts.map +1 -0
- package/dist/core/postprocessing.js +70 -0
- package/dist/core/preprocessing.d.ts +14 -0
- package/dist/core/preprocessing.d.ts.map +1 -0
- package/dist/core/preprocessing.js +79 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +31 -0
- package/dist/models/rtmpose.d.ts +25 -0
- package/dist/models/rtmpose.d.ts.map +1 -0
- package/dist/models/rtmpose.js +185 -0
- package/dist/models/rtmpose3d.d.ts +28 -0
- package/dist/models/rtmpose3d.d.ts.map +1 -0
- package/dist/models/rtmpose3d.js +184 -0
- package/dist/models/yolo12.d.ts +23 -0
- package/dist/models/yolo12.d.ts.map +1 -0
- package/dist/models/yolo12.js +165 -0
- package/dist/models/yolox.d.ts +18 -0
- package/dist/models/yolox.d.ts.map +1 -0
- package/dist/models/yolox.js +167 -0
- package/dist/solution/animalDetector.d.ts +229 -0
- package/dist/solution/animalDetector.d.ts.map +1 -0
- package/dist/solution/animalDetector.js +663 -0
- package/dist/solution/body.d.ts +16 -0
- package/dist/solution/body.d.ts.map +1 -0
- package/dist/solution/body.js +52 -0
- package/dist/solution/bodyWithFeet.d.ts +16 -0
- package/dist/solution/bodyWithFeet.d.ts.map +1 -0
- package/dist/solution/bodyWithFeet.js +52 -0
- package/dist/solution/customDetector.d.ts +137 -0
- package/dist/solution/customDetector.d.ts.map +1 -0
- package/dist/solution/customDetector.js +342 -0
- package/dist/solution/hand.d.ts +14 -0
- package/dist/solution/hand.d.ts.map +1 -0
- package/dist/solution/hand.js +20 -0
- package/dist/solution/index.d.ts +10 -0
- package/dist/solution/index.d.ts.map +1 -0
- package/dist/solution/index.js +9 -0
- package/dist/solution/objectDetector.d.ts +172 -0
- package/dist/solution/objectDetector.d.ts.map +1 -0
- package/dist/solution/objectDetector.js +606 -0
- package/dist/solution/pose3dDetector.d.ts +145 -0
- package/dist/solution/pose3dDetector.d.ts.map +1 -0
- package/dist/solution/pose3dDetector.js +611 -0
- package/dist/solution/poseDetector.d.ts +198 -0
- package/dist/solution/poseDetector.d.ts.map +1 -0
- package/dist/solution/poseDetector.js +622 -0
- package/dist/solution/poseTracker.d.ts +22 -0
- package/dist/solution/poseTracker.d.ts.map +1 -0
- package/dist/solution/poseTracker.js +106 -0
- package/dist/solution/wholebody.d.ts +19 -0
- package/dist/solution/wholebody.d.ts.map +1 -0
- package/dist/solution/wholebody.js +82 -0
- package/dist/solution/wholebody3d.d.ts +22 -0
- package/dist/solution/wholebody3d.d.ts.map +1 -0
- package/dist/solution/wholebody3d.js +75 -0
- package/dist/types/index.d.ts +52 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +5 -0
- package/dist/visualization/draw.d.ts +57 -0
- package/dist/visualization/draw.d.ts.map +1 -0
- package/dist/visualization/draw.js +400 -0
- package/dist/visualization/skeleton/coco133.d.ts +350 -0
- package/dist/visualization/skeleton/coco133.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco133.js +120 -0
- package/dist/visualization/skeleton/coco17.d.ts +180 -0
- package/dist/visualization/skeleton/coco17.d.ts.map +1 -0
- package/dist/visualization/skeleton/coco17.js +48 -0
- package/dist/visualization/skeleton/halpe26.d.ts +278 -0
- package/dist/visualization/skeleton/halpe26.d.ts.map +1 -0
- package/dist/visualization/skeleton/halpe26.js +70 -0
- package/dist/visualization/skeleton/hand21.d.ts +196 -0
- package/dist/visualization/skeleton/hand21.d.ts.map +1 -0
- package/dist/visualization/skeleton/hand21.js +51 -0
- package/dist/visualization/skeleton/index.d.ts +10 -0
- package/dist/visualization/skeleton/index.d.ts.map +1 -0
- package/dist/visualization/skeleton/index.js +9 -0
- package/dist/visualization/skeleton/openpose134.d.ts +357 -0
- package/dist/visualization/skeleton/openpose134.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose134.js +116 -0
- package/dist/visualization/skeleton/openpose18.d.ts +177 -0
- package/dist/visualization/skeleton/openpose18.d.ts.map +1 -0
- package/dist/visualization/skeleton/openpose18.js +47 -0
- package/docs/ANIMAL_DETECTOR.md +450 -0
- package/docs/CUSTOM_DETECTOR.md +568 -0
- package/docs/OBJECT_DETECTOR.md +373 -0
- package/docs/POSE3D_DETECTOR.md +458 -0
- package/docs/POSE_DETECTOR.md +442 -0
- package/examples/README.md +119 -0
- package/examples/index.html +746 -0
- package/package.json +51 -0
- package/playground/README.md +114 -0
- package/playground/app/favicon.ico +0 -0
- package/playground/app/globals.css +17 -0
- package/playground/app/layout.tsx +19 -0
- package/playground/app/page.tsx +1338 -0
- package/playground/eslint.config.mjs +18 -0
- package/playground/next.config.ts +34 -0
- package/playground/package-lock.json +6723 -0
- package/playground/package.json +27 -0
- package/playground/postcss.config.mjs +7 -0
- package/playground/tsconfig.json +34 -0
- package/src/core/base.ts +66 -0
- package/src/core/file.ts +141 -0
- package/src/core/modelCache.ts +189 -0
- package/src/core/posePostprocessing.ts +91 -0
- package/src/core/postprocessing.ts +93 -0
- package/src/core/preprocessing.ts +127 -0
- package/src/index.ts +69 -0
- package/src/models/rtmpose.ts +265 -0
- package/src/models/rtmpose3d.ts +289 -0
- package/src/models/yolo12.ts +220 -0
- package/src/models/yolox.ts +214 -0
- package/src/solution/animalDetector.ts +955 -0
- package/src/solution/body.ts +89 -0
- package/src/solution/bodyWithFeet.ts +89 -0
- package/src/solution/customDetector.ts +474 -0
- package/src/solution/hand.ts +52 -0
- package/src/solution/index.ts +10 -0
- package/src/solution/objectDetector.ts +816 -0
- package/src/solution/pose3dDetector.ts +890 -0
- package/src/solution/poseDetector.ts +892 -0
- package/src/solution/poseTracker.ts +172 -0
- package/src/solution/wholebody.ts +130 -0
- package/src/solution/wholebody3d.ts +125 -0
- package/src/types/index.ts +62 -0
- package/src/visualization/draw.ts +543 -0
- package/src/visualization/skeleton/coco133.ts +131 -0
- package/src/visualization/skeleton/coco17.ts +49 -0
- package/src/visualization/skeleton/halpe26.ts +71 -0
- package/src/visualization/skeleton/hand21.ts +52 -0
- package/src/visualization/skeleton/index.ts +10 -0
- package/src/visualization/skeleton/openpose134.ts +125 -0
- package/src/visualization/skeleton/openpose18.ts +48 -0
- package/tsconfig.json +32 -0
|
@@ -0,0 +1,568 @@
|
|
|
1
|
+
# CustomDetector API
|
|
2
|
+
|
|
3
|
+
Maximum flexibility detector for any ONNX model with customizable preprocessing and postprocessing.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
`CustomDetector` provides a low-level, flexible API for running inference with any ONNX model. It supports automatic preprocessing, custom preprocessing/postprocessing functions, and works with various input sources (canvas, video, image, file, blob).
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
npm install rtmlib-ts
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Quick Start
|
|
16
|
+
|
|
17
|
+
### Basic Usage
|
|
18
|
+
|
|
19
|
+
```typescript
|
|
20
|
+
import { CustomDetector } from 'rtmlib-ts';
|
|
21
|
+
|
|
22
|
+
// Initialize with model path
|
|
23
|
+
const detector = new CustomDetector({
|
|
24
|
+
model: 'path/to/model.onnx',
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
await detector.init();
|
|
28
|
+
|
|
29
|
+
// Run inference
|
|
30
|
+
const result = await detector.runFromCanvas(canvas);
|
|
31
|
+
console.log(result.outputs); // Raw ONNX outputs
|
|
32
|
+
console.log(result.data); // Processed data
|
|
33
|
+
console.log(result.inferenceTime); // Inference time in ms
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Simple Classification
|
|
37
|
+
|
|
38
|
+
```typescript
|
|
39
|
+
const detector = new CustomDetector({
|
|
40
|
+
model: 'https://example.com/mobilenet.onnx',
|
|
41
|
+
inputSize: [224, 224],
|
|
42
|
+
normalization: {
|
|
43
|
+
mean: [123.675, 116.28, 103.53],
|
|
44
|
+
std: [58.395, 57.12, 57.375],
|
|
45
|
+
},
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
await detector.init();
|
|
49
|
+
|
|
50
|
+
const result = await detector.runFromCanvas(canvas);
|
|
51
|
+
const output = detector.getOutputTensor(result.outputs);
|
|
52
|
+
const predictedClass = output.data.indexOf(Math.max(...output.data));
|
|
53
|
+
console.log(`Predicted class: ${predictedClass}`);
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Custom Preprocessing and Postprocessing
|
|
57
|
+
|
|
58
|
+
```typescript
|
|
59
|
+
const detector = new CustomDetector({
|
|
60
|
+
model: 'path/to/model.onnx',
|
|
61
|
+
inputSize: [512, 512],
|
|
62
|
+
preprocessing: (imageData, config) => {
|
|
63
|
+
// Custom preprocessing logic
|
|
64
|
+
const tensor = new Float32Array(3 * 512 * 512);
|
|
65
|
+
// ... your preprocessing
|
|
66
|
+
return tensor;
|
|
67
|
+
},
|
|
68
|
+
postprocessing: (outputs, metadata) => {
|
|
69
|
+
// Custom postprocessing logic
|
|
70
|
+
const output = outputs['output'];
|
|
71
|
+
return {
|
|
72
|
+
boxes: decodeBoxes(output),
|
|
73
|
+
scores: decodeScores(output),
|
|
74
|
+
};
|
|
75
|
+
},
|
|
76
|
+
});
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## API Reference
|
|
80
|
+
|
|
81
|
+
### Constructor
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
new CustomDetector(config: CustomDetectorConfig)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
**Configuration Options:**
|
|
88
|
+
|
|
89
|
+
| Option | Type | Default | Description |
|
|
90
|
+
|--------|------|---------|-------------|
|
|
91
|
+
| `model` | `string` | required | Path to ONNX model |
|
|
92
|
+
| `inputName` | `string` | auto | Input tensor name |
|
|
93
|
+
| `outputNames` | `string[]` | auto | Output tensor names |
|
|
94
|
+
| `inputShape` | `[number, number, number, number]` | `[1, 3, 224, 224]` | Expected input shape |
|
|
95
|
+
| `inputSize` | `[number, number]` | optional | Input size for automatic preprocessing |
|
|
96
|
+
| `preprocessing` | `function` | auto | Custom preprocessing function |
|
|
97
|
+
| `postprocessing` | `function` | auto | Custom postprocessing function |
|
|
98
|
+
| `normalization` | `object` | `{ mean: [0,0,0], std: [1,1,1] }` | Normalization parameters |
|
|
99
|
+
| `keepAspectRatio` | `boolean` | `true` | Keep aspect ratio during preprocessing |
|
|
100
|
+
| `backgroundColor` | `string` | `'#000000'` | Background color for letterbox |
|
|
101
|
+
| `backend` | `'wasm' \| 'webgpu'` | `'wasm'` | Execution backend |
|
|
102
|
+
| `cache` | `boolean` | `true` | Enable model caching |
|
|
103
|
+
| `metadata` | `any` | optional | Custom metadata for postprocessing |
|
|
104
|
+
|
|
105
|
+
### Methods
|
|
106
|
+
|
|
107
|
+
#### `init()`
|
|
108
|
+
|
|
109
|
+
Initialize the model.
|
|
110
|
+
|
|
111
|
+
```typescript
|
|
112
|
+
await detector.init();
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
#### `runFromCanvas()`
|
|
116
|
+
|
|
117
|
+
Run inference on HTMLCanvasElement.
|
|
118
|
+
|
|
119
|
+
```typescript
|
|
120
|
+
async runFromCanvas<T = any>(
|
|
121
|
+
canvas: HTMLCanvasElement
|
|
122
|
+
): Promise<DetectionResult<T>>
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
#### `runFromVideo()`
|
|
126
|
+
|
|
127
|
+
Run inference on HTMLVideoElement.
|
|
128
|
+
|
|
129
|
+
```typescript
|
|
130
|
+
async runFromVideo<T = any>(
|
|
131
|
+
video: HTMLVideoElement,
|
|
132
|
+
targetCanvas?: HTMLCanvasElement
|
|
133
|
+
): Promise<DetectionResult<T>>
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
#### `runFromImage()`
|
|
137
|
+
|
|
138
|
+
Run inference on HTMLImageElement.
|
|
139
|
+
|
|
140
|
+
```typescript
|
|
141
|
+
async runFromImage<T = any>(
|
|
142
|
+
image: HTMLImageElement,
|
|
143
|
+
targetCanvas?: HTMLCanvasElement
|
|
144
|
+
): Promise<DetectionResult<T>>
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
#### `runFromBitmap()`
|
|
148
|
+
|
|
149
|
+
Run inference on ImageBitmap.
|
|
150
|
+
|
|
151
|
+
```typescript
|
|
152
|
+
async runFromBitmap<T = any>(
|
|
153
|
+
bitmap: ImageBitmap,
|
|
154
|
+
targetCanvas?: HTMLCanvasElement
|
|
155
|
+
): Promise<DetectionResult<T>>
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
#### `runFromFile()`
|
|
159
|
+
|
|
160
|
+
Run inference on File object.
|
|
161
|
+
|
|
162
|
+
```typescript
|
|
163
|
+
async runFromFile<T = any>(
|
|
164
|
+
file: File,
|
|
165
|
+
targetCanvas?: HTMLCanvasElement
|
|
166
|
+
): Promise<DetectionResult<T>>
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
#### `runFromBlob()`
|
|
170
|
+
|
|
171
|
+
Run inference on Blob.
|
|
172
|
+
|
|
173
|
+
```typescript
|
|
174
|
+
async runFromBlob<T = any>(
|
|
175
|
+
blob: Blob,
|
|
176
|
+
targetCanvas?: HTMLCanvasElement
|
|
177
|
+
): Promise<DetectionResult<T>>
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
#### `run()`
|
|
181
|
+
|
|
182
|
+
Low-level method for raw image data with custom preprocessing.
|
|
183
|
+
|
|
184
|
+
```typescript
|
|
185
|
+
async run<T = any>(
|
|
186
|
+
imageData: ImageData,
|
|
187
|
+
width: number,
|
|
188
|
+
height: number,
|
|
189
|
+
metadata?: any
|
|
190
|
+
): Promise<DetectionResult<T>>
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
#### `getModelInfo()`
|
|
194
|
+
|
|
195
|
+
Get model input/output information.
|
|
196
|
+
|
|
197
|
+
```typescript
|
|
198
|
+
const info = detector.getModelInfo();
|
|
199
|
+
console.log(`Inputs: ${info.inputNames}`);
|
|
200
|
+
console.log(`Outputs: ${info.outputNames}`);
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
#### `getOutputTensor()`
|
|
204
|
+
|
|
205
|
+
Get tensor by name from outputs.
|
|
206
|
+
|
|
207
|
+
```typescript
|
|
208
|
+
const tensor = detector.getOutputTensor<ort.Tensor>(
|
|
209
|
+
result.outputs,
|
|
210
|
+
'output_name' // optional, uses first output if not specified
|
|
211
|
+
);
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
#### `dispose()`
|
|
215
|
+
|
|
216
|
+
Release resources.
|
|
217
|
+
|
|
218
|
+
```typescript
|
|
219
|
+
detector.dispose();
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### Types
|
|
223
|
+
|
|
224
|
+
#### `DetectionResult<T>`
|
|
225
|
+
|
|
226
|
+
```typescript
|
|
227
|
+
interface DetectionResult<T = any> {
|
|
228
|
+
outputs: Record<string, ort.Tensor>; // Raw model outputs
|
|
229
|
+
data: T; // Processed results
|
|
230
|
+
inferenceTime: number; // Inference time in ms
|
|
231
|
+
inputShape: number[]; // Input shape used
|
|
232
|
+
}
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
#### `CustomDetectorConfig`
|
|
236
|
+
|
|
237
|
+
```typescript
|
|
238
|
+
interface CustomDetectorConfig {
|
|
239
|
+
model: string;
|
|
240
|
+
inputName?: string;
|
|
241
|
+
outputNames?: string[];
|
|
242
|
+
inputShape?: [number, number, number, number];
|
|
243
|
+
preprocessing?: (data: ImageData, config: CustomDetectorConfig) => Float32Array | ort.Tensor;
|
|
244
|
+
postprocessing?: (outputs: Record<string, ort.Tensor>, metadata: any) => any;
|
|
245
|
+
backend?: 'wasm' | 'webgpu';
|
|
246
|
+
cache?: boolean;
|
|
247
|
+
metadata?: any;
|
|
248
|
+
normalization?: {
|
|
249
|
+
mean: number[];
|
|
250
|
+
std: number[];
|
|
251
|
+
};
|
|
252
|
+
inputSize?: [number, number];
|
|
253
|
+
keepAspectRatio?: boolean;
|
|
254
|
+
backgroundColor?: string;
|
|
255
|
+
}
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## Examples
|
|
259
|
+
|
|
260
|
+
### Image Classification
|
|
261
|
+
|
|
262
|
+
```typescript
|
|
263
|
+
import { CustomDetector } from 'rtmlib-ts';
|
|
264
|
+
|
|
265
|
+
const detector = new CustomDetector({
|
|
266
|
+
model: 'https://example.com/resnet50.onnx',
|
|
267
|
+
inputSize: [224, 224],
|
|
268
|
+
normalization: {
|
|
269
|
+
mean: [123.675, 116.28, 103.53],
|
|
270
|
+
std: [58.395, 57.12, 57.375],
|
|
271
|
+
},
|
|
272
|
+
postprocessing: (outputs) => {
|
|
273
|
+
const output = outputs['output'];
|
|
274
|
+
const scores = Array.from(output.data as Float32Array);
|
|
275
|
+
const predictedClass = scores.indexOf(Math.max(...scores));
|
|
276
|
+
const confidence = scores[predictedClass];
|
|
277
|
+
return { predictedClass, confidence, scores };
|
|
278
|
+
},
|
|
279
|
+
});
|
|
280
|
+
|
|
281
|
+
await detector.init();
|
|
282
|
+
|
|
283
|
+
const result = await detector.runFromCanvas(canvas);
|
|
284
|
+
console.log(`Predicted: ${result.data.predictedClass} (${result.data.confidence * 100}%)`);
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
### Object Detection (YOLO-style)
|
|
288
|
+
|
|
289
|
+
```typescript
|
|
290
|
+
const detector = new CustomDetector({
|
|
291
|
+
model: 'path/to/yolo.onnx',
|
|
292
|
+
inputSize: [640, 640],
|
|
293
|
+
normalization: {
|
|
294
|
+
mean: [0, 0, 0],
|
|
295
|
+
std: [1, 1, 1],
|
|
296
|
+
},
|
|
297
|
+
postprocessing: (outputs, metadata) => {
|
|
298
|
+
const output = outputs['output'];
|
|
299
|
+
const data = output.data as Float32Array;
|
|
300
|
+
const numDetections = output.dims[1];
|
|
301
|
+
|
|
302
|
+
const detections = [];
|
|
303
|
+
for (let i = 0; i < numDetections; i++) {
|
|
304
|
+
const idx = i * 6;
|
|
305
|
+
const [x1, y1, x2, y2, conf, classId] = [
|
|
306
|
+
data[idx],
|
|
307
|
+
data[idx + 1],
|
|
308
|
+
data[idx + 2],
|
|
309
|
+
data[idx + 3],
|
|
310
|
+
data[idx + 4],
|
|
311
|
+
Math.round(data[idx + 5]),
|
|
312
|
+
];
|
|
313
|
+
|
|
314
|
+
if (conf > 0.5) {
|
|
315
|
+
detections.push({
|
|
316
|
+
bbox: { x1, y1, x2, y2 },
|
|
317
|
+
classId,
|
|
318
|
+
confidence: conf,
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
return detections;
|
|
324
|
+
},
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
await detector.init();
|
|
328
|
+
const result = await detector.runFromCanvas(canvas);
|
|
329
|
+
console.log(`Detected ${result.data.length} objects`);
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
### Semantic Segmentation
|
|
333
|
+
|
|
334
|
+
```typescript
|
|
335
|
+
const detector = new CustomDetector({
|
|
336
|
+
model: 'path/to/deeplab.onnx',
|
|
337
|
+
inputSize: [512, 512],
|
|
338
|
+
normalization: {
|
|
339
|
+
mean: [123.675, 116.28, 103.53],
|
|
340
|
+
std: [58.395, 57.12, 57.375],
|
|
341
|
+
},
|
|
342
|
+
postprocessing: (outputs) => {
|
|
343
|
+
const mask = outputs['mask'];
|
|
344
|
+
const data = mask.data as Float32Array;
|
|
345
|
+
const [batch, numClasses, height, width] = mask.dims;
|
|
346
|
+
|
|
347
|
+
// Get class for each pixel
|
|
348
|
+
const segmentation = new Uint8Array(height * width);
|
|
349
|
+
for (let y = 0; y < height; y++) {
|
|
350
|
+
for (let x = 0; x < width; x++) {
|
|
351
|
+
const pixelIdx = y * width + x;
|
|
352
|
+
let maxClass = 0;
|
|
353
|
+
let maxScore = -Infinity;
|
|
354
|
+
|
|
355
|
+
for (let c = 0; c < numClasses; c++) {
|
|
356
|
+
const score = data[c * height * width + pixelIdx];
|
|
357
|
+
if (score > maxScore) {
|
|
358
|
+
maxScore = score;
|
|
359
|
+
maxClass = c;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
segmentation[pixelIdx] = maxClass;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
return { segmentation, height, width };
|
|
368
|
+
},
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
await detector.init();
|
|
372
|
+
const result = await detector.runFromCanvas(canvas);
|
|
373
|
+
console.log(`Segmentation map: ${result.data.height}x${result.data.width}`);
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
### Face Landmarks
|
|
377
|
+
|
|
378
|
+
```typescript
|
|
379
|
+
const detector = new CustomDetector({
|
|
380
|
+
model: 'path/to/face_landmarks.onnx',
|
|
381
|
+
inputSize: [192, 192],
|
|
382
|
+
preprocessing: (imageData, config) => {
|
|
383
|
+
// Custom face preprocessing
|
|
384
|
+
const tensor = new Float32Array(3 * 192 * 192);
|
|
385
|
+
const { data, width, height } = imageData;
|
|
386
|
+
|
|
387
|
+
for (let i = 0; i < data.length; i += 4) {
|
|
388
|
+
const pixelIdx = i / 4;
|
|
389
|
+
tensor[pixelIdx] = (data[i] - 127.5) / 127.5;
|
|
390
|
+
tensor[pixelIdx + width * height] = (data[i + 1] - 127.5) / 127.5;
|
|
391
|
+
tensor[pixelIdx + 2 * width * height] = (data[i + 2] - 127.5) / 127.5;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
return tensor;
|
|
395
|
+
},
|
|
396
|
+
postprocessing: (outputs) => {
|
|
397
|
+
const landmarks = outputs['landmarks'];
|
|
398
|
+
const data = landmarks.data as Float32Array;
|
|
399
|
+
const numLandmarks = landmarks.dims[1];
|
|
400
|
+
|
|
401
|
+
const points = [];
|
|
402
|
+
for (let i = 0; i < numLandmarks; i++) {
|
|
403
|
+
points.push({
|
|
404
|
+
x: data[i * 2],
|
|
405
|
+
y: data[i * 2 + 1],
|
|
406
|
+
});
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
return points;
|
|
410
|
+
},
|
|
411
|
+
});
|
|
412
|
+
|
|
413
|
+
await detector.init();
|
|
414
|
+
const result = await detector.runFromCanvas(canvas);
|
|
415
|
+
console.log(`Detected ${result.data.length} facial landmarks`);
|
|
416
|
+
```
|
|
417
|
+
|
|
418
|
+
## Preprocessing Options
|
|
419
|
+
|
|
420
|
+
### Automatic Preprocessing
|
|
421
|
+
|
|
422
|
+
If you provide `inputSize`, automatic preprocessing with letterbox is applied:
|
|
423
|
+
|
|
424
|
+
```typescript
|
|
425
|
+
const detector = new CustomDetector({
|
|
426
|
+
model: 'path/to/model.onnx',
|
|
427
|
+
inputSize: [224, 224],
|
|
428
|
+
keepAspectRatio: true, // Letterbox padding
|
|
429
|
+
backgroundColor: '#000000',
|
|
430
|
+
normalization: {
|
|
431
|
+
mean: [123.675, 116.28, 103.53],
|
|
432
|
+
std: [58.395, 57.12, 57.375],
|
|
433
|
+
},
|
|
434
|
+
});
|
|
435
|
+
```
|
|
436
|
+
|
|
437
|
+
### Custom Preprocessing Function
|
|
438
|
+
|
|
439
|
+
```typescript
|
|
440
|
+
const detector = new CustomDetector({
|
|
441
|
+
model: 'path/to/model.onnx',
|
|
442
|
+
preprocessing: (imageData, config) => {
|
|
443
|
+
const { data, width, height } = imageData;
|
|
444
|
+
const tensor = new Float32Array(3 * width * height);
|
|
445
|
+
|
|
446
|
+
// Custom preprocessing logic
|
|
447
|
+
for (let i = 0; i < data.length; i += 4) {
|
|
448
|
+
const pixelIdx = i / 4;
|
|
449
|
+
tensor[pixelIdx] = data[i] / 255; // R
|
|
450
|
+
tensor[pixelIdx + width * height] = data[i + 1] / 255; // G
|
|
451
|
+
tensor[pixelIdx + 2 * width * height] = data[i + 2] / 255; // B
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
return tensor;
|
|
455
|
+
},
|
|
456
|
+
});
|
|
457
|
+
```
|
|
458
|
+
|
|
459
|
+
## Postprocessing Options
|
|
460
|
+
|
|
461
|
+
### Automatic Postprocessing
|
|
462
|
+
|
|
463
|
+
If no `postprocessing` function is provided, raw ONNX outputs are returned:
|
|
464
|
+
|
|
465
|
+
```typescript
|
|
466
|
+
const result = await detector.runFromCanvas(canvas);
|
|
467
|
+
console.log(result.outputs); // Record<string, ort.Tensor>
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
### Custom Postprocessing Function
|
|
471
|
+
|
|
472
|
+
```typescript
|
|
473
|
+
const detector = new CustomDetector({
|
|
474
|
+
model: 'path/to/model.onnx',
|
|
475
|
+
postprocessing: (outputs, metadata) => {
|
|
476
|
+
// Process outputs
|
|
477
|
+
const output1 = outputs['output1'];
|
|
478
|
+
const output2 = outputs['output2'];
|
|
479
|
+
|
|
480
|
+
// Your custom logic
|
|
481
|
+
return {
|
|
482
|
+
processed1: processOutput1(output1),
|
|
483
|
+
processed2: processOutput2(output2),
|
|
484
|
+
};
|
|
485
|
+
},
|
|
486
|
+
});
|
|
487
|
+
```
|
|
488
|
+
|
|
489
|
+
## Performance Optimization
|
|
490
|
+
|
|
491
|
+
### 1. Use WebGPU Backend
|
|
492
|
+
|
|
493
|
+
```typescript
|
|
494
|
+
const detector = new CustomDetector({
|
|
495
|
+
model: 'path/to/model.onnx',
|
|
496
|
+
backend: 'webgpu', // Faster than WASM
|
|
497
|
+
});
|
|
498
|
+
```
|
|
499
|
+
|
|
500
|
+
### 2. Enable Model Caching
|
|
501
|
+
|
|
502
|
+
```typescript
|
|
503
|
+
const detector = new CustomDetector({
|
|
504
|
+
model: 'path/to/model.onnx',
|
|
505
|
+
cache: true, // Cache model for faster subsequent loads
|
|
506
|
+
});
|
|
507
|
+
```
|
|
508
|
+
|
|
509
|
+
### 3. Reuse Detector Instance
|
|
510
|
+
|
|
511
|
+
```typescript
|
|
512
|
+
const detector = new CustomDetector({ model: 'path/to/model.onnx' });
|
|
513
|
+
await detector.init();
|
|
514
|
+
|
|
515
|
+
// Reuse for multiple inferences
|
|
516
|
+
for (const frame of frames) {
|
|
517
|
+
const result = await detector.run(frame.data, frame.width, frame.height);
|
|
518
|
+
}
|
|
519
|
+
```
|
|
520
|
+
|
|
521
|
+
### 4. Pre-allocate Resources
|
|
522
|
+
|
|
523
|
+
```typescript
|
|
524
|
+
// Create canvas once
|
|
525
|
+
const canvas = document.createElement('canvas');
|
|
526
|
+
canvas.width = 640;
|
|
527
|
+
canvas.height = 480;
|
|
528
|
+
|
|
529
|
+
// Reuse for all inferences
|
|
530
|
+
const result = await detector.runFromCanvas(canvas);
|
|
531
|
+
```
|
|
532
|
+
|
|
533
|
+
## Browser Support
|
|
534
|
+
|
|
535
|
+
| Browser | Version | Backend |
|
|
536
|
+
|---------|---------|---------|
|
|
537
|
+
| Chrome | 94+ | WASM, WebGPU |
|
|
538
|
+
| Edge | 94+ | WASM, WebGPU |
|
|
539
|
+
| Firefox | 95+ | WASM |
|
|
540
|
+
| Safari | 16.4+ | WASM |
|
|
541
|
+
|
|
542
|
+
## Troubleshooting
|
|
543
|
+
|
|
544
|
+
### "Model loading failed"
|
|
545
|
+
|
|
546
|
+
- Ensure model is accessible via HTTP (not `file://` protocol)
|
|
547
|
+
- Use a local server: `python -m http.server 8080`
|
|
548
|
+
- Check CORS headers
|
|
549
|
+
|
|
550
|
+
### "Input name not found"
|
|
551
|
+
|
|
552
|
+
- Use `getModelInfo()` to check available input names
|
|
553
|
+
- Specify `inputName` in config if auto-detection fails
|
|
554
|
+
|
|
555
|
+
### "Output shape mismatch"
|
|
556
|
+
|
|
557
|
+
- Check model's expected input shape with `getModelInfo()`
|
|
558
|
+
- Adjust `inputSize` or `inputShape` in config
|
|
559
|
+
|
|
560
|
+
### "Slow inference"
|
|
561
|
+
|
|
562
|
+
- Switch to WebGPU backend if available
|
|
563
|
+
- Reduce `inputSize`
|
|
564
|
+
- Use model quantization (INT8 models)
|
|
565
|
+
|
|
566
|
+
## License
|
|
567
|
+
|
|
568
|
+
Apache 2.0
|