@camstack/vision 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/addons/animal-classifier/index.d.mts +25 -0
- package/dist/addons/animal-classifier/index.d.ts +25 -0
- package/dist/addons/animal-classifier/index.js +469 -0
- package/dist/addons/animal-classifier/index.js.map +1 -0
- package/dist/addons/animal-classifier/index.mjs +9 -0
- package/dist/addons/animal-classifier/index.mjs.map +1 -0
- package/dist/addons/audio-classification/index.d.mts +31 -0
- package/dist/addons/audio-classification/index.d.ts +31 -0
- package/dist/addons/audio-classification/index.js +411 -0
- package/dist/addons/audio-classification/index.js.map +1 -0
- package/dist/addons/audio-classification/index.mjs +8 -0
- package/dist/addons/audio-classification/index.mjs.map +1 -0
- package/dist/addons/bird-global-classifier/index.d.mts +26 -0
- package/dist/addons/bird-global-classifier/index.d.ts +26 -0
- package/dist/addons/bird-global-classifier/index.js +475 -0
- package/dist/addons/bird-global-classifier/index.js.map +1 -0
- package/dist/addons/bird-global-classifier/index.mjs +9 -0
- package/dist/addons/bird-global-classifier/index.mjs.map +1 -0
- package/dist/addons/bird-nabirds-classifier/index.d.mts +28 -0
- package/dist/addons/bird-nabirds-classifier/index.d.ts +28 -0
- package/dist/addons/bird-nabirds-classifier/index.js +517 -0
- package/dist/addons/bird-nabirds-classifier/index.js.map +1 -0
- package/dist/addons/bird-nabirds-classifier/index.mjs +9 -0
- package/dist/addons/bird-nabirds-classifier/index.mjs.map +1 -0
- package/dist/addons/camera-native-detection/index.d.mts +32 -0
- package/dist/addons/camera-native-detection/index.d.ts +32 -0
- package/dist/addons/camera-native-detection/index.js +99 -0
- package/dist/addons/camera-native-detection/index.js.map +1 -0
- package/dist/addons/camera-native-detection/index.mjs +7 -0
- package/dist/addons/camera-native-detection/index.mjs.map +1 -0
- package/dist/addons/face-detection/index.d.mts +24 -0
- package/dist/addons/face-detection/index.d.ts +24 -0
- package/dist/addons/face-detection/index.js +513 -0
- package/dist/addons/face-detection/index.js.map +1 -0
- package/dist/addons/face-detection/index.mjs +10 -0
- package/dist/addons/face-detection/index.mjs.map +1 -0
- package/dist/addons/face-recognition/index.d.mts +24 -0
- package/dist/addons/face-recognition/index.d.ts +24 -0
- package/dist/addons/face-recognition/index.js +437 -0
- package/dist/addons/face-recognition/index.js.map +1 -0
- package/dist/addons/face-recognition/index.mjs +9 -0
- package/dist/addons/face-recognition/index.mjs.map +1 -0
- package/dist/addons/motion-detection/index.d.mts +26 -0
- package/dist/addons/motion-detection/index.d.ts +26 -0
- package/dist/addons/motion-detection/index.js +273 -0
- package/dist/addons/motion-detection/index.js.map +1 -0
- package/dist/addons/motion-detection/index.mjs +8 -0
- package/dist/addons/motion-detection/index.mjs.map +1 -0
- package/dist/addons/object-detection/index.d.mts +25 -0
- package/dist/addons/object-detection/index.d.ts +25 -0
- package/dist/addons/object-detection/index.js +673 -0
- package/dist/addons/object-detection/index.js.map +1 -0
- package/dist/addons/object-detection/index.mjs +10 -0
- package/dist/addons/object-detection/index.mjs.map +1 -0
- package/dist/addons/plate-detection/index.d.mts +25 -0
- package/dist/addons/plate-detection/index.d.ts +25 -0
- package/dist/addons/plate-detection/index.js +477 -0
- package/dist/addons/plate-detection/index.js.map +1 -0
- package/dist/addons/plate-detection/index.mjs +10 -0
- package/dist/addons/plate-detection/index.mjs.map +1 -0
- package/dist/addons/plate-recognition/index.d.mts +25 -0
- package/dist/addons/plate-recognition/index.d.ts +25 -0
- package/dist/addons/plate-recognition/index.js +470 -0
- package/dist/addons/plate-recognition/index.js.map +1 -0
- package/dist/addons/plate-recognition/index.mjs +9 -0
- package/dist/addons/plate-recognition/index.mjs.map +1 -0
- package/dist/chunk-3BKYLBBH.mjs +229 -0
- package/dist/chunk-3BKYLBBH.mjs.map +1 -0
- package/dist/chunk-4PC262GU.mjs +203 -0
- package/dist/chunk-4PC262GU.mjs.map +1 -0
- package/dist/chunk-6OR5TE7A.mjs +101 -0
- package/dist/chunk-6OR5TE7A.mjs.map +1 -0
- package/dist/chunk-7SZAISGP.mjs +210 -0
- package/dist/chunk-7SZAISGP.mjs.map +1 -0
- package/dist/chunk-AD2TFYZA.mjs +235 -0
- package/dist/chunk-AD2TFYZA.mjs.map +1 -0
- package/dist/chunk-CGYSSHHM.mjs +363 -0
- package/dist/chunk-CGYSSHHM.mjs.map +1 -0
- package/dist/chunk-IYHMGYGP.mjs +79 -0
- package/dist/chunk-IYHMGYGP.mjs.map +1 -0
- package/dist/chunk-J3IUBPRE.mjs +187 -0
- package/dist/chunk-J3IUBPRE.mjs.map +1 -0
- package/dist/chunk-KFZDJPYL.mjs +190 -0
- package/dist/chunk-KFZDJPYL.mjs.map +1 -0
- package/dist/chunk-KUO2BVFY.mjs +90 -0
- package/dist/chunk-KUO2BVFY.mjs.map +1 -0
- package/dist/chunk-PXBY3QOA.mjs +152 -0
- package/dist/chunk-PXBY3QOA.mjs.map +1 -0
- package/dist/chunk-XUKDL23Y.mjs +216 -0
- package/dist/chunk-XUKDL23Y.mjs.map +1 -0
- package/dist/chunk-Z26BVC7S.mjs +214 -0
- package/dist/chunk-Z26BVC7S.mjs.map +1 -0
- package/dist/chunk-Z5AHZQEZ.mjs +258 -0
- package/dist/chunk-Z5AHZQEZ.mjs.map +1 -0
- package/dist/index.d.mts +152 -0
- package/dist/index.d.ts +152 -0
- package/dist/index.js +2775 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +205 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +43 -0
- package/python/coreml_inference.py +67 -0
- package/python/openvino_inference.py +76 -0
- package/python/pytorch_inference.py +74 -0
|
@@ -0,0 +1,673 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
|
|
30
|
+
// src/addons/object-detection/index.ts
|
|
31
|
+
var object_detection_exports = {};
|
|
32
|
+
__export(object_detection_exports, {
|
|
33
|
+
default: () => ObjectDetectionAddon
|
|
34
|
+
});
|
|
35
|
+
module.exports = __toCommonJS(object_detection_exports);
|
|
36
|
+
var import_types = require("@camstack/types");
|
|
37
|
+
|
|
38
|
+
// src/shared/image-utils.ts
|
|
39
|
+
var import_sharp = __toESM(require("sharp"));
|
|
40
|
+
async function letterbox(jpeg, targetSize) {
|
|
41
|
+
const meta = await (0, import_sharp.default)(jpeg).metadata();
|
|
42
|
+
const originalWidth = meta.width ?? 0;
|
|
43
|
+
const originalHeight = meta.height ?? 0;
|
|
44
|
+
const scale = Math.min(targetSize / originalWidth, targetSize / originalHeight);
|
|
45
|
+
const scaledWidth = Math.round(originalWidth * scale);
|
|
46
|
+
const scaledHeight = Math.round(originalHeight * scale);
|
|
47
|
+
const padX = Math.floor((targetSize - scaledWidth) / 2);
|
|
48
|
+
const padY = Math.floor((targetSize - scaledHeight) / 2);
|
|
49
|
+
const { data } = await (0, import_sharp.default)(jpeg).resize(scaledWidth, scaledHeight).extend({
|
|
50
|
+
top: padY,
|
|
51
|
+
bottom: targetSize - scaledHeight - padY,
|
|
52
|
+
left: padX,
|
|
53
|
+
right: targetSize - scaledWidth - padX,
|
|
54
|
+
background: { r: 114, g: 114, b: 114 }
|
|
55
|
+
}).removeAlpha().raw().toBuffer({ resolveWithObject: true });
|
|
56
|
+
const numPixels = targetSize * targetSize;
|
|
57
|
+
const float32 = new Float32Array(3 * numPixels);
|
|
58
|
+
for (let i = 0; i < numPixels; i++) {
|
|
59
|
+
const srcBase = i * 3;
|
|
60
|
+
float32[0 * numPixels + i] = data[srcBase] / 255;
|
|
61
|
+
float32[1 * numPixels + i] = data[srcBase + 1] / 255;
|
|
62
|
+
float32[2 * numPixels + i] = data[srcBase + 2] / 255;
|
|
63
|
+
}
|
|
64
|
+
return { data: float32, scale, padX, padY, originalWidth, originalHeight };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// src/shared/postprocess/yolo.ts
|
|
68
|
+
function iou(a, b) {
|
|
69
|
+
const ax1 = a.x;
|
|
70
|
+
const ay1 = a.y;
|
|
71
|
+
const ax2 = a.x + a.w;
|
|
72
|
+
const ay2 = a.y + a.h;
|
|
73
|
+
const bx1 = b.x;
|
|
74
|
+
const by1 = b.y;
|
|
75
|
+
const bx2 = b.x + b.w;
|
|
76
|
+
const by2 = b.y + b.h;
|
|
77
|
+
const interX1 = Math.max(ax1, bx1);
|
|
78
|
+
const interY1 = Math.max(ay1, by1);
|
|
79
|
+
const interX2 = Math.min(ax2, bx2);
|
|
80
|
+
const interY2 = Math.min(ay2, by2);
|
|
81
|
+
const interW = Math.max(0, interX2 - interX1);
|
|
82
|
+
const interH = Math.max(0, interY2 - interY1);
|
|
83
|
+
const interArea = interW * interH;
|
|
84
|
+
if (interArea === 0) return 0;
|
|
85
|
+
const areaA = a.w * a.h;
|
|
86
|
+
const areaB = b.w * b.h;
|
|
87
|
+
const unionArea = areaA + areaB - interArea;
|
|
88
|
+
return unionArea === 0 ? 0 : interArea / unionArea;
|
|
89
|
+
}
|
|
90
|
+
function nms(boxes, iouThreshold) {
|
|
91
|
+
const indices = boxes.map((_, i) => i).sort((a, b) => boxes[b].score - boxes[a].score);
|
|
92
|
+
const kept = [];
|
|
93
|
+
const suppressed = /* @__PURE__ */ new Set();
|
|
94
|
+
for (const idx of indices) {
|
|
95
|
+
if (suppressed.has(idx)) continue;
|
|
96
|
+
kept.push(idx);
|
|
97
|
+
for (const other of indices) {
|
|
98
|
+
if (other === idx || suppressed.has(other)) continue;
|
|
99
|
+
if (iou(boxes[idx].bbox, boxes[other].bbox) > iouThreshold) {
|
|
100
|
+
suppressed.add(other);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
return kept;
|
|
105
|
+
}
|
|
106
|
+
function yoloPostprocess(output, numClasses, numBoxes, options) {
|
|
107
|
+
const { confidence, iouThreshold, labels, scale, padX, padY, originalWidth, originalHeight } = options;
|
|
108
|
+
const candidates = [];
|
|
109
|
+
for (let i = 0; i < numBoxes; i++) {
|
|
110
|
+
const cx = output[0 * numBoxes + i];
|
|
111
|
+
const cy = output[1 * numBoxes + i];
|
|
112
|
+
const w = output[2 * numBoxes + i];
|
|
113
|
+
const h = output[3 * numBoxes + i];
|
|
114
|
+
let bestScore = -Infinity;
|
|
115
|
+
let bestClass = 0;
|
|
116
|
+
for (let j = 0; j < numClasses; j++) {
|
|
117
|
+
const score = output[(4 + j) * numBoxes + i];
|
|
118
|
+
if (score > bestScore) {
|
|
119
|
+
bestScore = score;
|
|
120
|
+
bestClass = j;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
if (bestScore < confidence) continue;
|
|
124
|
+
const bbox = {
|
|
125
|
+
x: cx - w / 2,
|
|
126
|
+
y: cy - h / 2,
|
|
127
|
+
w,
|
|
128
|
+
h
|
|
129
|
+
};
|
|
130
|
+
candidates.push({ bbox, score: bestScore, classIdx: bestClass });
|
|
131
|
+
}
|
|
132
|
+
if (candidates.length === 0) return [];
|
|
133
|
+
const keptIndices = nms(candidates, iouThreshold);
|
|
134
|
+
return keptIndices.map((idx) => {
|
|
135
|
+
const { bbox, score, classIdx } = candidates[idx];
|
|
136
|
+
const label = labels[classIdx] ?? String(classIdx);
|
|
137
|
+
const x = Math.max(0, Math.min(originalWidth, (bbox.x - padX) / scale));
|
|
138
|
+
const y = Math.max(0, Math.min(originalHeight, (bbox.y - padY) / scale));
|
|
139
|
+
const x2 = Math.max(0, Math.min(originalWidth, (bbox.x + bbox.w - padX) / scale));
|
|
140
|
+
const y2 = Math.max(0, Math.min(originalHeight, (bbox.y + bbox.h - padY) / scale));
|
|
141
|
+
const finalBbox = { x, y, w: x2 - x, h: y2 - y };
|
|
142
|
+
return {
|
|
143
|
+
class: label,
|
|
144
|
+
originalClass: label,
|
|
145
|
+
score,
|
|
146
|
+
bbox: finalBbox
|
|
147
|
+
};
|
|
148
|
+
});
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// src/shared/postprocess/yolo-seg.ts
|
|
152
|
+
function sigmoid(x) {
|
|
153
|
+
return 1 / (1 + Math.exp(-x));
|
|
154
|
+
}
|
|
155
|
+
function computeRawMask(coeffs, protos, numMaskCoeffs, maskH, maskW) {
|
|
156
|
+
const maskSize = maskH * maskW;
|
|
157
|
+
const rawMask = new Float32Array(maskSize);
|
|
158
|
+
for (let px = 0; px < maskSize; px++) {
|
|
159
|
+
let val = 0;
|
|
160
|
+
for (let k = 0; k < numMaskCoeffs; k++) {
|
|
161
|
+
val += (coeffs[k] ?? 0) * (protos[k * maskSize + px] ?? 0);
|
|
162
|
+
}
|
|
163
|
+
rawMask[px] = sigmoid(val);
|
|
164
|
+
}
|
|
165
|
+
return rawMask;
|
|
166
|
+
}
|
|
167
|
+
function cropAndThresholdMask(rawMask, maskH, maskW, bbox, maskThreshold, maskScale) {
|
|
168
|
+
const cropX1 = Math.max(0, Math.floor(bbox.x * maskScale));
|
|
169
|
+
const cropY1 = Math.max(0, Math.floor(bbox.y * maskScale));
|
|
170
|
+
const cropX2 = Math.min(maskW, Math.ceil((bbox.x + bbox.w) * maskScale));
|
|
171
|
+
const cropY2 = Math.min(maskH, Math.ceil((bbox.y + bbox.h) * maskScale));
|
|
172
|
+
const cropW = Math.max(1, cropX2 - cropX1);
|
|
173
|
+
const cropH = Math.max(1, cropY2 - cropY1);
|
|
174
|
+
const data = new Uint8Array(cropW * cropH);
|
|
175
|
+
for (let row = 0; row < cropH; row++) {
|
|
176
|
+
const srcRow = cropY1 + row;
|
|
177
|
+
for (let col = 0; col < cropW; col++) {
|
|
178
|
+
const srcCol = cropX1 + col;
|
|
179
|
+
const srcIdx = srcRow * maskW + srcCol;
|
|
180
|
+
data[row * cropW + col] = (rawMask[srcIdx] ?? 0) > maskThreshold ? 255 : 0;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
return { data, width: cropW, height: cropH };
|
|
184
|
+
}
|
|
185
|
+
function yoloSegPostprocess(segOutput, options) {
|
|
186
|
+
const {
|
|
187
|
+
detectionOutput,
|
|
188
|
+
protoOutput,
|
|
189
|
+
numClasses,
|
|
190
|
+
numBoxes,
|
|
191
|
+
numMaskCoeffs,
|
|
192
|
+
maskHeight,
|
|
193
|
+
maskWidth
|
|
194
|
+
} = segOutput;
|
|
195
|
+
const {
|
|
196
|
+
confidence,
|
|
197
|
+
iouThreshold,
|
|
198
|
+
labels,
|
|
199
|
+
scale,
|
|
200
|
+
padX,
|
|
201
|
+
padY,
|
|
202
|
+
originalWidth,
|
|
203
|
+
originalHeight,
|
|
204
|
+
maskThreshold = 0.5
|
|
205
|
+
} = options;
|
|
206
|
+
const yoloInputSize = 640;
|
|
207
|
+
const maskScale = maskHeight / yoloInputSize;
|
|
208
|
+
const candidates = [];
|
|
209
|
+
for (let i = 0; i < numBoxes; i++) {
|
|
210
|
+
const cx = detectionOutput[0 * numBoxes + i] ?? 0;
|
|
211
|
+
const cy = detectionOutput[1 * numBoxes + i] ?? 0;
|
|
212
|
+
const w = detectionOutput[2 * numBoxes + i] ?? 0;
|
|
213
|
+
const h = detectionOutput[3 * numBoxes + i] ?? 0;
|
|
214
|
+
let bestScore = -Infinity;
|
|
215
|
+
let bestClass = 0;
|
|
216
|
+
for (let j = 0; j < numClasses; j++) {
|
|
217
|
+
const score = detectionOutput[(4 + j) * numBoxes + i] ?? 0;
|
|
218
|
+
if (score > bestScore) {
|
|
219
|
+
bestScore = score;
|
|
220
|
+
bestClass = j;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
if (bestScore < confidence) continue;
|
|
224
|
+
const bbox = {
|
|
225
|
+
x: cx - w / 2,
|
|
226
|
+
y: cy - h / 2,
|
|
227
|
+
w,
|
|
228
|
+
h
|
|
229
|
+
};
|
|
230
|
+
const coeffs = new Float32Array(numMaskCoeffs);
|
|
231
|
+
for (let k = 0; k < numMaskCoeffs; k++) {
|
|
232
|
+
coeffs[k] = detectionOutput[(4 + numClasses + k) * numBoxes + i] ?? 0;
|
|
233
|
+
}
|
|
234
|
+
candidates.push({ bbox, score: bestScore, classIdx: bestClass, coeffs });
|
|
235
|
+
}
|
|
236
|
+
if (candidates.length === 0) return [];
|
|
237
|
+
const keptIndices = nms(candidates, iouThreshold);
|
|
238
|
+
return keptIndices.map((idx) => {
|
|
239
|
+
const { bbox, score, classIdx, coeffs } = candidates[idx];
|
|
240
|
+
const label = labels[classIdx] ?? String(classIdx);
|
|
241
|
+
const x = Math.max(0, Math.min(originalWidth, (bbox.x - padX) / scale));
|
|
242
|
+
const y = Math.max(0, Math.min(originalHeight, (bbox.y - padY) / scale));
|
|
243
|
+
const x2 = Math.max(0, Math.min(originalWidth, (bbox.x + bbox.w - padX) / scale));
|
|
244
|
+
const y2 = Math.max(0, Math.min(originalHeight, (bbox.y + bbox.h - padY) / scale));
|
|
245
|
+
const finalBbox = { x, y, w: x2 - x, h: y2 - y };
|
|
246
|
+
const rawMask = computeRawMask(coeffs, protoOutput, numMaskCoeffs, maskHeight, maskWidth);
|
|
247
|
+
const { data: maskData, width: mW, height: mH } = cropAndThresholdMask(
|
|
248
|
+
rawMask,
|
|
249
|
+
maskHeight,
|
|
250
|
+
maskWidth,
|
|
251
|
+
bbox,
|
|
252
|
+
maskThreshold,
|
|
253
|
+
maskScale
|
|
254
|
+
);
|
|
255
|
+
return {
|
|
256
|
+
class: label,
|
|
257
|
+
originalClass: label,
|
|
258
|
+
score,
|
|
259
|
+
bbox: finalBbox,
|
|
260
|
+
mask: maskData,
|
|
261
|
+
maskWidth: mW,
|
|
262
|
+
maskHeight: mH
|
|
263
|
+
};
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
// src/shared/engine-resolver.ts
|
|
268
|
+
var fs = __toESM(require("fs"));
|
|
269
|
+
var path2 = __toESM(require("path"));
|
|
270
|
+
|
|
271
|
+
// src/shared/node-engine.ts
|
|
272
|
+
var path = __toESM(require("path"));
|
|
273
|
+
var BACKEND_TO_PROVIDER = {
|
|
274
|
+
cpu: "cpu",
|
|
275
|
+
coreml: "coreml",
|
|
276
|
+
cuda: "cuda",
|
|
277
|
+
tensorrt: "tensorrt",
|
|
278
|
+
dml: "dml"
|
|
279
|
+
};
|
|
280
|
+
var BACKEND_TO_DEVICE = {
|
|
281
|
+
cpu: "cpu",
|
|
282
|
+
coreml: "gpu-mps",
|
|
283
|
+
cuda: "gpu-cuda",
|
|
284
|
+
tensorrt: "tensorrt"
|
|
285
|
+
};
|
|
286
|
+
var NodeInferenceEngine = class {
|
|
287
|
+
constructor(modelPath, backend) {
|
|
288
|
+
this.modelPath = modelPath;
|
|
289
|
+
this.backend = backend;
|
|
290
|
+
this.device = BACKEND_TO_DEVICE[backend] ?? "cpu";
|
|
291
|
+
}
|
|
292
|
+
runtime = "onnx";
|
|
293
|
+
device;
|
|
294
|
+
session = null;
|
|
295
|
+
async initialize() {
|
|
296
|
+
const ort = await import("onnxruntime-node");
|
|
297
|
+
const provider = BACKEND_TO_PROVIDER[this.backend] ?? "cpu";
|
|
298
|
+
const absModelPath = path.isAbsolute(this.modelPath) ? this.modelPath : path.resolve(process.cwd(), this.modelPath);
|
|
299
|
+
const sessionOptions = {
|
|
300
|
+
executionProviders: [provider]
|
|
301
|
+
};
|
|
302
|
+
this.session = await ort.InferenceSession.create(absModelPath, sessionOptions);
|
|
303
|
+
}
|
|
304
|
+
async run(input, inputShape) {
|
|
305
|
+
if (!this.session) {
|
|
306
|
+
throw new Error("NodeInferenceEngine: not initialized \u2014 call initialize() first");
|
|
307
|
+
}
|
|
308
|
+
const ort = await import("onnxruntime-node");
|
|
309
|
+
const sess = this.session;
|
|
310
|
+
const inputName = sess.inputNames[0];
|
|
311
|
+
const tensor = new ort.Tensor("float32", input, [...inputShape]);
|
|
312
|
+
const feeds = { [inputName]: tensor };
|
|
313
|
+
const results = await sess.run(feeds);
|
|
314
|
+
const outputName = sess.outputNames[0];
|
|
315
|
+
const outputTensor = results[outputName];
|
|
316
|
+
return outputTensor.data;
|
|
317
|
+
}
|
|
318
|
+
async runMultiOutput(input, inputShape) {
|
|
319
|
+
if (!this.session) {
|
|
320
|
+
throw new Error("NodeInferenceEngine: not initialized \u2014 call initialize() first");
|
|
321
|
+
}
|
|
322
|
+
const ort = await import("onnxruntime-node");
|
|
323
|
+
const sess = this.session;
|
|
324
|
+
const inputName = sess.inputNames[0];
|
|
325
|
+
const tensor = new ort.Tensor("float32", input, [...inputShape]);
|
|
326
|
+
const feeds = { [inputName]: tensor };
|
|
327
|
+
const results = await sess.run(feeds);
|
|
328
|
+
const out = {};
|
|
329
|
+
for (const name of sess.outputNames) {
|
|
330
|
+
out[name] = results[name].data;
|
|
331
|
+
}
|
|
332
|
+
return out;
|
|
333
|
+
}
|
|
334
|
+
async dispose() {
|
|
335
|
+
this.session = null;
|
|
336
|
+
}
|
|
337
|
+
};
|
|
338
|
+
|
|
339
|
+
// src/shared/engine-resolver.ts
|
|
340
|
+
var AUTO_BACKEND_PRIORITY = ["coreml", "cuda", "tensorrt", "cpu"];
|
|
341
|
+
var BACKEND_TO_FORMAT = {
|
|
342
|
+
cpu: "onnx",
|
|
343
|
+
coreml: "coreml",
|
|
344
|
+
cuda: "onnx",
|
|
345
|
+
tensorrt: "onnx"
|
|
346
|
+
};
|
|
347
|
+
var RUNTIME_TO_FORMAT = {
|
|
348
|
+
onnx: "onnx",
|
|
349
|
+
coreml: "coreml",
|
|
350
|
+
openvino: "openvino",
|
|
351
|
+
tflite: "tflite",
|
|
352
|
+
pytorch: "pt"
|
|
353
|
+
};
|
|
354
|
+
function modelFilePath(modelsDir, modelEntry, format) {
|
|
355
|
+
const formatEntry = modelEntry.formats[format];
|
|
356
|
+
if (!formatEntry) {
|
|
357
|
+
throw new Error(`Model ${modelEntry.id} has no ${format} format`);
|
|
358
|
+
}
|
|
359
|
+
const urlParts = formatEntry.url.split("/");
|
|
360
|
+
const filename = urlParts[urlParts.length - 1] ?? `${modelEntry.id}.${format}`;
|
|
361
|
+
return path2.join(modelsDir, filename);
|
|
362
|
+
}
|
|
363
|
+
function modelExists(filePath) {
|
|
364
|
+
try {
|
|
365
|
+
return fs.existsSync(filePath);
|
|
366
|
+
} catch {
|
|
367
|
+
return false;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
async function resolveEngine(options) {
|
|
371
|
+
const { runtime, backend, modelEntry, modelsDir, downloadModel } = options;
|
|
372
|
+
let selectedFormat;
|
|
373
|
+
let selectedBackend;
|
|
374
|
+
if (runtime === "auto") {
|
|
375
|
+
const available = await probeOnnxBackends();
|
|
376
|
+
let chosen = null;
|
|
377
|
+
for (const b of AUTO_BACKEND_PRIORITY) {
|
|
378
|
+
if (!available.includes(b)) continue;
|
|
379
|
+
const fmt = BACKEND_TO_FORMAT[b];
|
|
380
|
+
if (!fmt) continue;
|
|
381
|
+
if (!modelEntry.formats[fmt]) continue;
|
|
382
|
+
chosen = { backend: b, format: fmt };
|
|
383
|
+
break;
|
|
384
|
+
}
|
|
385
|
+
if (!chosen) {
|
|
386
|
+
throw new Error(
|
|
387
|
+
`resolveEngine: no compatible backend found for model ${modelEntry.id}. Available backends: ${available.join(", ")}`
|
|
388
|
+
);
|
|
389
|
+
}
|
|
390
|
+
selectedFormat = chosen.format;
|
|
391
|
+
selectedBackend = chosen.backend;
|
|
392
|
+
} else {
|
|
393
|
+
const fmt = RUNTIME_TO_FORMAT[runtime];
|
|
394
|
+
if (!fmt) {
|
|
395
|
+
throw new Error(`resolveEngine: unsupported runtime "${runtime}"`);
|
|
396
|
+
}
|
|
397
|
+
if (!modelEntry.formats[fmt]) {
|
|
398
|
+
throw new Error(
|
|
399
|
+
`resolveEngine: model ${modelEntry.id} has no ${fmt} format for runtime ${runtime}`
|
|
400
|
+
);
|
|
401
|
+
}
|
|
402
|
+
selectedFormat = fmt;
|
|
403
|
+
selectedBackend = runtime === "onnx" ? backend || "cpu" : runtime;
|
|
404
|
+
}
|
|
405
|
+
let modelPath = modelFilePath(modelsDir, modelEntry, selectedFormat);
|
|
406
|
+
if (!modelExists(modelPath)) {
|
|
407
|
+
if (downloadModel) {
|
|
408
|
+
const formatEntry = modelEntry.formats[selectedFormat];
|
|
409
|
+
modelPath = await downloadModel(formatEntry.url, modelsDir);
|
|
410
|
+
} else {
|
|
411
|
+
throw new Error(
|
|
412
|
+
`resolveEngine: model file not found at ${modelPath} and no downloadModel function provided`
|
|
413
|
+
);
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
if (selectedFormat === "onnx" || selectedFormat === "coreml") {
|
|
417
|
+
const engine = new NodeInferenceEngine(modelPath, selectedBackend);
|
|
418
|
+
await engine.initialize();
|
|
419
|
+
return { engine, format: selectedFormat, modelPath };
|
|
420
|
+
}
|
|
421
|
+
const fallbackPath = modelFilePath(modelsDir, modelEntry, "onnx");
|
|
422
|
+
if (modelEntry.formats["onnx"] && modelExists(fallbackPath)) {
|
|
423
|
+
const engine = new NodeInferenceEngine(fallbackPath, "cpu");
|
|
424
|
+
await engine.initialize();
|
|
425
|
+
return { engine, format: "onnx", modelPath: fallbackPath };
|
|
426
|
+
}
|
|
427
|
+
throw new Error(
|
|
428
|
+
`resolveEngine: format ${selectedFormat} is not yet supported by NodeInferenceEngine and no ONNX fallback is available`
|
|
429
|
+
);
|
|
430
|
+
}
|
|
431
|
+
async function probeOnnxBackends() {
|
|
432
|
+
const available = ["cpu"];
|
|
433
|
+
try {
|
|
434
|
+
const ort = await import("onnxruntime-node");
|
|
435
|
+
const providers = ort.env?.webgl?.disabled !== void 0 ? ort.InferenceSession?.getAvailableProviders?.() ?? [] : [];
|
|
436
|
+
for (const p of providers) {
|
|
437
|
+
const normalized = p.toLowerCase().replace("executionprovider", "");
|
|
438
|
+
if (normalized === "coreml") available.push("coreml");
|
|
439
|
+
else if (normalized === "cuda") available.push("cuda");
|
|
440
|
+
else if (normalized === "tensorrt") available.push("tensorrt");
|
|
441
|
+
}
|
|
442
|
+
} catch {
|
|
443
|
+
}
|
|
444
|
+
if (process.platform === "darwin" && !available.includes("coreml")) {
|
|
445
|
+
available.push("coreml");
|
|
446
|
+
}
|
|
447
|
+
return [...new Set(available)];
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// src/addons/object-detection/index.ts
|
|
451
|
+
function isSegModel(modelId) {
|
|
452
|
+
return modelId.includes("-seg");
|
|
453
|
+
}
|
|
454
|
+
var ALL_DETECTION_MODELS = [
|
|
455
|
+
...import_types.OBJECT_DETECTION_MODELS,
|
|
456
|
+
...import_types.SEGMENTATION_MODELS
|
|
457
|
+
];
|
|
458
|
+
function applyClassMap(detections, classMap) {
|
|
459
|
+
return detections.filter((d) => classMap.mapping[d.class] !== void 0).map((d) => ({
|
|
460
|
+
...d,
|
|
461
|
+
originalClass: d.class,
|
|
462
|
+
class: classMap.mapping[d.class]
|
|
463
|
+
}));
|
|
464
|
+
}
|
|
465
|
+
var ObjectDetectionAddon = class {
|
|
466
|
+
id = "object-detection";
|
|
467
|
+
slot = "detector";
|
|
468
|
+
inputClasses = null;
|
|
469
|
+
outputClasses = ["person", "vehicle", "animal"];
|
|
470
|
+
slotPriority = 0;
|
|
471
|
+
manifest = {
|
|
472
|
+
id: "object-detection",
|
|
473
|
+
name: "Object Detection",
|
|
474
|
+
version: "0.1.0",
|
|
475
|
+
description: "YOLO-based object detection \u2014 detects persons, vehicles, and animals",
|
|
476
|
+
packageName: "@camstack/vision",
|
|
477
|
+
slot: "detector",
|
|
478
|
+
inputClasses: void 0,
|
|
479
|
+
outputClasses: ["person", "vehicle", "animal"],
|
|
480
|
+
supportsCustomModels: true,
|
|
481
|
+
mayRequirePython: false,
|
|
482
|
+
defaultConfig: {
|
|
483
|
+
modelId: "yolov8n",
|
|
484
|
+
runtime: "auto",
|
|
485
|
+
backend: "cpu",
|
|
486
|
+
confidence: 0.5,
|
|
487
|
+
iouThreshold: 0.45
|
|
488
|
+
}
|
|
489
|
+
};
|
|
490
|
+
engine;
|
|
491
|
+
modelEntry;
|
|
492
|
+
confidence = 0.5;
|
|
493
|
+
iouThreshold = 0.45;
|
|
494
|
+
async initialize(ctx) {
|
|
495
|
+
const cfg = ctx.addonConfig;
|
|
496
|
+
const modelId = cfg["modelId"] ?? "yolov8n";
|
|
497
|
+
const runtime = cfg["runtime"] ?? "auto";
|
|
498
|
+
const backend = cfg["backend"] ?? "cpu";
|
|
499
|
+
this.confidence = cfg["confidence"] ?? 0.5;
|
|
500
|
+
this.iouThreshold = cfg["iouThreshold"] ?? 0.45;
|
|
501
|
+
const entry = ALL_DETECTION_MODELS.find((m) => m.id === modelId);
|
|
502
|
+
if (!entry) {
|
|
503
|
+
throw new Error(`ObjectDetectionAddon: unknown modelId "${modelId}"`);
|
|
504
|
+
}
|
|
505
|
+
this.modelEntry = entry;
|
|
506
|
+
const resolved = await resolveEngine({
|
|
507
|
+
runtime,
|
|
508
|
+
backend,
|
|
509
|
+
modelEntry: entry,
|
|
510
|
+
modelsDir: ctx.locationPaths.models
|
|
511
|
+
});
|
|
512
|
+
this.engine = resolved.engine;
|
|
513
|
+
}
|
|
514
|
+
async detect(frame) {
|
|
515
|
+
const start = Date.now();
|
|
516
|
+
const { width: inputW, height: inputH } = this.modelEntry.inputSize;
|
|
517
|
+
const targetSize = Math.max(inputW, inputH);
|
|
518
|
+
const lb = await letterbox(frame.data, targetSize);
|
|
519
|
+
const numClasses = this.modelEntry.labels.length;
|
|
520
|
+
const labels = this.modelEntry.labels.map((l) => l.id);
|
|
521
|
+
const postprocessOpts = {
|
|
522
|
+
confidence: this.confidence,
|
|
523
|
+
iouThreshold: this.iouThreshold,
|
|
524
|
+
labels,
|
|
525
|
+
scale: lb.scale,
|
|
526
|
+
padX: lb.padX,
|
|
527
|
+
padY: lb.padY,
|
|
528
|
+
originalWidth: lb.originalWidth,
|
|
529
|
+
originalHeight: lb.originalHeight
|
|
530
|
+
};
|
|
531
|
+
let rawDetections;
|
|
532
|
+
if (isSegModel(this.modelEntry.id)) {
|
|
533
|
+
const outputs = await this.engine.runMultiOutput(lb.data, [1, 3, targetSize, targetSize]);
|
|
534
|
+
const outputNames = Object.keys(outputs);
|
|
535
|
+
if (outputNames.length < 2) {
|
|
536
|
+
throw new Error(
|
|
537
|
+
`ObjectDetectionAddon: seg model "${this.modelEntry.id}" returned ${outputNames.length} output(s); expected 2`
|
|
538
|
+
);
|
|
539
|
+
}
|
|
540
|
+
const detectionOutput = outputs[outputNames[0]];
|
|
541
|
+
const protoOutput = outputs[outputNames[1]];
|
|
542
|
+
const numMaskCoeffs = 32;
|
|
543
|
+
const numBoxes = detectionOutput.length / (4 + numClasses + numMaskCoeffs);
|
|
544
|
+
const maskHeight = 160;
|
|
545
|
+
const maskWidth = 160;
|
|
546
|
+
rawDetections = yoloSegPostprocess(
|
|
547
|
+
{
|
|
548
|
+
detectionOutput,
|
|
549
|
+
protoOutput,
|
|
550
|
+
numClasses,
|
|
551
|
+
numBoxes,
|
|
552
|
+
numMaskCoeffs,
|
|
553
|
+
maskHeight,
|
|
554
|
+
maskWidth
|
|
555
|
+
},
|
|
556
|
+
postprocessOpts
|
|
557
|
+
);
|
|
558
|
+
} else {
|
|
559
|
+
const output = await this.engine.run(lb.data, [1, 3, targetSize, targetSize]);
|
|
560
|
+
const numBoxes = output.length / (4 + numClasses);
|
|
561
|
+
rawDetections = yoloPostprocess(output, numClasses, numBoxes, postprocessOpts);
|
|
562
|
+
}
|
|
563
|
+
const detections = applyClassMap(rawDetections, import_types.COCO_TO_MACRO);
|
|
564
|
+
return {
|
|
565
|
+
detections,
|
|
566
|
+
inferenceMs: Date.now() - start,
|
|
567
|
+
modelId: this.modelEntry.id
|
|
568
|
+
};
|
|
569
|
+
}
|
|
570
|
+
async shutdown() {
|
|
571
|
+
await this.engine?.dispose();
|
|
572
|
+
}
|
|
573
|
+
getConfigSchema() {
|
|
574
|
+
return {
|
|
575
|
+
sections: [
|
|
576
|
+
{
|
|
577
|
+
id: "model",
|
|
578
|
+
title: "Model",
|
|
579
|
+
columns: 2,
|
|
580
|
+
fields: [
|
|
581
|
+
{
|
|
582
|
+
key: "modelId",
|
|
583
|
+
label: "Model",
|
|
584
|
+
type: "model-selector",
|
|
585
|
+
catalog: [...ALL_DETECTION_MODELS],
|
|
586
|
+
allowCustom: true,
|
|
587
|
+
allowConversion: true,
|
|
588
|
+
acceptFormats: ["onnx", "coreml", "openvino", "tflite"],
|
|
589
|
+
requiredMetadata: ["inputSize", "labels", "outputFormat"],
|
|
590
|
+
outputFormatHint: "yolo"
|
|
591
|
+
}
|
|
592
|
+
]
|
|
593
|
+
},
|
|
594
|
+
{
|
|
595
|
+
id: "runtime",
|
|
596
|
+
title: "Runtime",
|
|
597
|
+
columns: 2,
|
|
598
|
+
fields: [
|
|
599
|
+
{
|
|
600
|
+
key: "runtime",
|
|
601
|
+
label: "Runtime",
|
|
602
|
+
type: "select",
|
|
603
|
+
options: [
|
|
604
|
+
{ value: "auto", label: "Auto (recommended)" },
|
|
605
|
+
{ value: "onnx", label: "ONNX Runtime" },
|
|
606
|
+
{ value: "coreml", label: "CoreML (Apple)" },
|
|
607
|
+
{ value: "openvino", label: "OpenVINO (Intel)" }
|
|
608
|
+
]
|
|
609
|
+
},
|
|
610
|
+
{
|
|
611
|
+
key: "backend",
|
|
612
|
+
label: "Backend",
|
|
613
|
+
type: "select",
|
|
614
|
+
dependsOn: { runtime: "onnx" },
|
|
615
|
+
options: [
|
|
616
|
+
{ value: "cpu", label: "CPU" },
|
|
617
|
+
{ value: "coreml", label: "CoreML" },
|
|
618
|
+
{ value: "cuda", label: "CUDA (NVIDIA)" },
|
|
619
|
+
{ value: "tensorrt", label: "TensorRT (NVIDIA)" }
|
|
620
|
+
]
|
|
621
|
+
}
|
|
622
|
+
]
|
|
623
|
+
},
|
|
624
|
+
{
|
|
625
|
+
id: "thresholds",
|
|
626
|
+
title: "Detection Thresholds",
|
|
627
|
+
columns: 2,
|
|
628
|
+
fields: [
|
|
629
|
+
{
|
|
630
|
+
key: "confidence",
|
|
631
|
+
label: "Confidence Threshold",
|
|
632
|
+
type: "slider",
|
|
633
|
+
min: 0.1,
|
|
634
|
+
max: 1,
|
|
635
|
+
step: 0.05,
|
|
636
|
+
default: 0.5
|
|
637
|
+
},
|
|
638
|
+
{
|
|
639
|
+
key: "iouThreshold",
|
|
640
|
+
label: "IoU Threshold (NMS)",
|
|
641
|
+
type: "slider",
|
|
642
|
+
min: 0.1,
|
|
643
|
+
max: 1,
|
|
644
|
+
step: 0.05,
|
|
645
|
+
default: 0.45
|
|
646
|
+
}
|
|
647
|
+
]
|
|
648
|
+
}
|
|
649
|
+
]
|
|
650
|
+
};
|
|
651
|
+
}
|
|
652
|
+
getClassMap() {
|
|
653
|
+
return import_types.COCO_TO_MACRO;
|
|
654
|
+
}
|
|
655
|
+
getModelCatalog() {
|
|
656
|
+
return [...ALL_DETECTION_MODELS];
|
|
657
|
+
}
|
|
658
|
+
getAvailableModels() {
|
|
659
|
+
return [];
|
|
660
|
+
}
|
|
661
|
+
getActiveLabels() {
|
|
662
|
+
return import_types.MACRO_LABELS;
|
|
663
|
+
}
|
|
664
|
+
async probe() {
|
|
665
|
+
return {
|
|
666
|
+
available: true,
|
|
667
|
+
runtime: this.engine?.runtime ?? "onnx",
|
|
668
|
+
device: this.engine?.device ?? "cpu",
|
|
669
|
+
capabilities: ["fp32"]
|
|
670
|
+
};
|
|
671
|
+
}
|
|
672
|
+
};
|
|
673
|
+
//# sourceMappingURL=index.js.map
|