@camstack/addon-vision 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/addons/animal-classifier/index.d.mts +6 -1
- package/dist/addons/animal-classifier/index.d.ts +6 -1
- package/dist/addons/animal-classifier/index.js +513 -49
- package/dist/addons/animal-classifier/index.js.map +1 -1
- package/dist/addons/animal-classifier/index.mjs +6 -4
- package/dist/addons/audio-classification/index.d.mts +6 -1
- package/dist/addons/audio-classification/index.d.ts +6 -1
- package/dist/addons/audio-classification/index.js +86 -26
- package/dist/addons/audio-classification/index.js.map +1 -1
- package/dist/addons/audio-classification/index.mjs +3 -2
- package/dist/addons/bird-global-classifier/index.d.mts +6 -1
- package/dist/addons/bird-global-classifier/index.d.ts +6 -1
- package/dist/addons/bird-global-classifier/index.js +514 -50
- package/dist/addons/bird-global-classifier/index.js.map +1 -1
- package/dist/addons/bird-global-classifier/index.mjs +6 -4
- package/dist/addons/bird-nabirds-classifier/index.d.mts +6 -1
- package/dist/addons/bird-nabirds-classifier/index.d.ts +6 -1
- package/dist/addons/bird-nabirds-classifier/index.js +523 -60
- package/dist/addons/bird-nabirds-classifier/index.js.map +1 -1
- package/dist/addons/bird-nabirds-classifier/index.mjs +6 -4
- package/dist/addons/face-detection/index.d.mts +6 -1
- package/dist/addons/face-detection/index.d.ts +6 -1
- package/dist/addons/face-detection/index.js +538 -39
- package/dist/addons/face-detection/index.js.map +1 -1
- package/dist/addons/face-detection/index.mjs +5 -3
- package/dist/addons/face-recognition/index.d.mts +6 -1
- package/dist/addons/face-recognition/index.d.ts +6 -1
- package/dist/addons/face-recognition/index.js +487 -33
- package/dist/addons/face-recognition/index.js.map +1 -1
- package/dist/addons/face-recognition/index.mjs +5 -3
- package/dist/addons/motion-detection/index.d.mts +3 -1
- package/dist/addons/motion-detection/index.d.ts +3 -1
- package/dist/addons/motion-detection/index.js +11 -3
- package/dist/addons/motion-detection/index.js.map +1 -1
- package/dist/addons/motion-detection/index.mjs +140 -3
- package/dist/addons/motion-detection/index.mjs.map +1 -1
- package/dist/addons/object-detection/index.d.mts +6 -1
- package/dist/addons/object-detection/index.d.ts +6 -1
- package/dist/addons/object-detection/index.js +369 -72
- package/dist/addons/object-detection/index.js.map +1 -1
- package/dist/addons/object-detection/index.mjs +5 -3
- package/dist/addons/plate-detection/index.d.mts +6 -1
- package/dist/addons/plate-detection/index.d.ts +6 -1
- package/dist/addons/plate-detection/index.js +531 -31
- package/dist/addons/plate-detection/index.js.map +1 -1
- package/dist/addons/plate-detection/index.mjs +5 -3
- package/dist/addons/plate-recognition/index.d.mts +7 -1
- package/dist/addons/plate-recognition/index.d.ts +7 -1
- package/dist/addons/plate-recognition/index.js +176 -44
- package/dist/addons/plate-recognition/index.js.map +1 -1
- package/dist/addons/plate-recognition/index.mjs +4 -3
- package/dist/addons/segmentation-refiner/index.d.mts +30 -0
- package/dist/addons/segmentation-refiner/index.d.ts +30 -0
- package/dist/addons/segmentation-refiner/index.js +1048 -0
- package/dist/addons/segmentation-refiner/index.js.map +1 -0
- package/dist/addons/segmentation-refiner/index.mjs +209 -0
- package/dist/addons/segmentation-refiner/index.mjs.map +1 -0
- package/dist/addons/vehicle-classifier/index.d.mts +31 -0
- package/dist/addons/vehicle-classifier/index.d.ts +31 -0
- package/dist/addons/vehicle-classifier/index.js +688 -0
- package/dist/addons/vehicle-classifier/index.js.map +1 -0
- package/dist/addons/vehicle-classifier/index.mjs +250 -0
- package/dist/addons/vehicle-classifier/index.mjs.map +1 -0
- package/dist/{chunk-6OR5TE7A.mjs → chunk-22BHCDT5.mjs} +2 -2
- package/dist/chunk-22BHCDT5.mjs.map +1 -0
- package/dist/{chunk-LPI42WL6.mjs → chunk-2IOKI4ES.mjs} +23 -12
- package/dist/chunk-2IOKI4ES.mjs.map +1 -0
- package/dist/chunk-7DYHXUPZ.mjs +36 -0
- package/dist/chunk-7DYHXUPZ.mjs.map +1 -0
- package/dist/chunk-BJTO5JO5.mjs +11 -0
- package/dist/chunk-BP7H4NFS.mjs +412 -0
- package/dist/chunk-BP7H4NFS.mjs.map +1 -0
- package/dist/chunk-BR2FPGOX.mjs +98 -0
- package/dist/chunk-BR2FPGOX.mjs.map +1 -0
- package/dist/{chunk-5AIQSN32.mjs → chunk-D6WEHN33.mjs} +66 -17
- package/dist/chunk-D6WEHN33.mjs.map +1 -0
- package/dist/{chunk-3MQFUDRU.mjs → chunk-DRYFGARD.mjs} +76 -47
- package/dist/chunk-DRYFGARD.mjs.map +1 -0
- package/dist/{chunk-ISOIDU4U.mjs → chunk-DUN6XU3N.mjs} +23 -5
- package/dist/chunk-DUN6XU3N.mjs.map +1 -0
- package/dist/{chunk-MEVASN3P.mjs → chunk-ESLHNWWE.mjs} +104 -22
- package/dist/chunk-ESLHNWWE.mjs.map +1 -0
- package/dist/{chunk-B3R66MPF.mjs → chunk-JUQEW6ON.mjs} +58 -21
- package/dist/chunk-JUQEW6ON.mjs.map +1 -0
- package/dist/{chunk-AYBFB7ID.mjs → chunk-R5J3WAUI.mjs} +200 -318
- package/dist/chunk-R5J3WAUI.mjs.map +1 -0
- package/dist/chunk-XZ6ZMXXU.mjs +39 -0
- package/dist/chunk-XZ6ZMXXU.mjs.map +1 -0
- package/dist/{chunk-5JJZGKL7.mjs → chunk-YPU4WTXZ.mjs} +102 -19
- package/dist/chunk-YPU4WTXZ.mjs.map +1 -0
- package/dist/{chunk-J4WRYHHY.mjs → chunk-YUCD2TFH.mjs} +66 -36
- package/dist/chunk-YUCD2TFH.mjs.map +1 -0
- package/dist/{chunk-PDSHDDPV.mjs → chunk-ZTJENCFC.mjs} +159 -35
- package/dist/chunk-ZTJENCFC.mjs.map +1 -0
- package/dist/{chunk-Q3SQOYG6.mjs → chunk-ZWYXXCXP.mjs} +67 -37
- package/dist/chunk-ZWYXXCXP.mjs.map +1 -0
- package/dist/index.d.mts +17 -5
- package/dist/index.d.ts +17 -5
- package/dist/index.js +1343 -550
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +191 -20
- package/dist/index.mjs.map +1 -1
- package/package.json +94 -18
- package/python/coreml_inference.py +61 -18
- package/python/openvino_inference.py +12 -4
- package/python/pytorch_inference.py +12 -4
- package/dist/addons/camera-native-detection/index.d.mts +0 -32
- package/dist/addons/camera-native-detection/index.d.ts +0 -32
- package/dist/addons/camera-native-detection/index.js +0 -99
- package/dist/addons/camera-native-detection/index.js.map +0 -1
- package/dist/addons/camera-native-detection/index.mjs +0 -7
- package/dist/chunk-3MQFUDRU.mjs.map +0 -1
- package/dist/chunk-5AIQSN32.mjs.map +0 -1
- package/dist/chunk-5JJZGKL7.mjs.map +0 -1
- package/dist/chunk-6OR5TE7A.mjs.map +0 -1
- package/dist/chunk-AYBFB7ID.mjs.map +0 -1
- package/dist/chunk-B3R66MPF.mjs.map +0 -1
- package/dist/chunk-DTOAB2CE.mjs +0 -79
- package/dist/chunk-DTOAB2CE.mjs.map +0 -1
- package/dist/chunk-ISOIDU4U.mjs.map +0 -1
- package/dist/chunk-J4WRYHHY.mjs.map +0 -1
- package/dist/chunk-LPI42WL6.mjs.map +0 -1
- package/dist/chunk-MEVASN3P.mjs.map +0 -1
- package/dist/chunk-PDSHDDPV.mjs.map +0 -1
- package/dist/chunk-Q3SQOYG6.mjs.map +0 -1
- package/dist/chunk-QIMDG34B.mjs +0 -229
- package/dist/chunk-QIMDG34B.mjs.map +0 -1
- package/python/__pycache__/coreml_inference.cpython-313.pyc +0 -0
- package/python/__pycache__/openvino_inference.cpython-313.pyc +0 -0
- package/python/__pycache__/pytorch_inference.cpython-313.pyc +0 -0
- /package/dist/{addons/camera-native-detection/index.mjs.map → chunk-BJTO5JO5.mjs.map} +0 -0
package/dist/index.mjs
CHANGED
|
@@ -1,72 +1,241 @@
|
|
|
1
|
-
import {
|
|
2
|
-
CameraNativeDetectionAddon
|
|
3
|
-
} from "./chunk-DTOAB2CE.mjs";
|
|
4
1
|
import {
|
|
5
2
|
BirdGlobalClassifierAddon
|
|
6
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-ZWYXXCXP.mjs";
|
|
7
4
|
import {
|
|
8
5
|
BirdNABirdsClassifierAddon
|
|
9
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-DRYFGARD.mjs";
|
|
10
7
|
import {
|
|
11
8
|
AnimalClassifierAddon
|
|
12
|
-
} from "./chunk-
|
|
9
|
+
} from "./chunk-YUCD2TFH.mjs";
|
|
13
10
|
import {
|
|
14
11
|
ANIMAL_TYPE_MODELS,
|
|
15
12
|
BIRD_NABIRDS_MODELS,
|
|
16
13
|
BIRD_SPECIES_MODELS
|
|
17
|
-
} from "./chunk-
|
|
14
|
+
} from "./chunk-DUN6XU3N.mjs";
|
|
15
|
+
import {
|
|
16
|
+
VEHICLE_TYPE_MODELS
|
|
17
|
+
} from "./chunk-XZ6ZMXXU.mjs";
|
|
18
|
+
import {
|
|
19
|
+
SEGMENTATION_REFINER_MODELS
|
|
20
|
+
} from "./chunk-7DYHXUPZ.mjs";
|
|
18
21
|
import {
|
|
19
|
-
MotionDetectionAddon,
|
|
20
22
|
detectMotion
|
|
21
|
-
} from "./chunk-
|
|
23
|
+
} from "./chunk-BR2FPGOX.mjs";
|
|
22
24
|
import {
|
|
23
|
-
OBJECT_DETECTION_MODELS,
|
|
24
25
|
ObjectDetectionAddon,
|
|
25
26
|
SEGMENTATION_MODELS
|
|
26
|
-
} from "./chunk-
|
|
27
|
+
} from "./chunk-R5J3WAUI.mjs";
|
|
27
28
|
import {
|
|
28
29
|
FACE_DETECTION_MODELS,
|
|
29
30
|
FaceDetectionAddon,
|
|
30
31
|
scrfdPostprocess
|
|
31
|
-
} from "./chunk-
|
|
32
|
+
} from "./chunk-ESLHNWWE.mjs";
|
|
32
33
|
import {
|
|
33
34
|
FACE_RECOGNITION_MODELS,
|
|
34
35
|
FaceRecognitionAddon,
|
|
35
36
|
cosineSimilarity,
|
|
36
37
|
l2Normalize
|
|
37
|
-
} from "./chunk-
|
|
38
|
+
} from "./chunk-JUQEW6ON.mjs";
|
|
38
39
|
import {
|
|
39
40
|
PLATE_DETECTION_MODELS,
|
|
40
41
|
PlateDetectionAddon
|
|
41
|
-
} from "./chunk-
|
|
42
|
+
} from "./chunk-YPU4WTXZ.mjs";
|
|
42
43
|
import {
|
|
43
44
|
iou,
|
|
44
45
|
nms,
|
|
45
46
|
yoloPostprocess
|
|
46
47
|
} from "./chunk-KUO2BVFY.mjs";
|
|
48
|
+
import {
|
|
49
|
+
MLPACKAGE_FILES,
|
|
50
|
+
OBJECT_DETECTION_MODELS
|
|
51
|
+
} from "./chunk-BP7H4NFS.mjs";
|
|
47
52
|
import {
|
|
48
53
|
PLATE_RECOGNITION_MODELS,
|
|
49
54
|
PlateRecognitionAddon,
|
|
50
55
|
ctcDecode
|
|
51
|
-
} from "./chunk-
|
|
56
|
+
} from "./chunk-ZTJENCFC.mjs";
|
|
52
57
|
import {
|
|
53
58
|
cropRegion,
|
|
54
59
|
jpegToRgb,
|
|
55
60
|
letterbox,
|
|
56
61
|
resizeAndNormalize,
|
|
57
62
|
rgbToGrayscale
|
|
58
|
-
} from "./chunk-
|
|
63
|
+
} from "./chunk-22BHCDT5.mjs";
|
|
59
64
|
import {
|
|
60
65
|
AUDIO_CLASSIFICATION_MODELS,
|
|
61
66
|
AudioClassificationAddon,
|
|
62
67
|
yamnetPostprocess
|
|
63
|
-
} from "./chunk-
|
|
68
|
+
} from "./chunk-D6WEHN33.mjs";
|
|
64
69
|
import {
|
|
65
70
|
NodeInferenceEngine,
|
|
66
71
|
PythonInferenceEngine,
|
|
67
72
|
probeOnnxBackends,
|
|
68
73
|
resolveEngine
|
|
69
|
-
} from "./chunk-
|
|
74
|
+
} from "./chunk-2IOKI4ES.mjs";
|
|
75
|
+
import "./chunk-BJTO5JO5.mjs";
|
|
76
|
+
|
|
77
|
+
// src/catalogs/general-ocr-models.ts
|
|
78
|
+
import { hfModelUrl } from "@camstack/types";
|
|
79
|
+
var HF_REPO = "camstack/camstack-models";
|
|
80
|
+
var OCR_TEXT_LABELS = [
|
|
81
|
+
{ id: "text", name: "Scene Text" }
|
|
82
|
+
];
|
|
83
|
+
var GENERAL_OCR_MODELS = [
|
|
84
|
+
// ── OnnxTR / docTR — lightweight general scene text recognition ──
|
|
85
|
+
{
|
|
86
|
+
id: "doctr-det-db-mobilenet",
|
|
87
|
+
name: "docTR Detection MobileNet",
|
|
88
|
+
description: "docTR DBNet MobileNet V3 \u2014 lightweight text region detection for scene text",
|
|
89
|
+
inputSize: { width: 1024, height: 1024 },
|
|
90
|
+
labels: OCR_TEXT_LABELS,
|
|
91
|
+
formats: {
|
|
92
|
+
onnx: {
|
|
93
|
+
url: hfModelUrl(HF_REPO, "generalOcr/doctr/onnx/camstack-doctr-det-db-mobilenet-v3.onnx"),
|
|
94
|
+
sizeMB: 15
|
|
95
|
+
},
|
|
96
|
+
coreml: {
|
|
97
|
+
url: hfModelUrl(HF_REPO, "generalOcr/doctr/coreml/camstack-doctr-det-db-mobilenet-v3.mlpackage"),
|
|
98
|
+
sizeMB: 8,
|
|
99
|
+
isDirectory: true,
|
|
100
|
+
files: [
|
|
101
|
+
"Manifest.json",
|
|
102
|
+
"Data/com.apple.CoreML/model.mlmodel",
|
|
103
|
+
"Data/com.apple.CoreML/weights/weight.bin"
|
|
104
|
+
],
|
|
105
|
+
runtimes: ["python"]
|
|
106
|
+
},
|
|
107
|
+
openvino: {
|
|
108
|
+
url: hfModelUrl(HF_REPO, "generalOcr/doctr/openvino/camstack-doctr-det-db-mobilenet-v3.xml"),
|
|
109
|
+
sizeMB: 8,
|
|
110
|
+
runtimes: ["python"]
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
id: "doctr-rec-crnn-mobilenet",
|
|
116
|
+
name: "docTR Recognition CRNN MobileNet",
|
|
117
|
+
description: "docTR CRNN MobileNet V3 \u2014 lightweight text recognition for detected regions",
|
|
118
|
+
inputSize: { width: 128, height: 32 },
|
|
119
|
+
labels: OCR_TEXT_LABELS,
|
|
120
|
+
formats: {
|
|
121
|
+
onnx: {
|
|
122
|
+
url: hfModelUrl(HF_REPO, "generalOcr/doctr/onnx/camstack-doctr-rec-crnn-mobilenet-v3.onnx"),
|
|
123
|
+
sizeMB: 5
|
|
124
|
+
},
|
|
125
|
+
coreml: {
|
|
126
|
+
url: hfModelUrl(HF_REPO, "generalOcr/doctr/coreml/camstack-doctr-rec-crnn-mobilenet-v3.mlpackage"),
|
|
127
|
+
sizeMB: 3,
|
|
128
|
+
isDirectory: true,
|
|
129
|
+
files: [
|
|
130
|
+
"Manifest.json",
|
|
131
|
+
"Data/com.apple.CoreML/model.mlmodel",
|
|
132
|
+
"Data/com.apple.CoreML/weights/weight.bin"
|
|
133
|
+
],
|
|
134
|
+
runtimes: ["python"]
|
|
135
|
+
},
|
|
136
|
+
openvino: {
|
|
137
|
+
url: hfModelUrl(HF_REPO, "generalOcr/doctr/openvino/camstack-doctr-rec-crnn-mobilenet-v3.xml"),
|
|
138
|
+
sizeMB: 3,
|
|
139
|
+
runtimes: ["python"]
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
},
|
|
143
|
+
{
|
|
144
|
+
id: "doctr-rec-parseq",
|
|
145
|
+
name: "docTR Recognition PARSeq",
|
|
146
|
+
description: "docTR PARSeq \u2014 high-accuracy scene text recognition (top ICDAR scores)",
|
|
147
|
+
inputSize: { width: 128, height: 32 },
|
|
148
|
+
labels: OCR_TEXT_LABELS,
|
|
149
|
+
formats: {
|
|
150
|
+
onnx: {
|
|
151
|
+
url: hfModelUrl(HF_REPO, "generalOcr/doctr/onnx/camstack-doctr-rec-parseq.onnx"),
|
|
152
|
+
sizeMB: 25
|
|
153
|
+
},
|
|
154
|
+
coreml: {
|
|
155
|
+
url: hfModelUrl(HF_REPO, "generalOcr/doctr/coreml/camstack-doctr-rec-parseq.mlpackage"),
|
|
156
|
+
sizeMB: 13,
|
|
157
|
+
isDirectory: true,
|
|
158
|
+
files: [
|
|
159
|
+
"Manifest.json",
|
|
160
|
+
"Data/com.apple.CoreML/model.mlmodel",
|
|
161
|
+
"Data/com.apple.CoreML/weights/weight.bin"
|
|
162
|
+
],
|
|
163
|
+
runtimes: ["python"]
|
|
164
|
+
},
|
|
165
|
+
openvino: {
|
|
166
|
+
url: hfModelUrl(HF_REPO, "generalOcr/doctr/openvino/camstack-doctr-rec-parseq.xml"),
|
|
167
|
+
sizeMB: 13,
|
|
168
|
+
runtimes: ["python"]
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
},
|
|
172
|
+
// ── PaddleOCR PP-OCRv5 Mobile — general-purpose text detection + recognition ──
|
|
173
|
+
{
|
|
174
|
+
id: "ppocr-v5-det-mobile",
|
|
175
|
+
name: "PP-OCRv5 Detection Mobile",
|
|
176
|
+
description: "PP-OCRv5 mobile text detection \u2014 optimized for edge, 100+ languages",
|
|
177
|
+
inputSize: { width: 640, height: 640 },
|
|
178
|
+
labels: OCR_TEXT_LABELS,
|
|
179
|
+
formats: {
|
|
180
|
+
onnx: {
|
|
181
|
+
url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/onnx/camstack-ppocr-v5-det-mobile.onnx"),
|
|
182
|
+
sizeMB: 6
|
|
183
|
+
},
|
|
184
|
+
coreml: {
|
|
185
|
+
url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/coreml/camstack-ppocr-v5-det-mobile.mlpackage"),
|
|
186
|
+
sizeMB: 3,
|
|
187
|
+
isDirectory: true,
|
|
188
|
+
files: [
|
|
189
|
+
"Manifest.json",
|
|
190
|
+
"Data/com.apple.CoreML/model.mlmodel",
|
|
191
|
+
"Data/com.apple.CoreML/weights/weight.bin"
|
|
192
|
+
],
|
|
193
|
+
runtimes: ["python"]
|
|
194
|
+
},
|
|
195
|
+
openvino: {
|
|
196
|
+
url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/openvino/camstack-ppocr-v5-det-mobile.xml"),
|
|
197
|
+
sizeMB: 3,
|
|
198
|
+
runtimes: ["python"]
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
},
|
|
202
|
+
{
|
|
203
|
+
id: "ppocr-v5-rec-mobile",
|
|
204
|
+
name: "PP-OCRv5 Recognition Mobile",
|
|
205
|
+
description: "PP-OCRv5 mobile text recognition \u2014 100+ languages, CTC decoding",
|
|
206
|
+
inputSize: { width: 320, height: 48 },
|
|
207
|
+
labels: OCR_TEXT_LABELS,
|
|
208
|
+
formats: {
|
|
209
|
+
onnx: {
|
|
210
|
+
url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/onnx/camstack-ppocr-v5-rec-mobile.onnx"),
|
|
211
|
+
sizeMB: 8
|
|
212
|
+
},
|
|
213
|
+
coreml: {
|
|
214
|
+
url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/coreml/camstack-ppocr-v5-rec-mobile.mlpackage"),
|
|
215
|
+
sizeMB: 4,
|
|
216
|
+
isDirectory: true,
|
|
217
|
+
files: [
|
|
218
|
+
"Manifest.json",
|
|
219
|
+
"Data/com.apple.CoreML/model.mlmodel",
|
|
220
|
+
"Data/com.apple.CoreML/weights/weight.bin"
|
|
221
|
+
],
|
|
222
|
+
runtimes: ["python"]
|
|
223
|
+
},
|
|
224
|
+
openvino: {
|
|
225
|
+
url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/openvino/camstack-ppocr-v5-rec-mobile.xml"),
|
|
226
|
+
sizeMB: 4,
|
|
227
|
+
runtimes: ["python"]
|
|
228
|
+
}
|
|
229
|
+
},
|
|
230
|
+
extraFiles: [
|
|
231
|
+
{
|
|
232
|
+
url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/camstack-ppocr-v5-keys.txt"),
|
|
233
|
+
filename: "camstack-ppocr-v5-keys.txt",
|
|
234
|
+
sizeMB: 0.1
|
|
235
|
+
}
|
|
236
|
+
]
|
|
237
|
+
}
|
|
238
|
+
];
|
|
70
239
|
export {
|
|
71
240
|
ANIMAL_TYPE_MODELS,
|
|
72
241
|
AUDIO_CLASSIFICATION_MODELS,
|
|
@@ -76,12 +245,12 @@ export {
|
|
|
76
245
|
BIRD_SPECIES_MODELS,
|
|
77
246
|
BirdGlobalClassifierAddon,
|
|
78
247
|
BirdNABirdsClassifierAddon,
|
|
79
|
-
CameraNativeDetectionAddon,
|
|
80
248
|
FACE_DETECTION_MODELS,
|
|
81
249
|
FACE_RECOGNITION_MODELS,
|
|
82
250
|
FaceDetectionAddon,
|
|
83
251
|
FaceRecognitionAddon,
|
|
84
|
-
|
|
252
|
+
GENERAL_OCR_MODELS,
|
|
253
|
+
MLPACKAGE_FILES,
|
|
85
254
|
NodeInferenceEngine,
|
|
86
255
|
OBJECT_DETECTION_MODELS,
|
|
87
256
|
ObjectDetectionAddon,
|
|
@@ -91,6 +260,8 @@ export {
|
|
|
91
260
|
PlateRecognitionAddon,
|
|
92
261
|
PythonInferenceEngine,
|
|
93
262
|
SEGMENTATION_MODELS,
|
|
263
|
+
SEGMENTATION_REFINER_MODELS,
|
|
264
|
+
VEHICLE_TYPE_MODELS,
|
|
94
265
|
cosineSimilarity,
|
|
95
266
|
cropRegion,
|
|
96
267
|
ctcDecode,
|
package/dist/index.mjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/catalogs/general-ocr-models.ts"],"sourcesContent":["import type { ModelCatalogEntry, LabelDefinition } from '@camstack/types'\nimport { hfModelUrl } from '@camstack/types'\n\nconst HF_REPO = 'camstack/camstack-models'\n\nconst OCR_TEXT_LABELS: readonly LabelDefinition[] = [\n { id: 'text', name: 'Scene Text' },\n] as const\n\n/**\n * General-purpose OCR models for scene text recognition in camera feeds.\n * These complement the plate-specific PaddleOCR models for broader text detection\n * (signs, labels, addresses, etc.).\n */\nexport const GENERAL_OCR_MODELS: readonly ModelCatalogEntry[] = [\n // ── OnnxTR / docTR — lightweight general scene text recognition ──\n {\n id: 'doctr-det-db-mobilenet',\n name: 'docTR Detection MobileNet',\n description: 'docTR DBNet MobileNet V3 — lightweight text region detection for scene text',\n inputSize: { width: 1024, height: 1024 },\n labels: OCR_TEXT_LABELS,\n formats: {\n onnx: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/onnx/camstack-doctr-det-db-mobilenet-v3.onnx'),\n sizeMB: 15,\n },\n coreml: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/coreml/camstack-doctr-det-db-mobilenet-v3.mlpackage'),\n sizeMB: 8,\n isDirectory: true,\n files: [\n 'Manifest.json',\n 'Data/com.apple.CoreML/model.mlmodel',\n 'Data/com.apple.CoreML/weights/weight.bin',\n ],\n runtimes: ['python'],\n },\n openvino: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/openvino/camstack-doctr-det-db-mobilenet-v3.xml'),\n sizeMB: 8,\n runtimes: ['python'],\n },\n },\n },\n {\n id: 'doctr-rec-crnn-mobilenet',\n name: 'docTR Recognition CRNN MobileNet',\n description: 'docTR CRNN MobileNet V3 — lightweight text recognition for detected regions',\n inputSize: { width: 128, height: 32 },\n labels: OCR_TEXT_LABELS,\n formats: {\n onnx: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/onnx/camstack-doctr-rec-crnn-mobilenet-v3.onnx'),\n sizeMB: 5,\n },\n coreml: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/coreml/camstack-doctr-rec-crnn-mobilenet-v3.mlpackage'),\n sizeMB: 3,\n isDirectory: true,\n files: [\n 'Manifest.json',\n 'Data/com.apple.CoreML/model.mlmodel',\n 'Data/com.apple.CoreML/weights/weight.bin',\n ],\n runtimes: ['python'],\n },\n openvino: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/openvino/camstack-doctr-rec-crnn-mobilenet-v3.xml'),\n sizeMB: 3,\n runtimes: ['python'],\n },\n },\n },\n {\n id: 'doctr-rec-parseq',\n name: 'docTR Recognition PARSeq',\n description: 'docTR PARSeq — high-accuracy scene text recognition (top ICDAR scores)',\n inputSize: { width: 128, height: 32 },\n labels: OCR_TEXT_LABELS,\n formats: {\n onnx: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/onnx/camstack-doctr-rec-parseq.onnx'),\n sizeMB: 25,\n },\n coreml: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/coreml/camstack-doctr-rec-parseq.mlpackage'),\n sizeMB: 13,\n isDirectory: true,\n files: [\n 'Manifest.json',\n 'Data/com.apple.CoreML/model.mlmodel',\n 'Data/com.apple.CoreML/weights/weight.bin',\n ],\n runtimes: ['python'],\n },\n openvino: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/openvino/camstack-doctr-rec-parseq.xml'),\n sizeMB: 13,\n runtimes: ['python'],\n },\n },\n },\n\n // ── PaddleOCR PP-OCRv5 Mobile — general-purpose text detection + recognition ──\n {\n id: 'ppocr-v5-det-mobile',\n name: 'PP-OCRv5 Detection Mobile',\n description: 'PP-OCRv5 mobile text detection — optimized for edge, 100+ languages',\n inputSize: { width: 640, height: 640 },\n labels: OCR_TEXT_LABELS,\n formats: {\n onnx: {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/onnx/camstack-ppocr-v5-det-mobile.onnx'),\n sizeMB: 6,\n },\n coreml: {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/coreml/camstack-ppocr-v5-det-mobile.mlpackage'),\n sizeMB: 3,\n isDirectory: true,\n files: [\n 'Manifest.json',\n 'Data/com.apple.CoreML/model.mlmodel',\n 'Data/com.apple.CoreML/weights/weight.bin',\n ],\n runtimes: ['python'],\n },\n openvino: {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/openvino/camstack-ppocr-v5-det-mobile.xml'),\n sizeMB: 3,\n runtimes: ['python'],\n },\n },\n },\n {\n id: 'ppocr-v5-rec-mobile',\n name: 'PP-OCRv5 Recognition Mobile',\n description: 'PP-OCRv5 mobile text recognition — 100+ languages, CTC decoding',\n inputSize: { width: 320, height: 48 },\n labels: OCR_TEXT_LABELS,\n formats: {\n onnx: {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/onnx/camstack-ppocr-v5-rec-mobile.onnx'),\n sizeMB: 8,\n },\n coreml: {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/coreml/camstack-ppocr-v5-rec-mobile.mlpackage'),\n sizeMB: 4,\n isDirectory: true,\n files: [\n 'Manifest.json',\n 'Data/com.apple.CoreML/model.mlmodel',\n 'Data/com.apple.CoreML/weights/weight.bin',\n ],\n runtimes: ['python'],\n },\n openvino: {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/openvino/camstack-ppocr-v5-rec-mobile.xml'),\n sizeMB: 4,\n runtimes: ['python'],\n },\n },\n extraFiles: [\n {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/camstack-ppocr-v5-keys.txt'),\n filename: 'camstack-ppocr-v5-keys.txt',\n sizeMB: 0.1,\n },\n ],\n },\n] as const\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AACA,SAAS,kBAAkB;AAE3B,IAAM,UAAU;AAEhB,IAAM,kBAA8C;AAAA,EAClD,EAAE,IAAI,QAAQ,MAAM,aAAa;AACnC;AAOO,IAAM,qBAAmD;AAAA;AAAA,EAE9D;AAAA,IACE,IAAI;AAAA,IACJ,MAAM;AAAA,IACN,aAAa;AAAA,IACb,WAAW,EAAE,OAAO,MAAM,QAAQ,KAAK;AAAA,IACvC,QAAQ;AAAA,IACR,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,KAAK,WAAW,SAAS,+DAA+D;AAAA,QACxF,QAAQ;AAAA,MACV;AAAA,MACA,QAAQ;AAAA,QACN,KAAK,WAAW,SAAS,sEAAsE;AAAA,QAC/F,QAAQ;AAAA,QACR,aAAa;AAAA,QACb,OAAO;AAAA,UACL;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,QACA,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,MACA,UAAU;AAAA,QACR,KAAK,WAAW,SAAS,kEAAkE;AAAA,QAC3F,QAAQ;AAAA,QACR,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,IACF;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,MAAM;AAAA,IACN,aAAa;AAAA,IACb,WAAW,EAAE,OAAO,KAAK,QAAQ,GAAG;AAAA,IACpC,QAAQ;AAAA,IACR,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,KAAK,WAAW,SAAS,iEAAiE;AAAA,QAC1F,QAAQ;AAAA,MACV;AAAA,MACA,QAAQ;AAAA,QACN,KAAK,WAAW,SAAS,wEAAwE;AAAA,QACjG,QAAQ;AAAA,QACR,aAAa;AAAA,QACb,OAAO;AAAA,UACL;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,QACA,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,MACA,UAAU;AAAA,QACR,KAAK,WAAW,SAAS,oEAAoE;AAAA,QAC7F,QAAQ;AAAA,QACR,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,IACF;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,MAAM;AAAA,IACN,aAAa;AAAA,IACb,WAAW,EAAE,OAAO,KAAK,QAAQ,GAAG;AAAA,IACpC,QAAQ;AAAA,IACR,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,KAAK,WAAW,SAAS,sDAAsD;AAAA,QAC/E,QAAQ;AAAA,MACV;AAAA,MACA,QAAQ;AAAA,QACN,KAAK,WAAW,SAAS,6DAA6D;AAAA,QACtF,QAAQ;AAAA,QACR,aAAa;AAAA,QACb,OAAO;AAAA,UACL;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,QACA,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,MACA,UAAU;AAAA,QACR,KAAK,WAAW,SAAS,yDAAyD;AAAA,QAClF,QAAQ;AAAA,QACR,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA;AAAA,IACE,IAAI;AAAA,IACJ,MAAM;AAAA,IACN,aAAa;AAAA,IACb,WAAW,EAAE,OAAO,KAAK,QAAQ,IAAI;AAAA,IACrC,QAAQ;AAAA,IACR,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,KAAK,WAAW,SAAS,4DAA4D;AAAA,QACrF,QAAQ;AAAA,MACV;AAAA,MACA,QAAQ;AAAA,QACN,KAAK,WAAW,SAAS,mEAAmE;AAAA,QAC5F,QAAQ;AAAA,QACR,aAAa;AAAA,QACb,OAAO;AAAA,UACL;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,QACA,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,MACA,UAAU;AAAA,QACR,KAAK,WAAW,SAAS,+DAA+D;AAAA,QACxF,QAAQ;AAAA,QACR,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,IACF;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,MAAM;AAAA,IACN,aAAa;AAAA,IACb,WAAW,EAAE,OAAO,KAAK,QAAQ,GAAG;AAAA,IACpC,QAAQ;AAAA,IACR,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,KAAK,WAAW,SAAS,4DAA4D;AAAA,QACrF,QAAQ;AAAA,MACV;AAAA,MACA,QAAQ;AAAA,QACN,KAAK,WAAW,SAAS,mEAAmE;AAAA,QAC5F,QAAQ;AAAA,QACR,aAAa;AAAA,QACb,OAAO;AAAA,UACL;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,QACA,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,MACA,UAAU;AAAA,QACR,KAAK,WAAW,SAAS,+DAA+D;AAAA,QACxF,QAAQ;AAAA,QACR,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,IACF;AAAA,IACA,YAAY;AAAA,MACV;AAAA,QACE,KAAK,WAAW,SAAS,gDAAgD;AAAA,QACzE,UAAU;AAAA,QACV,QAAQ;AAAA,MACV;AAAA,IACF;AAAA,EACF;AACF;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,8 +1,17 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@camstack/addon-vision",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "Builtin detection addons for CamStack — object detection, face, plate, audio, motion",
|
|
5
|
-
"keywords": [
|
|
5
|
+
"keywords": [
|
|
6
|
+
"camstack",
|
|
7
|
+
"addon",
|
|
8
|
+
"camstack-addon",
|
|
9
|
+
"vision",
|
|
10
|
+
"object-detection",
|
|
11
|
+
"face-detection",
|
|
12
|
+
"motion-detection",
|
|
13
|
+
"camera"
|
|
14
|
+
],
|
|
6
15
|
"license": "MIT",
|
|
7
16
|
"repository": {
|
|
8
17
|
"type": "git",
|
|
@@ -12,25 +21,87 @@
|
|
|
12
21
|
"module": "./dist/index.mjs",
|
|
13
22
|
"types": "./dist/index.d.ts",
|
|
14
23
|
"exports": {
|
|
15
|
-
".": {
|
|
16
|
-
|
|
24
|
+
".": {
|
|
25
|
+
"import": "./dist/index.mjs",
|
|
26
|
+
"require": "./dist/index.js",
|
|
27
|
+
"types": "./dist/index.d.ts"
|
|
28
|
+
},
|
|
29
|
+
"./addons/*": {
|
|
30
|
+
"import": "./dist/addons/*/index.mjs",
|
|
31
|
+
"require": "./dist/addons/*/index.js",
|
|
32
|
+
"types": "./dist/addons/*/index.d.ts"
|
|
33
|
+
},
|
|
34
|
+
"./package.json": "./package.json"
|
|
17
35
|
},
|
|
18
36
|
"camstack": {
|
|
37
|
+
"displayName": "CamStack Vision",
|
|
19
38
|
"addons": [
|
|
20
|
-
{
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
{
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
{
|
|
39
|
+
{
|
|
40
|
+
"id": "motion-detection",
|
|
41
|
+
"entry": "./dist/addons/motion-detection/index.js",
|
|
42
|
+
"slot": "detector"
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"id": "object-detection",
|
|
46
|
+
"entry": "./dist/addons/object-detection/index.js",
|
|
47
|
+
"slot": "detector"
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"id": "face-detection",
|
|
51
|
+
"entry": "./dist/addons/face-detection/index.js",
|
|
52
|
+
"slot": "cropper"
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"id": "face-recognition",
|
|
56
|
+
"entry": "./dist/addons/face-recognition/index.js",
|
|
57
|
+
"slot": "classifier"
|
|
58
|
+
},
|
|
59
|
+
{
|
|
60
|
+
"id": "plate-detection",
|
|
61
|
+
"entry": "./dist/addons/plate-detection/index.js",
|
|
62
|
+
"slot": "cropper"
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
"id": "plate-recognition",
|
|
66
|
+
"entry": "./dist/addons/plate-recognition/index.js",
|
|
67
|
+
"slot": "classifier"
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"id": "audio-classification",
|
|
71
|
+
"entry": "./dist/addons/audio-classification/index.js",
|
|
72
|
+
"slot": "classifier"
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"id": "bird-global-classifier",
|
|
76
|
+
"entry": "./dist/addons/bird-global-classifier/index.js",
|
|
77
|
+
"slot": "classifier"
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
"id": "bird-nabirds-classifier",
|
|
81
|
+
"entry": "./dist/addons/bird-nabirds-classifier/index.js",
|
|
82
|
+
"slot": "classifier"
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
"id": "animal-classifier",
|
|
86
|
+
"entry": "./dist/addons/animal-classifier/index.js",
|
|
87
|
+
"slot": "classifier"
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
"id": "vehicle-classifier",
|
|
91
|
+
"entry": "./dist/addons/vehicle-classifier/index.js",
|
|
92
|
+
"slot": "classifier"
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
"id": "segmentation-refiner",
|
|
96
|
+
"entry": "./dist/addons/segmentation-refiner/index.js",
|
|
97
|
+
"slot": "refiner"
|
|
98
|
+
}
|
|
31
99
|
]
|
|
32
100
|
},
|
|
33
|
-
"files": [
|
|
101
|
+
"files": [
|
|
102
|
+
"dist",
|
|
103
|
+
"python"
|
|
104
|
+
],
|
|
34
105
|
"scripts": {
|
|
35
106
|
"build": "tsup",
|
|
36
107
|
"dev": "tsup --watch",
|
|
@@ -38,8 +109,13 @@
|
|
|
38
109
|
"test": "vitest run",
|
|
39
110
|
"test:watch": "vitest"
|
|
40
111
|
},
|
|
41
|
-
"peerDependencies": {
|
|
42
|
-
|
|
112
|
+
"peerDependencies": {
|
|
113
|
+
"@camstack/types": "^0.1.0"
|
|
114
|
+
},
|
|
115
|
+
"dependencies": {
|
|
116
|
+
"onnxruntime-node": "^1.24.3",
|
|
117
|
+
"sharp": "^0.34.0"
|
|
118
|
+
},
|
|
43
119
|
"devDependencies": {
|
|
44
120
|
"@camstack/types": "*",
|
|
45
121
|
"tsup": "^8.0.0",
|
|
@@ -110,15 +110,23 @@ def compute_iou(x1a: float, y1a: float, x2a: float, y2a: float,
|
|
|
110
110
|
def parse_yolo_output(output: np.ndarray, conf_threshold: float,
|
|
111
111
|
img_w: int, img_h: int, input_size: int,
|
|
112
112
|
scale: float, pad: tuple[int, int]) -> list[dict]:
|
|
113
|
-
"""Parse YOLO output tensor [1,
|
|
114
|
-
|
|
113
|
+
"""Parse YOLO output tensor [1, C, 8400] into normalised detections.
|
|
114
|
+
|
|
115
|
+
Supports both detection (C=84) and segmentation (C=116) outputs.
|
|
116
|
+
For seg models the extra 32 mask coefficients are ignored (detections only).
|
|
117
|
+
"""
|
|
118
|
+
# Squeeze batch dim
|
|
115
119
|
if output.ndim == 3 and output.shape[0] == 1:
|
|
116
120
|
output = output[0]
|
|
117
|
-
|
|
118
|
-
|
|
121
|
+
|
|
122
|
+
# Transpose if channels-first: [C, N] -> [N, C]
|
|
123
|
+
num_channels = output.shape[0]
|
|
124
|
+
if num_channels in (84, 116, 144): # det=84, seg=116, etc.
|
|
125
|
+
output = output.T # [8400, C]
|
|
119
126
|
|
|
120
127
|
cx, cy, w, h = output[:, 0], output[:, 1], output[:, 2], output[:, 3]
|
|
121
|
-
|
|
128
|
+
# Take only the 80 class scores (skip mask coefficients if seg model)
|
|
129
|
+
class_scores = output[:, 4:84] # [num_boxes, 80] — works for both det and seg
|
|
122
130
|
|
|
123
131
|
class_ids = np.argmax(class_scores, axis=1)
|
|
124
132
|
scores = class_scores[np.arange(len(class_ids)), class_ids]
|
|
@@ -179,8 +187,12 @@ def parse_yolo_output(output: np.ndarray, conf_threshold: float,
|
|
|
179
187
|
# ---------------------------------------------------------------------------
|
|
180
188
|
|
|
181
189
|
def _parse_nms_output(coords: np.ndarray, confs: np.ndarray,
|
|
182
|
-
conf_threshold: float, img_w: int, img_h: int
|
|
183
|
-
|
|
190
|
+
conf_threshold: float, img_w: int, img_h: int,
|
|
191
|
+
input_size: int, scale: float, pad: tuple[int, int]) -> list[dict]:
|
|
192
|
+
"""Parse CoreML NMS output: (N, 4) coords [cx, cy, w, h] normalized 0-1 relative to model input.
|
|
193
|
+
|
|
194
|
+
Coordinates must be un-letterboxed to get correct positions in the original image.
|
|
195
|
+
"""
|
|
184
196
|
if coords.shape[0] == 0:
|
|
185
197
|
return []
|
|
186
198
|
|
|
@@ -195,12 +207,31 @@ def _parse_nms_output(coords: np.ndarray, confs: np.ndarray,
|
|
|
195
207
|
if len(scores) == 0:
|
|
196
208
|
return []
|
|
197
209
|
|
|
198
|
-
# CoreML coords are [cx, cy, w, h]
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
210
|
+
# CoreML NMS coords are [cx, cy, w, h] normalized 0-1 relative to model input (640x640)
|
|
211
|
+
# Convert to pixel coords in the letterboxed frame first
|
|
212
|
+
cx = coords[:, 0] * input_size
|
|
213
|
+
cy = coords[:, 1] * input_size
|
|
214
|
+
bw = coords[:, 2] * input_size
|
|
215
|
+
bh = coords[:, 3] * input_size
|
|
216
|
+
|
|
217
|
+
# Corner format
|
|
218
|
+
x1 = cx - bw / 2
|
|
219
|
+
y1 = cy - bh / 2
|
|
220
|
+
x2 = cx + bw / 2
|
|
221
|
+
y2 = cy + bh / 2
|
|
222
|
+
|
|
223
|
+
# Undo letterbox padding and scale → original image pixel coords
|
|
224
|
+
pad_x, pad_y = pad
|
|
225
|
+
x1 = (x1 - pad_x) / scale
|
|
226
|
+
y1 = (y1 - pad_y) / scale
|
|
227
|
+
x2 = (x2 - pad_x) / scale
|
|
228
|
+
y2 = (y2 - pad_y) / scale
|
|
229
|
+
|
|
230
|
+
# Normalize to 0-1 relative to original image
|
|
231
|
+
x1 = np.clip(x1 / img_w, 0, 1)
|
|
232
|
+
y1 = np.clip(y1 / img_h, 0, 1)
|
|
233
|
+
x2 = np.clip(x2 / img_w, 0, 1)
|
|
234
|
+
y2 = np.clip(y2 / img_h, 0, 1)
|
|
204
235
|
|
|
205
236
|
detections = []
|
|
206
237
|
for i in range(len(scores)):
|
|
@@ -293,13 +324,25 @@ def main() -> None:
|
|
|
293
324
|
|
|
294
325
|
if has_builtin_nms:
|
|
295
326
|
# Model has built-in NMS: outputs are (N, 4) boxes + (N, 80) scores
|
|
296
|
-
coords = np.array(predictions['coordinates']) # (N, 4) — cx, cy, w, h normalized
|
|
327
|
+
coords = np.array(predictions['coordinates']) # (N, 4) — cx, cy, w, h normalized to model input
|
|
297
328
|
confs = np.array(predictions['confidence']) # (N, 80)
|
|
298
|
-
detections = _parse_nms_output(coords, confs, args.confidence, orig_w, orig_h
|
|
329
|
+
detections = _parse_nms_output(coords, confs, args.confidence, orig_w, orig_h,
|
|
330
|
+
args.input_size, scale, pad)
|
|
299
331
|
else:
|
|
300
|
-
# Raw YOLO output: [1,
|
|
301
|
-
|
|
302
|
-
|
|
332
|
+
# Raw YOLO output: [1, C, N] where C=84 (det) or C=116 (seg)
|
|
333
|
+
# For seg models with multiple outputs, pick the detection tensor
|
|
334
|
+
# (the one with shape [1, 84|116, 8400], not the mask protos [1, 32, 160, 160])
|
|
335
|
+
output_keys = list(predictions.keys())
|
|
336
|
+
output = None
|
|
337
|
+
for key in output_keys:
|
|
338
|
+
arr = np.array(predictions[key])
|
|
339
|
+
# Detection tensor has a dimension of 84 or 116 (not 32, not 160)
|
|
340
|
+
if arr.ndim >= 2 and any(d in (84, 116, 144) for d in arr.shape):
|
|
341
|
+
output = arr
|
|
342
|
+
break
|
|
343
|
+
if output is None:
|
|
344
|
+
output = np.array(predictions[output_keys[0]])
|
|
345
|
+
|
|
303
346
|
detections = parse_yolo_output(
|
|
304
347
|
output, args.confidence, orig_w, orig_h,
|
|
305
348
|
args.input_size, scale, pad,
|
|
@@ -110,14 +110,22 @@ def compute_iou(x1a: float, y1a: float, x2a: float, y2a: float,
|
|
|
110
110
|
def parse_yolo_output(output: np.ndarray, conf_threshold: float,
|
|
111
111
|
img_w: int, img_h: int, input_size: int,
|
|
112
112
|
scale: float, pad: tuple[int, int]) -> list[dict]:
|
|
113
|
-
"""Parse YOLO output tensor [1,
|
|
113
|
+
"""Parse YOLO output tensor [1, C, 8400] into normalised detections.
|
|
114
|
+
|
|
115
|
+
Supports both detection (C=84) and segmentation (C=116) outputs.
|
|
116
|
+
For seg models the extra 32 mask coefficients are ignored (detections only).
|
|
117
|
+
"""
|
|
114
118
|
if output.ndim == 3 and output.shape[0] == 1:
|
|
115
119
|
output = output[0]
|
|
116
|
-
|
|
117
|
-
|
|
120
|
+
|
|
121
|
+
# Transpose if channels-first: [C, N] -> [N, C]
|
|
122
|
+
num_channels = output.shape[0]
|
|
123
|
+
if num_channels in (84, 116, 144): # det=84, seg=116, etc.
|
|
124
|
+
output = output.T # [8400, C]
|
|
118
125
|
|
|
119
126
|
cx, cy, w, h = output[:, 0], output[:, 1], output[:, 2], output[:, 3]
|
|
120
|
-
|
|
127
|
+
# Take only the 80 class scores (skip mask coefficients if seg model)
|
|
128
|
+
class_scores = output[:, 4:84] # [num_boxes, 80] — works for both det and seg
|
|
121
129
|
|
|
122
130
|
class_ids = np.argmax(class_scores, axis=1)
|
|
123
131
|
scores = class_scores[np.arange(len(class_ids)), class_ids]
|
|
@@ -114,14 +114,22 @@ def compute_iou(x1a: float, y1a: float, x2a: float, y2a: float,
|
|
|
114
114
|
def parse_yolo_output(output: np.ndarray, conf_threshold: float,
|
|
115
115
|
img_w: int, img_h: int, input_size: int,
|
|
116
116
|
scale: float, pad: tuple[int, int]) -> list[dict]:
|
|
117
|
-
"""Parse YOLO output tensor [1,
|
|
117
|
+
"""Parse YOLO output tensor [1, C, 8400] into normalised detections.
|
|
118
|
+
|
|
119
|
+
Supports both detection (C=84) and segmentation (C=116) outputs.
|
|
120
|
+
For seg models the extra 32 mask coefficients are ignored (detections only).
|
|
121
|
+
"""
|
|
118
122
|
if output.ndim == 3 and output.shape[0] == 1:
|
|
119
123
|
output = output[0]
|
|
120
|
-
|
|
121
|
-
|
|
124
|
+
|
|
125
|
+
# Transpose if channels-first: [C, N] -> [N, C]
|
|
126
|
+
num_channels = output.shape[0]
|
|
127
|
+
if num_channels in (84, 116, 144): # det=84, seg=116, etc.
|
|
128
|
+
output = output.T # [8400, C]
|
|
122
129
|
|
|
123
130
|
cx, cy, w, h = output[:, 0], output[:, 1], output[:, 2], output[:, 3]
|
|
124
|
-
|
|
131
|
+
# Take only the 80 class scores (skip mask coefficients if seg model)
|
|
132
|
+
class_scores = output[:, 4:84] # [num_boxes, 80] — works for both det and seg
|
|
125
133
|
|
|
126
134
|
class_ids = np.argmax(class_scores, axis=1)
|
|
127
135
|
scores = class_scores[np.arange(len(class_ids)), class_ids]
|
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
import { IDetectorProvider, IDetectionAddon, AddonManifest, AddonContext, FrameInput, DetectorOutput, ConfigUISchema, ClassMapDefinition, ModelCatalogEntry, DetectionModel, LabelDefinition, ProbeResult } from '@camstack/types';
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* CameraNativeDetectionAddon
|
|
5
|
-
*
|
|
6
|
-
* A stub detector that wraps native camera event subscriptions (e.g. Frigate / Scrypted
|
|
7
|
-
* webhooks). The detect() method always returns empty — real detections are delivered
|
|
8
|
-
* asynchronously via external event subscriptions which should populate the pipeline
|
|
9
|
-
* from outside this addon's detect() call.
|
|
10
|
-
*
|
|
11
|
-
* This addon exists so the pipeline can declare a 'detector' slot backed by camera events
|
|
12
|
-
* without requiring any inference model.
|
|
13
|
-
*/
|
|
14
|
-
declare class CameraNativeDetectionAddon implements IDetectorProvider, IDetectionAddon {
|
|
15
|
-
readonly id = "camera-native-detection";
|
|
16
|
-
readonly slot: "detector";
|
|
17
|
-
readonly inputClasses: readonly string[] | null;
|
|
18
|
-
readonly outputClasses: readonly ["person", "vehicle", "motion", "face"];
|
|
19
|
-
readonly slotPriority = 5;
|
|
20
|
-
readonly manifest: AddonManifest;
|
|
21
|
-
initialize(_ctx: AddonContext): Promise<void>;
|
|
22
|
-
detect(_frame: FrameInput): Promise<DetectorOutput>;
|
|
23
|
-
shutdown(): Promise<void>;
|
|
24
|
-
getConfigSchema(): ConfigUISchema;
|
|
25
|
-
getClassMap(): ClassMapDefinition;
|
|
26
|
-
getModelCatalog(): ModelCatalogEntry[];
|
|
27
|
-
getAvailableModels(): DetectionModel[];
|
|
28
|
-
getActiveLabels(): readonly LabelDefinition[];
|
|
29
|
-
probe(): Promise<ProbeResult>;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
export { CameraNativeDetectionAddon as default };
|