@camstack/addon-vision 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/dist/addons/animal-classifier/index.d.mts +6 -1
  2. package/dist/addons/animal-classifier/index.d.ts +6 -1
  3. package/dist/addons/animal-classifier/index.js +513 -49
  4. package/dist/addons/animal-classifier/index.js.map +1 -1
  5. package/dist/addons/animal-classifier/index.mjs +6 -4
  6. package/dist/addons/audio-classification/index.d.mts +6 -1
  7. package/dist/addons/audio-classification/index.d.ts +6 -1
  8. package/dist/addons/audio-classification/index.js +86 -26
  9. package/dist/addons/audio-classification/index.js.map +1 -1
  10. package/dist/addons/audio-classification/index.mjs +3 -2
  11. package/dist/addons/bird-global-classifier/index.d.mts +6 -1
  12. package/dist/addons/bird-global-classifier/index.d.ts +6 -1
  13. package/dist/addons/bird-global-classifier/index.js +514 -50
  14. package/dist/addons/bird-global-classifier/index.js.map +1 -1
  15. package/dist/addons/bird-global-classifier/index.mjs +6 -4
  16. package/dist/addons/bird-nabirds-classifier/index.d.mts +6 -1
  17. package/dist/addons/bird-nabirds-classifier/index.d.ts +6 -1
  18. package/dist/addons/bird-nabirds-classifier/index.js +523 -60
  19. package/dist/addons/bird-nabirds-classifier/index.js.map +1 -1
  20. package/dist/addons/bird-nabirds-classifier/index.mjs +6 -4
  21. package/dist/addons/face-detection/index.d.mts +6 -1
  22. package/dist/addons/face-detection/index.d.ts +6 -1
  23. package/dist/addons/face-detection/index.js +538 -39
  24. package/dist/addons/face-detection/index.js.map +1 -1
  25. package/dist/addons/face-detection/index.mjs +5 -3
  26. package/dist/addons/face-recognition/index.d.mts +6 -1
  27. package/dist/addons/face-recognition/index.d.ts +6 -1
  28. package/dist/addons/face-recognition/index.js +487 -33
  29. package/dist/addons/face-recognition/index.js.map +1 -1
  30. package/dist/addons/face-recognition/index.mjs +5 -3
  31. package/dist/addons/motion-detection/index.d.mts +3 -1
  32. package/dist/addons/motion-detection/index.d.ts +3 -1
  33. package/dist/addons/motion-detection/index.js +11 -3
  34. package/dist/addons/motion-detection/index.js.map +1 -1
  35. package/dist/addons/motion-detection/index.mjs +140 -3
  36. package/dist/addons/motion-detection/index.mjs.map +1 -1
  37. package/dist/addons/object-detection/index.d.mts +6 -1
  38. package/dist/addons/object-detection/index.d.ts +6 -1
  39. package/dist/addons/object-detection/index.js +369 -72
  40. package/dist/addons/object-detection/index.js.map +1 -1
  41. package/dist/addons/object-detection/index.mjs +5 -3
  42. package/dist/addons/plate-detection/index.d.mts +6 -1
  43. package/dist/addons/plate-detection/index.d.ts +6 -1
  44. package/dist/addons/plate-detection/index.js +531 -31
  45. package/dist/addons/plate-detection/index.js.map +1 -1
  46. package/dist/addons/plate-detection/index.mjs +5 -3
  47. package/dist/addons/plate-recognition/index.d.mts +7 -1
  48. package/dist/addons/plate-recognition/index.d.ts +7 -1
  49. package/dist/addons/plate-recognition/index.js +176 -44
  50. package/dist/addons/plate-recognition/index.js.map +1 -1
  51. package/dist/addons/plate-recognition/index.mjs +4 -3
  52. package/dist/addons/segmentation-refiner/index.d.mts +30 -0
  53. package/dist/addons/segmentation-refiner/index.d.ts +30 -0
  54. package/dist/addons/segmentation-refiner/index.js +1048 -0
  55. package/dist/addons/segmentation-refiner/index.js.map +1 -0
  56. package/dist/addons/segmentation-refiner/index.mjs +209 -0
  57. package/dist/addons/segmentation-refiner/index.mjs.map +1 -0
  58. package/dist/addons/vehicle-classifier/index.d.mts +31 -0
  59. package/dist/addons/vehicle-classifier/index.d.ts +31 -0
  60. package/dist/addons/vehicle-classifier/index.js +688 -0
  61. package/dist/addons/vehicle-classifier/index.js.map +1 -0
  62. package/dist/addons/vehicle-classifier/index.mjs +250 -0
  63. package/dist/addons/vehicle-classifier/index.mjs.map +1 -0
  64. package/dist/{chunk-6OR5TE7A.mjs → chunk-22BHCDT5.mjs} +2 -2
  65. package/dist/chunk-22BHCDT5.mjs.map +1 -0
  66. package/dist/{chunk-LPI42WL6.mjs → chunk-2IOKI4ES.mjs} +23 -12
  67. package/dist/chunk-2IOKI4ES.mjs.map +1 -0
  68. package/dist/chunk-7DYHXUPZ.mjs +36 -0
  69. package/dist/chunk-7DYHXUPZ.mjs.map +1 -0
  70. package/dist/chunk-BJTO5JO5.mjs +11 -0
  71. package/dist/chunk-BP7H4NFS.mjs +412 -0
  72. package/dist/chunk-BP7H4NFS.mjs.map +1 -0
  73. package/dist/chunk-BR2FPGOX.mjs +98 -0
  74. package/dist/chunk-BR2FPGOX.mjs.map +1 -0
  75. package/dist/{chunk-5AIQSN32.mjs → chunk-D6WEHN33.mjs} +66 -17
  76. package/dist/chunk-D6WEHN33.mjs.map +1 -0
  77. package/dist/{chunk-3MQFUDRU.mjs → chunk-DRYFGARD.mjs} +76 -47
  78. package/dist/chunk-DRYFGARD.mjs.map +1 -0
  79. package/dist/{chunk-ISOIDU4U.mjs → chunk-DUN6XU3N.mjs} +23 -5
  80. package/dist/chunk-DUN6XU3N.mjs.map +1 -0
  81. package/dist/{chunk-MEVASN3P.mjs → chunk-ESLHNWWE.mjs} +104 -22
  82. package/dist/chunk-ESLHNWWE.mjs.map +1 -0
  83. package/dist/{chunk-B3R66MPF.mjs → chunk-JUQEW6ON.mjs} +58 -21
  84. package/dist/chunk-JUQEW6ON.mjs.map +1 -0
  85. package/dist/{chunk-AYBFB7ID.mjs → chunk-R5J3WAUI.mjs} +200 -318
  86. package/dist/chunk-R5J3WAUI.mjs.map +1 -0
  87. package/dist/chunk-XZ6ZMXXU.mjs +39 -0
  88. package/dist/chunk-XZ6ZMXXU.mjs.map +1 -0
  89. package/dist/{chunk-5JJZGKL7.mjs → chunk-YPU4WTXZ.mjs} +102 -19
  90. package/dist/chunk-YPU4WTXZ.mjs.map +1 -0
  91. package/dist/{chunk-J4WRYHHY.mjs → chunk-YUCD2TFH.mjs} +66 -36
  92. package/dist/chunk-YUCD2TFH.mjs.map +1 -0
  93. package/dist/{chunk-PDSHDDPV.mjs → chunk-ZTJENCFC.mjs} +159 -35
  94. package/dist/chunk-ZTJENCFC.mjs.map +1 -0
  95. package/dist/{chunk-Q3SQOYG6.mjs → chunk-ZWYXXCXP.mjs} +67 -37
  96. package/dist/chunk-ZWYXXCXP.mjs.map +1 -0
  97. package/dist/index.d.mts +17 -5
  98. package/dist/index.d.ts +17 -5
  99. package/dist/index.js +1343 -550
  100. package/dist/index.js.map +1 -1
  101. package/dist/index.mjs +191 -20
  102. package/dist/index.mjs.map +1 -1
  103. package/package.json +94 -18
  104. package/python/coreml_inference.py +61 -18
  105. package/python/openvino_inference.py +12 -4
  106. package/python/pytorch_inference.py +12 -4
  107. package/dist/addons/camera-native-detection/index.d.mts +0 -32
  108. package/dist/addons/camera-native-detection/index.d.ts +0 -32
  109. package/dist/addons/camera-native-detection/index.js +0 -99
  110. package/dist/addons/camera-native-detection/index.js.map +0 -1
  111. package/dist/addons/camera-native-detection/index.mjs +0 -7
  112. package/dist/chunk-3MQFUDRU.mjs.map +0 -1
  113. package/dist/chunk-5AIQSN32.mjs.map +0 -1
  114. package/dist/chunk-5JJZGKL7.mjs.map +0 -1
  115. package/dist/chunk-6OR5TE7A.mjs.map +0 -1
  116. package/dist/chunk-AYBFB7ID.mjs.map +0 -1
  117. package/dist/chunk-B3R66MPF.mjs.map +0 -1
  118. package/dist/chunk-DTOAB2CE.mjs +0 -79
  119. package/dist/chunk-DTOAB2CE.mjs.map +0 -1
  120. package/dist/chunk-ISOIDU4U.mjs.map +0 -1
  121. package/dist/chunk-J4WRYHHY.mjs.map +0 -1
  122. package/dist/chunk-LPI42WL6.mjs.map +0 -1
  123. package/dist/chunk-MEVASN3P.mjs.map +0 -1
  124. package/dist/chunk-PDSHDDPV.mjs.map +0 -1
  125. package/dist/chunk-Q3SQOYG6.mjs.map +0 -1
  126. package/dist/chunk-QIMDG34B.mjs +0 -229
  127. package/dist/chunk-QIMDG34B.mjs.map +0 -1
  128. package/python/__pycache__/coreml_inference.cpython-313.pyc +0 -0
  129. package/python/__pycache__/openvino_inference.cpython-313.pyc +0 -0
  130. package/python/__pycache__/pytorch_inference.cpython-313.pyc +0 -0
  131. /package/dist/{addons/camera-native-detection/index.mjs.map → chunk-BJTO5JO5.mjs.map} +0 -0
package/dist/index.mjs CHANGED
@@ -1,72 +1,241 @@
1
- import {
2
- CameraNativeDetectionAddon
3
- } from "./chunk-DTOAB2CE.mjs";
4
1
  import {
5
2
  BirdGlobalClassifierAddon
6
- } from "./chunk-Q3SQOYG6.mjs";
3
+ } from "./chunk-ZWYXXCXP.mjs";
7
4
  import {
8
5
  BirdNABirdsClassifierAddon
9
- } from "./chunk-3MQFUDRU.mjs";
6
+ } from "./chunk-DRYFGARD.mjs";
10
7
  import {
11
8
  AnimalClassifierAddon
12
- } from "./chunk-J4WRYHHY.mjs";
9
+ } from "./chunk-YUCD2TFH.mjs";
13
10
  import {
14
11
  ANIMAL_TYPE_MODELS,
15
12
  BIRD_NABIRDS_MODELS,
16
13
  BIRD_SPECIES_MODELS
17
- } from "./chunk-ISOIDU4U.mjs";
14
+ } from "./chunk-DUN6XU3N.mjs";
15
+ import {
16
+ VEHICLE_TYPE_MODELS
17
+ } from "./chunk-XZ6ZMXXU.mjs";
18
+ import {
19
+ SEGMENTATION_REFINER_MODELS
20
+ } from "./chunk-7DYHXUPZ.mjs";
18
21
  import {
19
- MotionDetectionAddon,
20
22
  detectMotion
21
- } from "./chunk-QIMDG34B.mjs";
23
+ } from "./chunk-BR2FPGOX.mjs";
22
24
  import {
23
- OBJECT_DETECTION_MODELS,
24
25
  ObjectDetectionAddon,
25
26
  SEGMENTATION_MODELS
26
- } from "./chunk-AYBFB7ID.mjs";
27
+ } from "./chunk-R5J3WAUI.mjs";
27
28
  import {
28
29
  FACE_DETECTION_MODELS,
29
30
  FaceDetectionAddon,
30
31
  scrfdPostprocess
31
- } from "./chunk-MEVASN3P.mjs";
32
+ } from "./chunk-ESLHNWWE.mjs";
32
33
  import {
33
34
  FACE_RECOGNITION_MODELS,
34
35
  FaceRecognitionAddon,
35
36
  cosineSimilarity,
36
37
  l2Normalize
37
- } from "./chunk-B3R66MPF.mjs";
38
+ } from "./chunk-JUQEW6ON.mjs";
38
39
  import {
39
40
  PLATE_DETECTION_MODELS,
40
41
  PlateDetectionAddon
41
- } from "./chunk-5JJZGKL7.mjs";
42
+ } from "./chunk-YPU4WTXZ.mjs";
42
43
  import {
43
44
  iou,
44
45
  nms,
45
46
  yoloPostprocess
46
47
  } from "./chunk-KUO2BVFY.mjs";
48
+ import {
49
+ MLPACKAGE_FILES,
50
+ OBJECT_DETECTION_MODELS
51
+ } from "./chunk-BP7H4NFS.mjs";
47
52
  import {
48
53
  PLATE_RECOGNITION_MODELS,
49
54
  PlateRecognitionAddon,
50
55
  ctcDecode
51
- } from "./chunk-PDSHDDPV.mjs";
56
+ } from "./chunk-ZTJENCFC.mjs";
52
57
  import {
53
58
  cropRegion,
54
59
  jpegToRgb,
55
60
  letterbox,
56
61
  resizeAndNormalize,
57
62
  rgbToGrayscale
58
- } from "./chunk-6OR5TE7A.mjs";
63
+ } from "./chunk-22BHCDT5.mjs";
59
64
  import {
60
65
  AUDIO_CLASSIFICATION_MODELS,
61
66
  AudioClassificationAddon,
62
67
  yamnetPostprocess
63
- } from "./chunk-5AIQSN32.mjs";
68
+ } from "./chunk-D6WEHN33.mjs";
64
69
  import {
65
70
  NodeInferenceEngine,
66
71
  PythonInferenceEngine,
67
72
  probeOnnxBackends,
68
73
  resolveEngine
69
- } from "./chunk-LPI42WL6.mjs";
74
+ } from "./chunk-2IOKI4ES.mjs";
75
+ import "./chunk-BJTO5JO5.mjs";
76
+
77
+ // src/catalogs/general-ocr-models.ts
78
+ import { hfModelUrl } from "@camstack/types";
79
+ var HF_REPO = "camstack/camstack-models";
80
+ var OCR_TEXT_LABELS = [
81
+ { id: "text", name: "Scene Text" }
82
+ ];
83
+ var GENERAL_OCR_MODELS = [
84
+ // ── OnnxTR / docTR — lightweight general scene text recognition ──
85
+ {
86
+ id: "doctr-det-db-mobilenet",
87
+ name: "docTR Detection MobileNet",
88
+ description: "docTR DBNet MobileNet V3 \u2014 lightweight text region detection for scene text",
89
+ inputSize: { width: 1024, height: 1024 },
90
+ labels: OCR_TEXT_LABELS,
91
+ formats: {
92
+ onnx: {
93
+ url: hfModelUrl(HF_REPO, "generalOcr/doctr/onnx/camstack-doctr-det-db-mobilenet-v3.onnx"),
94
+ sizeMB: 15
95
+ },
96
+ coreml: {
97
+ url: hfModelUrl(HF_REPO, "generalOcr/doctr/coreml/camstack-doctr-det-db-mobilenet-v3.mlpackage"),
98
+ sizeMB: 8,
99
+ isDirectory: true,
100
+ files: [
101
+ "Manifest.json",
102
+ "Data/com.apple.CoreML/model.mlmodel",
103
+ "Data/com.apple.CoreML/weights/weight.bin"
104
+ ],
105
+ runtimes: ["python"]
106
+ },
107
+ openvino: {
108
+ url: hfModelUrl(HF_REPO, "generalOcr/doctr/openvino/camstack-doctr-det-db-mobilenet-v3.xml"),
109
+ sizeMB: 8,
110
+ runtimes: ["python"]
111
+ }
112
+ }
113
+ },
114
+ {
115
+ id: "doctr-rec-crnn-mobilenet",
116
+ name: "docTR Recognition CRNN MobileNet",
117
+ description: "docTR CRNN MobileNet V3 \u2014 lightweight text recognition for detected regions",
118
+ inputSize: { width: 128, height: 32 },
119
+ labels: OCR_TEXT_LABELS,
120
+ formats: {
121
+ onnx: {
122
+ url: hfModelUrl(HF_REPO, "generalOcr/doctr/onnx/camstack-doctr-rec-crnn-mobilenet-v3.onnx"),
123
+ sizeMB: 5
124
+ },
125
+ coreml: {
126
+ url: hfModelUrl(HF_REPO, "generalOcr/doctr/coreml/camstack-doctr-rec-crnn-mobilenet-v3.mlpackage"),
127
+ sizeMB: 3,
128
+ isDirectory: true,
129
+ files: [
130
+ "Manifest.json",
131
+ "Data/com.apple.CoreML/model.mlmodel",
132
+ "Data/com.apple.CoreML/weights/weight.bin"
133
+ ],
134
+ runtimes: ["python"]
135
+ },
136
+ openvino: {
137
+ url: hfModelUrl(HF_REPO, "generalOcr/doctr/openvino/camstack-doctr-rec-crnn-mobilenet-v3.xml"),
138
+ sizeMB: 3,
139
+ runtimes: ["python"]
140
+ }
141
+ }
142
+ },
143
+ {
144
+ id: "doctr-rec-parseq",
145
+ name: "docTR Recognition PARSeq",
146
+ description: "docTR PARSeq \u2014 high-accuracy scene text recognition (top ICDAR scores)",
147
+ inputSize: { width: 128, height: 32 },
148
+ labels: OCR_TEXT_LABELS,
149
+ formats: {
150
+ onnx: {
151
+ url: hfModelUrl(HF_REPO, "generalOcr/doctr/onnx/camstack-doctr-rec-parseq.onnx"),
152
+ sizeMB: 25
153
+ },
154
+ coreml: {
155
+ url: hfModelUrl(HF_REPO, "generalOcr/doctr/coreml/camstack-doctr-rec-parseq.mlpackage"),
156
+ sizeMB: 13,
157
+ isDirectory: true,
158
+ files: [
159
+ "Manifest.json",
160
+ "Data/com.apple.CoreML/model.mlmodel",
161
+ "Data/com.apple.CoreML/weights/weight.bin"
162
+ ],
163
+ runtimes: ["python"]
164
+ },
165
+ openvino: {
166
+ url: hfModelUrl(HF_REPO, "generalOcr/doctr/openvino/camstack-doctr-rec-parseq.xml"),
167
+ sizeMB: 13,
168
+ runtimes: ["python"]
169
+ }
170
+ }
171
+ },
172
+ // ── PaddleOCR PP-OCRv5 Mobile — general-purpose text detection + recognition ──
173
+ {
174
+ id: "ppocr-v5-det-mobile",
175
+ name: "PP-OCRv5 Detection Mobile",
176
+ description: "PP-OCRv5 mobile text detection \u2014 optimized for edge, 100+ languages",
177
+ inputSize: { width: 640, height: 640 },
178
+ labels: OCR_TEXT_LABELS,
179
+ formats: {
180
+ onnx: {
181
+ url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/onnx/camstack-ppocr-v5-det-mobile.onnx"),
182
+ sizeMB: 6
183
+ },
184
+ coreml: {
185
+ url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/coreml/camstack-ppocr-v5-det-mobile.mlpackage"),
186
+ sizeMB: 3,
187
+ isDirectory: true,
188
+ files: [
189
+ "Manifest.json",
190
+ "Data/com.apple.CoreML/model.mlmodel",
191
+ "Data/com.apple.CoreML/weights/weight.bin"
192
+ ],
193
+ runtimes: ["python"]
194
+ },
195
+ openvino: {
196
+ url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/openvino/camstack-ppocr-v5-det-mobile.xml"),
197
+ sizeMB: 3,
198
+ runtimes: ["python"]
199
+ }
200
+ }
201
+ },
202
+ {
203
+ id: "ppocr-v5-rec-mobile",
204
+ name: "PP-OCRv5 Recognition Mobile",
205
+ description: "PP-OCRv5 mobile text recognition \u2014 100+ languages, CTC decoding",
206
+ inputSize: { width: 320, height: 48 },
207
+ labels: OCR_TEXT_LABELS,
208
+ formats: {
209
+ onnx: {
210
+ url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/onnx/camstack-ppocr-v5-rec-mobile.onnx"),
211
+ sizeMB: 8
212
+ },
213
+ coreml: {
214
+ url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/coreml/camstack-ppocr-v5-rec-mobile.mlpackage"),
215
+ sizeMB: 4,
216
+ isDirectory: true,
217
+ files: [
218
+ "Manifest.json",
219
+ "Data/com.apple.CoreML/model.mlmodel",
220
+ "Data/com.apple.CoreML/weights/weight.bin"
221
+ ],
222
+ runtimes: ["python"]
223
+ },
224
+ openvino: {
225
+ url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/openvino/camstack-ppocr-v5-rec-mobile.xml"),
226
+ sizeMB: 4,
227
+ runtimes: ["python"]
228
+ }
229
+ },
230
+ extraFiles: [
231
+ {
232
+ url: hfModelUrl(HF_REPO, "generalOcr/ppocr-v5/camstack-ppocr-v5-keys.txt"),
233
+ filename: "camstack-ppocr-v5-keys.txt",
234
+ sizeMB: 0.1
235
+ }
236
+ ]
237
+ }
238
+ ];
70
239
  export {
71
240
  ANIMAL_TYPE_MODELS,
72
241
  AUDIO_CLASSIFICATION_MODELS,
@@ -76,12 +245,12 @@ export {
76
245
  BIRD_SPECIES_MODELS,
77
246
  BirdGlobalClassifierAddon,
78
247
  BirdNABirdsClassifierAddon,
79
- CameraNativeDetectionAddon,
80
248
  FACE_DETECTION_MODELS,
81
249
  FACE_RECOGNITION_MODELS,
82
250
  FaceDetectionAddon,
83
251
  FaceRecognitionAddon,
84
- MotionDetectionAddon,
252
+ GENERAL_OCR_MODELS,
253
+ MLPACKAGE_FILES,
85
254
  NodeInferenceEngine,
86
255
  OBJECT_DETECTION_MODELS,
87
256
  ObjectDetectionAddon,
@@ -91,6 +260,8 @@ export {
91
260
  PlateRecognitionAddon,
92
261
  PythonInferenceEngine,
93
262
  SEGMENTATION_MODELS,
263
+ SEGMENTATION_REFINER_MODELS,
264
+ VEHICLE_TYPE_MODELS,
94
265
  cosineSimilarity,
95
266
  cropRegion,
96
267
  ctcDecode,
@@ -1 +1 @@
1
- {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
1
+ {"version":3,"sources":["../src/catalogs/general-ocr-models.ts"],"sourcesContent":["import type { ModelCatalogEntry, LabelDefinition } from '@camstack/types'\nimport { hfModelUrl } from '@camstack/types'\n\nconst HF_REPO = 'camstack/camstack-models'\n\nconst OCR_TEXT_LABELS: readonly LabelDefinition[] = [\n { id: 'text', name: 'Scene Text' },\n] as const\n\n/**\n * General-purpose OCR models for scene text recognition in camera feeds.\n * These complement the plate-specific PaddleOCR models for broader text detection\n * (signs, labels, addresses, etc.).\n */\nexport const GENERAL_OCR_MODELS: readonly ModelCatalogEntry[] = [\n // ── OnnxTR / docTR — lightweight general scene text recognition ──\n {\n id: 'doctr-det-db-mobilenet',\n name: 'docTR Detection MobileNet',\n description: 'docTR DBNet MobileNet V3 — lightweight text region detection for scene text',\n inputSize: { width: 1024, height: 1024 },\n labels: OCR_TEXT_LABELS,\n formats: {\n onnx: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/onnx/camstack-doctr-det-db-mobilenet-v3.onnx'),\n sizeMB: 15,\n },\n coreml: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/coreml/camstack-doctr-det-db-mobilenet-v3.mlpackage'),\n sizeMB: 8,\n isDirectory: true,\n files: [\n 'Manifest.json',\n 'Data/com.apple.CoreML/model.mlmodel',\n 'Data/com.apple.CoreML/weights/weight.bin',\n ],\n runtimes: ['python'],\n },\n openvino: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/openvino/camstack-doctr-det-db-mobilenet-v3.xml'),\n sizeMB: 8,\n runtimes: ['python'],\n },\n },\n },\n {\n id: 'doctr-rec-crnn-mobilenet',\n name: 'docTR Recognition CRNN MobileNet',\n description: 'docTR CRNN MobileNet V3 — lightweight text recognition for detected regions',\n inputSize: { width: 128, height: 32 },\n labels: OCR_TEXT_LABELS,\n formats: {\n onnx: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/onnx/camstack-doctr-rec-crnn-mobilenet-v3.onnx'),\n sizeMB: 5,\n },\n coreml: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/coreml/camstack-doctr-rec-crnn-mobilenet-v3.mlpackage'),\n sizeMB: 3,\n isDirectory: true,\n files: [\n 'Manifest.json',\n 'Data/com.apple.CoreML/model.mlmodel',\n 'Data/com.apple.CoreML/weights/weight.bin',\n ],\n runtimes: ['python'],\n },\n openvino: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/openvino/camstack-doctr-rec-crnn-mobilenet-v3.xml'),\n sizeMB: 3,\n runtimes: ['python'],\n },\n },\n },\n {\n id: 'doctr-rec-parseq',\n name: 'docTR Recognition PARSeq',\n description: 'docTR PARSeq — high-accuracy scene text recognition (top ICDAR scores)',\n inputSize: { width: 128, height: 32 },\n labels: OCR_TEXT_LABELS,\n formats: {\n onnx: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/onnx/camstack-doctr-rec-parseq.onnx'),\n sizeMB: 25,\n },\n coreml: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/coreml/camstack-doctr-rec-parseq.mlpackage'),\n sizeMB: 13,\n isDirectory: true,\n files: [\n 'Manifest.json',\n 'Data/com.apple.CoreML/model.mlmodel',\n 'Data/com.apple.CoreML/weights/weight.bin',\n ],\n runtimes: ['python'],\n },\n openvino: {\n url: hfModelUrl(HF_REPO, 'generalOcr/doctr/openvino/camstack-doctr-rec-parseq.xml'),\n sizeMB: 13,\n runtimes: ['python'],\n },\n },\n },\n\n // ── PaddleOCR PP-OCRv5 Mobile — general-purpose text detection + recognition ──\n {\n id: 'ppocr-v5-det-mobile',\n name: 'PP-OCRv5 Detection Mobile',\n description: 'PP-OCRv5 mobile text detection — optimized for edge, 100+ languages',\n inputSize: { width: 640, height: 640 },\n labels: OCR_TEXT_LABELS,\n formats: {\n onnx: {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/onnx/camstack-ppocr-v5-det-mobile.onnx'),\n sizeMB: 6,\n },\n coreml: {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/coreml/camstack-ppocr-v5-det-mobile.mlpackage'),\n sizeMB: 3,\n isDirectory: true,\n files: [\n 'Manifest.json',\n 'Data/com.apple.CoreML/model.mlmodel',\n 'Data/com.apple.CoreML/weights/weight.bin',\n ],\n runtimes: ['python'],\n },\n openvino: {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/openvino/camstack-ppocr-v5-det-mobile.xml'),\n sizeMB: 3,\n runtimes: ['python'],\n },\n },\n },\n {\n id: 'ppocr-v5-rec-mobile',\n name: 'PP-OCRv5 Recognition Mobile',\n description: 'PP-OCRv5 mobile text recognition — 100+ languages, CTC decoding',\n inputSize: { width: 320, height: 48 },\n labels: OCR_TEXT_LABELS,\n formats: {\n onnx: {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/onnx/camstack-ppocr-v5-rec-mobile.onnx'),\n sizeMB: 8,\n },\n coreml: {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/coreml/camstack-ppocr-v5-rec-mobile.mlpackage'),\n sizeMB: 4,\n isDirectory: true,\n files: [\n 'Manifest.json',\n 'Data/com.apple.CoreML/model.mlmodel',\n 'Data/com.apple.CoreML/weights/weight.bin',\n ],\n runtimes: ['python'],\n },\n openvino: {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/openvino/camstack-ppocr-v5-rec-mobile.xml'),\n sizeMB: 4,\n runtimes: ['python'],\n },\n },\n extraFiles: [\n {\n url: hfModelUrl(HF_REPO, 'generalOcr/ppocr-v5/camstack-ppocr-v5-keys.txt'),\n filename: 'camstack-ppocr-v5-keys.txt',\n sizeMB: 0.1,\n },\n ],\n },\n] as const\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AACA,SAAS,kBAAkB;AAE3B,IAAM,UAAU;AAEhB,IAAM,kBAA8C;AAAA,EAClD,EAAE,IAAI,QAAQ,MAAM,aAAa;AACnC;AAOO,IAAM,qBAAmD;AAAA;AAAA,EAE9D;AAAA,IACE,IAAI;AAAA,IACJ,MAAM;AAAA,IACN,aAAa;AAAA,IACb,WAAW,EAAE,OAAO,MAAM,QAAQ,KAAK;AAAA,IACvC,QAAQ;AAAA,IACR,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,KAAK,WAAW,SAAS,+DAA+D;AAAA,QACxF,QAAQ;AAAA,MACV;AAAA,MACA,QAAQ;AAAA,QACN,KAAK,WAAW,SAAS,sEAAsE;AAAA,QAC/F,QAAQ;AAAA,QACR,aAAa;AAAA,QACb,OAAO;AAAA,UACL;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,QACA,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,MACA,UAAU;AAAA,QACR,KAAK,WAAW,SAAS,kEAAkE;AAAA,QAC3F,QAAQ;AAAA,QACR,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,IACF;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,MAAM;AAAA,IACN,aAAa;AAAA,IACb,WAAW,EAAE,OAAO,KAAK,QAAQ,GAAG;AAAA,IACpC,QAAQ;AAAA,IACR,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,KAAK,WAAW,SAAS,iEAAiE;AAAA,QAC1F,QAAQ;AAAA,MACV;AAAA,MACA,QAAQ;AAAA,QACN,KAAK,WAAW,SAAS,wEAAwE;AAAA,QACjG,QAAQ;AAAA,QACR,aAAa;AAAA,QACb,OAAO;AAAA,UACL;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,QACA,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,MACA,UAAU;AAAA,QACR,KAAK,WAAW,SAAS,oEAAoE;AAAA,QAC7F,QAAQ;AAAA,QACR,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,IACF;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,MAAM;AAAA,IACN,aAAa;AAAA,IACb,WAAW,EAAE,OAAO,KAAK,QAAQ,GAAG;AAAA,IACpC,QAAQ;AAAA,IACR,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,KAAK,WAAW,SAAS,sDAAsD;AAAA,QAC/E,QAAQ;AAAA,MACV;AAAA,MACA,QAAQ;AAAA,QACN,KAAK,WAAW,SAAS,6DAA6D;AAAA,QACtF,QAAQ;AAAA,QACR,aAAa;AAAA,QACb,OAAO;AAAA,UACL;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,QACA,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,MACA,UAAU;AAAA,QACR,KAAK,WAAW,SAAS,yDAAyD;AAAA,QAClF,QAAQ;AAAA,QACR,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,IACF;AAAA,EACF;AAAA;AAAA,EAGA;AAAA,IACE,IAAI;AAAA,IACJ,MAAM;AAAA,IACN,aAAa;AAAA,IACb,WAAW,EAAE,OAAO,KAAK,QAAQ,IAAI;AAAA,IACrC,QAAQ;AAAA,IACR,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,KAAK,WAAW,SAAS,4DAA4D;AAAA,QACrF,QAAQ;AAAA,MACV;AAAA,MACA,QAAQ;AAAA,QACN,KAAK,WAAW,SAAS,mEAAmE;AAAA,QAC5F,QAAQ;AAAA,QACR,aAAa;AAAA,QACb,OAAO;AAAA,UACL;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,QACA,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,MACA,UAAU;AAAA,QACR,KAAK,WAAW,SAAS,+DAA+D;AAAA,QACxF,QAAQ;AAAA,QACR,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,IACF;AAAA,EACF;AAAA,EACA;AAAA,IACE,IAAI;AAAA,IACJ,MAAM;AAAA,IACN,aAAa;AAAA,IACb,WAAW,EAAE,OAAO,KAAK,QAAQ,GAAG;AAAA,IACpC,QAAQ;AAAA,IACR,SAAS;AAAA,MACP,MAAM;AAAA,QACJ,KAAK,WAAW,SAAS,4DAA4D;AAAA,QACrF,QAAQ;AAAA,MACV;AAAA,MACA,QAAQ;AAAA,QACN,KAAK,WAAW,SAAS,mEAAmE;AAAA,QAC5F,QAAQ;AAAA,QACR,aAAa;AAAA,QACb,OAAO;AAAA,UACL;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,QACA,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,MACA,UAAU;AAAA,QACR,KAAK,WAAW,SAAS,+DAA+D;AAAA,QACxF,QAAQ;AAAA,QACR,UAAU,CAAC,QAAQ;AAAA,MACrB;AAAA,IACF;AAAA,IACA,YAAY;AAAA,MACV;AAAA,QACE,KAAK,WAAW,SAAS,gDAAgD;AAAA,QACzE,UAAU;AAAA,QACV,QAAQ;AAAA,MACV;AAAA,IACF;AAAA,EACF;AACF;","names":[]}
package/package.json CHANGED
@@ -1,8 +1,17 @@
1
1
  {
2
2
  "name": "@camstack/addon-vision",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "description": "Builtin detection addons for CamStack — object detection, face, plate, audio, motion",
5
- "keywords": ["camstack", "addon", "camstack-addon", "vision", "object-detection", "face-detection", "motion-detection", "camera"],
5
+ "keywords": [
6
+ "camstack",
7
+ "addon",
8
+ "camstack-addon",
9
+ "vision",
10
+ "object-detection",
11
+ "face-detection",
12
+ "motion-detection",
13
+ "camera"
14
+ ],
6
15
  "license": "MIT",
7
16
  "repository": {
8
17
  "type": "git",
@@ -12,25 +21,87 @@
12
21
  "module": "./dist/index.mjs",
13
22
  "types": "./dist/index.d.ts",
14
23
  "exports": {
15
- ".": { "import": "./dist/index.mjs", "require": "./dist/index.js", "types": "./dist/index.d.ts" },
16
- "./addons/*": { "import": "./dist/addons/*/index.mjs", "require": "./dist/addons/*/index.js", "types": "./dist/addons/*/index.d.ts" }
24
+ ".": {
25
+ "import": "./dist/index.mjs",
26
+ "require": "./dist/index.js",
27
+ "types": "./dist/index.d.ts"
28
+ },
29
+ "./addons/*": {
30
+ "import": "./dist/addons/*/index.mjs",
31
+ "require": "./dist/addons/*/index.js",
32
+ "types": "./dist/addons/*/index.d.ts"
33
+ },
34
+ "./package.json": "./package.json"
17
35
  },
18
36
  "camstack": {
37
+ "displayName": "CamStack Vision",
19
38
  "addons": [
20
- { "id": "motion-detection", "entry": "./dist/addons/motion-detection/index.js", "slot": "detector" },
21
- { "id": "object-detection", "entry": "./dist/addons/object-detection/index.js", "slot": "detector" },
22
- { "id": "face-detection", "entry": "./dist/addons/face-detection/index.js", "slot": "cropper" },
23
- { "id": "face-recognition", "entry": "./dist/addons/face-recognition/index.js", "slot": "classifier" },
24
- { "id": "plate-detection", "entry": "./dist/addons/plate-detection/index.js", "slot": "cropper" },
25
- { "id": "plate-recognition", "entry": "./dist/addons/plate-recognition/index.js", "slot": "classifier" },
26
- { "id": "audio-classification", "entry": "./dist/addons/audio-classification/index.js", "slot": "classifier" },
27
- { "id": "camera-native-detection", "entry": "./dist/addons/camera-native-detection/index.js", "slot": "detector" },
28
- { "id": "bird-global-classifier", "entry": "./dist/addons/bird-global-classifier/index.js", "slot": "classifier" },
29
- { "id": "bird-nabirds-classifier", "entry": "./dist/addons/bird-nabirds-classifier/index.js", "slot": "classifier" },
30
- { "id": "animal-classifier", "entry": "./dist/addons/animal-classifier/index.js", "slot": "classifier" }
39
+ {
40
+ "id": "motion-detection",
41
+ "entry": "./dist/addons/motion-detection/index.js",
42
+ "slot": "detector"
43
+ },
44
+ {
45
+ "id": "object-detection",
46
+ "entry": "./dist/addons/object-detection/index.js",
47
+ "slot": "detector"
48
+ },
49
+ {
50
+ "id": "face-detection",
51
+ "entry": "./dist/addons/face-detection/index.js",
52
+ "slot": "cropper"
53
+ },
54
+ {
55
+ "id": "face-recognition",
56
+ "entry": "./dist/addons/face-recognition/index.js",
57
+ "slot": "classifier"
58
+ },
59
+ {
60
+ "id": "plate-detection",
61
+ "entry": "./dist/addons/plate-detection/index.js",
62
+ "slot": "cropper"
63
+ },
64
+ {
65
+ "id": "plate-recognition",
66
+ "entry": "./dist/addons/plate-recognition/index.js",
67
+ "slot": "classifier"
68
+ },
69
+ {
70
+ "id": "audio-classification",
71
+ "entry": "./dist/addons/audio-classification/index.js",
72
+ "slot": "classifier"
73
+ },
74
+ {
75
+ "id": "bird-global-classifier",
76
+ "entry": "./dist/addons/bird-global-classifier/index.js",
77
+ "slot": "classifier"
78
+ },
79
+ {
80
+ "id": "bird-nabirds-classifier",
81
+ "entry": "./dist/addons/bird-nabirds-classifier/index.js",
82
+ "slot": "classifier"
83
+ },
84
+ {
85
+ "id": "animal-classifier",
86
+ "entry": "./dist/addons/animal-classifier/index.js",
87
+ "slot": "classifier"
88
+ },
89
+ {
90
+ "id": "vehicle-classifier",
91
+ "entry": "./dist/addons/vehicle-classifier/index.js",
92
+ "slot": "classifier"
93
+ },
94
+ {
95
+ "id": "segmentation-refiner",
96
+ "entry": "./dist/addons/segmentation-refiner/index.js",
97
+ "slot": "refiner"
98
+ }
31
99
  ]
32
100
  },
33
- "files": ["dist", "python"],
101
+ "files": [
102
+ "dist",
103
+ "python"
104
+ ],
34
105
  "scripts": {
35
106
  "build": "tsup",
36
107
  "dev": "tsup --watch",
@@ -38,8 +109,13 @@
38
109
  "test": "vitest run",
39
110
  "test:watch": "vitest"
40
111
  },
41
- "peerDependencies": { "@camstack/types": "^0.1.0" },
42
- "dependencies": { "onnxruntime-node": "^1.24.3", "sharp": "^0.34.0" },
112
+ "peerDependencies": {
113
+ "@camstack/types": "^0.1.0"
114
+ },
115
+ "dependencies": {
116
+ "onnxruntime-node": "^1.24.3",
117
+ "sharp": "^0.34.0"
118
+ },
43
119
  "devDependencies": {
44
120
  "@camstack/types": "*",
45
121
  "tsup": "^8.0.0",
@@ -110,15 +110,23 @@ def compute_iou(x1a: float, y1a: float, x2a: float, y2a: float,
110
110
  def parse_yolo_output(output: np.ndarray, conf_threshold: float,
111
111
  img_w: int, img_h: int, input_size: int,
112
112
  scale: float, pad: tuple[int, int]) -> list[dict]:
113
- """Parse YOLO output tensor [1, 84, 8400] into normalised detections."""
114
- # Squeeze batch dim and transpose to [num_boxes, 84]
113
+ """Parse YOLO output tensor [1, C, 8400] into normalised detections.
114
+
115
+ Supports both detection (C=84) and segmentation (C=116) outputs.
116
+ For seg models the extra 32 mask coefficients are ignored (detections only).
117
+ """
118
+ # Squeeze batch dim
115
119
  if output.ndim == 3 and output.shape[0] == 1:
116
120
  output = output[0]
117
- if output.shape[0] == 84:
118
- output = output.T # [8400, 84]
121
+
122
+ # Transpose if channels-first: [C, N] -> [N, C]
123
+ num_channels = output.shape[0]
124
+ if num_channels in (84, 116, 144): # det=84, seg=116, etc.
125
+ output = output.T # [8400, C]
119
126
 
120
127
  cx, cy, w, h = output[:, 0], output[:, 1], output[:, 2], output[:, 3]
121
- class_scores = output[:, 4:] # [num_boxes, 80]
128
+ # Take only the 80 class scores (skip mask coefficients if seg model)
129
+ class_scores = output[:, 4:84] # [num_boxes, 80] — works for both det and seg
122
130
 
123
131
  class_ids = np.argmax(class_scores, axis=1)
124
132
  scores = class_scores[np.arange(len(class_ids)), class_ids]
@@ -179,8 +187,12 @@ def parse_yolo_output(output: np.ndarray, conf_threshold: float,
179
187
  # ---------------------------------------------------------------------------
180
188
 
181
189
  def _parse_nms_output(coords: np.ndarray, confs: np.ndarray,
182
- conf_threshold: float, img_w: int, img_h: int) -> list[dict]:
183
- """Parse CoreML NMS output: (N, 4) coords [cx, cy, w, h] normalized + (N, 80) scores."""
190
+ conf_threshold: float, img_w: int, img_h: int,
191
+ input_size: int, scale: float, pad: tuple[int, int]) -> list[dict]:
192
+ """Parse CoreML NMS output: (N, 4) coords [cx, cy, w, h] normalized 0-1 relative to model input.
193
+
194
+ Coordinates must be un-letterboxed to get correct positions in the original image.
195
+ """
184
196
  if coords.shape[0] == 0:
185
197
  return []
186
198
 
@@ -195,12 +207,31 @@ def _parse_nms_output(coords: np.ndarray, confs: np.ndarray,
195
207
  if len(scores) == 0:
196
208
  return []
197
209
 
198
- # CoreML coords are [cx, cy, w, h] already normalized 0-1
199
- cx, cy, w, h = coords[:, 0], coords[:, 1], coords[:, 2], coords[:, 3]
200
- x1 = np.clip(cx - w / 2, 0, 1)
201
- y1 = np.clip(cy - h / 2, 0, 1)
202
- x2 = np.clip(cx + w / 2, 0, 1)
203
- y2 = np.clip(cy + h / 2, 0, 1)
210
+ # CoreML NMS coords are [cx, cy, w, h] normalized 0-1 relative to model input (640x640)
211
+ # Convert to pixel coords in the letterboxed frame first
212
+ cx = coords[:, 0] * input_size
213
+ cy = coords[:, 1] * input_size
214
+ bw = coords[:, 2] * input_size
215
+ bh = coords[:, 3] * input_size
216
+
217
+ # Corner format
218
+ x1 = cx - bw / 2
219
+ y1 = cy - bh / 2
220
+ x2 = cx + bw / 2
221
+ y2 = cy + bh / 2
222
+
223
+ # Undo letterbox padding and scale → original image pixel coords
224
+ pad_x, pad_y = pad
225
+ x1 = (x1 - pad_x) / scale
226
+ y1 = (y1 - pad_y) / scale
227
+ x2 = (x2 - pad_x) / scale
228
+ y2 = (y2 - pad_y) / scale
229
+
230
+ # Normalize to 0-1 relative to original image
231
+ x1 = np.clip(x1 / img_w, 0, 1)
232
+ y1 = np.clip(y1 / img_h, 0, 1)
233
+ x2 = np.clip(x2 / img_w, 0, 1)
234
+ y2 = np.clip(y2 / img_h, 0, 1)
204
235
 
205
236
  detections = []
206
237
  for i in range(len(scores)):
@@ -293,13 +324,25 @@ def main() -> None:
293
324
 
294
325
  if has_builtin_nms:
295
326
  # Model has built-in NMS: outputs are (N, 4) boxes + (N, 80) scores
296
- coords = np.array(predictions['coordinates']) # (N, 4) — cx, cy, w, h normalized
327
+ coords = np.array(predictions['coordinates']) # (N, 4) — cx, cy, w, h normalized to model input
297
328
  confs = np.array(predictions['confidence']) # (N, 80)
298
- detections = _parse_nms_output(coords, confs, args.confidence, orig_w, orig_h)
329
+ detections = _parse_nms_output(coords, confs, args.confidence, orig_w, orig_h,
330
+ args.input_size, scale, pad)
299
331
  else:
300
- # Raw YOLO output: [1, 84, 8400]
301
- output_key = list(predictions.keys())[0]
302
- output = np.array(predictions[output_key])
332
+ # Raw YOLO output: [1, C, N] where C=84 (det) or C=116 (seg)
333
+ # For seg models with multiple outputs, pick the detection tensor
334
+ # (the one with shape [1, 84|116, 8400], not the mask protos [1, 32, 160, 160])
335
+ output_keys = list(predictions.keys())
336
+ output = None
337
+ for key in output_keys:
338
+ arr = np.array(predictions[key])
339
+ # Detection tensor has a dimension of 84 or 116 (not 32, not 160)
340
+ if arr.ndim >= 2 and any(d in (84, 116, 144) for d in arr.shape):
341
+ output = arr
342
+ break
343
+ if output is None:
344
+ output = np.array(predictions[output_keys[0]])
345
+
303
346
  detections = parse_yolo_output(
304
347
  output, args.confidence, orig_w, orig_h,
305
348
  args.input_size, scale, pad,
@@ -110,14 +110,22 @@ def compute_iou(x1a: float, y1a: float, x2a: float, y2a: float,
110
110
  def parse_yolo_output(output: np.ndarray, conf_threshold: float,
111
111
  img_w: int, img_h: int, input_size: int,
112
112
  scale: float, pad: tuple[int, int]) -> list[dict]:
113
- """Parse YOLO output tensor [1, 84, 8400] into normalised detections."""
113
+ """Parse YOLO output tensor [1, C, 8400] into normalised detections.
114
+
115
+ Supports both detection (C=84) and segmentation (C=116) outputs.
116
+ For seg models the extra 32 mask coefficients are ignored (detections only).
117
+ """
114
118
  if output.ndim == 3 and output.shape[0] == 1:
115
119
  output = output[0]
116
- if output.shape[0] == 84:
117
- output = output.T # [8400, 84]
120
+
121
+ # Transpose if channels-first: [C, N] -> [N, C]
122
+ num_channels = output.shape[0]
123
+ if num_channels in (84, 116, 144): # det=84, seg=116, etc.
124
+ output = output.T # [8400, C]
118
125
 
119
126
  cx, cy, w, h = output[:, 0], output[:, 1], output[:, 2], output[:, 3]
120
- class_scores = output[:, 4:]
127
+ # Take only the 80 class scores (skip mask coefficients if seg model)
128
+ class_scores = output[:, 4:84] # [num_boxes, 80] — works for both det and seg
121
129
 
122
130
  class_ids = np.argmax(class_scores, axis=1)
123
131
  scores = class_scores[np.arange(len(class_ids)), class_ids]
@@ -114,14 +114,22 @@ def compute_iou(x1a: float, y1a: float, x2a: float, y2a: float,
114
114
  def parse_yolo_output(output: np.ndarray, conf_threshold: float,
115
115
  img_w: int, img_h: int, input_size: int,
116
116
  scale: float, pad: tuple[int, int]) -> list[dict]:
117
- """Parse YOLO output tensor [1, 84, 8400] into normalised detections."""
117
+ """Parse YOLO output tensor [1, C, 8400] into normalised detections.
118
+
119
+ Supports both detection (C=84) and segmentation (C=116) outputs.
120
+ For seg models the extra 32 mask coefficients are ignored (detections only).
121
+ """
118
122
  if output.ndim == 3 and output.shape[0] == 1:
119
123
  output = output[0]
120
- if output.shape[0] == 84:
121
- output = output.T # [8400, 84]
124
+
125
+ # Transpose if channels-first: [C, N] -> [N, C]
126
+ num_channels = output.shape[0]
127
+ if num_channels in (84, 116, 144): # det=84, seg=116, etc.
128
+ output = output.T # [8400, C]
122
129
 
123
130
  cx, cy, w, h = output[:, 0], output[:, 1], output[:, 2], output[:, 3]
124
- class_scores = output[:, 4:]
131
+ # Take only the 80 class scores (skip mask coefficients if seg model)
132
+ class_scores = output[:, 4:84] # [num_boxes, 80] — works for both det and seg
125
133
 
126
134
  class_ids = np.argmax(class_scores, axis=1)
127
135
  scores = class_scores[np.arange(len(class_ids)), class_ids]
@@ -1,32 +0,0 @@
1
- import { IDetectorProvider, IDetectionAddon, AddonManifest, AddonContext, FrameInput, DetectorOutput, ConfigUISchema, ClassMapDefinition, ModelCatalogEntry, DetectionModel, LabelDefinition, ProbeResult } from '@camstack/types';
2
-
3
- /**
4
- * CameraNativeDetectionAddon
5
- *
6
- * A stub detector that wraps native camera event subscriptions (e.g. Frigate / Scrypted
7
- * webhooks). The detect() method always returns empty — real detections are delivered
8
- * asynchronously via external event subscriptions which should populate the pipeline
9
- * from outside this addon's detect() call.
10
- *
11
- * This addon exists so the pipeline can declare a 'detector' slot backed by camera events
12
- * without requiring any inference model.
13
- */
14
- declare class CameraNativeDetectionAddon implements IDetectorProvider, IDetectionAddon {
15
- readonly id = "camera-native-detection";
16
- readonly slot: "detector";
17
- readonly inputClasses: readonly string[] | null;
18
- readonly outputClasses: readonly ["person", "vehicle", "motion", "face"];
19
- readonly slotPriority = 5;
20
- readonly manifest: AddonManifest;
21
- initialize(_ctx: AddonContext): Promise<void>;
22
- detect(_frame: FrameInput): Promise<DetectorOutput>;
23
- shutdown(): Promise<void>;
24
- getConfigSchema(): ConfigUISchema;
25
- getClassMap(): ClassMapDefinition;
26
- getModelCatalog(): ModelCatalogEntry[];
27
- getAvailableModels(): DetectionModel[];
28
- getActiveLabels(): readonly LabelDefinition[];
29
- probe(): Promise<ProbeResult>;
30
- }
31
-
32
- export { CameraNativeDetectionAddon as default };