@camstack/addon-vision 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/dist/addons/animal-classifier/index.d.mts +25 -0
  2. package/dist/addons/animal-classifier/index.d.ts +25 -0
  3. package/dist/addons/animal-classifier/index.js +652 -0
  4. package/dist/addons/animal-classifier/index.js.map +1 -0
  5. package/dist/addons/animal-classifier/index.mjs +10 -0
  6. package/dist/addons/animal-classifier/index.mjs.map +1 -0
  7. package/dist/addons/audio-classification/index.d.mts +31 -0
  8. package/dist/addons/audio-classification/index.d.ts +31 -0
  9. package/dist/addons/audio-classification/index.js +572 -0
  10. package/dist/addons/audio-classification/index.js.map +1 -0
  11. package/dist/addons/audio-classification/index.mjs +8 -0
  12. package/dist/addons/audio-classification/index.mjs.map +1 -0
  13. package/dist/addons/bird-global-classifier/index.d.mts +26 -0
  14. package/dist/addons/bird-global-classifier/index.d.ts +26 -0
  15. package/dist/addons/bird-global-classifier/index.js +658 -0
  16. package/dist/addons/bird-global-classifier/index.js.map +1 -0
  17. package/dist/addons/bird-global-classifier/index.mjs +10 -0
  18. package/dist/addons/bird-global-classifier/index.mjs.map +1 -0
  19. package/dist/addons/bird-nabirds-classifier/index.d.mts +28 -0
  20. package/dist/addons/bird-nabirds-classifier/index.d.ts +28 -0
  21. package/dist/addons/bird-nabirds-classifier/index.js +700 -0
  22. package/dist/addons/bird-nabirds-classifier/index.js.map +1 -0
  23. package/dist/addons/bird-nabirds-classifier/index.mjs +10 -0
  24. package/dist/addons/bird-nabirds-classifier/index.mjs.map +1 -0
  25. package/dist/addons/camera-native-detection/index.d.mts +32 -0
  26. package/dist/addons/camera-native-detection/index.d.ts +32 -0
  27. package/dist/addons/camera-native-detection/index.js +99 -0
  28. package/dist/addons/camera-native-detection/index.js.map +1 -0
  29. package/dist/addons/camera-native-detection/index.mjs +7 -0
  30. package/dist/addons/camera-native-detection/index.mjs.map +1 -0
  31. package/dist/addons/face-detection/index.d.mts +24 -0
  32. package/dist/addons/face-detection/index.d.ts +24 -0
  33. package/dist/addons/face-detection/index.js +720 -0
  34. package/dist/addons/face-detection/index.js.map +1 -0
  35. package/dist/addons/face-detection/index.mjs +10 -0
  36. package/dist/addons/face-detection/index.mjs.map +1 -0
  37. package/dist/addons/face-recognition/index.d.mts +24 -0
  38. package/dist/addons/face-recognition/index.d.ts +24 -0
  39. package/dist/addons/face-recognition/index.js +603 -0
  40. package/dist/addons/face-recognition/index.js.map +1 -0
  41. package/dist/addons/face-recognition/index.mjs +9 -0
  42. package/dist/addons/face-recognition/index.mjs.map +1 -0
  43. package/dist/addons/motion-detection/index.d.mts +26 -0
  44. package/dist/addons/motion-detection/index.d.ts +26 -0
  45. package/dist/addons/motion-detection/index.js +273 -0
  46. package/dist/addons/motion-detection/index.js.map +1 -0
  47. package/dist/addons/motion-detection/index.mjs +8 -0
  48. package/dist/addons/motion-detection/index.mjs.map +1 -0
  49. package/dist/addons/object-detection/index.d.mts +26 -0
  50. package/dist/addons/object-detection/index.d.ts +26 -0
  51. package/dist/addons/object-detection/index.js +1214 -0
  52. package/dist/addons/object-detection/index.js.map +1 -0
  53. package/dist/addons/object-detection/index.mjs +10 -0
  54. package/dist/addons/object-detection/index.mjs.map +1 -0
  55. package/dist/addons/plate-detection/index.d.mts +25 -0
  56. package/dist/addons/plate-detection/index.d.ts +25 -0
  57. package/dist/addons/plate-detection/index.js +646 -0
  58. package/dist/addons/plate-detection/index.js.map +1 -0
  59. package/dist/addons/plate-detection/index.mjs +10 -0
  60. package/dist/addons/plate-detection/index.mjs.map +1 -0
  61. package/dist/addons/plate-recognition/index.d.mts +25 -0
  62. package/dist/addons/plate-recognition/index.d.ts +25 -0
  63. package/dist/addons/plate-recognition/index.js +648 -0
  64. package/dist/addons/plate-recognition/index.js.map +1 -0
  65. package/dist/addons/plate-recognition/index.mjs +9 -0
  66. package/dist/addons/plate-recognition/index.mjs.map +1 -0
  67. package/dist/chunk-3MQFUDRU.mjs +260 -0
  68. package/dist/chunk-3MQFUDRU.mjs.map +1 -0
  69. package/dist/chunk-5AIQSN32.mjs +227 -0
  70. package/dist/chunk-5AIQSN32.mjs.map +1 -0
  71. package/dist/chunk-5JJZGKL7.mjs +186 -0
  72. package/dist/chunk-5JJZGKL7.mjs.map +1 -0
  73. package/dist/chunk-6OR5TE7A.mjs +101 -0
  74. package/dist/chunk-6OR5TE7A.mjs.map +1 -0
  75. package/dist/chunk-AYBFB7ID.mjs +763 -0
  76. package/dist/chunk-AYBFB7ID.mjs.map +1 -0
  77. package/dist/chunk-B3R66MPF.mjs +219 -0
  78. package/dist/chunk-B3R66MPF.mjs.map +1 -0
  79. package/dist/chunk-DTOAB2CE.mjs +79 -0
  80. package/dist/chunk-DTOAB2CE.mjs.map +1 -0
  81. package/dist/chunk-ISOIDU4U.mjs +54 -0
  82. package/dist/chunk-ISOIDU4U.mjs.map +1 -0
  83. package/dist/chunk-J4WRYHHY.mjs +212 -0
  84. package/dist/chunk-J4WRYHHY.mjs.map +1 -0
  85. package/dist/chunk-KUO2BVFY.mjs +90 -0
  86. package/dist/chunk-KUO2BVFY.mjs.map +1 -0
  87. package/dist/chunk-LPI42WL6.mjs +324 -0
  88. package/dist/chunk-LPI42WL6.mjs.map +1 -0
  89. package/dist/chunk-MEVASN3P.mjs +305 -0
  90. package/dist/chunk-MEVASN3P.mjs.map +1 -0
  91. package/dist/chunk-PDSHDDPV.mjs +255 -0
  92. package/dist/chunk-PDSHDDPV.mjs.map +1 -0
  93. package/dist/chunk-Q3SQOYG6.mjs +218 -0
  94. package/dist/chunk-Q3SQOYG6.mjs.map +1 -0
  95. package/dist/chunk-QIMDG34B.mjs +229 -0
  96. package/dist/chunk-QIMDG34B.mjs.map +1 -0
  97. package/dist/index.d.mts +171 -0
  98. package/dist/index.d.ts +171 -0
  99. package/dist/index.js +3463 -0
  100. package/dist/index.js.map +1 -0
  101. package/dist/index.mjs +111 -0
  102. package/dist/index.mjs.map +1 -0
  103. package/package.json +49 -0
  104. package/python/__pycache__/coreml_inference.cpython-313.pyc +0 -0
  105. package/python/__pycache__/openvino_inference.cpython-313.pyc +0 -0
  106. package/python/__pycache__/pytorch_inference.cpython-313.pyc +0 -0
  107. package/python/coreml_inference.py +319 -0
  108. package/python/openvino_inference.py +247 -0
  109. package/python/pytorch_inference.py +255 -0
package/dist/index.mjs ADDED
@@ -0,0 +1,111 @@
1
+ import {
2
+ CameraNativeDetectionAddon
3
+ } from "./chunk-DTOAB2CE.mjs";
4
+ import {
5
+ BirdGlobalClassifierAddon
6
+ } from "./chunk-Q3SQOYG6.mjs";
7
+ import {
8
+ BirdNABirdsClassifierAddon
9
+ } from "./chunk-3MQFUDRU.mjs";
10
+ import {
11
+ AnimalClassifierAddon
12
+ } from "./chunk-J4WRYHHY.mjs";
13
+ import {
14
+ ANIMAL_TYPE_MODELS,
15
+ BIRD_NABIRDS_MODELS,
16
+ BIRD_SPECIES_MODELS
17
+ } from "./chunk-ISOIDU4U.mjs";
18
+ import {
19
+ MotionDetectionAddon,
20
+ detectMotion
21
+ } from "./chunk-QIMDG34B.mjs";
22
+ import {
23
+ OBJECT_DETECTION_MODELS,
24
+ ObjectDetectionAddon,
25
+ SEGMENTATION_MODELS
26
+ } from "./chunk-AYBFB7ID.mjs";
27
+ import {
28
+ FACE_DETECTION_MODELS,
29
+ FaceDetectionAddon,
30
+ scrfdPostprocess
31
+ } from "./chunk-MEVASN3P.mjs";
32
+ import {
33
+ FACE_RECOGNITION_MODELS,
34
+ FaceRecognitionAddon,
35
+ cosineSimilarity,
36
+ l2Normalize
37
+ } from "./chunk-B3R66MPF.mjs";
38
+ import {
39
+ PLATE_DETECTION_MODELS,
40
+ PlateDetectionAddon
41
+ } from "./chunk-5JJZGKL7.mjs";
42
+ import {
43
+ iou,
44
+ nms,
45
+ yoloPostprocess
46
+ } from "./chunk-KUO2BVFY.mjs";
47
+ import {
48
+ PLATE_RECOGNITION_MODELS,
49
+ PlateRecognitionAddon,
50
+ ctcDecode
51
+ } from "./chunk-PDSHDDPV.mjs";
52
+ import {
53
+ cropRegion,
54
+ jpegToRgb,
55
+ letterbox,
56
+ resizeAndNormalize,
57
+ rgbToGrayscale
58
+ } from "./chunk-6OR5TE7A.mjs";
59
+ import {
60
+ AUDIO_CLASSIFICATION_MODELS,
61
+ AudioClassificationAddon,
62
+ yamnetPostprocess
63
+ } from "./chunk-5AIQSN32.mjs";
64
+ import {
65
+ NodeInferenceEngine,
66
+ PythonInferenceEngine,
67
+ probeOnnxBackends,
68
+ resolveEngine
69
+ } from "./chunk-LPI42WL6.mjs";
70
+ export {
71
+ ANIMAL_TYPE_MODELS,
72
+ AUDIO_CLASSIFICATION_MODELS,
73
+ AnimalClassifierAddon,
74
+ AudioClassificationAddon,
75
+ BIRD_NABIRDS_MODELS,
76
+ BIRD_SPECIES_MODELS,
77
+ BirdGlobalClassifierAddon,
78
+ BirdNABirdsClassifierAddon,
79
+ CameraNativeDetectionAddon,
80
+ FACE_DETECTION_MODELS,
81
+ FACE_RECOGNITION_MODELS,
82
+ FaceDetectionAddon,
83
+ FaceRecognitionAddon,
84
+ MotionDetectionAddon,
85
+ NodeInferenceEngine,
86
+ OBJECT_DETECTION_MODELS,
87
+ ObjectDetectionAddon,
88
+ PLATE_DETECTION_MODELS,
89
+ PLATE_RECOGNITION_MODELS,
90
+ PlateDetectionAddon,
91
+ PlateRecognitionAddon,
92
+ PythonInferenceEngine,
93
+ SEGMENTATION_MODELS,
94
+ cosineSimilarity,
95
+ cropRegion,
96
+ ctcDecode,
97
+ detectMotion,
98
+ iou,
99
+ jpegToRgb,
100
+ l2Normalize,
101
+ letterbox,
102
+ nms,
103
+ probeOnnxBackends,
104
+ resizeAndNormalize,
105
+ resolveEngine,
106
+ rgbToGrayscale,
107
+ scrfdPostprocess,
108
+ yamnetPostprocess,
109
+ yoloPostprocess
110
+ };
111
+ //# sourceMappingURL=index.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
package/package.json ADDED
@@ -0,0 +1,49 @@
1
+ {
2
+ "name": "@camstack/addon-vision",
3
+ "version": "0.1.0",
4
+ "description": "Builtin detection addons for CamStack — object detection, face, plate, audio, motion",
5
+ "keywords": ["camstack", "addon", "camstack-addon", "vision", "object-detection", "face-detection", "motion-detection", "camera"],
6
+ "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "https://github.com/camstack/server"
10
+ },
11
+ "main": "./dist/index.js",
12
+ "module": "./dist/index.mjs",
13
+ "types": "./dist/index.d.ts",
14
+ "exports": {
15
+ ".": { "import": "./dist/index.mjs", "require": "./dist/index.js", "types": "./dist/index.d.ts" },
16
+ "./addons/*": { "import": "./dist/addons/*/index.mjs", "require": "./dist/addons/*/index.js", "types": "./dist/addons/*/index.d.ts" }
17
+ },
18
+ "camstack": {
19
+ "addons": [
20
+ { "id": "motion-detection", "entry": "./dist/addons/motion-detection/index.js", "slot": "detector" },
21
+ { "id": "object-detection", "entry": "./dist/addons/object-detection/index.js", "slot": "detector" },
22
+ { "id": "face-detection", "entry": "./dist/addons/face-detection/index.js", "slot": "cropper" },
23
+ { "id": "face-recognition", "entry": "./dist/addons/face-recognition/index.js", "slot": "classifier" },
24
+ { "id": "plate-detection", "entry": "./dist/addons/plate-detection/index.js", "slot": "cropper" },
25
+ { "id": "plate-recognition", "entry": "./dist/addons/plate-recognition/index.js", "slot": "classifier" },
26
+ { "id": "audio-classification", "entry": "./dist/addons/audio-classification/index.js", "slot": "classifier" },
27
+ { "id": "camera-native-detection", "entry": "./dist/addons/camera-native-detection/index.js", "slot": "detector" },
28
+ { "id": "bird-global-classifier", "entry": "./dist/addons/bird-global-classifier/index.js", "slot": "classifier" },
29
+ { "id": "bird-nabirds-classifier", "entry": "./dist/addons/bird-nabirds-classifier/index.js", "slot": "classifier" },
30
+ { "id": "animal-classifier", "entry": "./dist/addons/animal-classifier/index.js", "slot": "classifier" }
31
+ ]
32
+ },
33
+ "files": ["dist", "python"],
34
+ "scripts": {
35
+ "build": "tsup",
36
+ "dev": "tsup --watch",
37
+ "typecheck": "tsc --noEmit",
38
+ "test": "vitest run",
39
+ "test:watch": "vitest"
40
+ },
41
+ "peerDependencies": { "@camstack/types": "^0.1.0" },
42
+ "dependencies": { "onnxruntime-node": "^1.24.3", "sharp": "^0.34.0" },
43
+ "devDependencies": {
44
+ "@camstack/types": "*",
45
+ "tsup": "^8.0.0",
46
+ "typescript": "~5.9.0",
47
+ "vitest": "^3.0.0"
48
+ }
49
+ }
@@ -0,0 +1,319 @@
1
+ #!/usr/bin/env python3
2
+ """CoreML inference for YOLO object detection.
3
+
4
+ Binary IPC protocol over stdin/stdout:
5
+ Input: [4 bytes LE uint32 length][JPEG bytes]
6
+ Output: [4 bytes LE uint32 length][JSON bytes]
7
+
8
+ JSON output format:
9
+ {
10
+ "detections": [
11
+ {"className": "person", "score": 0.92, "bbox": [x1, y1, x2, y2]},
12
+ ...
13
+ ],
14
+ "inferenceMs": 12.5
15
+ }
16
+
17
+ Bounding boxes are NORMALIZED (0-1 range).
18
+
19
+ Usage:
20
+ python coreml_inference.py <model_path> [--device cpu|ane|gpu|all] [--input-size 640] [--confidence 0.25]
21
+ """
22
+ import sys
23
+ import struct
24
+ import json
25
+ import argparse
26
+ import time
27
+ import io
28
+ import numpy as np
29
+ from PIL import Image
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Binary IPC helpers
34
+ # ---------------------------------------------------------------------------
35
+
36
+ def read_frame(stream) -> bytes:
37
+ """Read one JPEG frame from binary IPC stream."""
38
+ header = stream.read(4)
39
+ if len(header) < 4:
40
+ return b""
41
+ length = struct.unpack("<I", header)[0]
42
+ return stream.read(length)
43
+
44
+
45
+ def write_result(stream, result: dict) -> None:
46
+ """Write JSON result to binary IPC stream."""
47
+ payload = json.dumps(result).encode("utf-8")
48
+ stream.write(struct.pack("<I", len(payload)) + payload)
49
+ stream.flush()
50
+
51
+
52
+ # ---------------------------------------------------------------------------
53
+ # Image preprocessing
54
+ # ---------------------------------------------------------------------------
55
+
56
+ def letterbox(img: Image.Image, size: int) -> tuple[np.ndarray, float, tuple[int, int]]:
57
+ """Resize with letterbox padding (maintain aspect ratio).
58
+
59
+ Returns (CHW float32 array normalised 0-1, scale, (pad_x, pad_y)).
60
+ """
61
+ w, h = img.size
62
+ scale = min(size / w, size / h)
63
+ nw, nh = int(w * scale), int(h * scale)
64
+ img_resized = img.resize((nw, nh), Image.BILINEAR)
65
+
66
+ canvas = Image.new("RGB", (size, size), (114, 114, 114))
67
+ pad_x, pad_y = (size - nw) // 2, (size - nh) // 2
68
+ canvas.paste(img_resized, (pad_x, pad_y))
69
+
70
+ arr = np.array(canvas, dtype=np.float32) / 255.0
71
+ return arr, scale, (pad_x, pad_y)
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # COCO 80 class names
76
+ # ---------------------------------------------------------------------------
77
+
78
+ COCO_CLASSES = [
79
+ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
80
+ "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
81
+ "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
82
+ "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
83
+ "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
84
+ "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
85
+ "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
86
+ "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
87
+ "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
88
+ "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush",
89
+ ]
90
+
91
+
92
+ # ---------------------------------------------------------------------------
93
+ # YOLO output parsing
94
+ # ---------------------------------------------------------------------------
95
+
96
+ def compute_iou(x1a: float, y1a: float, x2a: float, y2a: float,
97
+ x1b: float, y1b: float, x2b: float, y2b: float) -> float:
98
+ """Compute IoU between two bounding boxes."""
99
+ xi1 = max(x1a, x1b)
100
+ yi1 = max(y1a, y1b)
101
+ xi2 = min(x2a, x2b)
102
+ yi2 = min(y2a, y2b)
103
+ inter = max(0, xi2 - xi1) * max(0, yi2 - yi1)
104
+ area_a = (x2a - x1a) * (y2a - y1a)
105
+ area_b = (x2b - x1b) * (y2b - y1b)
106
+ union = area_a + area_b - inter
107
+ return inter / union if union > 0 else 0.0
108
+
109
+
110
+ def parse_yolo_output(output: np.ndarray, conf_threshold: float,
111
+ img_w: int, img_h: int, input_size: int,
112
+ scale: float, pad: tuple[int, int]) -> list[dict]:
113
+ """Parse YOLO output tensor [1, 84, 8400] into normalised detections."""
114
+ # Squeeze batch dim and transpose to [num_boxes, 84]
115
+ if output.ndim == 3 and output.shape[0] == 1:
116
+ output = output[0]
117
+ if output.shape[0] == 84:
118
+ output = output.T # [8400, 84]
119
+
120
+ cx, cy, w, h = output[:, 0], output[:, 1], output[:, 2], output[:, 3]
121
+ class_scores = output[:, 4:] # [num_boxes, 80]
122
+
123
+ class_ids = np.argmax(class_scores, axis=1)
124
+ scores = class_scores[np.arange(len(class_ids)), class_ids]
125
+
126
+ mask = scores > conf_threshold
127
+ cx, cy, w, h = cx[mask], cy[mask], w[mask], h[mask]
128
+ scores, class_ids = scores[mask], class_ids[mask]
129
+
130
+ # Centre-format -> corner-format
131
+ x1 = cx - w / 2
132
+ y1 = cy - h / 2
133
+ x2 = cx + w / 2
134
+ y2 = cy + h / 2
135
+
136
+ # Undo letterbox padding and scale
137
+ pad_x, pad_y = pad
138
+ x1 = (x1 - pad_x) / scale
139
+ y1 = (y1 - pad_y) / scale
140
+ x2 = (x2 - pad_x) / scale
141
+ y2 = (y2 - pad_y) / scale
142
+
143
+ # Normalise to 0-1 range
144
+ x1 = np.clip(x1 / img_w, 0, 1)
145
+ y1 = np.clip(y1 / img_h, 0, 1)
146
+ x2 = np.clip(x2 / img_w, 0, 1)
147
+ y2 = np.clip(y2 / img_h, 0, 1)
148
+
149
+ # Greedy NMS
150
+ detections: list[dict] = []
151
+ indices = np.argsort(-scores)
152
+ suppressed: set[int] = set()
153
+
154
+ for i in indices[:100]:
155
+ idx = int(i)
156
+ if idx in suppressed:
157
+ continue
158
+ detections.append({
159
+ "className": COCO_CLASSES[class_ids[idx]] if class_ids[idx] < len(COCO_CLASSES) else f"class_{class_ids[idx]}",
160
+ "score": round(float(scores[idx]), 4),
161
+ "bbox": [round(float(x1[idx]), 4), round(float(y1[idx]), 4),
162
+ round(float(x2[idx]), 4), round(float(y2[idx]), 4)],
163
+ })
164
+ for j in indices:
165
+ jdx = int(j)
166
+ if jdx in suppressed or jdx == idx:
167
+ continue
168
+ iou = compute_iou(float(x1[idx]), float(y1[idx]), float(x2[idx]), float(y2[idx]),
169
+ float(x1[jdx]), float(y1[jdx]), float(x2[jdx]), float(y2[jdx]))
170
+ if iou > 0.45:
171
+ suppressed.add(jdx)
172
+ suppressed.add(idx)
173
+
174
+ return detections
175
+
176
+
177
+ # ---------------------------------------------------------------------------
178
+ # CoreML NMS output parser (model with built-in NMS)
179
+ # ---------------------------------------------------------------------------
180
+
181
+ def _parse_nms_output(coords: np.ndarray, confs: np.ndarray,
182
+ conf_threshold: float, img_w: int, img_h: int) -> list[dict]:
183
+ """Parse CoreML NMS output: (N, 4) coords [cx, cy, w, h] normalized + (N, 80) scores."""
184
+ if coords.shape[0] == 0:
185
+ return []
186
+
187
+ class_ids = np.argmax(confs, axis=1)
188
+ scores = confs[np.arange(len(class_ids)), class_ids]
189
+
190
+ mask = scores > conf_threshold
191
+ coords = coords[mask]
192
+ scores = scores[mask]
193
+ class_ids = class_ids[mask]
194
+
195
+ if len(scores) == 0:
196
+ return []
197
+
198
+ # CoreML coords are [cx, cy, w, h] already normalized 0-1
199
+ cx, cy, w, h = coords[:, 0], coords[:, 1], coords[:, 2], coords[:, 3]
200
+ x1 = np.clip(cx - w / 2, 0, 1)
201
+ y1 = np.clip(cy - h / 2, 0, 1)
202
+ x2 = np.clip(cx + w / 2, 0, 1)
203
+ y2 = np.clip(cy + h / 2, 0, 1)
204
+
205
+ detections = []
206
+ for i in range(len(scores)):
207
+ detections.append({
208
+ "className": COCO_CLASSES[class_ids[i]] if class_ids[i] < len(COCO_CLASSES) else f"class_{class_ids[i]}",
209
+ "score": round(float(scores[i]), 4),
210
+ "bbox": [round(float(x1[i]), 4), round(float(y1[i]), 4),
211
+ round(float(x2[i]), 4), round(float(y2[i]), 4)],
212
+ })
213
+
214
+ return sorted(detections, key=lambda d: d["score"], reverse=True)[:100]
215
+
216
+
217
+ # ---------------------------------------------------------------------------
218
+ # Main
219
+ # ---------------------------------------------------------------------------
220
+
221
+ def main() -> None:
222
+ parser = argparse.ArgumentParser(description="CoreML inference via binary IPC")
223
+ parser.add_argument("model_path", help="Path to .mlpackage or .mlmodel")
224
+ parser.add_argument("--device", default="all", choices=["cpu", "ane", "gpu", "all"],
225
+ help="Compute unit: cpu, ane (Apple Neural Engine), gpu, or all")
226
+ parser.add_argument("--input-size", type=int, default=640,
227
+ help="Model input size (square)")
228
+ parser.add_argument("--confidence", type=float, default=0.25,
229
+ help="Confidence threshold")
230
+ args = parser.parse_args()
231
+
232
+ import coremltools as ct
233
+
234
+ compute_unit_map = {
235
+ "cpu": ct.ComputeUnit.CPU_ONLY,
236
+ "gpu": ct.ComputeUnit.CPU_AND_GPU,
237
+ "ane": ct.ComputeUnit.CPU_AND_NE,
238
+ "all": ct.ComputeUnit.ALL,
239
+ }
240
+ compute_units = compute_unit_map.get(args.device, ct.ComputeUnit.ALL)
241
+
242
+ sys.stderr.write(f"[coreml] Loading model: {args.model_path}\n")
243
+ sys.stderr.write(f"[coreml] Compute units: {args.device}\n")
244
+ sys.stderr.flush()
245
+
246
+ model = ct.models.MLModel(args.model_path, compute_units=compute_units)
247
+
248
+ # Resolve the first input name from the model spec
249
+ input_spec = model.get_spec().description.input[0]
250
+ input_name = input_spec.name
251
+
252
+ sys.stderr.write(f"[coreml] Model loaded — input: {input_name}, ready for inference\n")
253
+ sys.stderr.flush()
254
+
255
+ stdin_binary = sys.stdin.buffer
256
+ stdout_binary = sys.stdout.buffer
257
+
258
+ while True:
259
+ jpeg = read_frame(stdin_binary)
260
+ if not jpeg:
261
+ break
262
+
263
+ try:
264
+ start = time.perf_counter()
265
+
266
+ img = Image.open(io.BytesIO(jpeg)).convert("RGB")
267
+ orig_w, orig_h = img.size
268
+
269
+ arr, scale, pad = letterbox(img, args.input_size)
270
+
271
+ # Check if model has NMS built-in (outputs 'confidence' + 'coordinates')
272
+ output_names = [o.name for o in model.get_spec().description.output]
273
+ has_builtin_nms = 'confidence' in output_names and 'coordinates' in output_names
274
+
275
+ # Check input type — CoreML models may expect PIL Image or numpy array
276
+ input_type = input_spec.type.WhichOneof("Type")
277
+ predict_input: dict = {}
278
+ if input_type == "imageType":
279
+ letterboxed_img = Image.fromarray((arr * 255).astype(np.uint8))
280
+ predict_input[input_name] = letterboxed_img
281
+ else:
282
+ input_arr = arr.transpose(2, 0, 1)[np.newaxis]
283
+ predict_input[input_name] = input_arr
284
+
285
+ # Pass thresholds if model accepts them
286
+ input_names = [i.name for i in model.get_spec().description.input]
287
+ if 'iouThreshold' in input_names:
288
+ predict_input['iouThreshold'] = 0.45
289
+ if 'confidenceThreshold' in input_names:
290
+ predict_input['confidenceThreshold'] = args.confidence
291
+
292
+ predictions = model.predict(predict_input)
293
+
294
+ if has_builtin_nms:
295
+ # Model has built-in NMS: outputs are (N, 4) boxes + (N, 80) scores
296
+ coords = np.array(predictions['coordinates']) # (N, 4) — cx, cy, w, h normalized
297
+ confs = np.array(predictions['confidence']) # (N, 80)
298
+ detections = _parse_nms_output(coords, confs, args.confidence, orig_w, orig_h)
299
+ else:
300
+ # Raw YOLO output: [1, 84, 8400]
301
+ output_key = list(predictions.keys())[0]
302
+ output = np.array(predictions[output_key])
303
+ detections = parse_yolo_output(
304
+ output, args.confidence, orig_w, orig_h,
305
+ args.input_size, scale, pad,
306
+ )
307
+
308
+ elapsed = (time.perf_counter() - start) * 1000
309
+ result = {"detections": detections, "inferenceMs": round(elapsed, 2)}
310
+ except Exception as exc:
311
+ sys.stderr.write(f"[coreml] Inference error: {exc}\n")
312
+ sys.stderr.flush()
313
+ result = {"detections": [], "error": str(exc), "inferenceMs": 0}
314
+
315
+ write_result(stdout_binary, result)
316
+
317
+
318
+ if __name__ == "__main__":
319
+ main()