@camstack/addon-detection-pipeline 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,278 @@
1
+ """YOLO v8/v9/v11 postprocessor.
2
+
3
+ Input: raw predictions dict from CoreML/OpenVINO/ONNX
4
+ Output: {"kind": "detections", "detections": [{"class", "score", "bbox": [x1,y1,x2,y2]}]}
5
+
6
+ Handles both standard and built-in NMS outputs.
7
+ """
8
+ import numpy as np
9
+
10
+ COCO_80 = [
11
+ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck",
12
+ "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
13
+ "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
14
+ "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
15
+ "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
16
+ "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
17
+ "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
18
+ "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
19
+ "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
20
+ "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
21
+ "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier",
22
+ "toothbrush",
23
+ ]
24
+
25
+
26
+ def _iou(a: tuple, b: tuple) -> float:
27
+ """IoU between two boxes (x1, y1, x2, y2)."""
28
+ ax1, ay1, ax2, ay2 = a
29
+ bx1, by1, bx2, by2 = b
30
+ ix1 = max(ax1, bx1)
31
+ iy1 = max(ay1, by1)
32
+ ix2 = min(ax2, bx2)
33
+ iy2 = min(ay2, by2)
34
+ inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
35
+ if inter == 0:
36
+ return 0.0
37
+ area_a = (ax2 - ax1) * (ay2 - ay1)
38
+ area_b = (bx2 - bx1) * (by2 - by1)
39
+ union = area_a + area_b - inter
40
+ return inter / union if union > 0 else 0.0
41
+
42
+
43
+ def _nms(boxes: list[dict], iou_threshold: float = 0.45) -> list[dict]:
44
+ """Non-maximum suppression. Input: list of {bbox, score, class}."""
45
+ if not boxes:
46
+ return []
47
+ sorted_boxes = sorted(boxes, key=lambda b: b["score"], reverse=True)
48
+ kept = []
49
+ suppressed = set()
50
+ for i, box in enumerate(sorted_boxes):
51
+ if i in suppressed:
52
+ continue
53
+ kept.append(box)
54
+ for j in range(i + 1, len(sorted_boxes)):
55
+ if j in suppressed:
56
+ continue
57
+ if _iou(box["_xyxy"], sorted_boxes[j]["_xyxy"]) > iou_threshold:
58
+ suppressed.add(j)
59
+ return kept
60
+
61
+
62
+ def _postprocess_coreml_nms(
63
+ predictions: dict,
64
+ config: dict,
65
+ orig_w: int,
66
+ orig_h: int,
67
+ input_size: int,
68
+ ) -> dict:
69
+ """Handle CoreML models with built-in NMS.
70
+
71
+ CoreML NMS output has two tensors:
72
+ - 'confidence': (N, numClasses) — class scores post-NMS
73
+ - 'coordinates': (N, 4) — [cx, cy, w, h] normalized to input image size
74
+ """
75
+ conf_threshold = float(config.get("confidence", 0))
76
+ labels = config.get("labels", COCO_80)
77
+
78
+ confidence = np.array(predictions["confidence"])
79
+ coordinates = np.array(predictions["coordinates"])
80
+
81
+ if confidence.ndim == 1:
82
+ confidence = confidence.reshape(1, -1)
83
+ if coordinates.ndim == 1:
84
+ coordinates = coordinates.reshape(1, -1)
85
+
86
+ detections = []
87
+ for i in range(confidence.shape[0]):
88
+ best_class = int(np.argmax(confidence[i]))
89
+ best_score = float(confidence[i, best_class])
90
+
91
+ if best_score < conf_threshold:
92
+ continue
93
+
94
+ # CoreML coordinates: [cx, cy, w, h] normalized to input_size
95
+ cx, cy, bw, bh = coordinates[i]
96
+ # Scale to input pixel coords
97
+ cx_px = cx * input_size
98
+ cy_px = cy * input_size
99
+ bw_px = bw * input_size
100
+ bh_px = bh * input_size
101
+
102
+ x1 = cx_px - bw_px / 2
103
+ y1 = cy_px - bh_px / 2
104
+ x2 = cx_px + bw_px / 2
105
+ y2 = cy_px + bh_px / 2
106
+
107
+ # Map back to original image coords (undo letterbox)
108
+ # CoreML imageType with letterbox: the model sees the padded image
109
+ # so we need to figure out the letterbox transform
110
+ scale = input_size / max(orig_w, orig_h)
111
+ new_w = int(orig_w * scale)
112
+ new_h = int(orig_h * scale)
113
+ pad_x = (input_size - new_w) / 2
114
+ pad_y = (input_size - new_h) / 2
115
+
116
+ ox1 = max(0, min(orig_w, (x1 - pad_x) / scale))
117
+ oy1 = max(0, min(orig_h, (y1 - pad_y) / scale))
118
+ ox2 = max(0, min(orig_w, (x2 - pad_x) / scale))
119
+ oy2 = max(0, min(orig_h, (y2 - pad_y) / scale))
120
+
121
+ label = labels[best_class] if best_class < len(labels) else str(best_class)
122
+ detections.append({
123
+ "class": label,
124
+ "score": round(best_score, 4),
125
+ "bbox": [round(ox1, 1), round(oy1, 1), round(ox2, 1), round(oy2, 1)],
126
+ })
127
+
128
+ return {"kind": "detections", "detections": detections}
129
+
130
+
131
+ def _postprocess_yolo11_nms(
132
+ output: np.ndarray,
133
+ config: dict,
134
+ orig_w: int,
135
+ orig_h: int,
136
+ scale: float,
137
+ pad: tuple[int, int],
138
+ ) -> dict:
139
+ """Handle YOLO11/v26 post-NMS output: [N, 6] — (x1, y1, x2, y2, score, class_id).
140
+
141
+ Coordinates are in input pixel space (0..inputSize). Already post-NMS.
142
+ """
143
+ conf_threshold = float(config.get("confidence", 0))
144
+ labels = config.get("labels", COCO_80)
145
+
146
+ detections = []
147
+ for i in range(output.shape[0]):
148
+ x1, y1, x2, y2, score, class_id = output[i]
149
+ score = float(score)
150
+ if score < conf_threshold:
151
+ continue
152
+ class_id = int(class_id)
153
+ if class_id < 0:
154
+ continue
155
+
156
+ # Transform from letterbox pixel coords to original image
157
+ ox1 = max(0, min(orig_w, (float(x1) - pad[0]) / scale))
158
+ oy1 = max(0, min(orig_h, (float(y1) - pad[1]) / scale))
159
+ ox2 = max(0, min(orig_w, (float(x2) - pad[0]) / scale))
160
+ oy2 = max(0, min(orig_h, (float(y2) - pad[1]) / scale))
161
+
162
+ label = labels[class_id] if class_id < len(labels) else str(class_id)
163
+ detections.append({
164
+ "class": label,
165
+ "score": round(score, 4),
166
+ "bbox": [round(ox1, 1), round(oy1, 1), round(ox2, 1), round(oy2, 1)],
167
+ })
168
+
169
+ return {"kind": "detections", "detections": detections}
170
+
171
+
172
+ def postprocess_yolo(
173
+ predictions: dict,
174
+ config: dict,
175
+ orig_w: int,
176
+ orig_h: int,
177
+ scale: float,
178
+ pad: tuple[int, int],
179
+ ) -> dict:
180
+ """Postprocess YOLO output to structured detections.
181
+
182
+ Handles two formats:
183
+ 1. Raw tensor (ONNX/OpenVINO): [4+numClasses, numBoxes] — needs NMS
184
+ 2. CoreML built-in NMS: 'confidence' + 'coordinates' tensors — already post-NMS
185
+ """
186
+ conf_threshold = float(config.get("confidence", 0))
187
+ labels = config.get("labels", COCO_80)
188
+ num_classes = config.get("numClasses", len(labels))
189
+
190
+ # Detect CoreML NMS output format
191
+ if "confidence" in predictions and "coordinates" in predictions:
192
+ input_size = config.get("inputSize", 640)
193
+ return _postprocess_coreml_nms(predictions, config, orig_w, orig_h, input_size)
194
+
195
+ # Detect YOLO11/v26 post-NMS format: [batch, N, 6] — (x1, y1, x2, y2, score, class_id)
196
+ first_arr = np.array(list(predictions.values())[0])
197
+ if first_arr.ndim == 3:
198
+ first_arr = first_arr[0] # remove batch dim
199
+ if first_arr.ndim == 2 and first_arr.shape[1] == 6:
200
+ return _postprocess_yolo11_nms(first_arr, config, orig_w, orig_h, scale, pad)
201
+
202
+ # Raw tensor path (ONNX / OpenVINO / CoreML without NMS)
203
+ output = None
204
+ for key, val in predictions.items():
205
+ arr = np.array(val)
206
+ if arr.ndim >= 2 and any(d in (4 + num_classes, 84, 116, 144) for d in arr.shape):
207
+ output = arr
208
+ break
209
+ if output is None:
210
+ output = np.array(list(predictions.values())[0])
211
+
212
+ # Handle various shapes
213
+ if output.ndim == 3:
214
+ output = output[0] # remove batch dim
215
+
216
+ # YOLO output: [4+numClasses, numBoxes] or [numBoxes, 4+numClasses]
217
+ if output.shape[0] == 4 + num_classes:
218
+ pass # already [C, N]
219
+ elif output.shape[1] == 4 + num_classes:
220
+ output = output.T # transpose to [C, N]
221
+ else:
222
+ # Try to infer
223
+ if output.shape[0] < output.shape[1]:
224
+ pass # likely [C, N]
225
+ else:
226
+ output = output.T
227
+
228
+ num_boxes = output.shape[1]
229
+
230
+ # Detect if scores are raw logits (YOLO11/v26) vs sigmoid-activated (YOLOv8/v9).
231
+ # Sigmoid-activated scores are in [0, 1]. If any class score exceeds 1.0,
232
+ # the model outputs raw logits and we need to apply sigmoid.
233
+ sample_scores = output[4:4 + num_classes, :min(100, num_boxes)]
234
+ needs_sigmoid = float(np.max(sample_scores)) > 1.0 or float(np.min(sample_scores)) < 0.0
235
+ if needs_sigmoid:
236
+ # Apply sigmoid to all class scores in-place (rows 4..4+numClasses)
237
+ output[4:4 + num_classes, :] = 1.0 / (1.0 + np.exp(-np.clip(output[4:4 + num_classes, :], -50, 50)))
238
+
239
+ # Vectorised candidate extraction — replaces the per-box Python loop
240
+ # that took ~20ms on 8400 boxes. Pure numpy: ~0.5ms.
241
+ class_scores = output[4:4 + num_classes, :] # (C, N)
242
+ best_classes = np.argmax(class_scores, axis=0) # (N,)
243
+ best_scores = class_scores[best_classes, np.arange(num_boxes)] # (N,)
244
+ mask = best_scores >= conf_threshold
245
+ if not np.any(mask):
246
+ return {"kind": "detections", "detections": []}
247
+
248
+ # Gather only above-threshold boxes
249
+ idxs = np.nonzero(mask)[0]
250
+ cx = output[0, idxs]
251
+ cy = output[1, idxs]
252
+ bw = output[2, idxs]
253
+ bh = output[3, idxs]
254
+ sc = best_scores[idxs]
255
+ cl = best_classes[idxs]
256
+
257
+ # Centre → corner, letterbox → original coords
258
+ x1 = np.clip((cx - bw / 2 - pad[0]) / scale, 0, orig_w)
259
+ y1 = np.clip((cy - bh / 2 - pad[1]) / scale, 0, orig_h)
260
+ x2 = np.clip((cx + bw / 2 - pad[0]) / scale, 0, orig_w)
261
+ y2 = np.clip((cy + bh / 2 - pad[1]) / scale, 0, orig_h)
262
+
263
+ candidates = []
264
+ for k in range(len(idxs)):
265
+ ci = int(cl[k])
266
+ candidates.append({
267
+ "class": labels[ci] if ci < len(labels) else str(ci),
268
+ "score": round(float(sc[k]), 4),
269
+ "bbox": [round(float(x1[k]), 1), round(float(y1[k]), 1), round(float(x2[k]), 1), round(float(y2[k]), 1)],
270
+ "_xyxy": (float(x1[k]), float(y1[k]), float(x2[k]), float(y2[k])),
271
+ })
272
+
273
+ kept = _nms(candidates)
274
+ # Remove internal _xyxy field
275
+ for d in kept:
276
+ del d["_xyxy"]
277
+
278
+ return {"kind": "detections", "detections": kept}
@@ -0,0 +1,247 @@
1
+ """YOLO-seg postprocessor (YOLO26-seg instance segmentation).
2
+
3
+ Input: raw predictions dict with two outputs:
4
+ - output0: [1, 300, 38] — 300 NMS-filtered detections, each [x1,y1,x2,y2,conf,class_id,coeff_0..coeff_31]
5
+ - output1: [1, 32, 160, 160] — 32 prototype masks at 160x160
6
+
7
+ For raw (no NMS) models:
8
+ - output0: [1, 4+numClasses+32, numBoxes] — standard YOLO format with mask coefficients appended
9
+
10
+ Output: {"kind": "detections", "detections": [{..., "mask": "<b64>", "maskWidth": N, "maskHeight": N}]}
11
+ """
12
+ import base64
13
+
14
+ import numpy as np
15
+
16
+
17
+ COCO_80 = [
18
+ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck",
19
+ "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
20
+ "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra",
21
+ "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
22
+ "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove",
23
+ "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
24
+ "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
25
+ "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
26
+ "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
27
+ "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
28
+ "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier",
29
+ "toothbrush",
30
+ ]
31
+
32
+ NUM_MASK_COEFFS = 32
33
+ PROTO_SIZE = 160
34
+
35
+
36
+ def _iou(a: tuple, b: tuple) -> float:
37
+ """IoU between two boxes (x1, y1, x2, y2)."""
38
+ ax1, ay1, ax2, ay2 = a
39
+ bx1, by1, bx2, by2 = b
40
+ ix1 = max(ax1, bx1)
41
+ iy1 = max(ay1, by1)
42
+ ix2 = min(ax2, bx2)
43
+ iy2 = min(ay2, by2)
44
+ inter = max(0, ix2 - ix1) * max(0, iy2 - iy1)
45
+ if inter == 0:
46
+ return 0.0
47
+ area_a = (ax2 - ax1) * (ay2 - ay1)
48
+ area_b = (bx2 - bx1) * (by2 - by1)
49
+ union = area_a + area_b - inter
50
+ return inter / union if union > 0 else 0.0
51
+
52
+
53
+ def _nms(boxes: list[dict], iou_threshold: float = 0.45) -> list[dict]:
54
+ """Non-maximum suppression. Input: list of dicts with _xyxy, score, class."""
55
+ if not boxes:
56
+ return []
57
+ sorted_boxes = sorted(boxes, key=lambda b: b["score"], reverse=True)
58
+ kept = []
59
+ suppressed = set()
60
+ for i, box in enumerate(sorted_boxes):
61
+ if i in suppressed:
62
+ continue
63
+ kept.append(box)
64
+ for j in range(i + 1, len(sorted_boxes)):
65
+ if j in suppressed:
66
+ continue
67
+ if _iou(box["_xyxy"], sorted_boxes[j]["_xyxy"]) > iou_threshold:
68
+ suppressed.add(j)
69
+ return kept
70
+
71
+
72
+ def _sigmoid(x: np.ndarray) -> np.ndarray:
73
+ """Numerically stable sigmoid."""
74
+ return np.where(x >= 0, 1 / (1 + np.exp(-x)), np.exp(x) / (1 + np.exp(x)))
75
+
76
+
77
+ def _find_tensors(predictions: dict, num_classes: int) -> tuple[np.ndarray, np.ndarray]:
78
+ """Locate the detection tensor and prototype tensor from model outputs.
79
+
80
+ Returns (detections, prototypes) as numpy arrays.
81
+ detections: [num_dets, channels] row-major
82
+ prototypes: [32, 160, 160]
83
+ """
84
+ det_tensor = None
85
+ proto_tensor = None
86
+
87
+ for key, val in predictions.items():
88
+ arr = np.array(val, dtype=np.float32)
89
+
90
+ # Prototype: shape containing 32 * 160 * 160 = 819200
91
+ if arr.size == NUM_MASK_COEFFS * PROTO_SIZE * PROTO_SIZE:
92
+ proto_tensor = arr.reshape(NUM_MASK_COEFFS, PROTO_SIZE, PROTO_SIZE)
93
+ continue
94
+
95
+ # Skip tiny tensors
96
+ if arr.size < 10:
97
+ continue
98
+
99
+ # Detection tensor: the other large tensor
100
+ if det_tensor is None:
101
+ det_tensor = arr
102
+
103
+ if det_tensor is None:
104
+ raise ValueError("YOLO-seg postprocessor: could not find detection tensor")
105
+ if proto_tensor is None:
106
+ raise ValueError("YOLO-seg postprocessor: could not find prototype tensor")
107
+
108
+ # Normalize detection tensor to [num_dets, channels]
109
+ while det_tensor.ndim > 2:
110
+ det_tensor = det_tensor[0] # strip batch dim
111
+
112
+ # NMS format: [300, 38] — rows are detections
113
+ # Raw format: [4+nc+32, N] — cols are detections, needs transpose
114
+ nms_cols = 6 + NUM_MASK_COEFFS # 38
115
+ raw_cols = 4 + num_classes + NUM_MASK_COEFFS
116
+
117
+ if det_tensor.shape[1] == nms_cols:
118
+ pass # already [N, 38]
119
+ elif det_tensor.shape[0] == nms_cols:
120
+ det_tensor = det_tensor.T
121
+ elif det_tensor.shape[1] == raw_cols:
122
+ pass # already [N, raw_cols]
123
+ elif det_tensor.shape[0] == raw_cols:
124
+ det_tensor = det_tensor.T
125
+ elif det_tensor.shape[0] < det_tensor.shape[1]:
126
+ det_tensor = det_tensor.T # likely [C, N] -> [N, C]
127
+
128
+ return det_tensor, proto_tensor
129
+
130
+
131
+ def _crop_mask_to_bbox(
132
+ mask_full: np.ndarray,
133
+ x1: float,
134
+ y1: float,
135
+ x2: float,
136
+ y2: float,
137
+ input_size: int,
138
+ mask_threshold: float = 0.5,
139
+ ) -> tuple[np.ndarray, int, int]:
140
+ """Crop sigmoid mask to bbox region, threshold to binary, return (binary, w, h)."""
141
+ proto_scale = PROTO_SIZE / input_size
142
+ px1 = max(0, int(np.floor(x1 * proto_scale)))
143
+ py1 = max(0, int(np.floor(y1 * proto_scale)))
144
+ px2 = min(PROTO_SIZE, int(np.ceil(x2 * proto_scale)))
145
+ py2 = min(PROTO_SIZE, int(np.ceil(y2 * proto_scale)))
146
+
147
+ crop_w = max(1, px2 - px1)
148
+ crop_h = max(1, py2 - py1)
149
+
150
+ cropped = mask_full[py1:py1 + crop_h, px1:px1 + crop_w]
151
+ binary = (cropped > mask_threshold).astype(np.uint8) * 255
152
+ return binary, crop_w, crop_h
153
+
154
+
155
+ def postprocess_yolo_seg(
156
+ predictions: dict,
157
+ config: dict,
158
+ orig_w: int,
159
+ orig_h: int,
160
+ scale: float,
161
+ pad: tuple[int, int],
162
+ ) -> dict:
163
+ """Postprocess YOLO-seg output to detections with per-instance masks.
164
+
165
+ Handles:
166
+ 1. NMS-enabled export: [N, 6+32] with [x1,y1,x2,y2,conf,class_id,...coeffs]
167
+ 2. Raw export: [4+numClasses+32, N] standard YOLO transposed format
168
+ """
169
+ conf_threshold = float(config.get("confidence", 0))
170
+ labels = config.get("labels", COCO_80)
171
+ num_classes = config.get("numClasses", len(labels))
172
+ input_size = config.get("inputSize", 640)
173
+ mask_threshold = config.get("maskThreshold", 0.5)
174
+
175
+ det_tensor, proto_tensor = _find_tensors(predictions, num_classes)
176
+
177
+ num_dets = det_tensor.shape[0]
178
+ num_cols = det_tensor.shape[1]
179
+ is_nms_format = num_cols == 6 + NUM_MASK_COEFFS
180
+
181
+ candidates = []
182
+
183
+ for i in range(num_dets):
184
+ row = det_tensor[i]
185
+
186
+ if is_nms_format:
187
+ # [x1, y1, x2, y2, confidence, class_id, coeff_0..coeff_31]
188
+ x1, y1, x2, y2 = float(row[0]), float(row[1]), float(row[2]), float(row[3])
189
+ best_score = float(row[4])
190
+ best_class = int(row[5])
191
+ coeffs = row[6:6 + NUM_MASK_COEFFS]
192
+
193
+ if best_score < conf_threshold:
194
+ continue
195
+ if best_class < 0:
196
+ continue # padding row
197
+ else:
198
+ # [cx, cy, w, h, class_scores..., coeff_0..coeff_31]
199
+ cx, cy, w, h = float(row[0]), float(row[1]), float(row[2]), float(row[3])
200
+ class_scores = row[4:4 + num_classes]
201
+ best_class = int(np.argmax(class_scores))
202
+ best_score = float(class_scores[best_class])
203
+ coeffs = row[4 + num_classes:4 + num_classes + NUM_MASK_COEFFS]
204
+
205
+ if best_score < conf_threshold:
206
+ continue
207
+
208
+ x1 = cx - w / 2
209
+ y1 = cy - h / 2
210
+ x2 = cx + w / 2
211
+ y2 = cy + h / 2
212
+
213
+ # Compute instance mask: sigmoid(coefficients @ prototypes)
214
+ # coeffs: [32], proto_tensor: [32, 160, 160]
215
+ mask_raw = _sigmoid(coeffs @ proto_tensor.reshape(NUM_MASK_COEFFS, -1)).reshape(
216
+ PROTO_SIZE, PROTO_SIZE
217
+ )
218
+
219
+ # Crop mask to bbox, threshold to binary
220
+ binary, crop_w, crop_h = _crop_mask_to_bbox(
221
+ mask_raw, x1, y1, x2, y2, input_size, mask_threshold
222
+ )
223
+ mask_b64 = base64.b64encode(binary.tobytes()).decode("ascii")
224
+
225
+ # Transform bbox from model coords to original image coords
226
+ ox1 = max(0, min(orig_w, (x1 - pad[0]) / scale))
227
+ oy1 = max(0, min(orig_h, (y1 - pad[1]) / scale))
228
+ ox2 = max(0, min(orig_w, (x2 - pad[0]) / scale))
229
+ oy2 = max(0, min(orig_h, (y2 - pad[1]) / scale))
230
+
231
+ label = labels[best_class] if best_class < len(labels) else str(best_class)
232
+ candidates.append({
233
+ "class": label,
234
+ "score": round(best_score, 4),
235
+ "bbox": [round(ox1, 1), round(oy1, 1), round(ox2, 1), round(oy2, 1)],
236
+ "mask": mask_b64,
237
+ "maskWidth": crop_w,
238
+ "maskHeight": crop_h,
239
+ "_xyxy": (ox1, oy1, ox2, oy2),
240
+ })
241
+
242
+ kept = _nms(candidates)
243
+ # Remove internal _xyxy field
244
+ for d in kept:
245
+ del d["_xyxy"]
246
+
247
+ return {"kind": "detections", "detections": kept}
@@ -0,0 +1,4 @@
1
+ # CoreML backend (macOS only). coremltools 8.x supports Python 3.9-3.12
2
+ # and ships universal2 wheels for darwin. Pulls tensorflow as transitive,
3
+ # so first install is ~150 MB on disk.
4
+ coremltools>=8.0,<9
@@ -0,0 +1,3 @@
1
+ # ONNX Runtime backend. Ships wheels for darwin/linux/windows on
2
+ # arm64+x86_64 for Python 3.10-3.13.
3
+ onnxruntime>=1.20,<2
@@ -0,0 +1,3 @@
1
+ # OpenVINO backend (Linux/Windows; macOS support is x86_64 only).
2
+ # Wheels available for Python 3.10-3.12.
3
+ openvino>=2025,<2026
@@ -0,0 +1,9 @@
1
+ # Base requirements for inference_pool.py — installed by camstack
2
+ # at addon boot via ctx.deps.installPythonRequirements(...).
3
+ #
4
+ # Backend-specific deps (coremltools / onnxruntime / openvino) are NOT
5
+ # listed here — they are installed lazily from requirements-<backend>.txt
6
+ # the first time the user selects that runtime, to keep first-boot
7
+ # install footprint small.
8
+ numpy>=1.26,<3
9
+ Pillow>=10.0,<12