@camstack/addon-vision 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/addons/animal-classifier/index.d.mts +25 -0
- package/dist/addons/animal-classifier/index.d.ts +25 -0
- package/dist/addons/animal-classifier/index.js +652 -0
- package/dist/addons/animal-classifier/index.js.map +1 -0
- package/dist/addons/animal-classifier/index.mjs +10 -0
- package/dist/addons/animal-classifier/index.mjs.map +1 -0
- package/dist/addons/audio-classification/index.d.mts +31 -0
- package/dist/addons/audio-classification/index.d.ts +31 -0
- package/dist/addons/audio-classification/index.js +572 -0
- package/dist/addons/audio-classification/index.js.map +1 -0
- package/dist/addons/audio-classification/index.mjs +8 -0
- package/dist/addons/audio-classification/index.mjs.map +1 -0
- package/dist/addons/bird-global-classifier/index.d.mts +26 -0
- package/dist/addons/bird-global-classifier/index.d.ts +26 -0
- package/dist/addons/bird-global-classifier/index.js +658 -0
- package/dist/addons/bird-global-classifier/index.js.map +1 -0
- package/dist/addons/bird-global-classifier/index.mjs +10 -0
- package/dist/addons/bird-global-classifier/index.mjs.map +1 -0
- package/dist/addons/bird-nabirds-classifier/index.d.mts +28 -0
- package/dist/addons/bird-nabirds-classifier/index.d.ts +28 -0
- package/dist/addons/bird-nabirds-classifier/index.js +700 -0
- package/dist/addons/bird-nabirds-classifier/index.js.map +1 -0
- package/dist/addons/bird-nabirds-classifier/index.mjs +10 -0
- package/dist/addons/bird-nabirds-classifier/index.mjs.map +1 -0
- package/dist/addons/camera-native-detection/index.d.mts +32 -0
- package/dist/addons/camera-native-detection/index.d.ts +32 -0
- package/dist/addons/camera-native-detection/index.js +99 -0
- package/dist/addons/camera-native-detection/index.js.map +1 -0
- package/dist/addons/camera-native-detection/index.mjs +7 -0
- package/dist/addons/camera-native-detection/index.mjs.map +1 -0
- package/dist/addons/face-detection/index.d.mts +24 -0
- package/dist/addons/face-detection/index.d.ts +24 -0
- package/dist/addons/face-detection/index.js +720 -0
- package/dist/addons/face-detection/index.js.map +1 -0
- package/dist/addons/face-detection/index.mjs +10 -0
- package/dist/addons/face-detection/index.mjs.map +1 -0
- package/dist/addons/face-recognition/index.d.mts +24 -0
- package/dist/addons/face-recognition/index.d.ts +24 -0
- package/dist/addons/face-recognition/index.js +603 -0
- package/dist/addons/face-recognition/index.js.map +1 -0
- package/dist/addons/face-recognition/index.mjs +9 -0
- package/dist/addons/face-recognition/index.mjs.map +1 -0
- package/dist/addons/motion-detection/index.d.mts +26 -0
- package/dist/addons/motion-detection/index.d.ts +26 -0
- package/dist/addons/motion-detection/index.js +273 -0
- package/dist/addons/motion-detection/index.js.map +1 -0
- package/dist/addons/motion-detection/index.mjs +8 -0
- package/dist/addons/motion-detection/index.mjs.map +1 -0
- package/dist/addons/object-detection/index.d.mts +26 -0
- package/dist/addons/object-detection/index.d.ts +26 -0
- package/dist/addons/object-detection/index.js +1214 -0
- package/dist/addons/object-detection/index.js.map +1 -0
- package/dist/addons/object-detection/index.mjs +10 -0
- package/dist/addons/object-detection/index.mjs.map +1 -0
- package/dist/addons/plate-detection/index.d.mts +25 -0
- package/dist/addons/plate-detection/index.d.ts +25 -0
- package/dist/addons/plate-detection/index.js +646 -0
- package/dist/addons/plate-detection/index.js.map +1 -0
- package/dist/addons/plate-detection/index.mjs +10 -0
- package/dist/addons/plate-detection/index.mjs.map +1 -0
- package/dist/addons/plate-recognition/index.d.mts +25 -0
- package/dist/addons/plate-recognition/index.d.ts +25 -0
- package/dist/addons/plate-recognition/index.js +648 -0
- package/dist/addons/plate-recognition/index.js.map +1 -0
- package/dist/addons/plate-recognition/index.mjs +9 -0
- package/dist/addons/plate-recognition/index.mjs.map +1 -0
- package/dist/chunk-3MQFUDRU.mjs +260 -0
- package/dist/chunk-3MQFUDRU.mjs.map +1 -0
- package/dist/chunk-5AIQSN32.mjs +227 -0
- package/dist/chunk-5AIQSN32.mjs.map +1 -0
- package/dist/chunk-5JJZGKL7.mjs +186 -0
- package/dist/chunk-5JJZGKL7.mjs.map +1 -0
- package/dist/chunk-6OR5TE7A.mjs +101 -0
- package/dist/chunk-6OR5TE7A.mjs.map +1 -0
- package/dist/chunk-AYBFB7ID.mjs +763 -0
- package/dist/chunk-AYBFB7ID.mjs.map +1 -0
- package/dist/chunk-B3R66MPF.mjs +219 -0
- package/dist/chunk-B3R66MPF.mjs.map +1 -0
- package/dist/chunk-DTOAB2CE.mjs +79 -0
- package/dist/chunk-DTOAB2CE.mjs.map +1 -0
- package/dist/chunk-ISOIDU4U.mjs +54 -0
- package/dist/chunk-ISOIDU4U.mjs.map +1 -0
- package/dist/chunk-J4WRYHHY.mjs +212 -0
- package/dist/chunk-J4WRYHHY.mjs.map +1 -0
- package/dist/chunk-KUO2BVFY.mjs +90 -0
- package/dist/chunk-KUO2BVFY.mjs.map +1 -0
- package/dist/chunk-LPI42WL6.mjs +324 -0
- package/dist/chunk-LPI42WL6.mjs.map +1 -0
- package/dist/chunk-MEVASN3P.mjs +305 -0
- package/dist/chunk-MEVASN3P.mjs.map +1 -0
- package/dist/chunk-PDSHDDPV.mjs +255 -0
- package/dist/chunk-PDSHDDPV.mjs.map +1 -0
- package/dist/chunk-Q3SQOYG6.mjs +218 -0
- package/dist/chunk-Q3SQOYG6.mjs.map +1 -0
- package/dist/chunk-QIMDG34B.mjs +229 -0
- package/dist/chunk-QIMDG34B.mjs.map +1 -0
- package/dist/index.d.mts +171 -0
- package/dist/index.d.ts +171 -0
- package/dist/index.js +3463 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +111 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +49 -0
- package/python/__pycache__/coreml_inference.cpython-313.pyc +0 -0
- package/python/__pycache__/openvino_inference.cpython-313.pyc +0 -0
- package/python/__pycache__/pytorch_inference.cpython-313.pyc +0 -0
- package/python/coreml_inference.py +319 -0
- package/python/openvino_inference.py +247 -0
- package/python/pytorch_inference.py +255 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import {
|
|
2
|
+
CameraNativeDetectionAddon
|
|
3
|
+
} from "./chunk-DTOAB2CE.mjs";
|
|
4
|
+
import {
|
|
5
|
+
BirdGlobalClassifierAddon
|
|
6
|
+
} from "./chunk-Q3SQOYG6.mjs";
|
|
7
|
+
import {
|
|
8
|
+
BirdNABirdsClassifierAddon
|
|
9
|
+
} from "./chunk-3MQFUDRU.mjs";
|
|
10
|
+
import {
|
|
11
|
+
AnimalClassifierAddon
|
|
12
|
+
} from "./chunk-J4WRYHHY.mjs";
|
|
13
|
+
import {
|
|
14
|
+
ANIMAL_TYPE_MODELS,
|
|
15
|
+
BIRD_NABIRDS_MODELS,
|
|
16
|
+
BIRD_SPECIES_MODELS
|
|
17
|
+
} from "./chunk-ISOIDU4U.mjs";
|
|
18
|
+
import {
|
|
19
|
+
MotionDetectionAddon,
|
|
20
|
+
detectMotion
|
|
21
|
+
} from "./chunk-QIMDG34B.mjs";
|
|
22
|
+
import {
|
|
23
|
+
OBJECT_DETECTION_MODELS,
|
|
24
|
+
ObjectDetectionAddon,
|
|
25
|
+
SEGMENTATION_MODELS
|
|
26
|
+
} from "./chunk-AYBFB7ID.mjs";
|
|
27
|
+
import {
|
|
28
|
+
FACE_DETECTION_MODELS,
|
|
29
|
+
FaceDetectionAddon,
|
|
30
|
+
scrfdPostprocess
|
|
31
|
+
} from "./chunk-MEVASN3P.mjs";
|
|
32
|
+
import {
|
|
33
|
+
FACE_RECOGNITION_MODELS,
|
|
34
|
+
FaceRecognitionAddon,
|
|
35
|
+
cosineSimilarity,
|
|
36
|
+
l2Normalize
|
|
37
|
+
} from "./chunk-B3R66MPF.mjs";
|
|
38
|
+
import {
|
|
39
|
+
PLATE_DETECTION_MODELS,
|
|
40
|
+
PlateDetectionAddon
|
|
41
|
+
} from "./chunk-5JJZGKL7.mjs";
|
|
42
|
+
import {
|
|
43
|
+
iou,
|
|
44
|
+
nms,
|
|
45
|
+
yoloPostprocess
|
|
46
|
+
} from "./chunk-KUO2BVFY.mjs";
|
|
47
|
+
import {
|
|
48
|
+
PLATE_RECOGNITION_MODELS,
|
|
49
|
+
PlateRecognitionAddon,
|
|
50
|
+
ctcDecode
|
|
51
|
+
} from "./chunk-PDSHDDPV.mjs";
|
|
52
|
+
import {
|
|
53
|
+
cropRegion,
|
|
54
|
+
jpegToRgb,
|
|
55
|
+
letterbox,
|
|
56
|
+
resizeAndNormalize,
|
|
57
|
+
rgbToGrayscale
|
|
58
|
+
} from "./chunk-6OR5TE7A.mjs";
|
|
59
|
+
import {
|
|
60
|
+
AUDIO_CLASSIFICATION_MODELS,
|
|
61
|
+
AudioClassificationAddon,
|
|
62
|
+
yamnetPostprocess
|
|
63
|
+
} from "./chunk-5AIQSN32.mjs";
|
|
64
|
+
import {
|
|
65
|
+
NodeInferenceEngine,
|
|
66
|
+
PythonInferenceEngine,
|
|
67
|
+
probeOnnxBackends,
|
|
68
|
+
resolveEngine
|
|
69
|
+
} from "./chunk-LPI42WL6.mjs";
|
|
70
|
+
export {
|
|
71
|
+
ANIMAL_TYPE_MODELS,
|
|
72
|
+
AUDIO_CLASSIFICATION_MODELS,
|
|
73
|
+
AnimalClassifierAddon,
|
|
74
|
+
AudioClassificationAddon,
|
|
75
|
+
BIRD_NABIRDS_MODELS,
|
|
76
|
+
BIRD_SPECIES_MODELS,
|
|
77
|
+
BirdGlobalClassifierAddon,
|
|
78
|
+
BirdNABirdsClassifierAddon,
|
|
79
|
+
CameraNativeDetectionAddon,
|
|
80
|
+
FACE_DETECTION_MODELS,
|
|
81
|
+
FACE_RECOGNITION_MODELS,
|
|
82
|
+
FaceDetectionAddon,
|
|
83
|
+
FaceRecognitionAddon,
|
|
84
|
+
MotionDetectionAddon,
|
|
85
|
+
NodeInferenceEngine,
|
|
86
|
+
OBJECT_DETECTION_MODELS,
|
|
87
|
+
ObjectDetectionAddon,
|
|
88
|
+
PLATE_DETECTION_MODELS,
|
|
89
|
+
PLATE_RECOGNITION_MODELS,
|
|
90
|
+
PlateDetectionAddon,
|
|
91
|
+
PlateRecognitionAddon,
|
|
92
|
+
PythonInferenceEngine,
|
|
93
|
+
SEGMENTATION_MODELS,
|
|
94
|
+
cosineSimilarity,
|
|
95
|
+
cropRegion,
|
|
96
|
+
ctcDecode,
|
|
97
|
+
detectMotion,
|
|
98
|
+
iou,
|
|
99
|
+
jpegToRgb,
|
|
100
|
+
l2Normalize,
|
|
101
|
+
letterbox,
|
|
102
|
+
nms,
|
|
103
|
+
probeOnnxBackends,
|
|
104
|
+
resizeAndNormalize,
|
|
105
|
+
resolveEngine,
|
|
106
|
+
rgbToGrayscale,
|
|
107
|
+
scrfdPostprocess,
|
|
108
|
+
yamnetPostprocess,
|
|
109
|
+
yoloPostprocess
|
|
110
|
+
};
|
|
111
|
+
//# sourceMappingURL=index.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
|
package/package.json
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@camstack/addon-vision",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Builtin detection addons for CamStack — object detection, face, plate, audio, motion",
|
|
5
|
+
"keywords": ["camstack", "addon", "camstack-addon", "vision", "object-detection", "face-detection", "motion-detection", "camera"],
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/camstack/server"
|
|
10
|
+
},
|
|
11
|
+
"main": "./dist/index.js",
|
|
12
|
+
"module": "./dist/index.mjs",
|
|
13
|
+
"types": "./dist/index.d.ts",
|
|
14
|
+
"exports": {
|
|
15
|
+
".": { "import": "./dist/index.mjs", "require": "./dist/index.js", "types": "./dist/index.d.ts" },
|
|
16
|
+
"./addons/*": { "import": "./dist/addons/*/index.mjs", "require": "./dist/addons/*/index.js", "types": "./dist/addons/*/index.d.ts" }
|
|
17
|
+
},
|
|
18
|
+
"camstack": {
|
|
19
|
+
"addons": [
|
|
20
|
+
{ "id": "motion-detection", "entry": "./dist/addons/motion-detection/index.js", "slot": "detector" },
|
|
21
|
+
{ "id": "object-detection", "entry": "./dist/addons/object-detection/index.js", "slot": "detector" },
|
|
22
|
+
{ "id": "face-detection", "entry": "./dist/addons/face-detection/index.js", "slot": "cropper" },
|
|
23
|
+
{ "id": "face-recognition", "entry": "./dist/addons/face-recognition/index.js", "slot": "classifier" },
|
|
24
|
+
{ "id": "plate-detection", "entry": "./dist/addons/plate-detection/index.js", "slot": "cropper" },
|
|
25
|
+
{ "id": "plate-recognition", "entry": "./dist/addons/plate-recognition/index.js", "slot": "classifier" },
|
|
26
|
+
{ "id": "audio-classification", "entry": "./dist/addons/audio-classification/index.js", "slot": "classifier" },
|
|
27
|
+
{ "id": "camera-native-detection", "entry": "./dist/addons/camera-native-detection/index.js", "slot": "detector" },
|
|
28
|
+
{ "id": "bird-global-classifier", "entry": "./dist/addons/bird-global-classifier/index.js", "slot": "classifier" },
|
|
29
|
+
{ "id": "bird-nabirds-classifier", "entry": "./dist/addons/bird-nabirds-classifier/index.js", "slot": "classifier" },
|
|
30
|
+
{ "id": "animal-classifier", "entry": "./dist/addons/animal-classifier/index.js", "slot": "classifier" }
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
"files": ["dist", "python"],
|
|
34
|
+
"scripts": {
|
|
35
|
+
"build": "tsup",
|
|
36
|
+
"dev": "tsup --watch",
|
|
37
|
+
"typecheck": "tsc --noEmit",
|
|
38
|
+
"test": "vitest run",
|
|
39
|
+
"test:watch": "vitest"
|
|
40
|
+
},
|
|
41
|
+
"peerDependencies": { "@camstack/types": "^0.1.0" },
|
|
42
|
+
"dependencies": { "onnxruntime-node": "^1.24.3", "sharp": "^0.34.0" },
|
|
43
|
+
"devDependencies": {
|
|
44
|
+
"@camstack/types": "*",
|
|
45
|
+
"tsup": "^8.0.0",
|
|
46
|
+
"typescript": "~5.9.0",
|
|
47
|
+
"vitest": "^3.0.0"
|
|
48
|
+
}
|
|
49
|
+
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,319 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""CoreML inference for YOLO object detection.
|
|
3
|
+
|
|
4
|
+
Binary IPC protocol over stdin/stdout:
|
|
5
|
+
Input: [4 bytes LE uint32 length][JPEG bytes]
|
|
6
|
+
Output: [4 bytes LE uint32 length][JSON bytes]
|
|
7
|
+
|
|
8
|
+
JSON output format:
|
|
9
|
+
{
|
|
10
|
+
"detections": [
|
|
11
|
+
{"className": "person", "score": 0.92, "bbox": [x1, y1, x2, y2]},
|
|
12
|
+
...
|
|
13
|
+
],
|
|
14
|
+
"inferenceMs": 12.5
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
Bounding boxes are NORMALIZED (0-1 range).
|
|
18
|
+
|
|
19
|
+
Usage:
|
|
20
|
+
python coreml_inference.py <model_path> [--device cpu|ane|gpu|all] [--input-size 640] [--confidence 0.25]
|
|
21
|
+
"""
|
|
22
|
+
import sys
|
|
23
|
+
import struct
|
|
24
|
+
import json
|
|
25
|
+
import argparse
|
|
26
|
+
import time
|
|
27
|
+
import io
|
|
28
|
+
import numpy as np
|
|
29
|
+
from PIL import Image
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ---------------------------------------------------------------------------
|
|
33
|
+
# Binary IPC helpers
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
|
|
36
|
+
def read_frame(stream) -> bytes:
|
|
37
|
+
"""Read one JPEG frame from binary IPC stream."""
|
|
38
|
+
header = stream.read(4)
|
|
39
|
+
if len(header) < 4:
|
|
40
|
+
return b""
|
|
41
|
+
length = struct.unpack("<I", header)[0]
|
|
42
|
+
return stream.read(length)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def write_result(stream, result: dict) -> None:
|
|
46
|
+
"""Write JSON result to binary IPC stream."""
|
|
47
|
+
payload = json.dumps(result).encode("utf-8")
|
|
48
|
+
stream.write(struct.pack("<I", len(payload)) + payload)
|
|
49
|
+
stream.flush()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
# Image preprocessing
|
|
54
|
+
# ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
def letterbox(img: Image.Image, size: int) -> tuple[np.ndarray, float, tuple[int, int]]:
|
|
57
|
+
"""Resize with letterbox padding (maintain aspect ratio).
|
|
58
|
+
|
|
59
|
+
Returns (CHW float32 array normalised 0-1, scale, (pad_x, pad_y)).
|
|
60
|
+
"""
|
|
61
|
+
w, h = img.size
|
|
62
|
+
scale = min(size / w, size / h)
|
|
63
|
+
nw, nh = int(w * scale), int(h * scale)
|
|
64
|
+
img_resized = img.resize((nw, nh), Image.BILINEAR)
|
|
65
|
+
|
|
66
|
+
canvas = Image.new("RGB", (size, size), (114, 114, 114))
|
|
67
|
+
pad_x, pad_y = (size - nw) // 2, (size - nh) // 2
|
|
68
|
+
canvas.paste(img_resized, (pad_x, pad_y))
|
|
69
|
+
|
|
70
|
+
arr = np.array(canvas, dtype=np.float32) / 255.0
|
|
71
|
+
return arr, scale, (pad_x, pad_y)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ---------------------------------------------------------------------------
|
|
75
|
+
# COCO 80 class names
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
|
|
78
|
+
COCO_CLASSES = [
|
|
79
|
+
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
|
|
80
|
+
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
|
|
81
|
+
"dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack",
|
|
82
|
+
"umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
|
|
83
|
+
"kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket",
|
|
84
|
+
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
|
85
|
+
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
|
|
86
|
+
"couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse",
|
|
87
|
+
"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
|
|
88
|
+
"refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush",
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# ---------------------------------------------------------------------------
|
|
93
|
+
# YOLO output parsing
|
|
94
|
+
# ---------------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
def compute_iou(x1a: float, y1a: float, x2a: float, y2a: float,
|
|
97
|
+
x1b: float, y1b: float, x2b: float, y2b: float) -> float:
|
|
98
|
+
"""Compute IoU between two bounding boxes."""
|
|
99
|
+
xi1 = max(x1a, x1b)
|
|
100
|
+
yi1 = max(y1a, y1b)
|
|
101
|
+
xi2 = min(x2a, x2b)
|
|
102
|
+
yi2 = min(y2a, y2b)
|
|
103
|
+
inter = max(0, xi2 - xi1) * max(0, yi2 - yi1)
|
|
104
|
+
area_a = (x2a - x1a) * (y2a - y1a)
|
|
105
|
+
area_b = (x2b - x1b) * (y2b - y1b)
|
|
106
|
+
union = area_a + area_b - inter
|
|
107
|
+
return inter / union if union > 0 else 0.0
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def parse_yolo_output(output: np.ndarray, conf_threshold: float,
|
|
111
|
+
img_w: int, img_h: int, input_size: int,
|
|
112
|
+
scale: float, pad: tuple[int, int]) -> list[dict]:
|
|
113
|
+
"""Parse YOLO output tensor [1, 84, 8400] into normalised detections."""
|
|
114
|
+
# Squeeze batch dim and transpose to [num_boxes, 84]
|
|
115
|
+
if output.ndim == 3 and output.shape[0] == 1:
|
|
116
|
+
output = output[0]
|
|
117
|
+
if output.shape[0] == 84:
|
|
118
|
+
output = output.T # [8400, 84]
|
|
119
|
+
|
|
120
|
+
cx, cy, w, h = output[:, 0], output[:, 1], output[:, 2], output[:, 3]
|
|
121
|
+
class_scores = output[:, 4:] # [num_boxes, 80]
|
|
122
|
+
|
|
123
|
+
class_ids = np.argmax(class_scores, axis=1)
|
|
124
|
+
scores = class_scores[np.arange(len(class_ids)), class_ids]
|
|
125
|
+
|
|
126
|
+
mask = scores > conf_threshold
|
|
127
|
+
cx, cy, w, h = cx[mask], cy[mask], w[mask], h[mask]
|
|
128
|
+
scores, class_ids = scores[mask], class_ids[mask]
|
|
129
|
+
|
|
130
|
+
# Centre-format -> corner-format
|
|
131
|
+
x1 = cx - w / 2
|
|
132
|
+
y1 = cy - h / 2
|
|
133
|
+
x2 = cx + w / 2
|
|
134
|
+
y2 = cy + h / 2
|
|
135
|
+
|
|
136
|
+
# Undo letterbox padding and scale
|
|
137
|
+
pad_x, pad_y = pad
|
|
138
|
+
x1 = (x1 - pad_x) / scale
|
|
139
|
+
y1 = (y1 - pad_y) / scale
|
|
140
|
+
x2 = (x2 - pad_x) / scale
|
|
141
|
+
y2 = (y2 - pad_y) / scale
|
|
142
|
+
|
|
143
|
+
# Normalise to 0-1 range
|
|
144
|
+
x1 = np.clip(x1 / img_w, 0, 1)
|
|
145
|
+
y1 = np.clip(y1 / img_h, 0, 1)
|
|
146
|
+
x2 = np.clip(x2 / img_w, 0, 1)
|
|
147
|
+
y2 = np.clip(y2 / img_h, 0, 1)
|
|
148
|
+
|
|
149
|
+
# Greedy NMS
|
|
150
|
+
detections: list[dict] = []
|
|
151
|
+
indices = np.argsort(-scores)
|
|
152
|
+
suppressed: set[int] = set()
|
|
153
|
+
|
|
154
|
+
for i in indices[:100]:
|
|
155
|
+
idx = int(i)
|
|
156
|
+
if idx in suppressed:
|
|
157
|
+
continue
|
|
158
|
+
detections.append({
|
|
159
|
+
"className": COCO_CLASSES[class_ids[idx]] if class_ids[idx] < len(COCO_CLASSES) else f"class_{class_ids[idx]}",
|
|
160
|
+
"score": round(float(scores[idx]), 4),
|
|
161
|
+
"bbox": [round(float(x1[idx]), 4), round(float(y1[idx]), 4),
|
|
162
|
+
round(float(x2[idx]), 4), round(float(y2[idx]), 4)],
|
|
163
|
+
})
|
|
164
|
+
for j in indices:
|
|
165
|
+
jdx = int(j)
|
|
166
|
+
if jdx in suppressed or jdx == idx:
|
|
167
|
+
continue
|
|
168
|
+
iou = compute_iou(float(x1[idx]), float(y1[idx]), float(x2[idx]), float(y2[idx]),
|
|
169
|
+
float(x1[jdx]), float(y1[jdx]), float(x2[jdx]), float(y2[jdx]))
|
|
170
|
+
if iou > 0.45:
|
|
171
|
+
suppressed.add(jdx)
|
|
172
|
+
suppressed.add(idx)
|
|
173
|
+
|
|
174
|
+
return detections
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ---------------------------------------------------------------------------
|
|
178
|
+
# CoreML NMS output parser (model with built-in NMS)
|
|
179
|
+
# ---------------------------------------------------------------------------
|
|
180
|
+
|
|
181
|
+
def _parse_nms_output(coords: np.ndarray, confs: np.ndarray,
|
|
182
|
+
conf_threshold: float, img_w: int, img_h: int) -> list[dict]:
|
|
183
|
+
"""Parse CoreML NMS output: (N, 4) coords [cx, cy, w, h] normalized + (N, 80) scores."""
|
|
184
|
+
if coords.shape[0] == 0:
|
|
185
|
+
return []
|
|
186
|
+
|
|
187
|
+
class_ids = np.argmax(confs, axis=1)
|
|
188
|
+
scores = confs[np.arange(len(class_ids)), class_ids]
|
|
189
|
+
|
|
190
|
+
mask = scores > conf_threshold
|
|
191
|
+
coords = coords[mask]
|
|
192
|
+
scores = scores[mask]
|
|
193
|
+
class_ids = class_ids[mask]
|
|
194
|
+
|
|
195
|
+
if len(scores) == 0:
|
|
196
|
+
return []
|
|
197
|
+
|
|
198
|
+
# CoreML coords are [cx, cy, w, h] already normalized 0-1
|
|
199
|
+
cx, cy, w, h = coords[:, 0], coords[:, 1], coords[:, 2], coords[:, 3]
|
|
200
|
+
x1 = np.clip(cx - w / 2, 0, 1)
|
|
201
|
+
y1 = np.clip(cy - h / 2, 0, 1)
|
|
202
|
+
x2 = np.clip(cx + w / 2, 0, 1)
|
|
203
|
+
y2 = np.clip(cy + h / 2, 0, 1)
|
|
204
|
+
|
|
205
|
+
detections = []
|
|
206
|
+
for i in range(len(scores)):
|
|
207
|
+
detections.append({
|
|
208
|
+
"className": COCO_CLASSES[class_ids[i]] if class_ids[i] < len(COCO_CLASSES) else f"class_{class_ids[i]}",
|
|
209
|
+
"score": round(float(scores[i]), 4),
|
|
210
|
+
"bbox": [round(float(x1[i]), 4), round(float(y1[i]), 4),
|
|
211
|
+
round(float(x2[i]), 4), round(float(y2[i]), 4)],
|
|
212
|
+
})
|
|
213
|
+
|
|
214
|
+
return sorted(detections, key=lambda d: d["score"], reverse=True)[:100]
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# ---------------------------------------------------------------------------
|
|
218
|
+
# Main
|
|
219
|
+
# ---------------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
def main() -> None:
|
|
222
|
+
parser = argparse.ArgumentParser(description="CoreML inference via binary IPC")
|
|
223
|
+
parser.add_argument("model_path", help="Path to .mlpackage or .mlmodel")
|
|
224
|
+
parser.add_argument("--device", default="all", choices=["cpu", "ane", "gpu", "all"],
|
|
225
|
+
help="Compute unit: cpu, ane (Apple Neural Engine), gpu, or all")
|
|
226
|
+
parser.add_argument("--input-size", type=int, default=640,
|
|
227
|
+
help="Model input size (square)")
|
|
228
|
+
parser.add_argument("--confidence", type=float, default=0.25,
|
|
229
|
+
help="Confidence threshold")
|
|
230
|
+
args = parser.parse_args()
|
|
231
|
+
|
|
232
|
+
import coremltools as ct
|
|
233
|
+
|
|
234
|
+
compute_unit_map = {
|
|
235
|
+
"cpu": ct.ComputeUnit.CPU_ONLY,
|
|
236
|
+
"gpu": ct.ComputeUnit.CPU_AND_GPU,
|
|
237
|
+
"ane": ct.ComputeUnit.CPU_AND_NE,
|
|
238
|
+
"all": ct.ComputeUnit.ALL,
|
|
239
|
+
}
|
|
240
|
+
compute_units = compute_unit_map.get(args.device, ct.ComputeUnit.ALL)
|
|
241
|
+
|
|
242
|
+
sys.stderr.write(f"[coreml] Loading model: {args.model_path}\n")
|
|
243
|
+
sys.stderr.write(f"[coreml] Compute units: {args.device}\n")
|
|
244
|
+
sys.stderr.flush()
|
|
245
|
+
|
|
246
|
+
model = ct.models.MLModel(args.model_path, compute_units=compute_units)
|
|
247
|
+
|
|
248
|
+
# Resolve the first input name from the model spec
|
|
249
|
+
input_spec = model.get_spec().description.input[0]
|
|
250
|
+
input_name = input_spec.name
|
|
251
|
+
|
|
252
|
+
sys.stderr.write(f"[coreml] Model loaded — input: {input_name}, ready for inference\n")
|
|
253
|
+
sys.stderr.flush()
|
|
254
|
+
|
|
255
|
+
stdin_binary = sys.stdin.buffer
|
|
256
|
+
stdout_binary = sys.stdout.buffer
|
|
257
|
+
|
|
258
|
+
while True:
|
|
259
|
+
jpeg = read_frame(stdin_binary)
|
|
260
|
+
if not jpeg:
|
|
261
|
+
break
|
|
262
|
+
|
|
263
|
+
try:
|
|
264
|
+
start = time.perf_counter()
|
|
265
|
+
|
|
266
|
+
img = Image.open(io.BytesIO(jpeg)).convert("RGB")
|
|
267
|
+
orig_w, orig_h = img.size
|
|
268
|
+
|
|
269
|
+
arr, scale, pad = letterbox(img, args.input_size)
|
|
270
|
+
|
|
271
|
+
# Check if model has NMS built-in (outputs 'confidence' + 'coordinates')
|
|
272
|
+
output_names = [o.name for o in model.get_spec().description.output]
|
|
273
|
+
has_builtin_nms = 'confidence' in output_names and 'coordinates' in output_names
|
|
274
|
+
|
|
275
|
+
# Check input type — CoreML models may expect PIL Image or numpy array
|
|
276
|
+
input_type = input_spec.type.WhichOneof("Type")
|
|
277
|
+
predict_input: dict = {}
|
|
278
|
+
if input_type == "imageType":
|
|
279
|
+
letterboxed_img = Image.fromarray((arr * 255).astype(np.uint8))
|
|
280
|
+
predict_input[input_name] = letterboxed_img
|
|
281
|
+
else:
|
|
282
|
+
input_arr = arr.transpose(2, 0, 1)[np.newaxis]
|
|
283
|
+
predict_input[input_name] = input_arr
|
|
284
|
+
|
|
285
|
+
# Pass thresholds if model accepts them
|
|
286
|
+
input_names = [i.name for i in model.get_spec().description.input]
|
|
287
|
+
if 'iouThreshold' in input_names:
|
|
288
|
+
predict_input['iouThreshold'] = 0.45
|
|
289
|
+
if 'confidenceThreshold' in input_names:
|
|
290
|
+
predict_input['confidenceThreshold'] = args.confidence
|
|
291
|
+
|
|
292
|
+
predictions = model.predict(predict_input)
|
|
293
|
+
|
|
294
|
+
if has_builtin_nms:
|
|
295
|
+
# Model has built-in NMS: outputs are (N, 4) boxes + (N, 80) scores
|
|
296
|
+
coords = np.array(predictions['coordinates']) # (N, 4) — cx, cy, w, h normalized
|
|
297
|
+
confs = np.array(predictions['confidence']) # (N, 80)
|
|
298
|
+
detections = _parse_nms_output(coords, confs, args.confidence, orig_w, orig_h)
|
|
299
|
+
else:
|
|
300
|
+
# Raw YOLO output: [1, 84, 8400]
|
|
301
|
+
output_key = list(predictions.keys())[0]
|
|
302
|
+
output = np.array(predictions[output_key])
|
|
303
|
+
detections = parse_yolo_output(
|
|
304
|
+
output, args.confidence, orig_w, orig_h,
|
|
305
|
+
args.input_size, scale, pad,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
elapsed = (time.perf_counter() - start) * 1000
|
|
309
|
+
result = {"detections": detections, "inferenceMs": round(elapsed, 2)}
|
|
310
|
+
except Exception as exc:
|
|
311
|
+
sys.stderr.write(f"[coreml] Inference error: {exc}\n")
|
|
312
|
+
sys.stderr.flush()
|
|
313
|
+
result = {"detections": [], "error": str(exc), "inferenceMs": 0}
|
|
314
|
+
|
|
315
|
+
write_result(stdout_binary, result)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
if __name__ == "__main__":
|
|
319
|
+
main()
|