lvface 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lvface/__init__.py +37 -0
- lvface/detect/__init__.py +16 -0
- lvface/detect/align.py +196 -0
- lvface/detect/base.py +71 -0
- lvface/detect/insightface.py +191 -0
- lvface/embed/__init__.py +6 -0
- lvface/embed/base.py +126 -0
- lvface/embed/onnx.py +182 -0
- lvface/errors.py +9 -0
- lvface/hub.py +126 -0
- lvface/io.py +327 -0
- lvface/metrics.py +283 -0
- lvface/py.typed +1 -0
- lvface/recognizer.py +732 -0
- lvface/registry.py +209 -0
- lvface/types.py +193 -0
- lvface-0.1.0.dist-info/METADATA +357 -0
- lvface-0.1.0.dist-info/RECORD +20 -0
- lvface-0.1.0.dist-info/WHEEL +4 -0
- lvface-0.1.0.dist-info/licenses/LICENSE +21 -0
lvface/__init__.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Modern face-embedding framework."""
|
|
2
|
+
|
|
3
|
+
from . import metrics
|
|
4
|
+
from .detect import FaceDetector, InsightFaceDetector
|
|
5
|
+
from .embed import FaceEmbedder, LVFaceOnnxEmbedder
|
|
6
|
+
from .errors import AlignmentError, NoFaceError
|
|
7
|
+
from .hub import resolve_weights
|
|
8
|
+
from .io import load_image
|
|
9
|
+
from .recognizer import FaceRecognizer
|
|
10
|
+
from .registry import DEFAULT_MODEL, MODELS, Model, resolve_model_path
|
|
11
|
+
from .types import BBox, ComparisonResult, Embedding, Face, Match, MatchResult
|
|
12
|
+
|
|
13
|
+
__version__ = "0.1.0"
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"AlignmentError",
|
|
17
|
+
"BBox",
|
|
18
|
+
"ComparisonResult",
|
|
19
|
+
"DEFAULT_MODEL",
|
|
20
|
+
"Embedding",
|
|
21
|
+
"Face",
|
|
22
|
+
"FaceDetector",
|
|
23
|
+
"FaceEmbedder",
|
|
24
|
+
"FaceRecognizer",
|
|
25
|
+
"InsightFaceDetector",
|
|
26
|
+
"LVFaceOnnxEmbedder",
|
|
27
|
+
"MODELS",
|
|
28
|
+
"Match",
|
|
29
|
+
"MatchResult",
|
|
30
|
+
"Model",
|
|
31
|
+
"NoFaceError",
|
|
32
|
+
"__version__",
|
|
33
|
+
"load_image",
|
|
34
|
+
"metrics",
|
|
35
|
+
"resolve_model_path",
|
|
36
|
+
"resolve_weights",
|
|
37
|
+
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Face detection and alignment backends."""
|
|
2
|
+
|
|
3
|
+
from lvface.errors import AlignmentError
|
|
4
|
+
|
|
5
|
+
from .align import ARCFACE_DST, estimate_norm, norm_crop
|
|
6
|
+
from .base import FaceDetector
|
|
7
|
+
from .insightface import InsightFaceDetector
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"ARCFACE_DST",
|
|
11
|
+
"AlignmentError",
|
|
12
|
+
"FaceDetector",
|
|
13
|
+
"InsightFaceDetector",
|
|
14
|
+
"estimate_norm",
|
|
15
|
+
"norm_crop",
|
|
16
|
+
]
|
lvface/detect/align.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Five-point ArcFace alignment without OpenCV."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
import numpy.typing as npt
|
|
7
|
+
from PIL import Image
|
|
8
|
+
|
|
9
|
+
from lvface.errors import AlignmentError
|
|
10
|
+
|
|
11
|
+
ARCFACE_DST: npt.NDArray[np.float32] = np.array(
|
|
12
|
+
[
|
|
13
|
+
[38.2946, 51.6963],
|
|
14
|
+
[73.5318, 51.5014],
|
|
15
|
+
[56.0252, 71.7366],
|
|
16
|
+
[41.5493, 92.3655],
|
|
17
|
+
[70.7299, 92.2041],
|
|
18
|
+
],
|
|
19
|
+
dtype=np.float32,
|
|
20
|
+
)
|
|
21
|
+
ARCFACE_DST.setflags(write=False)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _landmarks_array(kps: object) -> npt.NDArray[np.float64]:
|
|
25
|
+
"""Validate five-point landmarks as a float64 array.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
kps: Array-like landmark coordinates.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
A finite, nondegenerate array with shape ``(5, 2)``.
|
|
32
|
+
"""
|
|
33
|
+
try:
|
|
34
|
+
landmarks = np.asarray(kps, dtype=np.float64)
|
|
35
|
+
except (TypeError, ValueError) as error:
|
|
36
|
+
raise AlignmentError("landmarks must be a numeric array with shape (5, 2)") from error
|
|
37
|
+
|
|
38
|
+
if landmarks.shape != (5, 2):
|
|
39
|
+
raise AlignmentError(f"need exactly 5 landmarks with shape (5, 2), got {landmarks.shape}")
|
|
40
|
+
|
|
41
|
+
if not np.isfinite(landmarks).all():
|
|
42
|
+
raise AlignmentError("landmarks must contain only finite coordinates")
|
|
43
|
+
|
|
44
|
+
if np.linalg.matrix_rank(landmarks - landmarks.mean(axis=0)) < 2:
|
|
45
|
+
raise AlignmentError("degenerate landmark geometry; cannot align")
|
|
46
|
+
|
|
47
|
+
return landmarks
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _umeyama(
|
|
51
|
+
source: npt.NDArray[np.float64],
|
|
52
|
+
destination: npt.NDArray[np.float64],
|
|
53
|
+
) -> npt.NDArray[np.float64] | None:
|
|
54
|
+
"""Estimate a 2D similarity transform with Umeyama's method.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
source: Source points with shape ``(N, 2)``.
|
|
58
|
+
destination: Destination points with shape ``(N, 2)``.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
A ``(2, 3)`` affine matrix, or ``None`` for degenerate points.
|
|
62
|
+
"""
|
|
63
|
+
source_mean = source.mean(axis=0)
|
|
64
|
+
destination_mean = destination.mean(axis=0)
|
|
65
|
+
source_centered = source - source_mean
|
|
66
|
+
destination_centered = destination - destination_mean
|
|
67
|
+
source_variance = np.sum(source_centered**2) / source.shape[0]
|
|
68
|
+
if source_variance <= np.finfo(np.float64).eps:
|
|
69
|
+
return None
|
|
70
|
+
|
|
71
|
+
covariance = destination_centered.T @ source_centered / source.shape[0]
|
|
72
|
+
u, singular_values, vh = np.linalg.svd(covariance)
|
|
73
|
+
signs = np.ones(source.shape[1], dtype=np.float64)
|
|
74
|
+
if np.linalg.det(covariance) < 0:
|
|
75
|
+
signs[-1] = -1.0
|
|
76
|
+
|
|
77
|
+
rank = np.linalg.matrix_rank(covariance)
|
|
78
|
+
if rank == 0:
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
if rank == source.shape[1] - 1:
|
|
82
|
+
if np.linalg.det(u) * np.linalg.det(vh) > 0:
|
|
83
|
+
rotation = u @ vh
|
|
84
|
+
else:
|
|
85
|
+
final_sign = signs[-1]
|
|
86
|
+
signs[-1] = -1.0
|
|
87
|
+
rotation = u @ np.diag(signs) @ vh
|
|
88
|
+
signs[-1] = final_sign
|
|
89
|
+
else:
|
|
90
|
+
rotation = u @ np.diag(signs) @ vh
|
|
91
|
+
|
|
92
|
+
scale = float(singular_values @ signs) / source_variance
|
|
93
|
+
matrix = np.empty((2, 3), dtype=np.float64)
|
|
94
|
+
matrix[:, :2] = scale * rotation
|
|
95
|
+
matrix[:, 2] = destination_mean - scale * rotation @ source_mean
|
|
96
|
+
return matrix
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _invert_affine(matrix: npt.NDArray[np.floating]) -> npt.NDArray[np.float64]:
|
|
100
|
+
"""Invert a two-dimensional affine transform.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
matrix: Affine matrix with shape ``(2, 3)``.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
The inverse affine matrix.
|
|
107
|
+
"""
|
|
108
|
+
linear = np.asarray(matrix[:, :2], dtype=np.float64)
|
|
109
|
+
translation = np.asarray(matrix[:, 2], dtype=np.float64)
|
|
110
|
+
try:
|
|
111
|
+
inverse_linear = np.linalg.inv(linear)
|
|
112
|
+
except np.linalg.LinAlgError as error:
|
|
113
|
+
raise AlignmentError("alignment transform is not invertible") from error
|
|
114
|
+
|
|
115
|
+
inverse = np.empty((2, 3), dtype=np.float64)
|
|
116
|
+
inverse[:, :2] = inverse_linear
|
|
117
|
+
with np.errstate(invalid="ignore"):
|
|
118
|
+
inverse[:, 2] = -inverse_linear @ translation
|
|
119
|
+
|
|
120
|
+
if not np.isfinite(inverse).all():
|
|
121
|
+
raise AlignmentError("alignment transform is not finite")
|
|
122
|
+
|
|
123
|
+
return inverse
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def estimate_norm(kps: object, size: int = 112) -> npt.NDArray[np.float32]:
|
|
127
|
+
"""Estimate the transform from landmarks to the ArcFace template.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
kps: Five facial landmarks with shape ``(5, 2)``.
|
|
131
|
+
size: Output crop size; LVFace supports only 112.
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
A float32 affine matrix with shape ``(2, 3)``.
|
|
135
|
+
"""
|
|
136
|
+
if size != 112:
|
|
137
|
+
raise AlignmentError("only size=112 is supported for LVFace alignment")
|
|
138
|
+
|
|
139
|
+
landmarks = _landmarks_array(kps)
|
|
140
|
+
matrix = _umeyama(landmarks, ARCFACE_DST.astype(np.float64))
|
|
141
|
+
if matrix is None or not np.isfinite(matrix).all():
|
|
142
|
+
raise AlignmentError("degenerate landmark geometry; cannot align")
|
|
143
|
+
|
|
144
|
+
return matrix.astype(np.float32)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _validate_image(image: object) -> npt.NDArray[np.uint8]:
|
|
148
|
+
"""Validate an RGB uint8 image array.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
image: Object expected to be a non-empty RGB image.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
The validated image without copying it.
|
|
155
|
+
"""
|
|
156
|
+
if not isinstance(image, np.ndarray):
|
|
157
|
+
raise TypeError("image must be a NumPy array")
|
|
158
|
+
|
|
159
|
+
if image.ndim != 3 or image.shape[2] != 3 or image.shape[0] == 0 or image.shape[1] == 0:
|
|
160
|
+
raise ValueError(f"image must have non-zero shape (H, W, 3), got {image.shape}")
|
|
161
|
+
|
|
162
|
+
if image.dtype != np.uint8:
|
|
163
|
+
raise ValueError(f"image must have dtype uint8, got {image.dtype}")
|
|
164
|
+
|
|
165
|
+
return image
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def norm_crop(
|
|
169
|
+
image: np.ndarray,
|
|
170
|
+
kps: object,
|
|
171
|
+
size: int = 112,
|
|
172
|
+
) -> npt.NDArray[np.uint8]:
|
|
173
|
+
"""Align an RGB image to an ArcFace crop using Pillow.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
image: Source RGB uint8 image.
|
|
177
|
+
kps: Five facial landmarks with shape ``(5, 2)``.
|
|
178
|
+
size: Output width and height; LVFace supports only 112.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
An owned aligned RGB uint8 crop.
|
|
182
|
+
"""
|
|
183
|
+
source = _validate_image(image)
|
|
184
|
+
matrix = estimate_norm(kps, size)
|
|
185
|
+
inverse = _invert_affine(matrix)
|
|
186
|
+
|
|
187
|
+
# Pillow samples pixel centers, so shift the affine transform by half a pixel.
|
|
188
|
+
inverse[:, 2] += 0.5 - inverse[:, :2] @ np.full(2, 0.5)
|
|
189
|
+
coefficients = tuple(float(value) for value in inverse.reshape(-1))
|
|
190
|
+
aligned = Image.fromarray(source, mode="RGB").transform(
|
|
191
|
+
(size, size),
|
|
192
|
+
Image.Transform.AFFINE,
|
|
193
|
+
coefficients,
|
|
194
|
+
resample=Image.Resampling.BILINEAR,
|
|
195
|
+
)
|
|
196
|
+
return np.array(aligned, dtype=np.uint8, order="C", copy=True)
|
lvface/detect/base.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""Shared face detector and aligner adapter."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from lvface.types import Face
|
|
8
|
+
|
|
9
|
+
from .align import norm_crop
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class FaceDetector(ABC):
|
|
13
|
+
"""Base class for pluggable face detection and alignment backends."""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def load(self) -> None:
|
|
17
|
+
"""Lazily initialize the detection backend."""
|
|
18
|
+
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def detect(self, image: np.ndarray) -> list[Face]:
|
|
21
|
+
"""Detect faces and five-point landmarks in an RGB image.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
image: Source RGB uint8 image.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
Detected faces in backend order.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def align(self, image: np.ndarray, kps: np.ndarray, size: int = 112) -> np.ndarray:
|
|
31
|
+
"""Align five landmarks to the canonical ArcFace template.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
image: Source RGB uint8 image.
|
|
35
|
+
kps: Five facial landmarks with shape ``(5, 2)``.
|
|
36
|
+
size: Output crop size.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
The aligned RGB crop.
|
|
40
|
+
"""
|
|
41
|
+
return norm_crop(image, kps, size)
|
|
42
|
+
|
|
43
|
+
def crop(self, image: np.ndarray) -> list[np.ndarray]:
|
|
44
|
+
"""Detect and align every face that has landmarks.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
image: Source RGB uint8 image.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Aligned crops for faces with five-point landmarks.
|
|
51
|
+
"""
|
|
52
|
+
self.load()
|
|
53
|
+
return [self.align(image, face.kps) for face in self.detect(image) if face.kps is not None]
|
|
54
|
+
|
|
55
|
+
def detect_and_align(self, image: np.ndarray) -> list[Face]:
|
|
56
|
+
"""Detect faces and attach crops when landmarks are available.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
image: Source RGB uint8 image.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Detected faces, with aligned crops where possible.
|
|
63
|
+
"""
|
|
64
|
+
self.load()
|
|
65
|
+
faces = self.detect(image)
|
|
66
|
+
|
|
67
|
+
for face in faces:
|
|
68
|
+
if face.kps is not None:
|
|
69
|
+
face.aligned = self.align(image, face.kps)
|
|
70
|
+
|
|
71
|
+
return faces
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""InsightFace detection and reference alignment backend."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import threading
|
|
6
|
+
import warnings
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
|
|
11
|
+
from lvface.embed.onnx import Device, _resolve_providers
|
|
12
|
+
from lvface.types import BBox, Face
|
|
13
|
+
|
|
14
|
+
from .align import _validate_image, estimate_norm
|
|
15
|
+
from .base import FaceDetector
|
|
16
|
+
|
|
17
|
+
_LICENSE_WARNING = (
|
|
18
|
+
"InsightFace bundled model packs are licensed for non-commercial research use only. "
|
|
19
|
+
"Use a detector with appropriately licensed weights or pre-aligned crops for other uses."
|
|
20
|
+
)
|
|
21
|
+
_license_warning_lock = threading.Lock()
|
|
22
|
+
_license_warning_emitted = False
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _warn_about_model_license() -> None:
|
|
26
|
+
"""Emit the InsightFace model-license warning once per process."""
|
|
27
|
+
global _license_warning_emitted
|
|
28
|
+
if _license_warning_emitted:
|
|
29
|
+
return
|
|
30
|
+
with _license_warning_lock:
|
|
31
|
+
if _license_warning_emitted:
|
|
32
|
+
return
|
|
33
|
+
warnings.warn(_LICENSE_WARNING, UserWarning, stacklevel=3)
|
|
34
|
+
_license_warning_emitted = True
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _import_insightface() -> tuple[type[Any], Any]:
|
|
38
|
+
"""Import optional InsightFace detection components.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
The ``FaceAnalysis`` class and face-alignment module.
|
|
42
|
+
"""
|
|
43
|
+
try:
|
|
44
|
+
from insightface.app import FaceAnalysis
|
|
45
|
+
from insightface.utils import face_align
|
|
46
|
+
except ModuleNotFoundError as error:
|
|
47
|
+
if error.name == "insightface":
|
|
48
|
+
raise ImportError(
|
|
49
|
+
'InsightFace detection requires `pip install "lvface[detect]"`'
|
|
50
|
+
) from error
|
|
51
|
+
raise
|
|
52
|
+
return FaceAnalysis, face_align
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class InsightFaceDetector(FaceDetector):
|
|
56
|
+
"""Face detector using InsightFace detection landmarks."""
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
name: str = "buffalo_l",
|
|
61
|
+
*,
|
|
62
|
+
device: Device = "auto",
|
|
63
|
+
det_size: tuple[int, int] = (640, 640),
|
|
64
|
+
min_score: float = 0.5,
|
|
65
|
+
) -> None:
|
|
66
|
+
"""Configure the InsightFace detector.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
name: InsightFace model-pack name.
|
|
70
|
+
device: Preferred inference device.
|
|
71
|
+
det_size: Detection input width and height.
|
|
72
|
+
min_score: Minimum confidence retained as a face.
|
|
73
|
+
"""
|
|
74
|
+
if not name:
|
|
75
|
+
raise ValueError("name must not be empty")
|
|
76
|
+
|
|
77
|
+
if (
|
|
78
|
+
len(det_size) != 2
|
|
79
|
+
or any(isinstance(value, bool) or not isinstance(value, int) for value in det_size)
|
|
80
|
+
or any(value <= 0 for value in det_size)
|
|
81
|
+
):
|
|
82
|
+
raise ValueError("det_size must contain two positive integers")
|
|
83
|
+
|
|
84
|
+
if not np.isfinite(min_score) or not 0.0 <= min_score <= 1.0:
|
|
85
|
+
raise ValueError("min_score must be finite and between 0 and 1")
|
|
86
|
+
|
|
87
|
+
self.name = name
|
|
88
|
+
self.device = device
|
|
89
|
+
self.det_size = det_size
|
|
90
|
+
self.min_score = float(min_score)
|
|
91
|
+
self.app: Any | None = None
|
|
92
|
+
self._face_align: Any | None = None
|
|
93
|
+
self._load_lock = threading.Lock()
|
|
94
|
+
|
|
95
|
+
def load(self) -> None:
|
|
96
|
+
"""Initialize the requested InsightFace detection pack once."""
|
|
97
|
+
if self.app is not None:
|
|
98
|
+
return
|
|
99
|
+
with self._load_lock:
|
|
100
|
+
if self.app is not None:
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
providers = _resolve_providers(self.device)
|
|
104
|
+
face_analysis, face_align = _import_insightface()
|
|
105
|
+
_warn_about_model_license()
|
|
106
|
+
app = face_analysis(
|
|
107
|
+
name=self.name,
|
|
108
|
+
allowed_modules=["detection"],
|
|
109
|
+
providers=providers,
|
|
110
|
+
)
|
|
111
|
+
ctx_id = 0 if providers[0] == "CUDAExecutionProvider" else -1
|
|
112
|
+
app.prepare(ctx_id=ctx_id, det_size=self.det_size)
|
|
113
|
+
self._face_align = face_align
|
|
114
|
+
self.app = app
|
|
115
|
+
|
|
116
|
+
def detect(self, image: np.ndarray) -> list[Face]:
|
|
117
|
+
"""Detect faces with InsightFace.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
image: Source RGB uint8 image.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Faces meeting the configured confidence threshold.
|
|
124
|
+
"""
|
|
125
|
+
source = _validate_image(image)
|
|
126
|
+
self.load()
|
|
127
|
+
if self.app is None:
|
|
128
|
+
raise RuntimeError("detector failed to initialize")
|
|
129
|
+
|
|
130
|
+
bgr = np.ascontiguousarray(source[:, :, ::-1])
|
|
131
|
+
detected = self.app.get(bgr)
|
|
132
|
+
faces: list[Face] = []
|
|
133
|
+
|
|
134
|
+
for detected_face in detected:
|
|
135
|
+
score = float(detected_face.det_score)
|
|
136
|
+
if not np.isfinite(score) or score < self.min_score:
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
coordinates = np.asarray(detected_face.bbox, dtype=np.float64)
|
|
140
|
+
if coordinates.shape != (4,) or not np.isfinite(coordinates).all():
|
|
141
|
+
raise ValueError("InsightFace returned an invalid bounding box")
|
|
142
|
+
|
|
143
|
+
landmarks = getattr(detected_face, "kps", None)
|
|
144
|
+
if landmarks is not None:
|
|
145
|
+
landmarks = np.asarray(landmarks, dtype=np.float32)
|
|
146
|
+
if landmarks.shape != (5, 2) or not np.isfinite(landmarks).all():
|
|
147
|
+
raise ValueError("InsightFace returned invalid five-point landmarks")
|
|
148
|
+
|
|
149
|
+
faces.append(
|
|
150
|
+
Face(
|
|
151
|
+
bbox=BBox(
|
|
152
|
+
float(coordinates[0]),
|
|
153
|
+
float(coordinates[1]),
|
|
154
|
+
float(coordinates[2]),
|
|
155
|
+
float(coordinates[3]),
|
|
156
|
+
score,
|
|
157
|
+
),
|
|
158
|
+
kps=landmarks,
|
|
159
|
+
aligned=None,
|
|
160
|
+
embedding=None,
|
|
161
|
+
face_index=len(faces),
|
|
162
|
+
)
|
|
163
|
+
)
|
|
164
|
+
return faces
|
|
165
|
+
|
|
166
|
+
def align(self, image: np.ndarray, kps: np.ndarray, size: int = 112) -> np.ndarray:
|
|
167
|
+
"""Align an RGB image with InsightFace's reference warp.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
image: Source RGB uint8 image.
|
|
171
|
+
kps: Five facial landmarks with shape ``(5, 2)``.
|
|
172
|
+
size: Output crop size.
|
|
173
|
+
|
|
174
|
+
Returns:
|
|
175
|
+
An aligned RGB uint8 crop.
|
|
176
|
+
"""
|
|
177
|
+
source = _validate_image(image)
|
|
178
|
+
estimate_norm(kps, size)
|
|
179
|
+
self.load()
|
|
180
|
+
if self._face_align is None:
|
|
181
|
+
raise RuntimeError("detector failed to initialize")
|
|
182
|
+
|
|
183
|
+
aligned = np.asarray(self._face_align.norm_crop(source, landmark=kps, image_size=size))
|
|
184
|
+
result = _validate_image(aligned)
|
|
185
|
+
expected_shape = (size, size, 3)
|
|
186
|
+
|
|
187
|
+
if result.shape != expected_shape:
|
|
188
|
+
raise ValueError(
|
|
189
|
+
f"InsightFace alignment returned shape {result.shape}, expected {expected_shape}"
|
|
190
|
+
)
|
|
191
|
+
return result
|
lvface/embed/__init__.py
ADDED
lvface/embed/base.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Shared face-embedding adapter."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
|
|
6
|
+
import numpy as np
|
|
7
|
+
import numpy.typing as npt
|
|
8
|
+
|
|
9
|
+
from lvface.types import Embedding
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class FaceEmbedder(ABC):
|
|
13
|
+
"""Base class for face-embedding backends."""
|
|
14
|
+
|
|
15
|
+
input_size: tuple[int, int] = (112, 112)
|
|
16
|
+
embedding_dim: int = 512
|
|
17
|
+
_fixed_batch_size: int | None = None
|
|
18
|
+
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def load(self) -> None:
|
|
21
|
+
"""Lazily initialize the embedding backend."""
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def _forward(self, batch: npt.NDArray[np.float32]) -> npt.NDArray[np.floating]:
|
|
25
|
+
"""Run inference for a normalized NCHW batch.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
batch: Preprocessed float32 images with shape ``(N, C, H, W)``.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Raw embedding vectors with shape ``(N, embedding_dim)``.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def preprocess(self, crop: np.ndarray) -> npt.NDArray[np.float32]:
|
|
35
|
+
"""Convert an aligned RGB crop to a normalized CHW tensor.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
crop: RGB uint8 crop matching ``input_size``.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
A contiguous normalized float32 CHW tensor.
|
|
42
|
+
"""
|
|
43
|
+
expected_shape = (*self.input_size, 3)
|
|
44
|
+
if not isinstance(crop, np.ndarray):
|
|
45
|
+
raise TypeError("crop must be a NumPy array")
|
|
46
|
+
|
|
47
|
+
if crop.shape != expected_shape:
|
|
48
|
+
raise ValueError(f"crop must have shape {expected_shape}, got {crop.shape}")
|
|
49
|
+
|
|
50
|
+
if crop.dtype != np.uint8:
|
|
51
|
+
raise ValueError(f"crop must have dtype uint8, got {crop.dtype}")
|
|
52
|
+
|
|
53
|
+
chw = np.transpose(crop, (2, 0, 1))
|
|
54
|
+
normalized = ((chw / 255.0) - 0.5) / 0.5
|
|
55
|
+
return np.ascontiguousarray(normalized, dtype=np.float32)
|
|
56
|
+
|
|
57
|
+
def embed(self, crop: np.ndarray, *, normalize: bool = True) -> Embedding:
|
|
58
|
+
"""Embed one aligned RGB crop.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
crop: RGB uint8 crop matching ``input_size``.
|
|
62
|
+
normalize: Whether to L2-normalize the embedding.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
The generated face embedding.
|
|
66
|
+
"""
|
|
67
|
+
return self.embed_batch([crop], normalize=normalize, batch_size=1)[0]
|
|
68
|
+
|
|
69
|
+
def embed_batch(
|
|
70
|
+
self,
|
|
71
|
+
crops: Sequence[np.ndarray],
|
|
72
|
+
*,
|
|
73
|
+
normalize: bool = True,
|
|
74
|
+
batch_size: int = 32,
|
|
75
|
+
) -> list[Embedding]:
|
|
76
|
+
"""Embed aligned crops while respecting the backend batch contract.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
crops: Aligned RGB uint8 crops.
|
|
80
|
+
normalize: Whether to L2-normalize each embedding.
|
|
81
|
+
batch_size: Maximum batch size for dynamic-batch backends.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Embeddings in the same order as ``crops``.
|
|
85
|
+
"""
|
|
86
|
+
if batch_size <= 0:
|
|
87
|
+
raise ValueError("batch_size must be greater than zero")
|
|
88
|
+
|
|
89
|
+
crop_list = list(crops)
|
|
90
|
+
if not crop_list:
|
|
91
|
+
return []
|
|
92
|
+
|
|
93
|
+
tensors = [self.preprocess(crop) for crop in crop_list]
|
|
94
|
+
self.load()
|
|
95
|
+
required_batch = self._fixed_batch_size
|
|
96
|
+
chunk_size = batch_size if required_batch is None else required_batch
|
|
97
|
+
vectors: list[npt.NDArray[np.floating]] = []
|
|
98
|
+
|
|
99
|
+
for start in range(0, len(tensors), chunk_size):
|
|
100
|
+
chunk = tensors[start : start + chunk_size]
|
|
101
|
+
real_size = len(chunk)
|
|
102
|
+
|
|
103
|
+
# Some exported models only accept one fixed batch size.
|
|
104
|
+
if required_batch is not None and real_size < required_batch:
|
|
105
|
+
chunk.extend([chunk[-1]] * (required_batch - real_size))
|
|
106
|
+
|
|
107
|
+
batch = np.stack(chunk)
|
|
108
|
+
output = np.asarray(self._forward(batch))
|
|
109
|
+
if output.ndim != 2 or output.shape != (len(chunk), self.embedding_dim):
|
|
110
|
+
raise ValueError(
|
|
111
|
+
"backend returned invalid embedding shape "
|
|
112
|
+
f"{output.shape}; expected {(len(chunk), self.embedding_dim)}"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
if not np.issubdtype(output.dtype, np.floating):
|
|
116
|
+
raise ValueError(f"backend returned non-floating embeddings: {output.dtype}")
|
|
117
|
+
|
|
118
|
+
if not np.isfinite(output).all():
|
|
119
|
+
raise ValueError("backend returned an embedding containing NaN/Inf")
|
|
120
|
+
|
|
121
|
+
vectors.extend(output[:real_size])
|
|
122
|
+
|
|
123
|
+
embeddings = [Embedding(vector) for vector in vectors]
|
|
124
|
+
if normalize:
|
|
125
|
+
return [embedding.normalize() for embedding in embeddings]
|
|
126
|
+
return embeddings
|