pyfaceau 1.0.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyfaceau/__init__.py +19 -0
- pyfaceau/alignment/__init__.py +0 -0
- pyfaceau/alignment/calc_params.py +671 -0
- pyfaceau/alignment/face_aligner.py +352 -0
- pyfaceau/alignment/numba_calcparams_accelerator.py +244 -0
- pyfaceau/cython_histogram_median.cp313-win_amd64.pyd +0 -0
- pyfaceau/cython_rotation_update.cp313-win_amd64.pyd +0 -0
- pyfaceau/detectors/__init__.py +0 -0
- pyfaceau/detectors/pfld.py +128 -0
- pyfaceau/detectors/retinaface.py +352 -0
- pyfaceau/download_weights.py +134 -0
- pyfaceau/features/__init__.py +0 -0
- pyfaceau/features/histogram_median_tracker.py +335 -0
- pyfaceau/features/pdm.py +269 -0
- pyfaceau/features/triangulation.py +64 -0
- pyfaceau/parallel_pipeline.py +462 -0
- pyfaceau/pipeline.py +1083 -0
- pyfaceau/prediction/__init__.py +0 -0
- pyfaceau/prediction/au_predictor.py +434 -0
- pyfaceau/prediction/batched_au_predictor.py +269 -0
- pyfaceau/prediction/model_parser.py +337 -0
- pyfaceau/prediction/running_median.py +318 -0
- pyfaceau/prediction/running_median_fallback.py +200 -0
- pyfaceau/processor.py +270 -0
- pyfaceau/refinement/__init__.py +12 -0
- pyfaceau/refinement/svr_patch_expert.py +361 -0
- pyfaceau/refinement/targeted_refiner.py +362 -0
- pyfaceau/utils/__init__.py +0 -0
- pyfaceau/utils/cython_extensions/cython_histogram_median.c +35391 -0
- pyfaceau/utils/cython_extensions/cython_histogram_median.pyx +316 -0
- pyfaceau/utils/cython_extensions/cython_rotation_update.c +32262 -0
- pyfaceau/utils/cython_extensions/cython_rotation_update.pyx +211 -0
- pyfaceau/utils/cython_extensions/setup.py +47 -0
- pyfaceau-1.0.3.data/scripts/pyfaceau_gui.py +302 -0
- pyfaceau-1.0.3.dist-info/METADATA +466 -0
- pyfaceau-1.0.3.dist-info/RECORD +40 -0
- pyfaceau-1.0.3.dist-info/WHEEL +5 -0
- pyfaceau-1.0.3.dist-info/entry_points.txt +3 -0
- pyfaceau-1.0.3.dist-info/licenses/LICENSE +40 -0
- pyfaceau-1.0.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
OpenFace 2.2 Face Alignment - Pure Python Implementation
|
|
4
|
+
|
|
5
|
+
This module provides face alignment to a canonical 112×112 reference frame
|
|
6
|
+
using similarity transform (scale + rotation + translation) based on 68 facial landmarks.
|
|
7
|
+
|
|
8
|
+
Replicates the OpenFace 2.2 C++ alignment algorithm from:
|
|
9
|
+
- Face_utils.cpp::AlignFace() (lines 109-146)
|
|
10
|
+
- RotationHelpers.h::AlignShapesWithScale() (lines 195-242)
|
|
11
|
+
- RotationHelpers.h::AlignShapesKabsch2D() (lines 168-191)
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
aligner = OpenFace22FaceAligner("pdm_68_multi_pie.txt")
|
|
15
|
+
aligned_face = aligner.align_face(image, landmarks_68, pose_tx, pose_ty)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
import numpy as np
|
|
19
|
+
import cv2
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Tuple, Optional
|
|
22
|
+
from pyfaceau.features.pdm import PDMParser
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class OpenFace22FaceAligner:
|
|
26
|
+
"""
|
|
27
|
+
Pure Python implementation of OpenFace 2.2 face alignment
|
|
28
|
+
|
|
29
|
+
Aligns faces from 68 landmarks to 112×112 canonical reference frame
|
|
30
|
+
using similarity transform (scale + rotation + translation).
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
# Rigid landmark indices (0-indexed) from OpenFace C++
|
|
34
|
+
# These correspond to rigid facial structures: forehead, nose bridge, eye corners
|
|
35
|
+
# Excludes soft tissue like lips and cheeks
|
|
36
|
+
# NOTE: Includes 8 eye landmarks (36,39,40,41,42,45,46,47) which affect rotation
|
|
37
|
+
# Testing shows removing eyes improves STABILITY but ruins MAGNITUDE (31° vs 5°)
|
|
38
|
+
RIGID_INDICES = [1, 2, 3, 4, 12, 13, 14, 15, 27, 28, 29, 31, 32, 33, 34, 35, 36, 39, 40, 41, 42, 45, 46, 47]
|
|
39
|
+
|
|
40
|
+
def __init__(self, pdm_file: str, sim_scale: float = 0.7, output_size: Tuple[int, int] = (112, 112)):
|
|
41
|
+
"""
|
|
42
|
+
Initialize face aligner with PDM reference shape
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
pdm_file: Path to PDM model file (e.g., "pdm_68_multi_pie.txt")
|
|
46
|
+
sim_scale: Scaling factor for reference shape (default: 0.7 for AU analysis)
|
|
47
|
+
output_size: Output aligned face size in pixels (default: 112×112)
|
|
48
|
+
"""
|
|
49
|
+
self.sim_scale = sim_scale
|
|
50
|
+
self.output_width, self.output_height = output_size
|
|
51
|
+
|
|
52
|
+
# Load PDM and extract mean shape
|
|
53
|
+
print(f"Loading PDM from: {pdm_file}")
|
|
54
|
+
pdm = PDMParser(pdm_file)
|
|
55
|
+
|
|
56
|
+
# Preprocess mean shape: 204 values (68 landmarks × 3D) → 68 landmarks × 2D
|
|
57
|
+
# OpenFace C++ logic (Face_utils.cpp:112-119):
|
|
58
|
+
# 1. Scale mean shape by sim_scale
|
|
59
|
+
# 2. Discard Z component (take first 136 values = all X,Y coords)
|
|
60
|
+
# 3. Reshape to (68, 2) format
|
|
61
|
+
#
|
|
62
|
+
# CRITICAL: PDM stores as: [x0, y0, x1, y1, ..., x67, y67, z0, z1, ..., z67]
|
|
63
|
+
# NOT as: [x0, y0, z0, x1, y1, z1, ...]
|
|
64
|
+
# So we must: take first 136 values (all X,Y), then reshape
|
|
65
|
+
mean_shape_scaled = pdm.mean_shape * sim_scale # (204, 1)
|
|
66
|
+
mean_shape_2d = mean_shape_scaled[:136] # First 136 = all X,Y values
|
|
67
|
+
self.reference_shape = mean_shape_2d.reshape(68, 2) # (68, 2)
|
|
68
|
+
|
|
69
|
+
print(f"Face aligner initialized")
|
|
70
|
+
print(f" Sim scale: {sim_scale}")
|
|
71
|
+
print(f" Output size: {output_size}")
|
|
72
|
+
print(f" Reference shape: {self.reference_shape.shape}")
|
|
73
|
+
print(f" Rigid points: {len(self.RIGID_INDICES)}")
|
|
74
|
+
|
|
75
|
+
def align_face(self, image: np.ndarray, landmarks_68: np.ndarray,
|
|
76
|
+
pose_tx: float, pose_ty: float, p_rz: float = 0.0,
|
|
77
|
+
apply_mask: bool = False, triangulation=None) -> np.ndarray:
|
|
78
|
+
"""
|
|
79
|
+
Align face to canonical 112×112 reference frame
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
image: Input BGR image (any size)
|
|
83
|
+
landmarks_68: 68 facial landmarks as (68, 2) array or (136,) flat array
|
|
84
|
+
pose_tx: Pose translation X (from OpenFace pose estimation)
|
|
85
|
+
pose_ty: Pose translation Y (from OpenFace pose estimation)
|
|
86
|
+
p_rz: Pose rotation Z in radians (from OpenFace params_global[3])
|
|
87
|
+
apply_mask: If True, mask out regions outside the face (like OpenFace C++)
|
|
88
|
+
triangulation: TriangulationParser object (required if apply_mask=True)
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
aligned_face: 112×112 aligned face image (BGR format)
|
|
92
|
+
"""
|
|
93
|
+
# Ensure landmarks are (68, 2) shape
|
|
94
|
+
if landmarks_68.shape == (136,):
|
|
95
|
+
landmarks_68 = landmarks_68.reshape(68, 2)
|
|
96
|
+
elif landmarks_68.shape != (68, 2):
|
|
97
|
+
raise ValueError(f"landmarks_68 must be (68, 2) or (136,), got {landmarks_68.shape}")
|
|
98
|
+
|
|
99
|
+
# Extract rigid points from both source and destination
|
|
100
|
+
source_rigid = self._extract_rigid_points(landmarks_68)
|
|
101
|
+
dest_rigid = self._extract_rigid_points(self.reference_shape)
|
|
102
|
+
|
|
103
|
+
# Compute scale (no rotation from Kabsch)
|
|
104
|
+
scale_identity = self._align_shapes_with_scale(source_rigid, dest_rigid)
|
|
105
|
+
scale = scale_identity[0, 0] # Extract scale from identity matrix
|
|
106
|
+
|
|
107
|
+
# Apply INVERSE of CSV p_rz rotation
|
|
108
|
+
# CSV p_rz describes rotation FROM canonical TO tilted
|
|
109
|
+
# We need rotation FROM tilted TO canonical, which is -p_rz
|
|
110
|
+
angle = -p_rz
|
|
111
|
+
cos_a = np.cos(angle)
|
|
112
|
+
sin_a = np.sin(angle)
|
|
113
|
+
|
|
114
|
+
R = np.array([[cos_a, -sin_a],
|
|
115
|
+
[sin_a, cos_a]], dtype=np.float32)
|
|
116
|
+
|
|
117
|
+
# Combine scale and rotation
|
|
118
|
+
scale_rot_matrix = scale * R
|
|
119
|
+
|
|
120
|
+
# Build 2×3 affine warp matrix
|
|
121
|
+
warp_matrix = self._build_warp_matrix(scale_rot_matrix, pose_tx, pose_ty)
|
|
122
|
+
|
|
123
|
+
# Apply affine transformation
|
|
124
|
+
aligned_face = cv2.warpAffine(
|
|
125
|
+
image,
|
|
126
|
+
warp_matrix,
|
|
127
|
+
(self.output_width, self.output_height),
|
|
128
|
+
flags=cv2.INTER_LINEAR
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Apply face mask if requested
|
|
132
|
+
if apply_mask:
|
|
133
|
+
if triangulation is None:
|
|
134
|
+
raise ValueError("triangulation required when apply_mask=True")
|
|
135
|
+
|
|
136
|
+
# Transform landmarks to aligned space
|
|
137
|
+
aligned_landmarks = self._transform_landmarks(landmarks_68, warp_matrix)
|
|
138
|
+
|
|
139
|
+
# Adjust eyebrow landmarks upward to include forehead (like C++)
|
|
140
|
+
# Indices 17-26 are eyebrows, 0 and 16 are jaw corners
|
|
141
|
+
forehead_offset = (30 / 0.7) * self.sim_scale
|
|
142
|
+
for idx in [0, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]:
|
|
143
|
+
aligned_landmarks[idx, 1] -= forehead_offset
|
|
144
|
+
|
|
145
|
+
# Create mask
|
|
146
|
+
mask = triangulation.create_face_mask(
|
|
147
|
+
aligned_landmarks,
|
|
148
|
+
self.output_width,
|
|
149
|
+
self.output_height
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Apply mask to each channel
|
|
153
|
+
aligned_face = cv2.bitwise_and(aligned_face, aligned_face, mask=mask)
|
|
154
|
+
|
|
155
|
+
return aligned_face
|
|
156
|
+
|
|
157
|
+
def _transform_landmarks(self, landmarks: np.ndarray, warp_matrix: np.ndarray) -> np.ndarray:
|
|
158
|
+
"""
|
|
159
|
+
Transform landmarks using affine warp matrix
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
landmarks: (68, 2) landmark coordinates in original image
|
|
163
|
+
warp_matrix: (2, 3) affine transformation matrix
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
(68, 2) transformed landmark coordinates
|
|
167
|
+
"""
|
|
168
|
+
# Convert to homogeneous coordinates
|
|
169
|
+
ones = np.ones((landmarks.shape[0], 1))
|
|
170
|
+
landmarks_hom = np.concatenate([landmarks, ones], axis=1) # (68, 3)
|
|
171
|
+
|
|
172
|
+
# Apply transform
|
|
173
|
+
transformed = (warp_matrix @ landmarks_hom.T).T # (68, 2)
|
|
174
|
+
|
|
175
|
+
return transformed
|
|
176
|
+
|
|
177
|
+
def _compute_scale_only(self, src: np.ndarray, dst: np.ndarray) -> float:
|
|
178
|
+
"""
|
|
179
|
+
Compute scale factor between two point sets (no rotation)
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
src: (N, 2) source points
|
|
183
|
+
dst: (N, 2) destination points
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Scale factor
|
|
187
|
+
"""
|
|
188
|
+
n = src.shape[0]
|
|
189
|
+
|
|
190
|
+
# Mean normalize
|
|
191
|
+
src_centered = src - src.mean(axis=0)
|
|
192
|
+
dst_centered = dst - dst.mean(axis=0)
|
|
193
|
+
|
|
194
|
+
# Compute RMS scale
|
|
195
|
+
s_src = np.sqrt(np.sum(src_centered ** 2) / n)
|
|
196
|
+
s_dst = np.sqrt(np.sum(dst_centered ** 2) / n)
|
|
197
|
+
|
|
198
|
+
return s_dst / s_src
|
|
199
|
+
|
|
200
|
+
def _build_warp_matrix(self, scale_rot: np.ndarray, pose_tx: float, pose_ty: float) -> np.ndarray:
|
|
201
|
+
"""
|
|
202
|
+
Build 2×3 affine warp matrix from 2×2 scale-rotation matrix and pose translation
|
|
203
|
+
|
|
204
|
+
Implementation matches Face_utils.cpp::AlignFace (lines 127-143)
|
|
205
|
+
|
|
206
|
+
Critical details:
|
|
207
|
+
1. Copy scale-rotation matrix directly (NO transpose)
|
|
208
|
+
2. Transform pose translation THROUGH scale-rotation matrix
|
|
209
|
+
3. Simple centering with out_width/2, out_height/2 (NO empirical shifts)
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
scale_rot: (2, 2) similarity transform matrix (scale × rotation)
|
|
213
|
+
pose_tx: Pose translation X (from params_global[4])
|
|
214
|
+
pose_ty: Pose translation Y (from params_global[5])
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
(2, 3) affine warp matrix for cv2.warpAffine
|
|
218
|
+
"""
|
|
219
|
+
# Initialize 2×3 warp matrix
|
|
220
|
+
warp_matrix = np.zeros((2, 3), dtype=np.float32)
|
|
221
|
+
|
|
222
|
+
# Copy scale-rotation to first 2×2 block (NO transpose)
|
|
223
|
+
warp_matrix[:2, :2] = scale_rot
|
|
224
|
+
|
|
225
|
+
# Transform pose translation through scale-rotation
|
|
226
|
+
# C++ code (line 138-139):
|
|
227
|
+
# cv::Vec2f T(tx, ty);
|
|
228
|
+
# T = scale_rot_matrix * T;
|
|
229
|
+
T = np.array([pose_tx, pose_ty], dtype=np.float32)
|
|
230
|
+
T_transformed = scale_rot @ T
|
|
231
|
+
|
|
232
|
+
# Translation for centering in output image
|
|
233
|
+
# C++ code (lines 142-143):
|
|
234
|
+
# warp_matrix(0,2) = -T(0) + out_width/2;
|
|
235
|
+
# warp_matrix(1,2) = -T(1) + out_height/2;
|
|
236
|
+
# NO empirical shifts (+2, -2) - those were incorrect!
|
|
237
|
+
warp_matrix[0, 2] = -T_transformed[0] + self.output_width / 2
|
|
238
|
+
warp_matrix[1, 2] = -T_transformed[1] + self.output_height / 2
|
|
239
|
+
|
|
240
|
+
return warp_matrix
|
|
241
|
+
|
|
242
|
+
def _extract_rigid_points(self, landmarks: np.ndarray) -> np.ndarray:
|
|
243
|
+
"""
|
|
244
|
+
Extract 24 rigid points from 68 landmarks
|
|
245
|
+
|
|
246
|
+
Rigid points correspond to stable facial structures (forehead, nose, eye corners)
|
|
247
|
+
that don't deform much during facial expressions.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
landmarks: (68, 2) array of facial landmarks
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
(24, 2) array of rigid landmarks
|
|
254
|
+
"""
|
|
255
|
+
return landmarks[self.RIGID_INDICES]
|
|
256
|
+
|
|
257
|
+
def _align_shapes_kabsch_2d(self, src: np.ndarray, dst: np.ndarray) -> np.ndarray:
|
|
258
|
+
"""
|
|
259
|
+
Compute 2D rotation matrix using Kabsch algorithm
|
|
260
|
+
|
|
261
|
+
This assumes src and dst are already mean-normalized.
|
|
262
|
+
|
|
263
|
+
Implementation based on RotationHelpers.h::AlignShapesKabsch2D (lines 168-191)
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
src: (N, 2) mean-normalized source points
|
|
267
|
+
dst: (N, 2) mean-normalized destination points
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
(2, 2) rotation matrix
|
|
271
|
+
"""
|
|
272
|
+
# SVD decomposition: src^T × dst
|
|
273
|
+
# OpenFace C++ uses: cv::SVD svd(align_from.t() * align_to)
|
|
274
|
+
U, S, Vt = np.linalg.svd(src.T @ dst)
|
|
275
|
+
|
|
276
|
+
# Check for reflection vs rotation
|
|
277
|
+
# OpenFace C++ uses determinant check to prevent reflections
|
|
278
|
+
d = np.linalg.det(Vt.T @ U.T)
|
|
279
|
+
|
|
280
|
+
# Correction matrix to prevent reflection
|
|
281
|
+
corr = np.eye(2)
|
|
282
|
+
if d > 0:
|
|
283
|
+
corr[1, 1] = 1
|
|
284
|
+
else:
|
|
285
|
+
corr[1, 1] = -1
|
|
286
|
+
|
|
287
|
+
# Rotation matrix: R = V^T × corr × U^T
|
|
288
|
+
# OpenFace C++ uses: R = svd.vt.t() * corr * svd.u.t()
|
|
289
|
+
R = Vt.T @ corr @ U.T
|
|
290
|
+
|
|
291
|
+
return R
|
|
292
|
+
|
|
293
|
+
def _align_shapes_with_scale(self, src: np.ndarray, dst: np.ndarray) -> np.ndarray:
|
|
294
|
+
"""
|
|
295
|
+
Compute similarity transform (scale only, NO rotation) between two point sets
|
|
296
|
+
|
|
297
|
+
CRITICAL FIX: Since CSV landmarks are PDM-reconstructed (via CalcShape2D),
|
|
298
|
+
they are already in canonical orientation. We only need scale + translation,
|
|
299
|
+
NOT rotation via Kabsch.
|
|
300
|
+
|
|
301
|
+
Background: FaceAnalyser.cpp calls CalcParams TWICE:
|
|
302
|
+
1. On raw landmarks → params_global₁ → CalcShape2D → reconstructed landmarks (CSV output)
|
|
303
|
+
2. On reconstructed landmarks → params_global₂ → AlignFace
|
|
304
|
+
|
|
305
|
+
The second CalcParams produces near-zero rotation because reconstructed landmarks
|
|
306
|
+
are already canonical. Our Python uses CSV landmarks (already canonical), so we
|
|
307
|
+
skip rotation computation entirely.
|
|
308
|
+
|
|
309
|
+
Algorithm:
|
|
310
|
+
1. Mean-normalize both src and dst
|
|
311
|
+
2. Compute RMS scale for each
|
|
312
|
+
3. Return: (s_dst / s_src) × Identity (scale only, no rotation)
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
src: (N, 2) source points (detected landmarks)
|
|
316
|
+
dst: (N, 2) destination points (reference shape)
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
(2, 2) similarity transform matrix (scale × identity)
|
|
320
|
+
"""
|
|
321
|
+
n = src.shape[0]
|
|
322
|
+
|
|
323
|
+
# 1. Mean normalize both point sets
|
|
324
|
+
mean_src_x = np.mean(src[:, 0])
|
|
325
|
+
mean_src_y = np.mean(src[:, 1])
|
|
326
|
+
mean_dst_x = np.mean(dst[:, 0])
|
|
327
|
+
mean_dst_y = np.mean(dst[:, 1])
|
|
328
|
+
|
|
329
|
+
src_mean_normed = src.copy()
|
|
330
|
+
src_mean_normed[:, 0] -= mean_src_x
|
|
331
|
+
src_mean_normed[:, 1] -= mean_src_y
|
|
332
|
+
|
|
333
|
+
dst_mean_normed = dst.copy()
|
|
334
|
+
dst_mean_normed[:, 0] -= mean_dst_x
|
|
335
|
+
dst_mean_normed[:, 1] -= mean_dst_y
|
|
336
|
+
|
|
337
|
+
# 2. Compute RMS scale for each point set
|
|
338
|
+
# OpenFace C++ uses: sqrt(sum(points^2) / n)
|
|
339
|
+
src_sq = src_mean_normed ** 2
|
|
340
|
+
dst_sq = dst_mean_normed ** 2
|
|
341
|
+
|
|
342
|
+
s_src = np.sqrt(np.sum(src_sq) / n)
|
|
343
|
+
s_dst = np.sqrt(np.sum(dst_sq) / n)
|
|
344
|
+
|
|
345
|
+
# 3. Normalize by scale
|
|
346
|
+
src_norm = src_mean_normed / s_src
|
|
347
|
+
dst_norm = dst_mean_normed / s_dst
|
|
348
|
+
|
|
349
|
+
# 3. Return scale only (no rotation computed via Kabsch)
|
|
350
|
+
# Rotation will be provided externally from CSV p_rz
|
|
351
|
+
scale = s_dst / s_src
|
|
352
|
+
return scale * np.eye(2, dtype=np.float32)
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Numba JIT-Accelerated Functions for CalcParams
|
|
4
|
+
|
|
5
|
+
Provides 2-5x speedup for pose estimation by compiling hot loops to machine code.
|
|
6
|
+
Maintains 100% accuracy (identical math, just compiled).
|
|
7
|
+
|
|
8
|
+
Performance targets:
|
|
9
|
+
- CalcParams: 42.5ms → 14-21ms (2-3x speedup)
|
|
10
|
+
- Overall FPS: 17.99 → 30-40 FPS
|
|
11
|
+
|
|
12
|
+
Author: Optimized for Apple Silicon MacBook
|
|
13
|
+
Date: 2025-11-01
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
import numba
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@numba.jit(nopython=True, cache=True, fastmath=False)
|
|
21
|
+
def euler_to_rotation_matrix_jit(rx, ry, rz):
|
|
22
|
+
"""
|
|
23
|
+
JIT-compiled Euler to rotation matrix conversion
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
rx, ry, rz: Euler angles in radians
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
3x3 rotation matrix (float32)
|
|
30
|
+
"""
|
|
31
|
+
s1, s2, s3 = np.sin(rx), np.sin(ry), np.sin(rz)
|
|
32
|
+
c1, c2, c3 = np.cos(rx), np.cos(ry), np.cos(rz)
|
|
33
|
+
|
|
34
|
+
R = np.empty((3, 3), dtype=np.float32)
|
|
35
|
+
R[0, 0] = c2 * c3
|
|
36
|
+
R[0, 1] = -c2 * s3
|
|
37
|
+
R[0, 2] = s2
|
|
38
|
+
R[1, 0] = c1 * s3 + c3 * s1 * s2
|
|
39
|
+
R[1, 1] = c1 * c3 - s1 * s2 * s3
|
|
40
|
+
R[1, 2] = -c2 * s1
|
|
41
|
+
R[2, 0] = s1 * s3 - c1 * c3 * s2
|
|
42
|
+
R[2, 1] = c3 * s1 + c1 * s2 * s3
|
|
43
|
+
R[2, 2] = c1 * c2
|
|
44
|
+
|
|
45
|
+
return R
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@numba.jit(nopython=True, cache=True, fastmath=False)
|
|
49
|
+
def project_shape_to_2d_jit(shape_3d, R, s, tx, ty, n_vis):
|
|
50
|
+
"""
|
|
51
|
+
JIT-compiled 2D projection of 3D shape
|
|
52
|
+
|
|
53
|
+
Replaces the slow Python loop in calc_params (lines 521-525)
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
shape_3d: (3, n_vis) 3D coordinates [X, Y, Z] rows
|
|
57
|
+
R: (3, 3) rotation matrix
|
|
58
|
+
s: scale
|
|
59
|
+
tx, ty: translation
|
|
60
|
+
n_vis: number of visible landmarks
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
(n_vis*2,) 2D projected landmarks [X0...Xn, Y0...Yn]
|
|
64
|
+
"""
|
|
65
|
+
curr_shape_2d = np.empty(n_vis * 2, dtype=np.float32)
|
|
66
|
+
|
|
67
|
+
r11, r12, r13 = R[0, 0], R[0, 1], R[0, 2]
|
|
68
|
+
r21, r22, r23 = R[1, 0], R[1, 1], R[1, 2]
|
|
69
|
+
|
|
70
|
+
for i in range(n_vis):
|
|
71
|
+
X = shape_3d[0, i]
|
|
72
|
+
Y = shape_3d[1, i]
|
|
73
|
+
Z = shape_3d[2, i]
|
|
74
|
+
|
|
75
|
+
curr_shape_2d[i] = s * (r11*X + r12*Y + r13*Z) + tx
|
|
76
|
+
curr_shape_2d[i + n_vis] = s * (r21*X + r22*Y + r23*Z) + ty
|
|
77
|
+
|
|
78
|
+
return curr_shape_2d
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@numba.jit(nopython=True, cache=True, fastmath=False)
|
|
82
|
+
def compute_jacobian_jit(shape_3d, R, s, princ_comp_vis, n_vis, m):
|
|
83
|
+
"""
|
|
84
|
+
JIT-compiled Jacobian computation - the main bottleneck!
|
|
85
|
+
|
|
86
|
+
Replaces compute_jacobian() nested loops (lines 271-313)
|
|
87
|
+
|
|
88
|
+
This function computes partial derivatives of 2D projection w.r.t.:
|
|
89
|
+
- 6 global params: scale, rx, ry, rz, tx, ty
|
|
90
|
+
- m local params: PCA coefficients (34)
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
shape_3d: (3, n_vis) 3D shape [X, Y, Z] rows
|
|
94
|
+
R: (3, 3) rotation matrix
|
|
95
|
+
s: scale
|
|
96
|
+
princ_comp_vis: (n_vis*3, m) principal components for visible landmarks
|
|
97
|
+
n_vis: number of visible landmarks
|
|
98
|
+
m: number of PCA modes (34)
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
J: (n_vis*2, 6+m) Jacobian matrix
|
|
102
|
+
"""
|
|
103
|
+
J = np.zeros((n_vis * 2, 6 + m), dtype=np.float32)
|
|
104
|
+
|
|
105
|
+
# Extract rotation matrix elements
|
|
106
|
+
r11, r12, r13 = R[0, 0], R[0, 1], R[0, 2]
|
|
107
|
+
r21, r22, r23 = R[1, 0], R[1, 1], R[1, 2]
|
|
108
|
+
r31, r32, r33 = R[2, 0], R[2, 1], R[2, 2] # Not used in 2D projection but needed for derivatives
|
|
109
|
+
|
|
110
|
+
# Iterate over each landmark
|
|
111
|
+
for i in range(n_vis):
|
|
112
|
+
X = shape_3d[0, i]
|
|
113
|
+
Y = shape_3d[1, i]
|
|
114
|
+
Z = shape_3d[2, i]
|
|
115
|
+
|
|
116
|
+
row_x = i
|
|
117
|
+
row_y = i + n_vis
|
|
118
|
+
|
|
119
|
+
# --- Global parameter derivatives ---
|
|
120
|
+
|
|
121
|
+
# Scaling term (column 0)
|
|
122
|
+
J[row_x, 0] = X * r11 + Y * r12 + Z * r13
|
|
123
|
+
J[row_y, 0] = X * r21 + Y * r22 + Z * r23
|
|
124
|
+
|
|
125
|
+
# Rotation terms (columns 1-3: rx, ry, rz)
|
|
126
|
+
J[row_x, 1] = s * (Y * r13 - Z * r12)
|
|
127
|
+
J[row_y, 1] = s * (Y * r23 - Z * r22)
|
|
128
|
+
|
|
129
|
+
J[row_x, 2] = -s * (X * r13 - Z * r11)
|
|
130
|
+
J[row_y, 2] = -s * (X * r23 - Z * r21)
|
|
131
|
+
|
|
132
|
+
J[row_x, 3] = s * (X * r12 - Y * r11)
|
|
133
|
+
J[row_y, 3] = s * (X * r22 - Y * r21)
|
|
134
|
+
|
|
135
|
+
# Translation terms (columns 4-5: tx, ty)
|
|
136
|
+
J[row_x, 4] = 1.0
|
|
137
|
+
J[row_y, 4] = 0.0
|
|
138
|
+
|
|
139
|
+
J[row_x, 5] = 0.0
|
|
140
|
+
J[row_y, 5] = 1.0
|
|
141
|
+
|
|
142
|
+
# --- Local parameter derivatives (columns 6 to 6+m-1) ---
|
|
143
|
+
for j in range(m):
|
|
144
|
+
# Get principal component for this mode at this landmark
|
|
145
|
+
Vx = princ_comp_vis[i, j]
|
|
146
|
+
Vy = princ_comp_vis[i + n_vis, j]
|
|
147
|
+
Vz = princ_comp_vis[i + 2*n_vis, j]
|
|
148
|
+
|
|
149
|
+
# Derivative of 2D projection w.r.t. local parameter j
|
|
150
|
+
J[row_x, 6 + j] = s * (r11*Vx + r12*Vy + r13*Vz)
|
|
151
|
+
J[row_y, 6 + j] = s * (r21*Vx + r22*Vy + r23*Vz)
|
|
152
|
+
|
|
153
|
+
return J
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
@numba.jit(nopython=True, cache=True, fastmath=False)
|
|
157
|
+
def apply_jacobian_weighting_jit(J, weight_diag, n_vis):
|
|
158
|
+
"""
|
|
159
|
+
JIT-compiled Jacobian weighting application
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
J: (n_vis*2, 6+m) Jacobian matrix
|
|
163
|
+
weight_diag: (n_vis*2,) diagonal of weight matrix
|
|
164
|
+
n_vis: number of visible landmarks
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
J_weighted: (n_vis*2, 6+m) weighted Jacobian
|
|
168
|
+
"""
|
|
169
|
+
J_weighted = J.copy()
|
|
170
|
+
|
|
171
|
+
for i in range(n_vis):
|
|
172
|
+
w_x = weight_diag[i]
|
|
173
|
+
w_y = weight_diag[i + n_vis]
|
|
174
|
+
|
|
175
|
+
# Weight all columns for this landmark's rows
|
|
176
|
+
for j in range(J.shape[1]):
|
|
177
|
+
J_weighted[i, j] *= w_x
|
|
178
|
+
J_weighted[i + n_vis, j] *= w_y
|
|
179
|
+
|
|
180
|
+
return J_weighted
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def compute_jacobian_accelerated(params_local, params_global, princ_comp_vis, mean_shape_vis, weight_matrix, n_vis, m):
|
|
184
|
+
"""
|
|
185
|
+
Python wrapper for JIT-compiled Jacobian computation
|
|
186
|
+
|
|
187
|
+
This function provides the same interface as CalcParams.compute_jacobian()
|
|
188
|
+
but uses Numba JIT for 2-5x speedup.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
params_local: (34,) local parameters
|
|
192
|
+
params_global: (6,) global parameters [scale, rx, ry, rz, tx, ty]
|
|
193
|
+
princ_comp_vis: (n_vis*3, 34) principal components for visible landmarks
|
|
194
|
+
mean_shape_vis: (n_vis*3, 1) mean shape for visible landmarks
|
|
195
|
+
weight_matrix: (n_vis*2, n_vis*2) diagonal weight matrix
|
|
196
|
+
n_vis: number of visible landmarks (68 or less)
|
|
197
|
+
m: number of PCA modes (34)
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
J: (n_vis*2, 6+m) Jacobian matrix
|
|
201
|
+
J_w_t: (6+m, n_vis*2) weighted Jacobian transpose
|
|
202
|
+
"""
|
|
203
|
+
# Extract global parameters
|
|
204
|
+
s = params_global[0]
|
|
205
|
+
euler = params_global[1:4]
|
|
206
|
+
|
|
207
|
+
# Get 3D shape from current local parameters
|
|
208
|
+
params_local_col = params_local.reshape(-1, 1)
|
|
209
|
+
shape_3d_flat = mean_shape_vis.flatten() + (princ_comp_vis @ params_local_col).flatten()
|
|
210
|
+
shape_3d = shape_3d_flat.reshape(3, n_vis).astype(np.float32)
|
|
211
|
+
|
|
212
|
+
# Get rotation matrix using JIT function
|
|
213
|
+
R = euler_to_rotation_matrix_jit(euler[0], euler[1], euler[2])
|
|
214
|
+
|
|
215
|
+
# Compute Jacobian using JIT function
|
|
216
|
+
J = compute_jacobian_jit(shape_3d, R, s, princ_comp_vis, n_vis, m)
|
|
217
|
+
|
|
218
|
+
# Apply weighting
|
|
219
|
+
weight_diag = np.diag(weight_matrix).astype(np.float32)
|
|
220
|
+
if not np.allclose(weight_diag, 1.0):
|
|
221
|
+
J_weighted = apply_jacobian_weighting_jit(J, weight_diag, n_vis)
|
|
222
|
+
J_w_t = J_weighted.T
|
|
223
|
+
else:
|
|
224
|
+
J_w_t = J.T
|
|
225
|
+
|
|
226
|
+
return J, J_w_t
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# Warmup JIT compilation on import (prevents first-call overhead)
|
|
230
|
+
def _warmup_jit():
|
|
231
|
+
"""Pre-compile JIT functions to avoid first-call overhead"""
|
|
232
|
+
# Dummy data for compilation
|
|
233
|
+
dummy_R = euler_to_rotation_matrix_jit(0.1, 0.2, 0.3)
|
|
234
|
+
dummy_shape = np.random.randn(3, 68).astype(np.float32)
|
|
235
|
+
dummy_proj = project_shape_to_2d_jit(dummy_shape, dummy_R, 1.0, 0.0, 0.0, 68)
|
|
236
|
+
dummy_princ = np.random.randn(68*3, 34).astype(np.float32)
|
|
237
|
+
dummy_J = compute_jacobian_jit(dummy_shape, dummy_R, 1.0, dummy_princ, 68, 34)
|
|
238
|
+
dummy_weight = np.ones(68*2, dtype=np.float32)
|
|
239
|
+
dummy_J_weighted = apply_jacobian_weighting_jit(dummy_J, dummy_weight, 68)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
# Warmup on import
|
|
243
|
+
_warmup_jit()
|
|
244
|
+
print("Numba CalcParams accelerator loaded - targeting 2-5x speedup")
|
|
Binary file
|
|
Binary file
|
|
File without changes
|