landmarkdiff 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. landmarkdiff/__init__.py +40 -0
  2. landmarkdiff/__main__.py +207 -0
  3. landmarkdiff/api_client.py +316 -0
  4. landmarkdiff/arcface_torch.py +583 -0
  5. landmarkdiff/audit.py +338 -0
  6. landmarkdiff/augmentation.py +293 -0
  7. landmarkdiff/benchmark.py +213 -0
  8. landmarkdiff/checkpoint_manager.py +361 -0
  9. landmarkdiff/cli.py +252 -0
  10. landmarkdiff/clinical.py +223 -0
  11. landmarkdiff/conditioning.py +278 -0
  12. landmarkdiff/config.py +358 -0
  13. landmarkdiff/curriculum.py +191 -0
  14. landmarkdiff/data.py +405 -0
  15. landmarkdiff/data_version.py +301 -0
  16. landmarkdiff/displacement_model.py +745 -0
  17. landmarkdiff/ensemble.py +330 -0
  18. landmarkdiff/evaluation.py +415 -0
  19. landmarkdiff/experiment_tracker.py +231 -0
  20. landmarkdiff/face_verifier.py +947 -0
  21. landmarkdiff/fid.py +244 -0
  22. landmarkdiff/hyperparam.py +347 -0
  23. landmarkdiff/inference.py +754 -0
  24. landmarkdiff/landmarks.py +432 -0
  25. landmarkdiff/log.py +90 -0
  26. landmarkdiff/losses.py +348 -0
  27. landmarkdiff/manipulation.py +651 -0
  28. landmarkdiff/masking.py +316 -0
  29. landmarkdiff/metrics_agg.py +313 -0
  30. landmarkdiff/metrics_viz.py +464 -0
  31. landmarkdiff/model_registry.py +362 -0
  32. landmarkdiff/morphometry.py +342 -0
  33. landmarkdiff/postprocess.py +600 -0
  34. landmarkdiff/py.typed +0 -0
  35. landmarkdiff/safety.py +395 -0
  36. landmarkdiff/synthetic/__init__.py +23 -0
  37. landmarkdiff/synthetic/augmentation.py +188 -0
  38. landmarkdiff/synthetic/pair_generator.py +208 -0
  39. landmarkdiff/synthetic/tps_warp.py +273 -0
  40. landmarkdiff/validation.py +324 -0
  41. landmarkdiff-0.2.3.dist-info/METADATA +1173 -0
  42. landmarkdiff-0.2.3.dist-info/RECORD +46 -0
  43. landmarkdiff-0.2.3.dist-info/WHEEL +5 -0
  44. landmarkdiff-0.2.3.dist-info/entry_points.txt +2 -0
  45. landmarkdiff-0.2.3.dist-info/licenses/LICENSE +21 -0
  46. landmarkdiff-0.2.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,432 @@
1
+ """Facial landmark extraction using MediaPipe Face Mesh v2."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+
9
+ import cv2
10
+ import mediapipe as mp
11
+ import numpy as np
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Region color map for visualization (BGR)
16
+ REGION_COLORS: dict[str, tuple[int, int, int]] = {
17
+ "jawline": (255, 255, 255), # white
18
+ "eyebrow_left": (0, 255, 0), # green
19
+ "eyebrow_right": (0, 255, 0),
20
+ "eye_left": (255, 255, 0), # cyan
21
+ "eye_right": (255, 255, 0),
22
+ "nose": (0, 255, 255), # yellow
23
+ "lips": (0, 0, 255), # red
24
+ "iris_left": (255, 0, 255), # magenta
25
+ "iris_right": (255, 0, 255),
26
+ }
27
+
28
+ # MediaPipe landmark index groups by anatomical region
29
+ LANDMARK_REGIONS: dict[str, list[int]] = {
30
+ "jawline": [
31
+ 10,
32
+ 338,
33
+ 297,
34
+ 332,
35
+ 284,
36
+ 251,
37
+ 389,
38
+ 356,
39
+ 454,
40
+ 323,
41
+ 361,
42
+ 288,
43
+ 397,
44
+ 365,
45
+ 379,
46
+ 378,
47
+ 400,
48
+ 377,
49
+ 152,
50
+ 148,
51
+ 176,
52
+ 149,
53
+ 150,
54
+ 136,
55
+ 172,
56
+ 58,
57
+ 132,
58
+ 93,
59
+ 234,
60
+ 127,
61
+ 162,
62
+ 21,
63
+ 54,
64
+ 103,
65
+ 67,
66
+ 109,
67
+ ],
68
+ "eye_left": [
69
+ 33,
70
+ 7,
71
+ 163,
72
+ 144,
73
+ 145,
74
+ 153,
75
+ 154,
76
+ 155,
77
+ 133,
78
+ 173,
79
+ 157,
80
+ 158,
81
+ 159,
82
+ 160,
83
+ 161,
84
+ 246,
85
+ ],
86
+ "eye_right": [
87
+ 362,
88
+ 382,
89
+ 381,
90
+ 380,
91
+ 374,
92
+ 373,
93
+ 390,
94
+ 249,
95
+ 263,
96
+ 466,
97
+ 388,
98
+ 387,
99
+ 386,
100
+ 385,
101
+ 384,
102
+ 398,
103
+ ],
104
+ "eyebrow_left": [70, 63, 105, 66, 107, 55, 65, 52, 53, 46],
105
+ "eyebrow_right": [300, 293, 334, 296, 336, 285, 295, 282, 283, 276],
106
+ "nose": [
107
+ 1,
108
+ 2,
109
+ 4,
110
+ 5,
111
+ 6,
112
+ 19,
113
+ 94,
114
+ 141,
115
+ 168,
116
+ 195,
117
+ 197,
118
+ 236,
119
+ 240,
120
+ 274,
121
+ 275,
122
+ 278,
123
+ 279,
124
+ 294,
125
+ 326,
126
+ 327,
127
+ 360,
128
+ 363,
129
+ 370,
130
+ 456,
131
+ 460,
132
+ ],
133
+ "lips": [
134
+ 61,
135
+ 146,
136
+ 91,
137
+ 181,
138
+ 84,
139
+ 17,
140
+ 314,
141
+ 405,
142
+ 321,
143
+ 375,
144
+ 291,
145
+ 308,
146
+ 324,
147
+ 318,
148
+ 402,
149
+ 317,
150
+ 14,
151
+ 87,
152
+ 178,
153
+ 88,
154
+ 95,
155
+ 78,
156
+ ],
157
+ "iris_left": [468, 469, 470, 471, 472],
158
+ "iris_right": [473, 474, 475, 476, 477],
159
+ }
160
+
161
+
162
+ @dataclass(frozen=True)
163
+ class FaceLandmarks:
164
+ """Extracted facial landmarks with metadata."""
165
+
166
+ landmarks: np.ndarray # (478, 3) normalized (x, y, z)
167
+ image_width: int
168
+ image_height: int
169
+ confidence: float
170
+
171
+ @property
172
+ def pixel_coords(self) -> np.ndarray:
173
+ """Convert normalized landmarks to pixel coordinates (478, 2)."""
174
+ coords = self.landmarks[:, :2].copy()
175
+ coords[:, 0] *= self.image_width
176
+ coords[:, 1] *= self.image_height
177
+ return coords
178
+
179
+ def pixel_coords_at(self, width: int, height: int) -> np.ndarray:
180
+ """Convert normalized landmarks to pixel coordinates at a given size.
181
+
182
+ Use this when the image has been resized after landmark extraction.
183
+ """
184
+ coords = self.landmarks[:, :2].copy()
185
+ coords[:, 0] *= width
186
+ coords[:, 1] *= height
187
+ return coords
188
+
189
+ def rescale(self, width: int, height: int) -> FaceLandmarks:
190
+ """Return a copy with updated image dimensions.
191
+
192
+ Landmarks stay in normalized [0,1] space; only the stored
193
+ width/height change, so ``pixel_coords`` returns values at
194
+ the new resolution.
195
+ """
196
+ return FaceLandmarks(
197
+ landmarks=self.landmarks.copy(),
198
+ image_width=width,
199
+ image_height=height,
200
+ confidence=self.confidence,
201
+ )
202
+
203
+ def get_region(self, region: str) -> np.ndarray:
204
+ """Get landmark indices for a named region."""
205
+ indices = LANDMARK_REGIONS.get(region, [])
206
+ return self.landmarks[indices]
207
+
208
+
209
+ def extract_landmarks(
210
+ image: np.ndarray,
211
+ min_detection_confidence: float = 0.5,
212
+ min_tracking_confidence: float = 0.5,
213
+ ) -> FaceLandmarks | None:
214
+ """Extract 478 facial landmarks from an image using MediaPipe Face Mesh.
215
+
216
+ Args:
217
+ image: BGR image as numpy array.
218
+ min_detection_confidence: Minimum face detection confidence.
219
+ min_tracking_confidence: Minimum landmark tracking confidence.
220
+
221
+ Returns:
222
+ FaceLandmarks if a face is detected, None otherwise.
223
+ """
224
+ h, w = image.shape[:2]
225
+ rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
226
+
227
+ # Try new Tasks API first (mediapipe >= 0.10.20), fall back to legacy solutions API
228
+ try:
229
+ landmarks, confidence = _extract_tasks_api(rgb, min_detection_confidence)
230
+ except Exception:
231
+ logger.debug("Tasks API unavailable, trying Solutions API", exc_info=True)
232
+ try:
233
+ landmarks, confidence = _extract_solutions_api(
234
+ rgb, min_detection_confidence, min_tracking_confidence
235
+ )
236
+ except Exception:
237
+ logger.debug("Both MediaPipe APIs failed", exc_info=True)
238
+ return None
239
+
240
+ if landmarks is None:
241
+ return None
242
+
243
+ return FaceLandmarks(
244
+ landmarks=landmarks,
245
+ image_width=w,
246
+ image_height=h,
247
+ confidence=confidence,
248
+ )
249
+
250
+
251
+ def _extract_tasks_api(
252
+ rgb: np.ndarray,
253
+ min_confidence: float,
254
+ ) -> tuple[np.ndarray | None, float]:
255
+ """Extract landmarks using MediaPipe Tasks API (>= 0.10.20)."""
256
+ FaceLandmarker = mp.tasks.vision.FaceLandmarker
257
+ FaceLandmarkerOptions = mp.tasks.vision.FaceLandmarkerOptions
258
+ RunningMode = mp.tasks.vision.RunningMode
259
+ BaseOptions = mp.tasks.BaseOptions
260
+ import tempfile
261
+ import urllib.request
262
+
263
+ # Download model if not cached
264
+ model_path = Path(tempfile.gettempdir()) / "face_landmarker_v2_with_blendshapes.task"
265
+ if not model_path.exists():
266
+ url = "https://storage.googleapis.com/mediapipe-models/face_landmarker/face_landmarker/float16/1/face_landmarker.task"
267
+ urllib.request.urlretrieve(url, str(model_path))
268
+
269
+ options = FaceLandmarkerOptions(
270
+ base_options=BaseOptions(model_asset_path=str(model_path)),
271
+ running_mode=RunningMode.IMAGE,
272
+ num_faces=1,
273
+ min_face_detection_confidence=min_confidence,
274
+ output_face_blendshapes=False,
275
+ output_facial_transformation_matrixes=False,
276
+ )
277
+
278
+ with FaceLandmarker.create_from_options(options) as landmarker:
279
+ mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb)
280
+ result = landmarker.detect(mp_image)
281
+
282
+ if not result.face_landmarks:
283
+ return None, 0.0
284
+
285
+ face_lms = result.face_landmarks[0]
286
+ landmarks = np.array(
287
+ [(lm.x, lm.y, lm.z) for lm in face_lms],
288
+ dtype=np.float32,
289
+ )
290
+
291
+ # MediaPipe Tasks API doesn't expose per-landmark detection confidence;
292
+ # return 1.0 to indicate successful detection
293
+ return landmarks, 1.0
294
+
295
+
296
+ def _extract_solutions_api(
297
+ rgb: np.ndarray,
298
+ min_detection_confidence: float,
299
+ min_tracking_confidence: float,
300
+ ) -> tuple[np.ndarray | None, float]:
301
+ """Extract landmarks using legacy MediaPipe Solutions API."""
302
+ with mp.solutions.face_mesh.FaceMesh(
303
+ static_image_mode=True,
304
+ max_num_faces=1,
305
+ refine_landmarks=True,
306
+ min_detection_confidence=min_detection_confidence,
307
+ min_tracking_confidence=min_tracking_confidence,
308
+ ) as face_mesh:
309
+ results = face_mesh.process(rgb)
310
+
311
+ if not results.multi_face_landmarks:
312
+ return None, 0.0
313
+
314
+ face = results.multi_face_landmarks[0]
315
+ landmarks = np.array(
316
+ [(lm.x, lm.y, lm.z) for lm in face.landmark],
317
+ dtype=np.float32,
318
+ )
319
+ # Legacy API doesn't expose detection confidence; return 1.0 for success
320
+ return landmarks, 1.0
321
+
322
+
323
+ def visualize_landmarks(
324
+ image: np.ndarray,
325
+ face: FaceLandmarks,
326
+ radius: int = 1,
327
+ draw_regions: bool = True,
328
+ ) -> np.ndarray:
329
+ """Draw colored landmark dots on image by anatomical region.
330
+
331
+ Args:
332
+ image: BGR image to draw on (will be copied).
333
+ face: Extracted face landmarks.
334
+ radius: Dot radius in pixels.
335
+ draw_regions: If True, color by region. Otherwise all white.
336
+
337
+ Returns:
338
+ Annotated image copy.
339
+ """
340
+ canvas = image.copy()
341
+ coords = face.pixel_coords
342
+
343
+ if draw_regions:
344
+ # Build index -> color mapping
345
+ idx_to_color: dict[int, tuple[int, int, int]] = {}
346
+ for region, indices in LANDMARK_REGIONS.items():
347
+ color = REGION_COLORS.get(region, (255, 255, 255))
348
+ for idx in indices:
349
+ idx_to_color[idx] = color
350
+
351
+ for i, (x, y) in enumerate(coords):
352
+ color = idx_to_color.get(i, (128, 128, 128))
353
+ cv2.circle(canvas, (int(x), int(y)), radius, color, -1)
354
+ else:
355
+ for x, y in coords:
356
+ cv2.circle(canvas, (int(x), int(y)), radius, (255, 255, 255), -1)
357
+
358
+ return canvas
359
+
360
+
361
+ def render_landmark_image(
362
+ face: FaceLandmarks,
363
+ width: int | None = None,
364
+ height: int | None = None,
365
+ radius: int = 2,
366
+ ) -> np.ndarray:
367
+ """Render MediaPipe face mesh tessellation on black canvas.
368
+
369
+ Draws the full 2556-edge tessellation mesh that CrucibleAI/ControlNetMediaPipeFace
370
+ was pre-trained on. This is critical -- the ControlNet expects dense triangulated
371
+ wireframes, not sparse dots.
372
+
373
+ Falls back to colored dots if tessellation connections aren't available.
374
+
375
+ Args:
376
+ face: Extracted face landmarks.
377
+ width: Canvas width (defaults to face.image_width).
378
+ height: Canvas height (defaults to face.image_height).
379
+ radius: Dot radius (used for key landmark dots overlay).
380
+
381
+ Returns:
382
+ BGR image with face mesh on black background.
383
+ """
384
+ w = width or face.image_width
385
+ h = height or face.image_height
386
+ canvas = np.zeros((h, w, 3), dtype=np.uint8)
387
+
388
+ coords = face.landmarks[:, :2].copy()
389
+ coords[:, 0] *= w
390
+ coords[:, 1] *= h
391
+ pts = coords.astype(np.int32)
392
+
393
+ # Draw tessellation mesh (what CrucibleAI ControlNet expects)
394
+ try:
395
+ from mediapipe.tasks.python.vision.face_landmarker import FaceLandmarksConnections
396
+
397
+ tessellation = FaceLandmarksConnections.FACE_LANDMARKS_TESSELATION
398
+ contours = FaceLandmarksConnections.FACE_LANDMARKS_CONTOURS
399
+
400
+ # Draw tessellation edges (thin, gray-white)
401
+ for conn in tessellation:
402
+ p1 = tuple(pts[conn.start])
403
+ p2 = tuple(pts[conn.end])
404
+ cv2.line(canvas, p1, p2, (192, 192, 192), 1, cv2.LINE_AA)
405
+
406
+ # Draw contour edges on top (brighter, key features)
407
+ for conn in contours:
408
+ p1 = tuple(pts[conn.start])
409
+ p2 = tuple(pts[conn.end])
410
+ cv2.line(canvas, p1, p2, (255, 255, 255), 1, cv2.LINE_AA)
411
+
412
+ except (ImportError, AttributeError):
413
+ # Fallback: draw colored dots if tessellation not available
414
+ idx_to_color: dict[int, tuple[int, int, int]] = {}
415
+ for region, indices in LANDMARK_REGIONS.items():
416
+ color = REGION_COLORS.get(region, (128, 128, 128))
417
+ for idx in indices:
418
+ idx_to_color[idx] = color
419
+
420
+ for i, (x, y) in enumerate(coords):
421
+ color = idx_to_color.get(i, (128, 128, 128))
422
+ cv2.circle(canvas, (int(x), int(y)), radius, color, -1)
423
+
424
+ return canvas
425
+
426
+
427
+ def load_image(path: str | Path) -> np.ndarray:
428
+ """Load an image from disk as BGR numpy array."""
429
+ img = cv2.imread(str(path))
430
+ if img is None:
431
+ raise FileNotFoundError(f"Could not load image: {path}")
432
+ return img
landmarkdiff/log.py ADDED
@@ -0,0 +1,90 @@
1
+ """Centralized logging configuration for LandmarkDiff.
2
+
3
+ Usage:
4
+ from landmarkdiff.log import get_logger
5
+ logger = get_logger(__name__)
6
+ logger.info("Training started")
7
+
8
+ Configure globally:
9
+ from landmarkdiff.log import setup_logging
10
+ setup_logging(level="DEBUG") # affects all LandmarkDiff loggers
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ import sys
17
+
18
+ _CONFIGURED = False
19
+
20
+ # Default format
21
+ LOG_FORMAT = "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
22
+ LOG_DATE_FORMAT = "%Y-%m-%d %H:%M:%S"
23
+
24
+
25
+ def setup_logging(
26
+ level: str | int = "INFO",
27
+ fmt: str | None = None,
28
+ stream: object = None,
29
+ log_file: str | None = None,
30
+ ) -> None:
31
+ """Configure logging for the landmarkdiff package.
32
+
33
+ Call once at application startup. Subsequent calls update the level.
34
+
35
+ Args:
36
+ level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
37
+ fmt: Custom format string. None uses the default.
38
+ stream: Output stream. None defaults to stderr.
39
+ log_file: Optional file path for logging to a file.
40
+ """
41
+ global _CONFIGURED
42
+
43
+ if isinstance(level, str):
44
+ level = getattr(logging, level.upper(), logging.INFO)
45
+
46
+ root_logger = logging.getLogger("landmarkdiff")
47
+ root_logger.setLevel(level)
48
+
49
+ if not _CONFIGURED:
50
+ formatter = logging.Formatter(
51
+ fmt or LOG_FORMAT,
52
+ datefmt=LOG_DATE_FORMAT,
53
+ )
54
+
55
+ # Add stream handler
56
+ stream_handler = logging.StreamHandler(stream or sys.stderr)
57
+ stream_handler.setFormatter(formatter)
58
+ root_logger.addHandler(stream_handler)
59
+
60
+ # Add file handler if log_file is specified
61
+ if log_file:
62
+ file_handler = logging.FileHandler(log_file)
63
+ file_handler.setFormatter(formatter)
64
+ root_logger.addHandler(file_handler)
65
+
66
+ # Prevent propagation to root logger to avoid duplicate messages
67
+ root_logger.propagate = False
68
+ _CONFIGURED = True
69
+ else:
70
+ # Just update the level
71
+ root_logger.setLevel(level)
72
+
73
+
74
+ def get_logger(name: str) -> logging.Logger:
75
+ """Get a logger for a LandmarkDiff module.
76
+
77
+ The returned logger is a child of the 'landmarkdiff' root logger,
78
+ so setup_logging() controls its level and formatting.
79
+
80
+ Args:
81
+ name: Module name (typically __name__).
82
+
83
+ Returns:
84
+ Configured logging.Logger instance.
85
+ """
86
+ # Ensure base configuration exists
87
+ if not _CONFIGURED:
88
+ setup_logging()
89
+
90
+ return logging.getLogger(name)