javiface 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.1
2
+ Name: javiface
3
+ Version: 0.0.1
4
+ Summary: Efficient and accurate image-based head pose estimation
5
+ Home-page: https://github.com/thohemp/6DRepNet
6
+ Author: Javier Javier Daza
7
+ Author-email: javierjdaza@gmail.com
8
+ License: MIT
9
+ Platform: UNKNOWN
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Description-Content-Type: text/markdown
15
+
16
+ UNKNOWN
17
+
File without changes
@@ -0,0 +1,45 @@
1
+ import onnxruntime as ort
2
+ import numpy as np
3
+ from PIL import Image
4
+ from torchvision import transforms
5
+
6
+
7
+ class FaceVerifier:
8
+
9
+ PROVIDERS = [
10
+ ('CUDAExecutionProvider', 'CUDA'),
11
+ ('CoreMLExecutionProvider', 'CoreML'),
12
+ ('CPUExecutionProvider', 'CPU'),
13
+ ]
14
+
15
+ TRANSFORM = transforms.Compose([
16
+ transforms.Resize((224, 224)),
17
+ transforms.ToTensor(),
18
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
19
+ ])
20
+
21
+ def __init__(self, onnx_path):
22
+ available = ort.get_available_providers()
23
+ providers = [p for p, _ in self.PROVIDERS if p in available]
24
+ device = next(name for p, name in self.PROVIDERS if p in available)
25
+
26
+ self.sess = ort.InferenceSession(onnx_path, providers = providers)
27
+ self.device = device
28
+ print(f'FaceVerifier loaded — provider: {device}')
29
+
30
+ def get_embedding(self, image_pillow):
31
+ img = image_pillow.convert('RGB')
32
+ tensor = self.TRANSFORM(img).unsqueeze(0).numpy()
33
+ return self.sess.run(['embedding'], {'image': tensor})[0][0]
34
+
35
+ def compare(self, image_pillow_1, image_pillow_2, threshold):
36
+ embedding_1 = self.get_embedding(image_pillow_1)
37
+ embedding_2 = self.get_embedding(image_pillow_2)
38
+
39
+ # Cosine Similarity
40
+ similarity = float(np.dot(embedding_1, embedding_2))
41
+
42
+ return {
43
+ 'similarity': similarity,
44
+ 'same_person': similarity >= threshold,
45
+ }
@@ -0,0 +1,270 @@
1
+ """
2
+ ===================================================================================================================
3
+ This is my own implementation for RetinaFace Model using Resnet34 onnx file.
4
+ Based on: https://github.com/yakhyo/retinaface-pytorch/tree/main
5
+ Pre-Trained Model: https://github.com/yakhyo/retinaface-pytorch/releases/download/v0.0.1/retinaface_r34.onnx
6
+
7
+ @Author: javier.daza@mercadolibre.com.co
8
+ ===================================================================================================================
9
+ """
10
+
11
+ import numpy as np
12
+ import onnxruntime as ort
13
+ from PIL import Image
14
+ import cv2
15
+ import math
16
+ from itertools import product
17
+ import torch
18
+ from typing import Tuple
19
+
20
+ class RetinaFaceONNXInference:
21
+ def __init__(
22
+ self,
23
+ model_path,
24
+ conf_threshold=0.02,
25
+ pre_nms_topk=5000,
26
+ nms_threshold=0.4,
27
+ post_nms_topk=750,
28
+ vis_threshold=0.9
29
+ ) -> None:
30
+ self.model_path = model_path
31
+ self.conf_threshold = conf_threshold
32
+ self.pre_nms_topk = pre_nms_topk
33
+ self.nms_threshold = nms_threshold
34
+ self.post_nms_topk = post_nms_topk
35
+ self.vis_threshold = vis_threshold
36
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37
+
38
+ # Load ONNX model
39
+ self.ort_session = ort.InferenceSession(model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
40
+ self.provider = self.ort_session.get_providers()
41
+ print(f'RetinaFace loaded — provider: {"CUDA" if self.provider == "CUDAExecutionProvider" else "CPU"}')
42
+ # Config for prior boxes
43
+ self.cfg = {
44
+ 'name': 'resnet34',
45
+ 'min_sizes': [[16, 32], [64, 128], [256, 512]],
46
+ 'steps': [8, 16, 32],
47
+ 'variance': [0.1, 0.2],
48
+ 'clip': False,
49
+ 'loc_weight': 2.0,
50
+ 'batch_size': 32,
51
+ 'epochs': 100,
52
+ 'milestones': [70, 90],
53
+ 'image_size': 640,
54
+ 'pretrain': True,
55
+ 'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
56
+ 'in_channel': 64,
57
+ 'out_channel': 128
58
+ }
59
+
60
+
61
+
62
+
63
+ @staticmethod
64
+ def preprocess_image(image, rgb_mean=(104, 117, 123)):
65
+ image = np.float32(image)
66
+ image -= rgb_mean
67
+ image = image.transpose(2, 0, 1) # HWC to CHW
68
+ image = np.expand_dims(image, axis=0) # Add batch dimension (1, C, H, W)
69
+ return image
70
+
71
+
72
+ @staticmethod
73
+ def nms(dets, threshold):
74
+ """
75
+ Apply Non-Maximum Suppression (NMS) to reduce overlapping bounding boxes based on a threshold.
76
+
77
+ Args:
78
+ dets (numpy.ndarray): Array of detections with each row as [x1, y1, x2, y2, score].
79
+ threshold (float): IoU threshold for suppression.
80
+
81
+ Returns:
82
+ list: Indices of bounding boxes retained after suppression.
83
+ """
84
+ x1 = dets[:, 0]
85
+ y1 = dets[:, 1]
86
+ x2 = dets[:, 2]
87
+ y2 = dets[:, 3]
88
+ scores = dets[:, 4]
89
+
90
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
91
+ order = scores.argsort()[::-1]
92
+
93
+ keep = []
94
+ while order.size > 0:
95
+ i = order[0]
96
+ keep.append(i)
97
+ xx1 = np.maximum(x1[i], x1[order[1:]])
98
+ yy1 = np.maximum(y1[i], y1[order[1:]])
99
+ xx2 = np.minimum(x2[i], x2[order[1:]])
100
+ yy2 = np.minimum(y2[i], y2[order[1:]])
101
+
102
+ w = np.maximum(0.0, xx2 - xx1 + 1)
103
+ h = np.maximum(0.0, yy2 - yy1 + 1)
104
+ inter = w * h
105
+ ovr = inter / (areas[i] + areas[order[1:]] - inter)
106
+
107
+ inds = np.where(ovr <= threshold)[0]
108
+ order = order[inds + 1]
109
+
110
+ return keep
111
+ @staticmethod
112
+ def decode(loc, priors, variances):
113
+ """
114
+ Decode locations from predictions using priors to undo
115
+ the encoding done for offset regression at train time.
116
+
117
+ Args:
118
+ loc (tensor): Location predictions for loc layers, shape: [num_priors, 4]
119
+ priors (tensor): Prior boxes in center-offset form, shape: [num_priors, 4]
120
+ variances (list[float]): Variances of prior boxes
121
+
122
+ Returns:
123
+ tensor: Decoded bounding box predictions
124
+ """
125
+ # Compute centers of predicted boxes
126
+ cxcy = priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:]
127
+
128
+ # Compute widths and heights of predicted boxes
129
+ wh = priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])
130
+
131
+ # Convert center, size to corner coordinates
132
+ boxes = torch.empty_like(loc)
133
+ boxes[:, :2] = cxcy - wh / 2 # xmin, ymin
134
+ boxes[:, 2:] = cxcy + wh / 2 # xmax, ymax
135
+
136
+ return boxes
137
+
138
+ @staticmethod
139
+ def decode_landmarks(predictions, priors, variances):
140
+ """
141
+ Decode landmarks from predictions using prior boxes to reverse the encoding done during training.
142
+
143
+ Args:
144
+ predictions (tensor): Landmark predictions for localization layers.
145
+ Shape: [num_priors, 10] where each prior contains 5 landmark (x, y) pairs.
146
+ priors (tensor): Prior boxes in center-offset form.
147
+ Shape: [num_priors, 4], where each prior has (cx, cy, width, height).
148
+ variances (list[float]): Variances of the prior boxes to scale the decoded values.
149
+
150
+ Returns:
151
+ landmarks (tensor): Decoded landmark predictions.
152
+ Shape: [num_priors, 10] where each row contains the decoded (x, y) pairs for 5 landmarks.
153
+ """
154
+
155
+ # Reshape predictions to [num_priors, 5, 2] to handle each pair (x, y) in a batch
156
+ predictions = predictions.view(predictions.size(0), 5, 2)
157
+
158
+ # Perform the same operation on all landmark pairs at once
159
+ landmarks = priors[:, :2].unsqueeze(1) + predictions * variances[0] * priors[:, 2:].unsqueeze(1)
160
+
161
+ # Flatten back to [num_priors, 10]
162
+ landmarks = landmarks.view(landmarks.size(0), -1)
163
+
164
+ return landmarks
165
+
166
+ def infer(self, image_array):
167
+ # Load and preprocess image
168
+ # original_image = cv2.imread(image_path)
169
+ original_image = image_array
170
+ img_height, img_width, _ = original_image.shape
171
+ image = self.preprocess_image(original_image)
172
+
173
+ # Run ONNX model inference
174
+ outputs = self.ort_session.run(None, {'input': image})
175
+ loc, conf, landmarks = outputs[0].squeeze(0), outputs[1].squeeze(0), outputs[2].squeeze(0)
176
+
177
+ # Generate anchor boxes
178
+ priorbox = PriorBox(self.cfg, image_size=(img_height, img_width))
179
+ priors = priorbox.generate_anchors()
180
+
181
+ # Decode boxes and landmarks
182
+ boxes = self.decode(torch.tensor(loc), priors, self.cfg['variance']).to(self.device)
183
+ landmarks = self.decode_landmarks(torch.tensor(landmarks), priors, self.cfg['variance']).to(self.device)
184
+
185
+ # Adjust scales for boxes and landmarks
186
+ bbox_scale = torch.tensor([img_width, img_height] * 2, device=self.device)
187
+ boxes = (boxes * bbox_scale).cpu().numpy()
188
+
189
+ landmark_scale = torch.tensor([img_width, img_height] * 5, device=self.device)
190
+ landmarks = (landmarks * landmark_scale).cpu().numpy()
191
+
192
+ scores = conf[:, 1] # Confidence scores for class 1 (face)
193
+
194
+ # Filter by confidence threshold
195
+ inds = scores > self.conf_threshold
196
+ boxes, landmarks, scores = boxes[inds], landmarks[inds], scores[inds]
197
+
198
+ # Sort by scores
199
+ order = scores.argsort()[::-1][:self.pre_nms_topk]
200
+ boxes, landmarks, scores = boxes[order], landmarks[order], scores[order]
201
+
202
+ # Apply NMS
203
+ detections = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
204
+ keep = self.nms(detections, self.nms_threshold)
205
+ detections, landmarks = detections[keep], landmarks[keep]
206
+
207
+ # Keep top-k detections
208
+ detections, landmarks = detections[:self.post_nms_topk], landmarks[:self.post_nms_topk]
209
+
210
+ # Concatenate detections and landmarks
211
+ return np.concatenate((detections, landmarks), axis=1), original_image
212
+
213
+ class PriorBox:
214
+ def __init__(self, cfg: dict, image_size: Tuple[int, int]) -> None:
215
+ super().__init__()
216
+ self.image_size = image_size
217
+ self.clip = cfg['clip']
218
+ self.steps = cfg['steps']
219
+ self.min_sizes = cfg['min_sizes']
220
+ self.feature_maps = [[
221
+ math.ceil(self.image_size[0]/step), math.ceil(self.image_size[1]/step)] for step in self.steps
222
+ ]
223
+
224
+ def generate_anchors(self) -> torch.Tensor:
225
+ """Generate anchor boxes based on configuration and image size"""
226
+ anchors = []
227
+ for k, (map_height, map_width) in enumerate(self.feature_maps):
228
+ step = self.steps[k]
229
+ for i, j in product(range(map_height), range(map_width)):
230
+ for min_size in self.min_sizes[k]:
231
+ s_kx = min_size / self.image_size[1]
232
+ s_ky = min_size / self.image_size[0]
233
+
234
+ dense_cx = [x * step / self.image_size[1] for x in [j+0.5]]
235
+ dense_cy = [y * step / self.image_size[0] for y in [i+0.5]]
236
+ for cy, cx in product(dense_cy, dense_cx):
237
+ anchors += [cx, cy, s_kx, s_ky]
238
+
239
+ # back to torch land
240
+ output = torch.Tensor(anchors).view(-1, 4)
241
+ if self.clip:
242
+ output.clamp_(max=1, min=0)
243
+ return output
244
+
245
+
246
+ # -- Get Face Fuction ---
247
+ def crop_face_rf(detector, image_pillow, vis_threshold, expand_face_area=0.2):
248
+ image_array = np.array(image_pillow)
249
+ detections, landmarks = detector.infer(image_array)
250
+
251
+ if vis_threshold:
252
+ detections = [d for d in detections if d[4] >= vis_threshold]
253
+ else:
254
+ detections = [d for d in detections if d[4] >= detector.vis_threshold]
255
+
256
+ if len(detections) > 0:
257
+ best = max(detections, key=lambda d: (d[2] - d[0]) * (d[3] - d[1]))
258
+ left, top, right, bottom = int(best[0]), int(best[1]), int(best[2]), int(best[3])
259
+
260
+ w, h = image_pillow.size
261
+ pw = (right - left) * expand_face_area
262
+ ph = (bottom - top) * expand_face_area
263
+ left = max(0, int(left - pw))
264
+ top = max(0, int(top - ph))
265
+ right = min(w, int(right + pw))
266
+ bottom = min(h, int(bottom + ph))
267
+
268
+ return image_pillow.crop((left, top, right, bottom))
269
+ else:
270
+ return None
@@ -0,0 +1,12 @@
1
+ import os, sys; sys.path.append(os.path.dirname(os.path.realpath(__file__)))
2
+ from javiface.JaviFace import JaviFace as JaviFace
3
+
4
+
5
+ """
6
+ Javi Face.
7
+
8
+ Accurate Faces Comparison.
9
+ """
10
+
11
+ __version__ = "0.0.1"
12
+ __author__ = 'Javier Javier Daza Olivella'
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.1
2
+ Name: javiface
3
+ Version: 0.0.1
4
+ Summary: Efficient and accurate image-based head pose estimation
5
+ Home-page: https://github.com/thohemp/6DRepNet
6
+ Author: Javier Javier Daza
7
+ Author-email: javierjdaza@gmail.com
8
+ License: MIT
9
+ Platform: UNKNOWN
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Operating System :: OS Independent
14
+ Description-Content-Type: text/markdown
15
+
16
+ UNKNOWN
17
+
@@ -0,0 +1,10 @@
1
+ README.md
2
+ setup.py
3
+ javiface/JaviFace.py
4
+ javiface/RetinaFace.py
5
+ javiface/__init__.py
6
+ javiface.egg-info/PKG-INFO
7
+ javiface.egg-info/SOURCES.txt
8
+ javiface.egg-info/dependency_links.txt
9
+ javiface.egg-info/requires.txt
10
+ javiface.egg-info/top_level.txt
@@ -0,0 +1,22 @@
1
+ coloredlogs==15.0.1
2
+ filelock==3.25.2
3
+ flatbuffers==25.12.19
4
+ fsspec==2026.2.0
5
+ humanfriendly==10.0
6
+ Jinja2==3.1.6
7
+ MarkupSafe==3.0.3
8
+ ml_dtypes==0.5.4
9
+ mpmath==1.3.0
10
+ networkx==3.4.2
11
+ numpy==2.2.6
12
+ onnx==1.20.1
13
+ onnx-ir==0.2.0
14
+ onnxruntime==1.23.2
15
+ onnxscript==0.6.2
16
+ packaging==26.0
17
+ pillow==12.1.1
18
+ protobuf==7.34.1
19
+ sympy==1.14.0
20
+ torch==2.11.0
21
+ torchvision==0.26.0
22
+ typing_extensions==4.15.0
@@ -0,0 +1 @@
1
+ javiface
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,36 @@
1
+ from setuptools import setup, find_packages
2
+ import codecs
3
+ import os.path
4
+
5
+ def read(rel_path):
6
+ here = os.path.abspath(os.path.dirname(__file__))
7
+ with codecs.open(os.path.join(here, rel_path), 'r') as fp:
8
+ return fp.read()
9
+
10
+ def get_version(rel_path):
11
+ for line in read(rel_path).splitlines():
12
+ if line.startswith('__version__'):
13
+ delim = '"' if '"' in line else "'"
14
+ return line.split(delim)[1]
15
+ else:
16
+ raise RuntimeError("Unable to find version string.")
17
+
18
+ setup(
19
+ name = 'javiface',
20
+ version = get_version("javiface/__init__.py"),
21
+ description = 'Efficient and accurate image-based head pose estimation',
22
+ long_description = "".join(open("README.md", "r").readlines()),
23
+ long_description_content_type = "text/markdown",
24
+ url = 'https://github.com/thohemp/6DRepNet',
25
+ author = 'Javier Javier Daza',
26
+ author_email = 'javierjdaza@gmail.com',
27
+ license = 'MIT',
28
+ packages = find_packages(),
29
+ install_requires = open('requirements.txt').readlines(),
30
+ classifiers = [
31
+ 'Programming Language :: Python :: 3',
32
+ 'Intended Audience :: Science/Research',
33
+ 'License :: OSI Approved :: MIT License',
34
+ 'Operating System :: OS Independent',
35
+ ],
36
+ )