modelinhos 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. modelinhos-0.0.0/.github/workflows/publish.yml +33 -0
  2. modelinhos-0.0.0/.github/workflows/tests.yml +40 -0
  3. modelinhos-0.0.0/.gitignore +10 -0
  4. modelinhos-0.0.0/.pre-commit-config.yaml +31 -0
  5. modelinhos-0.0.0/PKG-INFO +22 -0
  6. modelinhos-0.0.0/README.rst +4 -0
  7. modelinhos-0.0.0/TODO.md +21 -0
  8. modelinhos-0.0.0/modelinhos/__init__.py +0 -0
  9. modelinhos-0.0.0/modelinhos/blaze/__init__.py +0 -0
  10. modelinhos-0.0.0/modelinhos/blaze/blazenet.py +278 -0
  11. modelinhos-0.0.0/modelinhos/blaze/infer.py +93 -0
  12. modelinhos-0.0.0/modelinhos/blaze/postprocessing.py +302 -0
  13. modelinhos-0.0.0/modelinhos/blaze/to_onnx.py +53 -0
  14. modelinhos-0.0.0/modelinhos/coco.py +172 -0
  15. modelinhos-0.0.0/modelinhos/evaluation.py +284 -0
  16. modelinhos-0.0.0/modelinhos/infer/Makefile +10 -0
  17. modelinhos-0.0.0/modelinhos/infer/infer.cpp +76 -0
  18. modelinhos-0.0.0/modelinhos/plot.py +18 -0
  19. modelinhos-0.0.0/modelinhos/processing.py +36 -0
  20. modelinhos-0.0.0/modelinhos/sample.py +49 -0
  21. modelinhos-0.0.0/modelinhos/ssd/__init__.py +0 -0
  22. modelinhos-0.0.0/modelinhos/ssd/anchors.py +71 -0
  23. modelinhos-0.0.0/modelinhos/ssd/evaluate.py +96 -0
  24. modelinhos-0.0.0/modelinhos/ssd/inference.py +160 -0
  25. modelinhos-0.0.0/modelinhos/ssd/lite.py +226 -0
  26. modelinhos-0.0.0/modelinhos/ssd/load.py +34 -0
  27. modelinhos-0.0.0/modelinhos/ssd/retinanet.py +102 -0
  28. modelinhos-0.0.0/modelinhos.egg-info/PKG-INFO +22 -0
  29. modelinhos-0.0.0/modelinhos.egg-info/SOURCES.txt +43 -0
  30. modelinhos-0.0.0/modelinhos.egg-info/dependency_links.txt +1 -0
  31. modelinhos-0.0.0/modelinhos.egg-info/requires.txt +12 -0
  32. modelinhos-0.0.0/modelinhos.egg-info/top_level.txt +1 -0
  33. modelinhos-0.0.0/pyproject.toml +67 -0
  34. modelinhos-0.0.0/requirements.txt +3 -0
  35. modelinhos-0.0.0/setup.cfg +4 -0
  36. modelinhos-0.0.0/tests/__init__.py +0 -0
  37. modelinhos-0.0.0/tests/assets/annotations.json +27 -0
  38. modelinhos-0.0.0/tests/assets/person.jpg +0 -0
  39. modelinhos-0.0.0/tests/assets/person_800_1088.jpg +0 -0
  40. modelinhos-0.0.0/tests/ssd/conftest.py +14 -0
  41. modelinhos-0.0.0/tests/ssd/test_anchors.py +84 -0
  42. modelinhos-0.0.0/tests/ssd/test_inference.py +123 -0
  43. modelinhos-0.0.0/tests/ssd/test_models.py +59 -0
  44. modelinhos-0.0.0/tests/ssd/test_pipeline.py +55 -0
  45. modelinhos-0.0.0/tests/test_all.py +2 -0
@@ -0,0 +1,33 @@
1
+ name: publish
2
+
3
+ on:
4
+ release:
5
+ types: [created]
6
+
7
+ jobs:
8
+ deploy:
9
+
10
+ runs-on: ubuntu-latest
11
+
12
+ permissions:
13
+ id-token: write # Required for OIDC authentication
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+ - name: Set up Python
18
+ uses: actions/setup-python@v4
19
+ with:
20
+ python-version: '3.x'
21
+
22
+ - name: Install dependencies
23
+ run: |
24
+ python -m pip install --upgrade pip
25
+ pip install build
26
+
27
+ - name: Build package
28
+ run: python -m build
29
+
30
+ - name: Publish to PyPI
31
+ uses: pypa/gh-action-pypi-publish@release/v1
32
+ with:
33
+ verbose: true
@@ -0,0 +1,40 @@
1
+ name: tests
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - "**"
7
+
8
+ jobs:
9
+ build:
10
+ runs-on: ubuntu-latest
11
+
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v3
18
+
19
+ - name: Set up Python ${{ matrix.python-version }}
20
+ uses: actions/setup-python@v4
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install dependencies
25
+ run: |
26
+ pip install -r requirements.txt
27
+ pip install .
28
+
29
+ - name: Run the tests
30
+ run: |
31
+ pytest tests/
32
+
33
+ - name: Run pre-commit checks
34
+ uses: pre-commit/action@v3.0.1
35
+
36
+ - name: Check package metadata rendering (twine check)
37
+ run: |
38
+ pip install build twine
39
+ python -m build --sdist --wheel
40
+ twine check dist/*
@@ -0,0 +1,10 @@
1
+ *.egg-*
2
+ *.pyc
3
+ .coverage*
4
+ .python-version
5
+ .vscode
6
+ *.png
7
+ *.onnx
8
+ *.bin
9
+ .DS_Store
10
+ datasets
@@ -0,0 +1,31 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v4.0.1
4
+ hooks:
5
+ - id: check-added-large-files
6
+ args: ["--maxkb=1000"]
7
+ - id: check-json
8
+ - id: check-docstring-first
9
+ - id: end-of-file-fixer
10
+ - id: trailing-whitespace
11
+
12
+ - repo: https://github.com/astral-sh/ruff-pre-commit
13
+ rev: v0.4.4
14
+ hooks:
15
+ - id: ruff
16
+ args: [
17
+ "--config=pyproject.toml",
18
+ "--fix",
19
+ ]
20
+
21
+ - id: ruff-format
22
+ args: [
23
+ "--config=pyproject.toml"
24
+ ]
25
+
26
+ - repo: https://github.com/pre-commit/mirrors-mypy
27
+ rev: v0.921
28
+ hooks:
29
+ - id: mypy
30
+ additional_dependencies: ["types-PyYAML"]
31
+ args: ["--config-file", "pyproject.toml"]
@@ -0,0 +1,22 @@
1
+ Metadata-Version: 2.4
2
+ Name: modelinhos
3
+ Version: 0.0.0
4
+ Summary: Model zoo for a very small models
5
+ Description-Content-Type: text/x-rst
6
+ Requires-Dist: opencv-python
7
+ Requires-Dist: numpy<2
8
+ Requires-Dist: torch
9
+ Requires-Dist: torchvision
10
+ Requires-Dist: onnx
11
+ Requires-Dist: dacite
12
+ Requires-Dist: dataclasses-json
13
+ Requires-Dist: mean_average_precision
14
+ Requires-Dist: tqdm
15
+ Requires-Dist: requests
16
+ Requires-Dist: matplotlib
17
+ Requires-Dist: joblib
18
+
19
+ Detection models
20
+ ================
21
+
22
+ This is a small model zoo of small detection models.
@@ -0,0 +1,4 @@
1
+ Detection models
2
+ ================
3
+
4
+ This is a small model zoo of small detection models.
@@ -0,0 +1,21 @@
1
+ Things to do:
2
+
3
+ - [x] See what happens when we remove anchors from retina-net -> man is not detected, ties are kinda detected
4
+ - [x] Add the ssd model
5
+ - [x] Simplify the anchor tests
6
+ - [x] Fix the load with mismatch for when new(n_classes) > old(n_classes)
7
+ - [x] Add the fit/transform/predict wrappers
8
+ - [x] Add the notion of samples instead of operating on dicts
9
+ - [x] Fix plotting with Samples
10
+ - [x] Add the toy-example
11
+ - [x] Adapt the mAP calculation
12
+ - [x] Add per-sample map calculation.
13
+ - [x] Download the COCO evaluation dataset
14
+ - [ ] Add a tool to select the thresholds
15
+ - [x] Add FP / FN calculation @ threshold for each image to detect the failures
16
+ - [x] Plot PR curve -- looking at these plots select the threshold
17
+ - [x] Add FP / FN calculation per image @ threshold -- already evaluation
18
+ - [ ] Use the batched version by default
19
+ - [ ] Add the fcos inference
20
+ - [ ] Add the BlazeNet inference (anchors parametrization)
21
+ - [ ] Implement the BlazeNet tests
File without changes
File without changes
@@ -0,0 +1,278 @@
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+
6
+
7
+ class BlazeBlock(nn.Module):
8
+ def __init__(self, in_channels, out_channels, kernel_size=3, stride=1):
9
+ super(BlazeBlock, self).__init__()
10
+
11
+ self.stride = stride
12
+ self.channel_pad = out_channels - in_channels
13
+
14
+ # TFLite uses slightly different padding than PyTorch
15
+ # on the depthwise conv layer when the stride is 2.
16
+ if stride == 2:
17
+ self.max_pool = nn.MaxPool2d(kernel_size=stride, stride=stride)
18
+ padding = 0
19
+ else:
20
+ padding = (kernel_size - 1) // 2
21
+
22
+ self.convs = nn.Sequential(
23
+ nn.Conv2d(
24
+ in_channels=in_channels,
25
+ out_channels=in_channels,
26
+ kernel_size=kernel_size,
27
+ stride=stride,
28
+ padding=padding,
29
+ groups=in_channels,
30
+ bias=True,
31
+ ),
32
+ nn.Conv2d(
33
+ in_channels=in_channels,
34
+ out_channels=out_channels,
35
+ kernel_size=1,
36
+ stride=1,
37
+ padding=0,
38
+ bias=True,
39
+ ),
40
+ )
41
+
42
+ self.act = nn.ReLU(inplace=True)
43
+
44
+ def forward(self, x):
45
+ if self.stride == 2:
46
+ h = F.pad(x, (0, 2, 0, 2), "constant", 0)
47
+ x = self.max_pool(x)
48
+ else:
49
+ h = x
50
+
51
+ if self.channel_pad > 0:
52
+ x = F.pad(x, (0, 0, 0, 0, 0, self.channel_pad), "constant", 0)
53
+
54
+ return self.act(self.convs(h) + x)
55
+
56
+
57
+ class FinalBlazeBlock(nn.Module):
58
+ def __init__(self, channels, kernel_size=3):
59
+ super(FinalBlazeBlock, self).__init__()
60
+ # TFLite uses slightly different padding than PyTorch
61
+ # on the depthwise conv layer when the stride is 2.
62
+ self.convs = nn.Sequential(
63
+ nn.Conv2d(
64
+ in_channels=channels,
65
+ out_channels=channels,
66
+ kernel_size=kernel_size,
67
+ stride=2,
68
+ padding=0,
69
+ groups=channels,
70
+ bias=True,
71
+ ),
72
+ nn.Conv2d(
73
+ in_channels=channels,
74
+ out_channels=channels,
75
+ kernel_size=1,
76
+ stride=1,
77
+ padding=0,
78
+ bias=True,
79
+ ),
80
+ )
81
+
82
+ self.act = nn.ReLU(inplace=True)
83
+
84
+ def forward(self, x):
85
+ h = F.pad(x, (0, 2, 0, 2), "constant", 0)
86
+
87
+ return self.act(self.convs(h))
88
+
89
+
90
+ class BlazeNet(nn.Module):
91
+ """The BlazeFace face detection model from MediaPipe.
92
+
93
+ The version from MediaPipe is simpler than the one in the paper;
94
+ it does not use the "double" BlazeBlocks.
95
+
96
+ Because we won't be training this model, it doesn't need to have
97
+ batchnorm layers. These have already been "folded" into the conv
98
+ weights by TFLite.
99
+
100
+ The conversion to PyTorch is fairly straightforward, but there are
101
+ some small differences between TFLite and PyTorch in how they handle
102
+ padding on conv layers with stride 2.
103
+
104
+ This version works on batches, while the MediaPipe version can only
105
+ handle a single image at a time.
106
+
107
+ Based on code from https://github.com/tkat0/PyTorch_BlazeFace/ and
108
+ https://github.com/google/mediapipe/
109
+ """
110
+
111
+ def __init__(self, back_model=False):
112
+ super(BlazeNet, self).__init__()
113
+
114
+ # These are the settings from the MediaPipe example graphs
115
+ # mediapipe/graphs/face_detection/face_detection_mobile_gpu.pbtxt
116
+ # and
117
+ # mediapipe/graphs/face_detection/face_detection_back_mobile_gpu.pbtxt
118
+ self.num_classes = 1
119
+ self.num_anchors = 896
120
+ self.num_coords = 16
121
+ self.score_clipping_thresh = 100.0
122
+ self.back_model = back_model
123
+ if back_model:
124
+ self.x_scale = 256.0
125
+ self.y_scale = 256.0
126
+ self.h_scale = 256.0
127
+ self.w_scale = 256.0
128
+ self.min_score_thresh = 0.65
129
+ else:
130
+ self.x_scale = 128.0
131
+ self.y_scale = 128.0
132
+ self.h_scale = 128.0
133
+ self.w_scale = 128.0
134
+ self.min_score_thresh = 0.75
135
+ self.min_suppression_threshold = 0.3
136
+
137
+ self._define_layers()
138
+
139
+ def _define_layers(self):
140
+ if self.back_model:
141
+ self.backbone = nn.Sequential(
142
+ nn.Conv2d(
143
+ in_channels=3,
144
+ out_channels=24,
145
+ kernel_size=5,
146
+ stride=2,
147
+ padding=0,
148
+ bias=True,
149
+ ),
150
+ nn.ReLU(inplace=True),
151
+ BlazeBlock(24, 24),
152
+ BlazeBlock(24, 24),
153
+ BlazeBlock(24, 24),
154
+ BlazeBlock(24, 24),
155
+ BlazeBlock(24, 24),
156
+ BlazeBlock(24, 24),
157
+ BlazeBlock(24, 24),
158
+ BlazeBlock(24, 24, stride=2),
159
+ BlazeBlock(24, 24),
160
+ BlazeBlock(24, 24),
161
+ BlazeBlock(24, 24),
162
+ BlazeBlock(24, 24),
163
+ BlazeBlock(24, 24),
164
+ BlazeBlock(24, 24),
165
+ BlazeBlock(24, 24),
166
+ BlazeBlock(24, 48, stride=2),
167
+ BlazeBlock(48, 48),
168
+ BlazeBlock(48, 48),
169
+ BlazeBlock(48, 48),
170
+ BlazeBlock(48, 48),
171
+ BlazeBlock(48, 48),
172
+ BlazeBlock(48, 48),
173
+ BlazeBlock(48, 48),
174
+ BlazeBlock(48, 96, stride=2),
175
+ BlazeBlock(96, 96),
176
+ BlazeBlock(96, 96),
177
+ BlazeBlock(96, 96),
178
+ BlazeBlock(96, 96),
179
+ BlazeBlock(96, 96),
180
+ BlazeBlock(96, 96),
181
+ BlazeBlock(96, 96),
182
+ )
183
+ self.final = FinalBlazeBlock(96)
184
+ self.classifier_8 = nn.Conv2d(96, 2, 1, bias=True)
185
+ self.classifier_16 = nn.Conv2d(96, 6, 1, bias=True)
186
+
187
+ self.regressor_8 = nn.Conv2d(96, 32, 1, bias=True)
188
+ self.regressor_16 = nn.Conv2d(96, 96, 1, bias=True)
189
+ else:
190
+ self.backbone1 = nn.Sequential(
191
+ nn.Conv2d(
192
+ in_channels=3,
193
+ out_channels=24,
194
+ kernel_size=5,
195
+ stride=2,
196
+ padding=0,
197
+ bias=True,
198
+ ),
199
+ nn.ReLU(inplace=True),
200
+ BlazeBlock(24, 24),
201
+ BlazeBlock(24, 28),
202
+ BlazeBlock(28, 32, stride=2),
203
+ BlazeBlock(32, 36),
204
+ BlazeBlock(36, 42),
205
+ BlazeBlock(42, 48, stride=2),
206
+ BlazeBlock(48, 56),
207
+ BlazeBlock(56, 64),
208
+ BlazeBlock(64, 72),
209
+ BlazeBlock(72, 80),
210
+ BlazeBlock(80, 88),
211
+ )
212
+
213
+ self.backbone2 = nn.Sequential(
214
+ BlazeBlock(88, 96, stride=2),
215
+ BlazeBlock(96, 96),
216
+ BlazeBlock(96, 96),
217
+ BlazeBlock(96, 96),
218
+ BlazeBlock(96, 96),
219
+ )
220
+ self.classifier_8 = nn.Conv2d(88, 2, 1, bias=True)
221
+ self.classifier_16 = nn.Conv2d(96, 6, 1, bias=True)
222
+
223
+ self.regressor_8 = nn.Conv2d(88, 32, 1, bias=True)
224
+ self.regressor_16 = nn.Conv2d(96, 96, 1, bias=True)
225
+
226
+ def forward(self, image):
227
+ # TFLite uses slightly different padding on the first conv layer
228
+ # than PyTorch, so do it manually.
229
+ x = F.pad(image, (1, 2, 1, 2), "constant", 0)
230
+
231
+ b = x.shape[0] # batch size, needed for reshaping later
232
+
233
+ if self.back_model:
234
+ x = self.backbone(x) # (b, 16, 16, 96)
235
+ h = self.final(x) # (b, 8, 8, 96)
236
+ else:
237
+ x = self.backbone1(x) # (b, 88, 16, 16)
238
+ h = self.backbone2(x) # (b, 96, 8, 8)
239
+
240
+ # Note: Because PyTorch is NCHW but TFLite is NHWC, we need to
241
+ # permute the output from the conv layers before reshaping it.
242
+
243
+ c1 = self.classifier_8(x) # (b, 2, 16, 16)
244
+ c1 = c1.permute(0, 2, 3, 1) # (b, 16, 16, 2)
245
+ c1 = c1.reshape(b, -1, 1) # (b, 512, 1)
246
+
247
+ c2 = self.classifier_16(h) # (b, 6, 8, 8)
248
+ c2 = c2.permute(0, 2, 3, 1) # (b, 8, 8, 6)
249
+ c2 = c2.reshape(b, -1, 1) # (b, 384, 1)
250
+
251
+ c = torch.cat((c1, c2), dim=1) # (b, 896, 1)
252
+
253
+ r1 = self.regressor_8(x) # (b, 32, 16, 16)
254
+ r1 = r1.permute(0, 2, 3, 1) # (b, 16, 16, 32)
255
+ r1 = r1.reshape(b, -1, 16) # (b, 512, 16)
256
+
257
+ r2 = self.regressor_16(h) # (b, 96, 8, 8)
258
+ r2 = r2.permute(0, 2, 3, 1) # (b, 8, 8, 96)
259
+ r2 = r2.reshape(b, -1, 16) # (b, 384, 16)
260
+
261
+ r = torch.cat((r1, r2), dim=1) # (b, 896, 16)
262
+ return [r, c]
263
+
264
+
265
+ def load_weights(model: BlazeNet, path):
266
+ model.load_state_dict(torch.load(path))
267
+ model.eval()
268
+
269
+
270
+ def load_anchors(model: BlazeNet, path):
271
+ model.anchors = torch.tensor(
272
+ np.load(path),
273
+ dtype=torch.float32,
274
+ device=model.classifier_8.weight.device,
275
+ )
276
+ assert model.anchors.ndimension() == 2
277
+ assert model.anchors.shape[0] == model.num_anchors
278
+ assert model.anchors.shape[1] == 4
@@ -0,0 +1,93 @@
1
+ import cv2
2
+ import numpy as np
3
+ import torch
4
+
5
+ from modelinhos.blazenet import BlazeNet
6
+
7
+ EXPECTED = np.array(
8
+ [
9
+ [
10
+ 0.2763,
11
+ 0.3182,
12
+ 0.4465,
13
+ 0.4884,
14
+ 0.3830,
15
+ 0.3150,
16
+ 0.4561,
17
+ 0.3202,
18
+ 0.4309,
19
+ 0.3526,
20
+ 0.4229,
21
+ 0.3913,
22
+ 0.3182,
23
+ 0.3373,
24
+ 0.4769,
25
+ 0.3464,
26
+ 0.9308,
27
+ ]
28
+ ],
29
+ )
30
+
31
+
32
+ def plot(image, detections, with_keypoints=True):
33
+ visualized = image.copy()
34
+
35
+ if isinstance(detections, torch.Tensor):
36
+ detections = detections.cpu().numpy()
37
+
38
+ if detections.ndim == 1:
39
+ detections = np.expand_dims(detections, axis=0)
40
+
41
+ print("Found %d faces" % detections.shape[0])
42
+
43
+ for i in range(detections.shape[0]):
44
+ ymin = int(detections[i, 0] * image.shape[0])
45
+ xmin = int(detections[i, 1] * image.shape[1])
46
+ ymax = int(detections[i, 2] * image.shape[0])
47
+ xmax = int(detections[i, 3] * image.shape[1])
48
+
49
+ cv2.rectangle(
50
+ visualized,
51
+ (xmin, ymin),
52
+ (xmax, ymax),
53
+ color=(0, 0, 255), # red in BGR
54
+ thickness=1,
55
+ )
56
+
57
+ if with_keypoints:
58
+ for k in range(6):
59
+ kp_x = int(detections[i, 4 + k * 2] * image.shape[1])
60
+ kp_y = int(detections[i, 4 + k * 2 + 1] * image.shape[0])
61
+
62
+ cv2.circle(
63
+ visualized,
64
+ (kp_x, kp_y),
65
+ radius=2,
66
+ color=(255, 200, 100), # light-sky-blue-ish in BGR
67
+ thickness=1,
68
+ )
69
+
70
+ return visualized
71
+
72
+
73
+ def main():
74
+ front_net = BlazeNet()
75
+ front_net.load_weights("blazeface.pth")
76
+ front_net.load_anchors("anchors.npy")
77
+ front_net.min_score_thresh = 0.75
78
+ front_net.min_suppression_threshold = 0.3
79
+ image = cv2.imread("1face.png")
80
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
81
+ predictions = front_net.predict_on_image(image)
82
+ np.testing.assert_almost_equal(
83
+ predictions.cpu().numpy(),
84
+ EXPECTED,
85
+ )
86
+ visualized = plot(image, predictions)
87
+ cv2.imshow("Detections", visualized)
88
+ cv2.waitKey(0)
89
+ cv2.destroyAllWindows()
90
+
91
+
92
+ if __name__ == "__main__":
93
+ main()