circe 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/data/face_detection_yunet_2023mar.onnx +0 -0
- data/data/{yolov5s.onnx → yolov5su-sim.onnx} +0 -0
- data/data/yolov5su.onnx +0 -0
- data/data/yolov8s-sim.onnx +0 -0
- data/data/yolov8s.onnx +0 -0
- data/ext/circe.cpp +171 -107
- data/ext/extconf.rb +1 -1
- data/ext/yolo.cpp +124 -58
- data/ext/yolo.h +9 -5
- data/ext/yunet.h +22 -22
- data/lib/circe/version.rb +1 -1
- data/lib/circe.rb +3 -3
- metadata +8 -6
- data/data/face_detection_yunet_2022mar.onnx +0 -0
- data/ext/yunet.cpp +0 -132
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8d735b7de777b161f27ca0b0d50282eca7e18f1cd9e5aa6e298e089f6ccd2b89
|
4
|
+
data.tar.gz: 5ba4acf8e3c3ef5950c981de11a31f4bdcdb7a0fb1cc4c534b1e62f96a99fb3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8ac69c1f5b7d3af53db342306629fc880ae8b6386ba0ee4ee02fcec391fcab29b9acc3aa972a01f2d99b31cf0267861d6553d2cdc54b839e2a36648801b83fd3
|
7
|
+
data.tar.gz: 76a3084d3b0fc496d39e9efc04733b17a64a31e5b4285ad450bf1dc681db132671febe821ae3ae717b68771a20901f28247ae62c89a2ec57a013ef4ca7bb6c4b
|
Binary file
|
Binary file
|
data/data/yolov5su.onnx
ADDED
Binary file
|
Binary file
|
data/data/yolov8s.onnx
ADDED
Binary file
|
data/ext/circe.cpp
CHANGED
@@ -91,6 +91,7 @@ static VALUE eCirceError = Qundef;
|
|
91
91
|
#include <chrono>
|
92
92
|
#include <opencv2/highgui.hpp>
|
93
93
|
#include <opencv2/imgproc.hpp>
|
94
|
+
#include <opencv2/core/mat.hpp>
|
94
95
|
|
95
96
|
#include "yolo.h"
|
96
97
|
#include "yunet.h"
|
@@ -119,6 +120,10 @@ static ID id_class;
|
|
119
120
|
static ID id_png;
|
120
121
|
static ID id_jpg;
|
121
122
|
|
123
|
+
static ID id_label;
|
124
|
+
static ID id_thickness;
|
125
|
+
static ID id_extra;
|
126
|
+
static ID id_color;
|
122
127
|
|
123
128
|
static Yolo *yolo;
|
124
129
|
static YuNet *yunet;
|
@@ -143,85 +148,157 @@ draw_label(cv::Mat& img, string label, Point& origin,
|
|
143
148
|
}
|
144
149
|
|
145
150
|
static void
|
146
|
-
|
147
|
-
|
148
|
-
int thickness = 1) {
|
151
|
+
draw_box(cv::Mat& img, Rect& box,
|
152
|
+
Scalar& framecolor = BLUE, int thickness = 1) {
|
149
153
|
|
150
154
|
Point a = { box.x, box.y };
|
151
155
|
Point b = { box.x + box.width, box.y + box.height };
|
152
156
|
|
153
157
|
cv::rectangle(img, a, b, framecolor, thickness);
|
154
|
-
draw_label(img, label, a, textcolor, framecolor);
|
155
158
|
}
|
156
159
|
|
160
|
+
static void
|
161
|
+
draw_labelbox(cv::Mat& img, string label, Rect& box,
|
162
|
+
Scalar& framecolor = BLUE, Scalar& textcolor = BLACK,
|
163
|
+
int thickness = 1) {
|
157
164
|
|
165
|
+
Point o = { box.x, box.y };
|
166
|
+
|
167
|
+
draw_box(img, box, framecolor, thickness);
|
168
|
+
draw_label(img, label, o, textcolor, framecolor);
|
169
|
+
}
|
170
|
+
|
171
|
+
|
172
|
+
VALUE
|
173
|
+
circe_annotate(Mat& img, Rect& box, VALUE v_annotation, int *state) {
|
174
|
+
if (img.empty() || NIL_P(v_annotation))
|
175
|
+
return Qnil;
|
176
|
+
|
177
|
+
VALUE v_label = Qnil;
|
178
|
+
VALUE v_color = ULONG2NUM(0x0000ff);
|
179
|
+
VALUE v_thickness = INT2NUM(1);
|
180
|
+
VALUE v_extra = Qtrue;
|
181
|
+
|
182
|
+
VALUE s_label = rb_id2sym(id_label);
|
183
|
+
VALUE s_color = rb_id2sym(id_color);
|
184
|
+
VALUE s_thickness = rb_id2sym(id_thickness);
|
185
|
+
VALUE s_extra = rb_id2sym(id_extra);
|
186
|
+
|
187
|
+
switch (TYPE(v_annotation)) {
|
188
|
+
case T_NIL:
|
189
|
+
break;
|
190
|
+
case T_HASH:
|
191
|
+
v_thickness = rb_hash_lookup2(v_annotation, s_thickness, v_thickness);
|
192
|
+
v_color = rb_hash_lookup2(v_annotation, s_color, v_color );
|
193
|
+
v_label = rb_hash_lookup2(v_annotation, s_label, v_label );
|
194
|
+
v_extra = rb_hash_lookup2(v_annotation, s_extra, v_extra );
|
195
|
+
break;
|
196
|
+
case T_ARRAY:
|
197
|
+
switch(RARRAY_LENINT(v_annotation)) {
|
198
|
+
default:
|
199
|
+
case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
|
200
|
+
case 2: v_color = RARRAY_AREF(v_annotation, 1);
|
201
|
+
case 1: v_label = RARRAY_AREF(v_annotation, 0);
|
202
|
+
case 0: break;
|
203
|
+
}
|
204
|
+
break;
|
205
|
+
case T_STRING:
|
206
|
+
v_label = v_annotation;
|
207
|
+
break;
|
208
|
+
}
|
209
|
+
|
210
|
+
// No color, no rendering
|
211
|
+
if (NIL_P(v_color))
|
212
|
+
return Qnil;
|
213
|
+
|
214
|
+
long rgb = NUM2ULONG(v_color);
|
215
|
+
Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
|
216
|
+
(rgb >> 8) & 0xFF,
|
217
|
+
(rgb >> 16) & 0xFF);
|
218
|
+
|
219
|
+
if (! NIL_P(v_thickness)) {
|
220
|
+
int thickness = NUM2INT(v_thickness);
|
221
|
+
draw_box(img, box, color, thickness);
|
222
|
+
}
|
223
|
+
if (! NIL_P(v_label)) {
|
224
|
+
string label = StringValueCStr(v_label);
|
225
|
+
Point o = { box.x, box.y };
|
226
|
+
draw_label(img, label, o, BLACK, color);
|
227
|
+
}
|
228
|
+
|
229
|
+
// Return normalized parameters
|
230
|
+
VALUE r = rb_hash_new();
|
231
|
+
rb_hash_aset(r, s_label, v_label );
|
232
|
+
rb_hash_aset(r, s_color, v_color );
|
233
|
+
rb_hash_aset(r, s_thickness, v_thickness);
|
234
|
+
rb_hash_aset(r, s_extra, v_extra );
|
235
|
+
return r;
|
236
|
+
}
|
158
237
|
|
159
238
|
|
160
239
|
|
161
240
|
void
|
162
|
-
yunet_process_features(
|
163
|
-
Mat& img, VALUE v_features, int *state)
|
241
|
+
yunet_process_features(cv::Mat& faces, Mat& img, VALUE v_features, int *state)
|
164
242
|
{
|
165
|
-
for (int i = 0; i < faces.
|
166
|
-
|
167
|
-
|
243
|
+
for (int i = 0; i < faces.rows; i++) {
|
244
|
+
// Face
|
245
|
+
int x_f = static_cast<int>(faces.at<float>(i, 0));
|
246
|
+
int y_f = static_cast<int>(faces.at<float>(i, 1));
|
247
|
+
int w_f = static_cast<int>(faces.at<float>(i, 2));
|
248
|
+
int h_f = static_cast<int>(faces.at<float>(i, 3));
|
249
|
+
// Right eye
|
250
|
+
int x_re = static_cast<int>(faces.at<float>(i, 4));
|
251
|
+
int y_re = static_cast<int>(faces.at<float>(i, 5));
|
252
|
+
// Left eye
|
253
|
+
int x_le = static_cast<int>(faces.at<float>(i, 6));
|
254
|
+
int y_le = static_cast<int>(faces.at<float>(i, 7));
|
255
|
+
// Nose tip
|
256
|
+
int x_nt = static_cast<int>(faces.at<float>(i, 8));
|
257
|
+
int y_nt = static_cast<int>(faces.at<float>(i, 9));
|
258
|
+
// Right corner mouth
|
259
|
+
int x_rcm = static_cast<int>(faces.at<float>(i, 10));
|
260
|
+
int y_rcm = static_cast<int>(faces.at<float>(i, 11));
|
261
|
+
// Left corner mouth
|
262
|
+
int x_lcm = static_cast<int>(faces.at<float>(i, 12));
|
263
|
+
int y_lcm = static_cast<int>(faces.at<float>(i, 13));
|
264
|
+
// Confidence
|
265
|
+
float confidence = faces.at<float>(i, 14);
|
168
266
|
|
169
|
-
|
267
|
+
VALUE v_type = ID2SYM(id_face);
|
170
268
|
VALUE v_box = rb_ary_new_from_args(4,
|
171
|
-
|
172
|
-
|
269
|
+
INT2NUM(x_f), INT2NUM(y_f),
|
270
|
+
INT2NUM(w_f), INT2NUM(h_f));
|
173
271
|
VALUE v_landmark = rb_ary_new_from_args(5,
|
174
|
-
rb_ary_new_from_args(2, INT2NUM(
|
175
|
-
INT2NUM(
|
176
|
-
|
177
|
-
INT2NUM(
|
178
|
-
|
179
|
-
INT2NUM(
|
180
|
-
|
181
|
-
INT2NUM(
|
182
|
-
rb_ary_new_from_args(2, INT2NUM(
|
183
|
-
|
184
|
-
VALUE
|
185
|
-
|
272
|
+
rb_ary_new_from_args(2, INT2NUM(x_re),
|
273
|
+
INT2NUM(y_re)),
|
274
|
+
rb_ary_new_from_args(2, INT2NUM(x_le),
|
275
|
+
INT2NUM(y_le)),
|
276
|
+
rb_ary_new_from_args(2, INT2NUM(x_nt),
|
277
|
+
INT2NUM(y_nt)),
|
278
|
+
rb_ary_new_from_args(2, INT2NUM(x_rcm),
|
279
|
+
INT2NUM(y_rcm)),
|
280
|
+
rb_ary_new_from_args(2, INT2NUM(x_lcm),
|
281
|
+
INT2NUM(y_lcm)));
|
282
|
+
VALUE v_confidence = DBL2NUM(confidence);
|
283
|
+
VALUE v_feature = rb_ary_new_from_args(4, v_type, v_box, v_landmark,
|
284
|
+
v_confidence);
|
285
|
+
|
186
286
|
rb_ary_push(v_features, v_feature);
|
287
|
+
|
187
288
|
|
188
289
|
if (!img.empty() && rb_block_given_p()) {
|
189
|
-
|
190
|
-
VALUE
|
191
|
-
VALUE
|
192
|
-
VALUE
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
default:
|
202
|
-
case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
|
203
|
-
case 2: v_color = RARRAY_AREF(v_annotation, 1);
|
204
|
-
case 1: v_label = RARRAY_AREF(v_annotation, 0);
|
205
|
-
case 0: break;
|
206
|
-
}
|
207
|
-
break;
|
208
|
-
case T_STRING:
|
209
|
-
v_label = v_annotation;
|
210
|
-
break;
|
211
|
-
}
|
212
|
-
|
213
|
-
if (! NIL_P(v_label)) {
|
214
|
-
string label = StringValueCStr(v_label);
|
215
|
-
long rgb = NUM2ULONG(v_color);
|
216
|
-
int thickness = NUM2INT(v_thickness);
|
217
|
-
Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
|
218
|
-
(rgb >> 8) & 0xFF,
|
219
|
-
(rgb >> 16) & 0xFF);
|
220
|
-
draw_labelbox(img, label, box, color, BLACK, thickness);
|
221
|
-
|
222
|
-
for (const auto& p : lmark) {
|
223
|
-
cv::circle(img, p, 3, cv::Scalar(255, 0, 0), 2);
|
224
|
-
}
|
290
|
+
cv::Rect box = cv::Rect(x_f, y_f, w_f, h_f);
|
291
|
+
VALUE v_annotation = rb_yield_splat(v_feature);
|
292
|
+
VALUE cfg = circe_annotate(img, box, v_annotation, state);
|
293
|
+
VALUE s_extra = rb_id2sym(id_extra);
|
294
|
+
|
295
|
+
if (!NIL_P(cfg) && RTEST(rb_hash_aref(cfg, s_extra))) {
|
296
|
+
cv::Scalar color = cv::Scalar(255, 0, 0);
|
297
|
+
cv::circle(img, cv::Point(x_le, y_le ), 3, color, 2);
|
298
|
+
cv::circle(img, cv::Point(x_re, y_re ), 3, color, 2);
|
299
|
+
cv::circle(img, cv::Point(x_nt, y_nt ), 3, color, 2);
|
300
|
+
cv::circle(img, cv::Point(x_rcm, y_rcm), 3, color, 2);
|
301
|
+
cv::circle(img, cv::Point(x_lcm, y_lcm), 3, color, 2);
|
225
302
|
}
|
226
303
|
}
|
227
304
|
}
|
@@ -239,50 +316,18 @@ yolo_process_features(vector<Yolo::Item>& items,
|
|
239
316
|
Rect box = std::get<2>(items[i]);
|
240
317
|
|
241
318
|
VALUE v_type = ID2SYM(id_class);
|
242
|
-
VALUE v_name = rb_str_new(name.c_str(), name.size());
|
243
|
-
VALUE v_confidence = DBL2NUM(confidence);
|
244
319
|
VALUE v_box = rb_ary_new_from_args(4,
|
245
320
|
INT2NUM(box.x ), INT2NUM(box.y ),
|
246
321
|
INT2NUM(box.width), INT2NUM(box.height));
|
322
|
+
VALUE v_name = rb_str_new(name.c_str(), name.size());
|
323
|
+
VALUE v_confidence = DBL2NUM(confidence);
|
247
324
|
VALUE v_feature = rb_ary_new_from_args(4, v_type, v_box,
|
248
325
|
v_name, v_confidence);
|
249
326
|
rb_ary_push(v_features, v_feature);
|
250
327
|
|
251
|
-
if (rb_block_given_p()) {
|
328
|
+
if (!img.empty() && rb_block_given_p()) {
|
252
329
|
VALUE v_annotation = rb_yield_splat(v_feature);
|
253
|
-
|
254
|
-
VALUE v_label = Qnil;
|
255
|
-
VALUE v_color = ULONG2NUM(0x0000ff);
|
256
|
-
VALUE v_thickness = INT2NUM(1);
|
257
|
-
|
258
|
-
switch (TYPE(v_annotation)) {
|
259
|
-
case T_NIL:
|
260
|
-
break;
|
261
|
-
case T_HASH:
|
262
|
-
break;
|
263
|
-
case T_ARRAY:
|
264
|
-
switch(RARRAY_LENINT(v_annotation)) {
|
265
|
-
default:
|
266
|
-
case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
|
267
|
-
case 2: v_color = RARRAY_AREF(v_annotation, 1);
|
268
|
-
case 1: v_label = RARRAY_AREF(v_annotation, 0);
|
269
|
-
case 0: break;
|
270
|
-
}
|
271
|
-
break;
|
272
|
-
case T_STRING:
|
273
|
-
v_label = v_annotation;
|
274
|
-
break;
|
275
|
-
}
|
276
|
-
|
277
|
-
if (! NIL_P(v_label)) {
|
278
|
-
string label = StringValueCStr(v_label);
|
279
|
-
long rgb = NUM2ULONG(v_color);
|
280
|
-
int thickness = NUM2INT(v_thickness);
|
281
|
-
Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
|
282
|
-
(rgb >> 8) & 0xFF,
|
283
|
-
(rgb >> 16) & 0xFF);
|
284
|
-
draw_labelbox(img, label, box, color, BLACK, thickness);
|
285
|
-
}
|
330
|
+
circe_annotate(img, box, v_annotation, state);
|
286
331
|
}
|
287
332
|
}
|
288
333
|
}
|
@@ -337,9 +382,10 @@ circe_m_analyze(int argc, VALUE* argv, VALUE self) {
|
|
337
382
|
}
|
338
383
|
|
339
384
|
if (RTEST(v_face)) {
|
340
|
-
|
385
|
+
cv::Mat faces;
|
341
386
|
yunet->process(i_img, faces);
|
342
387
|
yunet_process_features(faces, o_img, v_features, &state);
|
388
|
+
faces.release();
|
343
389
|
if (state) goto exception;
|
344
390
|
}
|
345
391
|
|
@@ -363,7 +409,12 @@ circe_m_analyze(int argc, VALUE* argv, VALUE self) {
|
|
363
409
|
std::vector<uchar> buf;
|
364
410
|
cv::imencode(format, o_img, buf);
|
365
411
|
v_image = rb_str_new(reinterpret_cast<char*>(buf.data()), buf.size());
|
412
|
+
buf.clear();
|
366
413
|
}
|
414
|
+
|
415
|
+
i_img.release();
|
416
|
+
o_img.release();
|
417
|
+
|
367
418
|
return rb_ary_new_from_args(2, v_features, v_image);
|
368
419
|
|
369
420
|
exception:
|
@@ -382,22 +433,35 @@ void Init_core(void) {
|
|
382
433
|
eCirceError = rb_define_class_under(cCirce, "Error", rb_eStandardError);
|
383
434
|
// myclass = rb_const_get(mymodule, sym_myclass);
|
384
435
|
|
385
|
-
VALUE v_onnx_yolo
|
386
|
-
VALUE
|
436
|
+
VALUE v_onnx_yolo = rb_const_get(cCirce, rb_intern("ONNX_YOLO"));
|
437
|
+
VALUE v_yolo_path = RARRAY_AREF(v_onnx_yolo, 0);
|
438
|
+
VALUE v_yolo_height = RARRAY_AREF(v_onnx_yolo, 1);
|
439
|
+
VALUE v_yolo_width = RARRAY_AREF(v_onnx_yolo, 2);
|
440
|
+
|
441
|
+
VALUE v_onnx_yunet = rb_const_get(cCirce, rb_intern("ONNX_YUNET"));
|
442
|
+
VALUE v_yunet_path = RARRAY_AREF(v_onnx_yunet, 0);
|
387
443
|
|
388
|
-
|
389
|
-
|
444
|
+
|
445
|
+
|
446
|
+
static Yolo _yolo = { StringValueCStr(v_yolo_path ),
|
447
|
+
{ NUM2INT(v_yolo_width),
|
448
|
+
NUM2INT(v_yolo_height) }};
|
449
|
+
static YuNet _yunet = { StringValueCStr(v_yunet_path) };
|
390
450
|
|
391
451
|
yolo = &_yolo;
|
392
452
|
yunet = &_yunet;
|
393
453
|
|
394
454
|
|
395
|
-
id_debug = rb_intern_const("debug"
|
396
|
-
id_face = rb_intern_const("face"
|
397
|
-
id_classify = rb_intern_const("classify");
|
398
|
-
id_class = rb_intern_const("class"
|
399
|
-
id_png = rb_intern_const("png"
|
400
|
-
id_jpg = rb_intern_const("jpg"
|
455
|
+
id_debug = rb_intern_const("debug" );
|
456
|
+
id_face = rb_intern_const("face" );
|
457
|
+
id_classify = rb_intern_const("classify" );
|
458
|
+
id_class = rb_intern_const("class" );
|
459
|
+
id_png = rb_intern_const("png" );
|
460
|
+
id_jpg = rb_intern_const("jpg" );
|
461
|
+
id_label = rb_intern_const("label" );
|
462
|
+
id_thickness = rb_intern_const("thickness");
|
463
|
+
id_extra = rb_intern_const("extra" );
|
464
|
+
id_color = rb_intern_const("color" );
|
401
465
|
|
402
466
|
|
403
467
|
rb_define_method(cCirce, "analyze", circe_m_analyze, -1);
|
data/ext/extconf.rb
CHANGED
data/ext/yolo.cpp
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
/*
|
2
|
+
* https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-CPP-Inference/inference.cpp
|
3
|
+
* https://github.com/opencv/opencv/blob/4.x/samples/dnn/yolo_detector.cpp
|
4
|
+
*
|
5
|
+
* yolov5 has an output of shape:
|
6
|
+
* (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
|
7
|
+
* yolov8 has an output of shape:
|
8
|
+
* (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
|
9
|
+
*
|
10
|
+
* yolo export model=yolov8s.pt imgsz=480,640 format=onnx opset=12
|
11
|
+
*/
|
1
12
|
#include <tuple>
|
2
13
|
#include <string>
|
3
14
|
#include <opencv2/imgproc.hpp>
|
@@ -6,89 +17,144 @@
|
|
6
17
|
#include "yolo.h"
|
7
18
|
|
8
19
|
|
9
|
-
Yolo::Yolo(const std::string& model) {
|
10
|
-
net
|
20
|
+
Yolo::Yolo(const std::string& model, cv::Size size) {
|
21
|
+
this->net = cv::dnn::readNetFromONNX(model);
|
22
|
+
this->size = size;
|
11
23
|
}
|
12
24
|
|
13
25
|
|
14
26
|
void Yolo::process(cv::Mat &img, std::vector<Yolo::Item> &items) {
|
27
|
+
int version = 5;
|
15
28
|
|
29
|
+
cv::Mat input = img;
|
30
|
+
if (letterBoxForSquare && size.width == size.height)
|
31
|
+
input = formatToSquare(input);
|
32
|
+
|
16
33
|
// Pre-process
|
17
34
|
cv::Mat blob;
|
18
|
-
|
19
|
-
|
20
|
-
cv::dnn::blobFromImage(img, blob, 1./255.,
|
21
|
-
cv::Size(INPUT_WIDTH, INPUT_HEIGHT),
|
22
|
-
cv::Scalar(), true, false);
|
35
|
+
cv::dnn::blobFromImage(input, blob, 1./255.,
|
36
|
+
this->size, cv::Scalar(), true, false);
|
23
37
|
|
24
38
|
// Process
|
39
|
+
std::vector<cv::Mat> outputs;
|
25
40
|
net.setInput(blob);
|
26
41
|
net.forward(outputs, net.getUnconnectedOutLayersNames());
|
27
42
|
|
28
|
-
//
|
29
|
-
|
30
|
-
|
31
|
-
|
43
|
+
// Output
|
44
|
+
int rows = outputs[0].size[1];
|
45
|
+
int dimensions = outputs[0].size[2];
|
46
|
+
|
47
|
+
// Infer yolo version
|
48
|
+
// -> Check if the shape[2] is more than shape[1] (yolov8)
|
49
|
+
if (dimensions > rows) {
|
50
|
+
version = 8;
|
51
|
+
}
|
52
|
+
|
53
|
+
// Adjust according to version
|
54
|
+
if (version == 8) {
|
55
|
+
// cv::transposeND(outs[0], {0, 2, 1}, outs[0]);
|
56
|
+
rows = outputs[0].size[2];
|
57
|
+
dimensions = outputs[0].size[1];
|
58
|
+
outputs[0] = outputs[0].reshape(1, dimensions);
|
59
|
+
cv::transpose(outputs[0], outputs[0]);
|
60
|
+
}
|
61
|
+
|
62
|
+
// Output
|
63
|
+
float *data = (float *)outputs[0].data;
|
32
64
|
|
33
65
|
// Resizing factor.
|
34
|
-
float x_factor =
|
35
|
-
float y_factor =
|
66
|
+
float x_factor = input.cols / size.width;
|
67
|
+
float y_factor = input.rows / size.height;
|
36
68
|
|
37
|
-
|
38
|
-
|
69
|
+
// Post-process
|
70
|
+
std::vector<int> class_ids;
|
71
|
+
std::vector<float> confidences;
|
72
|
+
std::vector<cv::Rect> boxes;
|
39
73
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
74
|
+
|
75
|
+
if (version == 5) {
|
76
|
+
for (int i = 0; i < rows; ++i) {
|
77
|
+
float confidence = data[4];
|
78
|
+
|
79
|
+
if (confidence >= CONFIDENCE_THRESHOLD) {
|
80
|
+
float *classes_scores = data + 5;
|
81
|
+
|
82
|
+
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
83
|
+
cv::Point class_id;
|
84
|
+
double max_class_score;
|
85
|
+
|
86
|
+
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
|
87
|
+
|
88
|
+
if (max_class_score > SCORE_THRESHOLD) {
|
89
|
+
confidences.push_back(confidence);
|
90
|
+
class_ids.push_back(class_id.x);
|
91
|
+
|
92
|
+
float x = data[0];
|
93
|
+
float y = data[1];
|
94
|
+
float w = data[2];
|
95
|
+
float h = data[3];
|
96
|
+
|
97
|
+
int left = int((x - 0.5 * w) * x_factor);
|
98
|
+
int top = int((y - 0.5 * h) * y_factor);
|
99
|
+
int width = int(w * x_factor);
|
100
|
+
int height = int(h * y_factor);
|
101
|
+
|
102
|
+
boxes.push_back(cv::Rect(left, top, width, height));
|
103
|
+
}
|
104
|
+
}
|
105
|
+
data += dimensions;
|
106
|
+
}
|
107
|
+
} else if (version == 8) {
|
108
|
+
for (int i = 0; i < rows; ++i) {
|
109
|
+
float *classes_scores = data + 4;
|
110
|
+
|
111
|
+
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
112
|
+
cv::Point class_id;
|
113
|
+
double maxClassScore;
|
114
|
+
|
115
|
+
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
|
116
|
+
|
117
|
+
if (maxClassScore > SCORE_THRESHOLD) {
|
118
|
+
confidences.push_back(maxClassScore);
|
75
119
|
class_ids.push_back(class_id.x);
|
120
|
+
|
121
|
+
float x = data[0];
|
122
|
+
float y = data[1];
|
123
|
+
float w = data[2];
|
124
|
+
float h = data[3];
|
125
|
+
|
126
|
+
int left = int((x - 0.5 * w) * x_factor);
|
127
|
+
int top = int((y - 0.5 * h) * y_factor);
|
128
|
+
int width = int(w * x_factor);
|
129
|
+
int height = int(h * y_factor);
|
130
|
+
|
76
131
|
boxes.push_back(cv::Rect(left, top, width, height));
|
77
132
|
}
|
78
|
-
|
79
|
-
|
80
|
-
data += 85;
|
133
|
+
data += dimensions;
|
134
|
+
}
|
81
135
|
}
|
136
|
+
|
82
137
|
|
83
138
|
// Perform Non-Maximum Suppression and draw predictions.
|
84
|
-
std::vector<int>
|
139
|
+
std::vector<int> nms_result;
|
85
140
|
cv::dnn::NMSBoxes(boxes, confidences,
|
86
|
-
SCORE_THRESHOLD, NMS_THRESHOLD,
|
141
|
+
SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
|
87
142
|
|
88
143
|
items.clear();
|
89
144
|
|
90
|
-
for (int i = 0; i <
|
91
|
-
int idx =
|
92
|
-
items.push_back({
|
145
|
+
for (int i = 0; i < nms_result.size(); i++) {
|
146
|
+
int idx = nms_result[i];
|
147
|
+
items.push_back({ classes[class_ids[idx]],confidences[idx],boxes[idx] });
|
93
148
|
}
|
94
149
|
}
|
150
|
+
|
151
|
+
|
152
|
+
cv::Mat Yolo::formatToSquare(const cv::Mat &source)
|
153
|
+
{
|
154
|
+
int col = source.cols;
|
155
|
+
int row = source.rows;
|
156
|
+
int _max = MAX(col, row);
|
157
|
+
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
|
158
|
+
source.copyTo(result(cv::Rect(0, 0, col, row)));
|
159
|
+
return result;
|
160
|
+
}
|
data/ext/yolo.h
CHANGED
@@ -16,12 +16,14 @@ public:
|
|
16
16
|
private:
|
17
17
|
static constexpr float INPUT_WIDTH = 640.0;
|
18
18
|
static constexpr float INPUT_HEIGHT = 640.0;
|
19
|
-
static constexpr float
|
20
|
-
static constexpr float
|
21
|
-
static constexpr float
|
19
|
+
static constexpr float CONFIDENCE_THRESHOLD = 0.25;
|
20
|
+
static constexpr float SCORE_THRESHOLD = 0.50;
|
21
|
+
static constexpr float NMS_THRESHOLD = 0.50;
|
22
22
|
|
23
|
+
bool letterBoxForSquare = true;
|
24
|
+
|
23
25
|
public:
|
24
|
-
const std::vector<std::string>
|
26
|
+
const std::vector<std::string> classes = {
|
25
27
|
"person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train",
|
26
28
|
"truck", "boat", "traffic light", "fire hydrant", "stop sign",
|
27
29
|
"parking meter", "bench", "bird", "cat", "dog", "horse", "sheep",
|
@@ -38,11 +40,13 @@ private:
|
|
38
40
|
};
|
39
41
|
|
40
42
|
public:
|
41
|
-
Yolo(const std::string& model);
|
43
|
+
Yolo(const std::string& model, cv::Size size);
|
42
44
|
void process(cv::Mat &img, std::vector<Item> &items);
|
43
45
|
|
44
46
|
private:
|
45
47
|
cv::dnn::Net net;
|
48
|
+
cv::Size size;
|
49
|
+
cv::Mat formatToSquare(const cv::Mat &source);
|
46
50
|
};
|
47
51
|
|
48
52
|
#endif
|
data/ext/yunet.h
CHANGED
@@ -2,37 +2,37 @@
|
|
2
2
|
#define __YUNET__
|
3
3
|
|
4
4
|
#include <string>
|
5
|
-
#include <
|
6
|
-
#include <
|
7
|
-
#include <utility>
|
8
|
-
|
9
|
-
#include <opencv2/dnn.hpp>
|
10
|
-
|
5
|
+
#include <opencv2/objdetect/face.hpp>
|
6
|
+
#include <opencv2/core.hpp>
|
11
7
|
|
12
8
|
class YuNet
|
13
9
|
{
|
14
10
|
|
15
11
|
public:
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
12
|
+
YuNet(const std::string& model_path,
|
13
|
+
const cv::Size& input_size = cv::Size(320, 320),
|
14
|
+
float conf_threshold = 0.6f,
|
15
|
+
float nms_threshold = 0.3f,
|
16
|
+
int top_k = 5000,
|
17
|
+
int backend_id = cv::dnn::DNN_BACKEND_OPENCV,
|
18
|
+
int target_id = cv::dnn::DNN_TARGET_CPU)
|
19
|
+
{
|
20
|
+
model = cv::FaceDetectorYN::create(model_path, "", input_size,
|
21
|
+
conf_threshold,
|
22
|
+
nms_threshold, top_k,
|
23
|
+
backend_id, target_id);
|
24
|
+
}
|
28
25
|
|
29
|
-
public:
|
30
|
-
YuNet(const std::string& model);
|
31
26
|
~YuNet() {};
|
32
|
-
|
27
|
+
|
28
|
+
void process(const cv::Mat& img, cv::Mat& faces) {
|
29
|
+
model->setInputSize(img.size());
|
30
|
+
model->detect(img, faces);
|
31
|
+
}
|
33
32
|
|
34
33
|
private:
|
35
|
-
|
34
|
+
|
35
|
+
cv::Ptr<cv::FaceDetectorYN> model;
|
36
36
|
};
|
37
37
|
|
38
38
|
#endif
|
data/lib/circe/version.rb
CHANGED
data/lib/circe.rb
CHANGED
@@ -3,9 +3,9 @@ class Circe
|
|
3
3
|
private
|
4
4
|
|
5
5
|
# Don't know how to do it inside the c extension
|
6
|
-
DATA_DIR
|
7
|
-
ONNX_YOLO
|
8
|
-
ONNX_YUNET
|
6
|
+
DATA_DIR = File.join(__dir__, '..', 'data').freeze
|
7
|
+
ONNX_YOLO = [ File.join(DATA_DIR, 'yolov8s.onnx'), 480, 640 ]
|
8
|
+
ONNX_YUNET = [ File.join(DATA_DIR, 'face_detection_yunet_2023mar.onnx') ]
|
9
9
|
|
10
10
|
end
|
11
11
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: circe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stéphane D'Alu
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: |2+
|
14
14
|
|
@@ -22,14 +22,16 @@ extensions:
|
|
22
22
|
extra_rdoc_files: []
|
23
23
|
files:
|
24
24
|
- circe.gemspec
|
25
|
-
- data/
|
26
|
-
- data/
|
25
|
+
- data/face_detection_yunet_2023mar.onnx
|
26
|
+
- data/yolov5su-sim.onnx
|
27
|
+
- data/yolov5su.onnx
|
28
|
+
- data/yolov8s-sim.onnx
|
29
|
+
- data/yolov8s.onnx
|
27
30
|
- ext/camera_model.h
|
28
31
|
- ext/circe.cpp
|
29
32
|
- ext/extconf.rb
|
30
33
|
- ext/yolo.cpp
|
31
34
|
- ext/yolo.h
|
32
|
-
- ext/yunet.cpp
|
33
35
|
- ext/yunet.h
|
34
36
|
- lib/circe.rb
|
35
37
|
- lib/circe/version.rb
|
@@ -52,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
52
54
|
- !ruby/object:Gem::Version
|
53
55
|
version: '0'
|
54
56
|
requirements: []
|
55
|
-
rubygems_version: 3.
|
57
|
+
rubygems_version: 3.5.9
|
56
58
|
signing_key:
|
57
59
|
specification_version: 4
|
58
60
|
summary: Face and object recognition
|
Binary file
|
data/ext/yunet.cpp
DELETED
@@ -1,132 +0,0 @@
|
|
1
|
-
#include <cmath>
|
2
|
-
#include <string>
|
3
|
-
#include <vector>
|
4
|
-
#include <numeric>
|
5
|
-
#include <algorithm>
|
6
|
-
|
7
|
-
#include <opencv2/dnn.hpp>
|
8
|
-
|
9
|
-
#include "yunet.h"
|
10
|
-
|
11
|
-
|
12
|
-
YuNet::YuNet(const std::string& model_filename)
|
13
|
-
{
|
14
|
-
net = cv::dnn::readNetFromONNX(model_filename);
|
15
|
-
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
16
|
-
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
17
|
-
}
|
18
|
-
|
19
|
-
|
20
|
-
void YuNet::process(const cv::Mat& img, std::vector<YuNet::Face>& faces)
|
21
|
-
{
|
22
|
-
/* -- Preparing for image size -- */
|
23
|
-
cv::Size model_size;
|
24
|
-
model_size.width = MODEL_WIDTH;
|
25
|
-
model_size.height = MODEL_WIDTH * img.rows / img.cols;
|
26
|
-
model_size.height = (model_size.height / 32) * 32;
|
27
|
-
|
28
|
-
std::pair<int32_t, int32_t> feature_map_2th = {
|
29
|
-
(model_size.height + 1) / 2 / 2,
|
30
|
-
(model_size.width + 1) / 2 / 2
|
31
|
-
};
|
32
|
-
|
33
|
-
std::vector<std::pair<int32_t, int32_t>> feature_map_list;
|
34
|
-
feature_map_list.push_back({ (feature_map_2th.first + 1) / 2 ,
|
35
|
-
(feature_map_2th.second + 1) / 2 });
|
36
|
-
|
37
|
-
for (int32_t i = 0; i < 3; i++) {
|
38
|
-
const auto& previous = feature_map_list.back();
|
39
|
-
feature_map_list.push_back({ (previous.first + 1) / 2 ,
|
40
|
-
(previous.second + 1) / 2 });
|
41
|
-
}
|
42
|
-
|
43
|
-
std::vector<std::vector<float>> prior_list;
|
44
|
-
for (int i = 0; i < static_cast<int32_t>(feature_map_list.size()); i++) {
|
45
|
-
const auto& min_sizes = MIN_SIZES[i];
|
46
|
-
const auto& feature_map = feature_map_list[i];
|
47
|
-
for (int y = 0; y < feature_map.first; y++) {
|
48
|
-
for (int x = 0; x < feature_map.second; x++) {
|
49
|
-
for (const auto& min_size : min_sizes) {
|
50
|
-
float s_kx = static_cast<float>(min_size) / model_size.width;
|
51
|
-
float s_ky = static_cast<float>(min_size) / model_size.height;
|
52
|
-
float cx = (x + 0.5f) * STEPS[i] / model_size.width;
|
53
|
-
float cy = (y + 0.5f) * STEPS[i] / model_size.height;
|
54
|
-
prior_list.push_back({ cx, cy, s_kx, s_ky });
|
55
|
-
}
|
56
|
-
}
|
57
|
-
}
|
58
|
-
}
|
59
|
-
|
60
|
-
|
61
|
-
/* -- Pre-process -- */
|
62
|
-
cv::Mat blob;
|
63
|
-
cv::dnn::blobFromImage(img, blob, 1.0, model_size);
|
64
|
-
|
65
|
-
/* -- Inference -- */
|
66
|
-
std::vector<cv::Mat> outputs;
|
67
|
-
net.setInput(blob);
|
68
|
-
net.forward(outputs, { "conf", "iou", "loc" });
|
69
|
-
|
70
|
-
/* -- Post Process -- */
|
71
|
-
const cv::Mat& mat_conf = outputs[0];
|
72
|
-
const cv::Mat& mat_iou = outputs[1];
|
73
|
-
const cv::Mat& mat_loc = outputs[2];
|
74
|
-
const cv::Size image_size = img.size();
|
75
|
-
|
76
|
-
// Get score list
|
77
|
-
std::vector<float> cls_score;
|
78
|
-
for (int32_t row = 0; row < mat_conf.rows; row++) {
|
79
|
-
float val = mat_conf.at<float>(cv::Point(1, row));
|
80
|
-
cls_score.push_back(std::clamp(val, 0.0f, 1.0f));
|
81
|
-
}
|
82
|
-
|
83
|
-
std::vector<float> iou_score;
|
84
|
-
for (int32_t row = 0; row < mat_iou.rows; row++) {
|
85
|
-
float val = mat_conf.at<float>(cv::Point(0, row));
|
86
|
-
iou_score.push_back(std::clamp(val, 0.0f, 1.0f));
|
87
|
-
}
|
88
|
-
|
89
|
-
std::vector<float> score;
|
90
|
-
for (int32_t row = 0; row < mat_conf.rows; row++) {
|
91
|
-
score.push_back(std::sqrt(cls_score[row] * iou_score[row]));
|
92
|
-
}
|
93
|
-
|
94
|
-
// All bbox
|
95
|
-
std::vector<cv::Rect> bbox_all;
|
96
|
-
for (int row = 0; row < mat_loc.rows; row++) {
|
97
|
-
float cx = mat_loc.at<float>(cv::Point(0, row));
|
98
|
-
float cy = mat_loc.at<float>(cv::Point(1, row));
|
99
|
-
float w = mat_loc.at<float>(cv::Point(2, row));
|
100
|
-
float h = mat_loc.at<float>(cv::Point(3, row));
|
101
|
-
|
102
|
-
cx = prior_list[row][0] + cx * VARIANCES[0] * prior_list[row][2];
|
103
|
-
cy = prior_list[row][1] + cy * VARIANCES[0] * prior_list[row][3];
|
104
|
-
w = prior_list[row][2] * std::exp(w * VARIANCES[0]);
|
105
|
-
h = prior_list[row][3] * std::exp(h * VARIANCES[1]);
|
106
|
-
|
107
|
-
bbox_all.push_back({
|
108
|
-
static_cast<int32_t>((cx - w / 2) * image_size.width),
|
109
|
-
static_cast<int32_t>((cy - h / 2) * image_size.height),
|
110
|
-
static_cast<int32_t>(w * image_size.width),
|
111
|
-
static_cast<int32_t>(h * image_size.height) });
|
112
|
-
}
|
113
|
-
|
114
|
-
// Non-Maximum Suppression
|
115
|
-
std::vector<int> indices;
|
116
|
-
cv::dnn::NMSBoxes(bbox_all, score, CONF_THRESHOLD, NMS_THRESHOLD, indices);
|
117
|
-
|
118
|
-
// Get valid bbox and landmark
|
119
|
-
faces.clear();
|
120
|
-
for (int idx : indices) {
|
121
|
-
Landmark landmark; // (landmark is 5 points)
|
122
|
-
for (int i = 0; i < static_cast<int>(landmark.size()); i++) {
|
123
|
-
cv::Point& p = landmark[i];
|
124
|
-
float x = mat_loc.at<float>(cv::Point(4 + i * 2, idx));
|
125
|
-
float y = mat_loc.at<float>(cv::Point(4 + i * 2 + 1, idx));
|
126
|
-
p.x = static_cast<int32_t>((prior_list[idx][0] + x * VARIANCES[0] * prior_list[idx][2]) * image_size.width);
|
127
|
-
p.y = static_cast<int32_t>((prior_list[idx][1] + y * VARIANCES[0] * prior_list[idx][3]) * image_size.height);
|
128
|
-
}
|
129
|
-
|
130
|
-
faces.push_back({ bbox_all[idx], landmark });
|
131
|
-
}
|
132
|
-
}
|