circe 0.1.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/data/face_detection_yunet_2023mar.onnx +0 -0
- data/data/{yolov5s.onnx → yolov5su-sim.onnx} +0 -0
- data/data/yolov5su.onnx +0 -0
- data/data/yolov8s-sim.onnx +0 -0
- data/data/yolov8s.onnx +0 -0
- data/ext/circe.cpp +171 -107
- data/ext/extconf.rb +1 -1
- data/ext/yolo.cpp +124 -58
- data/ext/yolo.h +9 -5
- data/ext/yunet.h +22 -22
- data/lib/circe/version.rb +1 -1
- data/lib/circe.rb +3 -3
- metadata +8 -6
- data/data/face_detection_yunet_2022mar.onnx +0 -0
- data/ext/yunet.cpp +0 -132
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8d735b7de777b161f27ca0b0d50282eca7e18f1cd9e5aa6e298e089f6ccd2b89
|
4
|
+
data.tar.gz: 5ba4acf8e3c3ef5950c981de11a31f4bdcdb7a0fb1cc4c534b1e62f96a99fb3a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8ac69c1f5b7d3af53db342306629fc880ae8b6386ba0ee4ee02fcec391fcab29b9acc3aa972a01f2d99b31cf0267861d6553d2cdc54b839e2a36648801b83fd3
|
7
|
+
data.tar.gz: 76a3084d3b0fc496d39e9efc04733b17a64a31e5b4285ad450bf1dc681db132671febe821ae3ae717b68771a20901f28247ae62c89a2ec57a013ef4ca7bb6c4b
|
Binary file
|
Binary file
|
data/data/yolov5su.onnx
ADDED
Binary file
|
Binary file
|
data/data/yolov8s.onnx
ADDED
Binary file
|
data/ext/circe.cpp
CHANGED
@@ -91,6 +91,7 @@ static VALUE eCirceError = Qundef;
|
|
91
91
|
#include <chrono>
|
92
92
|
#include <opencv2/highgui.hpp>
|
93
93
|
#include <opencv2/imgproc.hpp>
|
94
|
+
#include <opencv2/core/mat.hpp>
|
94
95
|
|
95
96
|
#include "yolo.h"
|
96
97
|
#include "yunet.h"
|
@@ -119,6 +120,10 @@ static ID id_class;
|
|
119
120
|
static ID id_png;
|
120
121
|
static ID id_jpg;
|
121
122
|
|
123
|
+
static ID id_label;
|
124
|
+
static ID id_thickness;
|
125
|
+
static ID id_extra;
|
126
|
+
static ID id_color;
|
122
127
|
|
123
128
|
static Yolo *yolo;
|
124
129
|
static YuNet *yunet;
|
@@ -143,85 +148,157 @@ draw_label(cv::Mat& img, string label, Point& origin,
|
|
143
148
|
}
|
144
149
|
|
145
150
|
static void
|
146
|
-
|
147
|
-
|
148
|
-
int thickness = 1) {
|
151
|
+
draw_box(cv::Mat& img, Rect& box,
|
152
|
+
Scalar& framecolor = BLUE, int thickness = 1) {
|
149
153
|
|
150
154
|
Point a = { box.x, box.y };
|
151
155
|
Point b = { box.x + box.width, box.y + box.height };
|
152
156
|
|
153
157
|
cv::rectangle(img, a, b, framecolor, thickness);
|
154
|
-
draw_label(img, label, a, textcolor, framecolor);
|
155
158
|
}
|
156
159
|
|
160
|
+
static void
|
161
|
+
draw_labelbox(cv::Mat& img, string label, Rect& box,
|
162
|
+
Scalar& framecolor = BLUE, Scalar& textcolor = BLACK,
|
163
|
+
int thickness = 1) {
|
157
164
|
|
165
|
+
Point o = { box.x, box.y };
|
166
|
+
|
167
|
+
draw_box(img, box, framecolor, thickness);
|
168
|
+
draw_label(img, label, o, textcolor, framecolor);
|
169
|
+
}
|
170
|
+
|
171
|
+
|
172
|
+
VALUE
|
173
|
+
circe_annotate(Mat& img, Rect& box, VALUE v_annotation, int *state) {
|
174
|
+
if (img.empty() || NIL_P(v_annotation))
|
175
|
+
return Qnil;
|
176
|
+
|
177
|
+
VALUE v_label = Qnil;
|
178
|
+
VALUE v_color = ULONG2NUM(0x0000ff);
|
179
|
+
VALUE v_thickness = INT2NUM(1);
|
180
|
+
VALUE v_extra = Qtrue;
|
181
|
+
|
182
|
+
VALUE s_label = rb_id2sym(id_label);
|
183
|
+
VALUE s_color = rb_id2sym(id_color);
|
184
|
+
VALUE s_thickness = rb_id2sym(id_thickness);
|
185
|
+
VALUE s_extra = rb_id2sym(id_extra);
|
186
|
+
|
187
|
+
switch (TYPE(v_annotation)) {
|
188
|
+
case T_NIL:
|
189
|
+
break;
|
190
|
+
case T_HASH:
|
191
|
+
v_thickness = rb_hash_lookup2(v_annotation, s_thickness, v_thickness);
|
192
|
+
v_color = rb_hash_lookup2(v_annotation, s_color, v_color );
|
193
|
+
v_label = rb_hash_lookup2(v_annotation, s_label, v_label );
|
194
|
+
v_extra = rb_hash_lookup2(v_annotation, s_extra, v_extra );
|
195
|
+
break;
|
196
|
+
case T_ARRAY:
|
197
|
+
switch(RARRAY_LENINT(v_annotation)) {
|
198
|
+
default:
|
199
|
+
case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
|
200
|
+
case 2: v_color = RARRAY_AREF(v_annotation, 1);
|
201
|
+
case 1: v_label = RARRAY_AREF(v_annotation, 0);
|
202
|
+
case 0: break;
|
203
|
+
}
|
204
|
+
break;
|
205
|
+
case T_STRING:
|
206
|
+
v_label = v_annotation;
|
207
|
+
break;
|
208
|
+
}
|
209
|
+
|
210
|
+
// No color, no rendering
|
211
|
+
if (NIL_P(v_color))
|
212
|
+
return Qnil;
|
213
|
+
|
214
|
+
long rgb = NUM2ULONG(v_color);
|
215
|
+
Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
|
216
|
+
(rgb >> 8) & 0xFF,
|
217
|
+
(rgb >> 16) & 0xFF);
|
218
|
+
|
219
|
+
if (! NIL_P(v_thickness)) {
|
220
|
+
int thickness = NUM2INT(v_thickness);
|
221
|
+
draw_box(img, box, color, thickness);
|
222
|
+
}
|
223
|
+
if (! NIL_P(v_label)) {
|
224
|
+
string label = StringValueCStr(v_label);
|
225
|
+
Point o = { box.x, box.y };
|
226
|
+
draw_label(img, label, o, BLACK, color);
|
227
|
+
}
|
228
|
+
|
229
|
+
// Return normalized parameters
|
230
|
+
VALUE r = rb_hash_new();
|
231
|
+
rb_hash_aset(r, s_label, v_label );
|
232
|
+
rb_hash_aset(r, s_color, v_color );
|
233
|
+
rb_hash_aset(r, s_thickness, v_thickness);
|
234
|
+
rb_hash_aset(r, s_extra, v_extra );
|
235
|
+
return r;
|
236
|
+
}
|
158
237
|
|
159
238
|
|
160
239
|
|
161
240
|
void
|
162
|
-
yunet_process_features(
|
163
|
-
Mat& img, VALUE v_features, int *state)
|
241
|
+
yunet_process_features(cv::Mat& faces, Mat& img, VALUE v_features, int *state)
|
164
242
|
{
|
165
|
-
for (int i = 0; i < faces.
|
166
|
-
|
167
|
-
|
243
|
+
for (int i = 0; i < faces.rows; i++) {
|
244
|
+
// Face
|
245
|
+
int x_f = static_cast<int>(faces.at<float>(i, 0));
|
246
|
+
int y_f = static_cast<int>(faces.at<float>(i, 1));
|
247
|
+
int w_f = static_cast<int>(faces.at<float>(i, 2));
|
248
|
+
int h_f = static_cast<int>(faces.at<float>(i, 3));
|
249
|
+
// Right eye
|
250
|
+
int x_re = static_cast<int>(faces.at<float>(i, 4));
|
251
|
+
int y_re = static_cast<int>(faces.at<float>(i, 5));
|
252
|
+
// Left eye
|
253
|
+
int x_le = static_cast<int>(faces.at<float>(i, 6));
|
254
|
+
int y_le = static_cast<int>(faces.at<float>(i, 7));
|
255
|
+
// Nose tip
|
256
|
+
int x_nt = static_cast<int>(faces.at<float>(i, 8));
|
257
|
+
int y_nt = static_cast<int>(faces.at<float>(i, 9));
|
258
|
+
// Right corner mouth
|
259
|
+
int x_rcm = static_cast<int>(faces.at<float>(i, 10));
|
260
|
+
int y_rcm = static_cast<int>(faces.at<float>(i, 11));
|
261
|
+
// Left corner mouth
|
262
|
+
int x_lcm = static_cast<int>(faces.at<float>(i, 12));
|
263
|
+
int y_lcm = static_cast<int>(faces.at<float>(i, 13));
|
264
|
+
// Confidence
|
265
|
+
float confidence = faces.at<float>(i, 14);
|
168
266
|
|
169
|
-
|
267
|
+
VALUE v_type = ID2SYM(id_face);
|
170
268
|
VALUE v_box = rb_ary_new_from_args(4,
|
171
|
-
|
172
|
-
|
269
|
+
INT2NUM(x_f), INT2NUM(y_f),
|
270
|
+
INT2NUM(w_f), INT2NUM(h_f));
|
173
271
|
VALUE v_landmark = rb_ary_new_from_args(5,
|
174
|
-
rb_ary_new_from_args(2, INT2NUM(
|
175
|
-
INT2NUM(
|
176
|
-
|
177
|
-
INT2NUM(
|
178
|
-
|
179
|
-
INT2NUM(
|
180
|
-
|
181
|
-
INT2NUM(
|
182
|
-
rb_ary_new_from_args(2, INT2NUM(
|
183
|
-
|
184
|
-
VALUE
|
185
|
-
|
272
|
+
rb_ary_new_from_args(2, INT2NUM(x_re),
|
273
|
+
INT2NUM(y_re)),
|
274
|
+
rb_ary_new_from_args(2, INT2NUM(x_le),
|
275
|
+
INT2NUM(y_le)),
|
276
|
+
rb_ary_new_from_args(2, INT2NUM(x_nt),
|
277
|
+
INT2NUM(y_nt)),
|
278
|
+
rb_ary_new_from_args(2, INT2NUM(x_rcm),
|
279
|
+
INT2NUM(y_rcm)),
|
280
|
+
rb_ary_new_from_args(2, INT2NUM(x_lcm),
|
281
|
+
INT2NUM(y_lcm)));
|
282
|
+
VALUE v_confidence = DBL2NUM(confidence);
|
283
|
+
VALUE v_feature = rb_ary_new_from_args(4, v_type, v_box, v_landmark,
|
284
|
+
v_confidence);
|
285
|
+
|
186
286
|
rb_ary_push(v_features, v_feature);
|
287
|
+
|
187
288
|
|
188
289
|
if (!img.empty() && rb_block_given_p()) {
|
189
|
-
|
190
|
-
VALUE
|
191
|
-
VALUE
|
192
|
-
VALUE
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
default:
|
202
|
-
case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
|
203
|
-
case 2: v_color = RARRAY_AREF(v_annotation, 1);
|
204
|
-
case 1: v_label = RARRAY_AREF(v_annotation, 0);
|
205
|
-
case 0: break;
|
206
|
-
}
|
207
|
-
break;
|
208
|
-
case T_STRING:
|
209
|
-
v_label = v_annotation;
|
210
|
-
break;
|
211
|
-
}
|
212
|
-
|
213
|
-
if (! NIL_P(v_label)) {
|
214
|
-
string label = StringValueCStr(v_label);
|
215
|
-
long rgb = NUM2ULONG(v_color);
|
216
|
-
int thickness = NUM2INT(v_thickness);
|
217
|
-
Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
|
218
|
-
(rgb >> 8) & 0xFF,
|
219
|
-
(rgb >> 16) & 0xFF);
|
220
|
-
draw_labelbox(img, label, box, color, BLACK, thickness);
|
221
|
-
|
222
|
-
for (const auto& p : lmark) {
|
223
|
-
cv::circle(img, p, 3, cv::Scalar(255, 0, 0), 2);
|
224
|
-
}
|
290
|
+
cv::Rect box = cv::Rect(x_f, y_f, w_f, h_f);
|
291
|
+
VALUE v_annotation = rb_yield_splat(v_feature);
|
292
|
+
VALUE cfg = circe_annotate(img, box, v_annotation, state);
|
293
|
+
VALUE s_extra = rb_id2sym(id_extra);
|
294
|
+
|
295
|
+
if (!NIL_P(cfg) && RTEST(rb_hash_aref(cfg, s_extra))) {
|
296
|
+
cv::Scalar color = cv::Scalar(255, 0, 0);
|
297
|
+
cv::circle(img, cv::Point(x_le, y_le ), 3, color, 2);
|
298
|
+
cv::circle(img, cv::Point(x_re, y_re ), 3, color, 2);
|
299
|
+
cv::circle(img, cv::Point(x_nt, y_nt ), 3, color, 2);
|
300
|
+
cv::circle(img, cv::Point(x_rcm, y_rcm), 3, color, 2);
|
301
|
+
cv::circle(img, cv::Point(x_lcm, y_lcm), 3, color, 2);
|
225
302
|
}
|
226
303
|
}
|
227
304
|
}
|
@@ -239,50 +316,18 @@ yolo_process_features(vector<Yolo::Item>& items,
|
|
239
316
|
Rect box = std::get<2>(items[i]);
|
240
317
|
|
241
318
|
VALUE v_type = ID2SYM(id_class);
|
242
|
-
VALUE v_name = rb_str_new(name.c_str(), name.size());
|
243
|
-
VALUE v_confidence = DBL2NUM(confidence);
|
244
319
|
VALUE v_box = rb_ary_new_from_args(4,
|
245
320
|
INT2NUM(box.x ), INT2NUM(box.y ),
|
246
321
|
INT2NUM(box.width), INT2NUM(box.height));
|
322
|
+
VALUE v_name = rb_str_new(name.c_str(), name.size());
|
323
|
+
VALUE v_confidence = DBL2NUM(confidence);
|
247
324
|
VALUE v_feature = rb_ary_new_from_args(4, v_type, v_box,
|
248
325
|
v_name, v_confidence);
|
249
326
|
rb_ary_push(v_features, v_feature);
|
250
327
|
|
251
|
-
if (rb_block_given_p()) {
|
328
|
+
if (!img.empty() && rb_block_given_p()) {
|
252
329
|
VALUE v_annotation = rb_yield_splat(v_feature);
|
253
|
-
|
254
|
-
VALUE v_label = Qnil;
|
255
|
-
VALUE v_color = ULONG2NUM(0x0000ff);
|
256
|
-
VALUE v_thickness = INT2NUM(1);
|
257
|
-
|
258
|
-
switch (TYPE(v_annotation)) {
|
259
|
-
case T_NIL:
|
260
|
-
break;
|
261
|
-
case T_HASH:
|
262
|
-
break;
|
263
|
-
case T_ARRAY:
|
264
|
-
switch(RARRAY_LENINT(v_annotation)) {
|
265
|
-
default:
|
266
|
-
case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
|
267
|
-
case 2: v_color = RARRAY_AREF(v_annotation, 1);
|
268
|
-
case 1: v_label = RARRAY_AREF(v_annotation, 0);
|
269
|
-
case 0: break;
|
270
|
-
}
|
271
|
-
break;
|
272
|
-
case T_STRING:
|
273
|
-
v_label = v_annotation;
|
274
|
-
break;
|
275
|
-
}
|
276
|
-
|
277
|
-
if (! NIL_P(v_label)) {
|
278
|
-
string label = StringValueCStr(v_label);
|
279
|
-
long rgb = NUM2ULONG(v_color);
|
280
|
-
int thickness = NUM2INT(v_thickness);
|
281
|
-
Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
|
282
|
-
(rgb >> 8) & 0xFF,
|
283
|
-
(rgb >> 16) & 0xFF);
|
284
|
-
draw_labelbox(img, label, box, color, BLACK, thickness);
|
285
|
-
}
|
330
|
+
circe_annotate(img, box, v_annotation, state);
|
286
331
|
}
|
287
332
|
}
|
288
333
|
}
|
@@ -337,9 +382,10 @@ circe_m_analyze(int argc, VALUE* argv, VALUE self) {
|
|
337
382
|
}
|
338
383
|
|
339
384
|
if (RTEST(v_face)) {
|
340
|
-
|
385
|
+
cv::Mat faces;
|
341
386
|
yunet->process(i_img, faces);
|
342
387
|
yunet_process_features(faces, o_img, v_features, &state);
|
388
|
+
faces.release();
|
343
389
|
if (state) goto exception;
|
344
390
|
}
|
345
391
|
|
@@ -363,7 +409,12 @@ circe_m_analyze(int argc, VALUE* argv, VALUE self) {
|
|
363
409
|
std::vector<uchar> buf;
|
364
410
|
cv::imencode(format, o_img, buf);
|
365
411
|
v_image = rb_str_new(reinterpret_cast<char*>(buf.data()), buf.size());
|
412
|
+
buf.clear();
|
366
413
|
}
|
414
|
+
|
415
|
+
i_img.release();
|
416
|
+
o_img.release();
|
417
|
+
|
367
418
|
return rb_ary_new_from_args(2, v_features, v_image);
|
368
419
|
|
369
420
|
exception:
|
@@ -382,22 +433,35 @@ void Init_core(void) {
|
|
382
433
|
eCirceError = rb_define_class_under(cCirce, "Error", rb_eStandardError);
|
383
434
|
// myclass = rb_const_get(mymodule, sym_myclass);
|
384
435
|
|
385
|
-
VALUE v_onnx_yolo
|
386
|
-
VALUE
|
436
|
+
VALUE v_onnx_yolo = rb_const_get(cCirce, rb_intern("ONNX_YOLO"));
|
437
|
+
VALUE v_yolo_path = RARRAY_AREF(v_onnx_yolo, 0);
|
438
|
+
VALUE v_yolo_height = RARRAY_AREF(v_onnx_yolo, 1);
|
439
|
+
VALUE v_yolo_width = RARRAY_AREF(v_onnx_yolo, 2);
|
440
|
+
|
441
|
+
VALUE v_onnx_yunet = rb_const_get(cCirce, rb_intern("ONNX_YUNET"));
|
442
|
+
VALUE v_yunet_path = RARRAY_AREF(v_onnx_yunet, 0);
|
387
443
|
|
388
|
-
|
389
|
-
|
444
|
+
|
445
|
+
|
446
|
+
static Yolo _yolo = { StringValueCStr(v_yolo_path ),
|
447
|
+
{ NUM2INT(v_yolo_width),
|
448
|
+
NUM2INT(v_yolo_height) }};
|
449
|
+
static YuNet _yunet = { StringValueCStr(v_yunet_path) };
|
390
450
|
|
391
451
|
yolo = &_yolo;
|
392
452
|
yunet = &_yunet;
|
393
453
|
|
394
454
|
|
395
|
-
id_debug = rb_intern_const("debug"
|
396
|
-
id_face = rb_intern_const("face"
|
397
|
-
id_classify = rb_intern_const("classify");
|
398
|
-
id_class = rb_intern_const("class"
|
399
|
-
id_png = rb_intern_const("png"
|
400
|
-
id_jpg = rb_intern_const("jpg"
|
455
|
+
id_debug = rb_intern_const("debug" );
|
456
|
+
id_face = rb_intern_const("face" );
|
457
|
+
id_classify = rb_intern_const("classify" );
|
458
|
+
id_class = rb_intern_const("class" );
|
459
|
+
id_png = rb_intern_const("png" );
|
460
|
+
id_jpg = rb_intern_const("jpg" );
|
461
|
+
id_label = rb_intern_const("label" );
|
462
|
+
id_thickness = rb_intern_const("thickness");
|
463
|
+
id_extra = rb_intern_const("extra" );
|
464
|
+
id_color = rb_intern_const("color" );
|
401
465
|
|
402
466
|
|
403
467
|
rb_define_method(cCirce, "analyze", circe_m_analyze, -1);
|
data/ext/extconf.rb
CHANGED
data/ext/yolo.cpp
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
/*
|
2
|
+
* https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-CPP-Inference/inference.cpp
|
3
|
+
* https://github.com/opencv/opencv/blob/4.x/samples/dnn/yolo_detector.cpp
|
4
|
+
*
|
5
|
+
* yolov5 has an output of shape:
|
6
|
+
* (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
|
7
|
+
* yolov8 has an output of shape:
|
8
|
+
* (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
|
9
|
+
*
|
10
|
+
* yolo export model=yolov8s.pt imgsz=480,640 format=onnx opset=12
|
11
|
+
*/
|
1
12
|
#include <tuple>
|
2
13
|
#include <string>
|
3
14
|
#include <opencv2/imgproc.hpp>
|
@@ -6,89 +17,144 @@
|
|
6
17
|
#include "yolo.h"
|
7
18
|
|
8
19
|
|
9
|
-
Yolo::Yolo(const std::string& model) {
|
10
|
-
net
|
20
|
+
Yolo::Yolo(const std::string& model, cv::Size size) {
|
21
|
+
this->net = cv::dnn::readNetFromONNX(model);
|
22
|
+
this->size = size;
|
11
23
|
}
|
12
24
|
|
13
25
|
|
14
26
|
void Yolo::process(cv::Mat &img, std::vector<Yolo::Item> &items) {
|
27
|
+
int version = 5;
|
15
28
|
|
29
|
+
cv::Mat input = img;
|
30
|
+
if (letterBoxForSquare && size.width == size.height)
|
31
|
+
input = formatToSquare(input);
|
32
|
+
|
16
33
|
// Pre-process
|
17
34
|
cv::Mat blob;
|
18
|
-
|
19
|
-
|
20
|
-
cv::dnn::blobFromImage(img, blob, 1./255.,
|
21
|
-
cv::Size(INPUT_WIDTH, INPUT_HEIGHT),
|
22
|
-
cv::Scalar(), true, false);
|
35
|
+
cv::dnn::blobFromImage(input, blob, 1./255.,
|
36
|
+
this->size, cv::Scalar(), true, false);
|
23
37
|
|
24
38
|
// Process
|
39
|
+
std::vector<cv::Mat> outputs;
|
25
40
|
net.setInput(blob);
|
26
41
|
net.forward(outputs, net.getUnconnectedOutLayersNames());
|
27
42
|
|
28
|
-
//
|
29
|
-
|
30
|
-
|
31
|
-
|
43
|
+
// Output
|
44
|
+
int rows = outputs[0].size[1];
|
45
|
+
int dimensions = outputs[0].size[2];
|
46
|
+
|
47
|
+
// Infer yolo version
|
48
|
+
// -> Check if the shape[2] is more than shape[1] (yolov8)
|
49
|
+
if (dimensions > rows) {
|
50
|
+
version = 8;
|
51
|
+
}
|
52
|
+
|
53
|
+
// Adjust according to version
|
54
|
+
if (version == 8) {
|
55
|
+
// cv::transposeND(outs[0], {0, 2, 1}, outs[0]);
|
56
|
+
rows = outputs[0].size[2];
|
57
|
+
dimensions = outputs[0].size[1];
|
58
|
+
outputs[0] = outputs[0].reshape(1, dimensions);
|
59
|
+
cv::transpose(outputs[0], outputs[0]);
|
60
|
+
}
|
61
|
+
|
62
|
+
// Output
|
63
|
+
float *data = (float *)outputs[0].data;
|
32
64
|
|
33
65
|
// Resizing factor.
|
34
|
-
float x_factor =
|
35
|
-
float y_factor =
|
66
|
+
float x_factor = input.cols / size.width;
|
67
|
+
float y_factor = input.rows / size.height;
|
36
68
|
|
37
|
-
|
38
|
-
|
69
|
+
// Post-process
|
70
|
+
std::vector<int> class_ids;
|
71
|
+
std::vector<float> confidences;
|
72
|
+
std::vector<cv::Rect> boxes;
|
39
73
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
74
|
+
|
75
|
+
if (version == 5) {
|
76
|
+
for (int i = 0; i < rows; ++i) {
|
77
|
+
float confidence = data[4];
|
78
|
+
|
79
|
+
if (confidence >= CONFIDENCE_THRESHOLD) {
|
80
|
+
float *classes_scores = data + 5;
|
81
|
+
|
82
|
+
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
83
|
+
cv::Point class_id;
|
84
|
+
double max_class_score;
|
85
|
+
|
86
|
+
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
|
87
|
+
|
88
|
+
if (max_class_score > SCORE_THRESHOLD) {
|
89
|
+
confidences.push_back(confidence);
|
90
|
+
class_ids.push_back(class_id.x);
|
91
|
+
|
92
|
+
float x = data[0];
|
93
|
+
float y = data[1];
|
94
|
+
float w = data[2];
|
95
|
+
float h = data[3];
|
96
|
+
|
97
|
+
int left = int((x - 0.5 * w) * x_factor);
|
98
|
+
int top = int((y - 0.5 * h) * y_factor);
|
99
|
+
int width = int(w * x_factor);
|
100
|
+
int height = int(h * y_factor);
|
101
|
+
|
102
|
+
boxes.push_back(cv::Rect(left, top, width, height));
|
103
|
+
}
|
104
|
+
}
|
105
|
+
data += dimensions;
|
106
|
+
}
|
107
|
+
} else if (version == 8) {
|
108
|
+
for (int i = 0; i < rows; ++i) {
|
109
|
+
float *classes_scores = data + 4;
|
110
|
+
|
111
|
+
cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
|
112
|
+
cv::Point class_id;
|
113
|
+
double maxClassScore;
|
114
|
+
|
115
|
+
minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
|
116
|
+
|
117
|
+
if (maxClassScore > SCORE_THRESHOLD) {
|
118
|
+
confidences.push_back(maxClassScore);
|
75
119
|
class_ids.push_back(class_id.x);
|
120
|
+
|
121
|
+
float x = data[0];
|
122
|
+
float y = data[1];
|
123
|
+
float w = data[2];
|
124
|
+
float h = data[3];
|
125
|
+
|
126
|
+
int left = int((x - 0.5 * w) * x_factor);
|
127
|
+
int top = int((y - 0.5 * h) * y_factor);
|
128
|
+
int width = int(w * x_factor);
|
129
|
+
int height = int(h * y_factor);
|
130
|
+
|
76
131
|
boxes.push_back(cv::Rect(left, top, width, height));
|
77
132
|
}
|
78
|
-
|
79
|
-
|
80
|
-
data += 85;
|
133
|
+
data += dimensions;
|
134
|
+
}
|
81
135
|
}
|
136
|
+
|
82
137
|
|
83
138
|
// Perform Non-Maximum Suppression and draw predictions.
|
84
|
-
std::vector<int>
|
139
|
+
std::vector<int> nms_result;
|
85
140
|
cv::dnn::NMSBoxes(boxes, confidences,
|
86
|
-
SCORE_THRESHOLD, NMS_THRESHOLD,
|
141
|
+
SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
|
87
142
|
|
88
143
|
items.clear();
|
89
144
|
|
90
|
-
for (int i = 0; i <
|
91
|
-
int idx =
|
92
|
-
items.push_back({
|
145
|
+
for (int i = 0; i < nms_result.size(); i++) {
|
146
|
+
int idx = nms_result[i];
|
147
|
+
items.push_back({ classes[class_ids[idx]],confidences[idx],boxes[idx] });
|
93
148
|
}
|
94
149
|
}
|
150
|
+
|
151
|
+
|
152
|
+
cv::Mat Yolo::formatToSquare(const cv::Mat &source)
|
153
|
+
{
|
154
|
+
int col = source.cols;
|
155
|
+
int row = source.rows;
|
156
|
+
int _max = MAX(col, row);
|
157
|
+
cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
|
158
|
+
source.copyTo(result(cv::Rect(0, 0, col, row)));
|
159
|
+
return result;
|
160
|
+
}
|
data/ext/yolo.h
CHANGED
@@ -16,12 +16,14 @@ public:
|
|
16
16
|
private:
|
17
17
|
static constexpr float INPUT_WIDTH = 640.0;
|
18
18
|
static constexpr float INPUT_HEIGHT = 640.0;
|
19
|
-
static constexpr float
|
20
|
-
static constexpr float
|
21
|
-
static constexpr float
|
19
|
+
static constexpr float CONFIDENCE_THRESHOLD = 0.25;
|
20
|
+
static constexpr float SCORE_THRESHOLD = 0.50;
|
21
|
+
static constexpr float NMS_THRESHOLD = 0.50;
|
22
22
|
|
23
|
+
bool letterBoxForSquare = true;
|
24
|
+
|
23
25
|
public:
|
24
|
-
const std::vector<std::string>
|
26
|
+
const std::vector<std::string> classes = {
|
25
27
|
"person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train",
|
26
28
|
"truck", "boat", "traffic light", "fire hydrant", "stop sign",
|
27
29
|
"parking meter", "bench", "bird", "cat", "dog", "horse", "sheep",
|
@@ -38,11 +40,13 @@ private:
|
|
38
40
|
};
|
39
41
|
|
40
42
|
public:
|
41
|
-
Yolo(const std::string& model);
|
43
|
+
Yolo(const std::string& model, cv::Size size);
|
42
44
|
void process(cv::Mat &img, std::vector<Item> &items);
|
43
45
|
|
44
46
|
private:
|
45
47
|
cv::dnn::Net net;
|
48
|
+
cv::Size size;
|
49
|
+
cv::Mat formatToSquare(const cv::Mat &source);
|
46
50
|
};
|
47
51
|
|
48
52
|
#endif
|
data/ext/yunet.h
CHANGED
@@ -2,37 +2,37 @@
|
|
2
2
|
#define __YUNET__
|
3
3
|
|
4
4
|
#include <string>
|
5
|
-
#include <
|
6
|
-
#include <
|
7
|
-
#include <utility>
|
8
|
-
|
9
|
-
#include <opencv2/dnn.hpp>
|
10
|
-
|
5
|
+
#include <opencv2/objdetect/face.hpp>
|
6
|
+
#include <opencv2/core.hpp>
|
11
7
|
|
12
8
|
class YuNet
|
13
9
|
{
|
14
10
|
|
15
11
|
public:
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
12
|
+
YuNet(const std::string& model_path,
|
13
|
+
const cv::Size& input_size = cv::Size(320, 320),
|
14
|
+
float conf_threshold = 0.6f,
|
15
|
+
float nms_threshold = 0.3f,
|
16
|
+
int top_k = 5000,
|
17
|
+
int backend_id = cv::dnn::DNN_BACKEND_OPENCV,
|
18
|
+
int target_id = cv::dnn::DNN_TARGET_CPU)
|
19
|
+
{
|
20
|
+
model = cv::FaceDetectorYN::create(model_path, "", input_size,
|
21
|
+
conf_threshold,
|
22
|
+
nms_threshold, top_k,
|
23
|
+
backend_id, target_id);
|
24
|
+
}
|
28
25
|
|
29
|
-
public:
|
30
|
-
YuNet(const std::string& model);
|
31
26
|
~YuNet() {};
|
32
|
-
|
27
|
+
|
28
|
+
void process(const cv::Mat& img, cv::Mat& faces) {
|
29
|
+
model->setInputSize(img.size());
|
30
|
+
model->detect(img, faces);
|
31
|
+
}
|
33
32
|
|
34
33
|
private:
|
35
|
-
|
34
|
+
|
35
|
+
cv::Ptr<cv::FaceDetectorYN> model;
|
36
36
|
};
|
37
37
|
|
38
38
|
#endif
|
data/lib/circe/version.rb
CHANGED
data/lib/circe.rb
CHANGED
@@ -3,9 +3,9 @@ class Circe
|
|
3
3
|
private
|
4
4
|
|
5
5
|
# Don't know how to do it inside the c extension
|
6
|
-
DATA_DIR
|
7
|
-
ONNX_YOLO
|
8
|
-
ONNX_YUNET
|
6
|
+
DATA_DIR = File.join(__dir__, '..', 'data').freeze
|
7
|
+
ONNX_YOLO = [ File.join(DATA_DIR, 'yolov8s.onnx'), 480, 640 ]
|
8
|
+
ONNX_YUNET = [ File.join(DATA_DIR, 'face_detection_yunet_2023mar.onnx') ]
|
9
9
|
|
10
10
|
end
|
11
11
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: circe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stéphane D'Alu
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-07-05 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: |2+
|
14
14
|
|
@@ -22,14 +22,16 @@ extensions:
|
|
22
22
|
extra_rdoc_files: []
|
23
23
|
files:
|
24
24
|
- circe.gemspec
|
25
|
-
- data/
|
26
|
-
- data/
|
25
|
+
- data/face_detection_yunet_2023mar.onnx
|
26
|
+
- data/yolov5su-sim.onnx
|
27
|
+
- data/yolov5su.onnx
|
28
|
+
- data/yolov8s-sim.onnx
|
29
|
+
- data/yolov8s.onnx
|
27
30
|
- ext/camera_model.h
|
28
31
|
- ext/circe.cpp
|
29
32
|
- ext/extconf.rb
|
30
33
|
- ext/yolo.cpp
|
31
34
|
- ext/yolo.h
|
32
|
-
- ext/yunet.cpp
|
33
35
|
- ext/yunet.h
|
34
36
|
- lib/circe.rb
|
35
37
|
- lib/circe/version.rb
|
@@ -52,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
52
54
|
- !ruby/object:Gem::Version
|
53
55
|
version: '0'
|
54
56
|
requirements: []
|
55
|
-
rubygems_version: 3.
|
57
|
+
rubygems_version: 3.5.9
|
56
58
|
signing_key:
|
57
59
|
specification_version: 4
|
58
60
|
summary: Face and object recognition
|
Binary file
|
data/ext/yunet.cpp
DELETED
@@ -1,132 +0,0 @@
|
|
1
|
-
#include <cmath>
|
2
|
-
#include <string>
|
3
|
-
#include <vector>
|
4
|
-
#include <numeric>
|
5
|
-
#include <algorithm>
|
6
|
-
|
7
|
-
#include <opencv2/dnn.hpp>
|
8
|
-
|
9
|
-
#include "yunet.h"
|
10
|
-
|
11
|
-
|
12
|
-
YuNet::YuNet(const std::string& model_filename)
|
13
|
-
{
|
14
|
-
net = cv::dnn::readNetFromONNX(model_filename);
|
15
|
-
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
16
|
-
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
17
|
-
}
|
18
|
-
|
19
|
-
|
20
|
-
void YuNet::process(const cv::Mat& img, std::vector<YuNet::Face>& faces)
|
21
|
-
{
|
22
|
-
/* -- Preparing for image size -- */
|
23
|
-
cv::Size model_size;
|
24
|
-
model_size.width = MODEL_WIDTH;
|
25
|
-
model_size.height = MODEL_WIDTH * img.rows / img.cols;
|
26
|
-
model_size.height = (model_size.height / 32) * 32;
|
27
|
-
|
28
|
-
std::pair<int32_t, int32_t> feature_map_2th = {
|
29
|
-
(model_size.height + 1) / 2 / 2,
|
30
|
-
(model_size.width + 1) / 2 / 2
|
31
|
-
};
|
32
|
-
|
33
|
-
std::vector<std::pair<int32_t, int32_t>> feature_map_list;
|
34
|
-
feature_map_list.push_back({ (feature_map_2th.first + 1) / 2 ,
|
35
|
-
(feature_map_2th.second + 1) / 2 });
|
36
|
-
|
37
|
-
for (int32_t i = 0; i < 3; i++) {
|
38
|
-
const auto& previous = feature_map_list.back();
|
39
|
-
feature_map_list.push_back({ (previous.first + 1) / 2 ,
|
40
|
-
(previous.second + 1) / 2 });
|
41
|
-
}
|
42
|
-
|
43
|
-
std::vector<std::vector<float>> prior_list;
|
44
|
-
for (int i = 0; i < static_cast<int32_t>(feature_map_list.size()); i++) {
|
45
|
-
const auto& min_sizes = MIN_SIZES[i];
|
46
|
-
const auto& feature_map = feature_map_list[i];
|
47
|
-
for (int y = 0; y < feature_map.first; y++) {
|
48
|
-
for (int x = 0; x < feature_map.second; x++) {
|
49
|
-
for (const auto& min_size : min_sizes) {
|
50
|
-
float s_kx = static_cast<float>(min_size) / model_size.width;
|
51
|
-
float s_ky = static_cast<float>(min_size) / model_size.height;
|
52
|
-
float cx = (x + 0.5f) * STEPS[i] / model_size.width;
|
53
|
-
float cy = (y + 0.5f) * STEPS[i] / model_size.height;
|
54
|
-
prior_list.push_back({ cx, cy, s_kx, s_ky });
|
55
|
-
}
|
56
|
-
}
|
57
|
-
}
|
58
|
-
}
|
59
|
-
|
60
|
-
|
61
|
-
/* -- Pre-process -- */
|
62
|
-
cv::Mat blob;
|
63
|
-
cv::dnn::blobFromImage(img, blob, 1.0, model_size);
|
64
|
-
|
65
|
-
/* -- Inference -- */
|
66
|
-
std::vector<cv::Mat> outputs;
|
67
|
-
net.setInput(blob);
|
68
|
-
net.forward(outputs, { "conf", "iou", "loc" });
|
69
|
-
|
70
|
-
/* -- Post Process -- */
|
71
|
-
const cv::Mat& mat_conf = outputs[0];
|
72
|
-
const cv::Mat& mat_iou = outputs[1];
|
73
|
-
const cv::Mat& mat_loc = outputs[2];
|
74
|
-
const cv::Size image_size = img.size();
|
75
|
-
|
76
|
-
// Get score list
|
77
|
-
std::vector<float> cls_score;
|
78
|
-
for (int32_t row = 0; row < mat_conf.rows; row++) {
|
79
|
-
float val = mat_conf.at<float>(cv::Point(1, row));
|
80
|
-
cls_score.push_back(std::clamp(val, 0.0f, 1.0f));
|
81
|
-
}
|
82
|
-
|
83
|
-
std::vector<float> iou_score;
|
84
|
-
for (int32_t row = 0; row < mat_iou.rows; row++) {
|
85
|
-
float val = mat_conf.at<float>(cv::Point(0, row));
|
86
|
-
iou_score.push_back(std::clamp(val, 0.0f, 1.0f));
|
87
|
-
}
|
88
|
-
|
89
|
-
std::vector<float> score;
|
90
|
-
for (int32_t row = 0; row < mat_conf.rows; row++) {
|
91
|
-
score.push_back(std::sqrt(cls_score[row] * iou_score[row]));
|
92
|
-
}
|
93
|
-
|
94
|
-
// All bbox
|
95
|
-
std::vector<cv::Rect> bbox_all;
|
96
|
-
for (int row = 0; row < mat_loc.rows; row++) {
|
97
|
-
float cx = mat_loc.at<float>(cv::Point(0, row));
|
98
|
-
float cy = mat_loc.at<float>(cv::Point(1, row));
|
99
|
-
float w = mat_loc.at<float>(cv::Point(2, row));
|
100
|
-
float h = mat_loc.at<float>(cv::Point(3, row));
|
101
|
-
|
102
|
-
cx = prior_list[row][0] + cx * VARIANCES[0] * prior_list[row][2];
|
103
|
-
cy = prior_list[row][1] + cy * VARIANCES[0] * prior_list[row][3];
|
104
|
-
w = prior_list[row][2] * std::exp(w * VARIANCES[0]);
|
105
|
-
h = prior_list[row][3] * std::exp(h * VARIANCES[1]);
|
106
|
-
|
107
|
-
bbox_all.push_back({
|
108
|
-
static_cast<int32_t>((cx - w / 2) * image_size.width),
|
109
|
-
static_cast<int32_t>((cy - h / 2) * image_size.height),
|
110
|
-
static_cast<int32_t>(w * image_size.width),
|
111
|
-
static_cast<int32_t>(h * image_size.height) });
|
112
|
-
}
|
113
|
-
|
114
|
-
// Non-Maximum Suppression
|
115
|
-
std::vector<int> indices;
|
116
|
-
cv::dnn::NMSBoxes(bbox_all, score, CONF_THRESHOLD, NMS_THRESHOLD, indices);
|
117
|
-
|
118
|
-
// Get valid bbox and landmark
|
119
|
-
faces.clear();
|
120
|
-
for (int idx : indices) {
|
121
|
-
Landmark landmark; // (landmark is 5 points)
|
122
|
-
for (int i = 0; i < static_cast<int>(landmark.size()); i++) {
|
123
|
-
cv::Point& p = landmark[i];
|
124
|
-
float x = mat_loc.at<float>(cv::Point(4 + i * 2, idx));
|
125
|
-
float y = mat_loc.at<float>(cv::Point(4 + i * 2 + 1, idx));
|
126
|
-
p.x = static_cast<int32_t>((prior_list[idx][0] + x * VARIANCES[0] * prior_list[idx][2]) * image_size.width);
|
127
|
-
p.y = static_cast<int32_t>((prior_list[idx][1] + y * VARIANCES[0] * prior_list[idx][3]) * image_size.height);
|
128
|
-
}
|
129
|
-
|
130
|
-
faces.push_back({ bbox_all[idx], landmark });
|
131
|
-
}
|
132
|
-
}
|