circe 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/circe.gemspec +24 -0
- data/data/face_detection_yunet_2022mar.onnx +0 -0
- data/data/yolov5s.onnx +0 -0
- data/ext/camera_model.h +560 -0
- data/ext/circe.cpp +404 -0
- data/ext/extconf.rb +15 -0
- data/ext/yolo.cpp +94 -0
- data/ext/yolo.h +48 -0
- data/ext/yunet.cpp +132 -0
- data/ext/yunet.h +38 -0
- data/lib/circe/version.rb +3 -0
- data/lib/circe.rb +13 -0
- metadata +60 -0
data/ext/circe.cpp
ADDED
@@ -0,0 +1,404 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
|
3
|
+
#if 0
|
4
|
+
void estimateHeadPose(cv::Mat& image, CameraModel& camera, const FaceDetection::Landmark& landmark)
|
5
|
+
{
|
6
|
+
/* reference: https://qiita.com/TaroYamada/items/e3f3d0ea4ecc0a832fac */
|
7
|
+
/* reference: https://github.com/spmallick/learnopencv/blob/master/HeadPose/headPose.cpp */
|
8
|
+
static const std::vector<cv::Point3f> face_object_point_list = {
|
9
|
+
{ 0.0f, 0.0f, 0.0f }, // Nose
|
10
|
+
{ -225.0f, 170.0f, -135.0f }, // Left eye
|
11
|
+
{ 225.0f, 170.0f, -135.0f }, // Right eye
|
12
|
+
{ -150.0f, -150.0f, -125.0f }, // Left lip
|
13
|
+
{ 150.0f, -150.0f, -125.0f }, // Right lip
|
14
|
+
};
|
15
|
+
|
16
|
+
static const std::vector<cv::Point3f> face_object_point_for_pnp_list = {
|
17
|
+
face_object_point_list[0], face_object_point_list[0], // Nose
|
18
|
+
face_object_point_list[1], face_object_point_list[1], // Left eye
|
19
|
+
face_object_point_list[2], face_object_point_list[2], // Right eye
|
20
|
+
face_object_point_list[3], face_object_point_list[3], // Left lip
|
21
|
+
face_object_point_list[4], face_object_point_list[4], // Righ lip
|
22
|
+
};
|
23
|
+
|
24
|
+
std::vector<cv::Point2f> face_image_point_list;
|
25
|
+
face_image_point_list.push_back(landmark[2]); // Nose
|
26
|
+
face_image_point_list.push_back(landmark[2]); // Nose
|
27
|
+
face_image_point_list.push_back(landmark[0]); // Left eye
|
28
|
+
face_image_point_list.push_back(landmark[0]); // Left eye
|
29
|
+
face_image_point_list.push_back(landmark[1]); // Right eye
|
30
|
+
face_image_point_list.push_back(landmark[1]); // Right eye
|
31
|
+
face_image_point_list.push_back(landmark[3]); // Left lip
|
32
|
+
face_image_point_list.push_back(landmark[3]); // Left lip
|
33
|
+
face_image_point_list.push_back(landmark[4]); // Right lip
|
34
|
+
face_image_point_list.push_back(landmark[4]); // Right lip
|
35
|
+
|
36
|
+
cv::Mat rvec = cv::Mat_<float>(3, 1);
|
37
|
+
cv::Mat tvec = cv::Mat_<float>(3, 1);
|
38
|
+
cv::solvePnP(face_object_point_for_pnp_list, face_image_point_list, camera.K, camera.dist_coeff, rvec, tvec, false, cv::SOLVEPNP_ITERATIVE);
|
39
|
+
|
40
|
+
char text[128];
|
41
|
+
snprintf(text, sizeof(text), "Pitch = %-+4.0f, Yaw = %-+4.0f, Roll = %-+4.0f", Rad2Deg(rvec.at<float>(0, 0)), Rad2Deg(rvec.at<float>(1, 0)), Rad2Deg(rvec.at<float>(2, 0)));
|
42
|
+
CommonHelper::drawText(image, text, cv::Point(10, 10), 0.7, 3, cv::Scalar(0, 0, 0), cv::Scalar(255, 255, 255), false);
|
43
|
+
|
44
|
+
std::vector<cv::Point3f> nose_end_point3D = { { 0.0f, 0.0f, 500.0f } };
|
45
|
+
std::vector<cv::Point2f> nose_end_point2D;
|
46
|
+
cv::projectPoints(nose_end_point3D, rvec, tvec,
|
47
|
+
camera.K, camera.dist_coeff, nose_end_point2D);
|
48
|
+
cv::arrowedLine(image, face_image_point_list[0], nose_end_point2D[0],
|
49
|
+
cv::Scalar(0, 255, 0), 5);
|
50
|
+
|
51
|
+
|
52
|
+
/* Calculate Euler Angle */
|
53
|
+
cv::Mat R;
|
54
|
+
cv::Rodrigues(rvec, R);
|
55
|
+
cv::Mat projMat = (cv::Mat_<double>(3, 4) <<
|
56
|
+
R.at<float>(0, 0), R.at<float>(0, 1), R.at<float>(0, 2), 0,
|
57
|
+
R.at<float>(1, 0), R.at<float>(1, 1), R.at<float>(1, 2), 0,
|
58
|
+
R.at<float>(2, 0), R.at<float>(2, 1), R.at<float>(2, 2), 0);
|
59
|
+
|
60
|
+
cv::Mat K, R2, tvec2, R_x, R_y, R_z, eulerAngles;
|
61
|
+
cv::decomposeProjectionMatrix(projMat, K, R, tvec2,
|
62
|
+
R_x, R_y, R_z, eulerAngles);
|
63
|
+
double pitch = eulerAngles.at<double>(0, 0);
|
64
|
+
double yaw = eulerAngles.at<double>(1, 0);
|
65
|
+
double roll = eulerAngles.at<double>(2, 0);
|
66
|
+
snprintf(text, sizeof(text), "X = %-+4.0f, Y = %-+4.0f, Z = %-+4.0f", pitch, yaw, roll);
|
67
|
+
CommonHelper::drawText(image, text, cv::Point(10, 40), 0.7, 3, cv::Scalar(0, 0, 0), cv::Scalar(255, 255, 255), false);
|
68
|
+
}
|
69
|
+
#endif
|
70
|
+
|
71
|
+
#if 0
|
72
|
+
CameraModel camera;
|
73
|
+
camera.SetIntrinsic(image_input.cols, image_input.rows, FocalLength(image_input.cols, kFovDeg));
|
74
|
+
camera.SetDist({ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f });
|
75
|
+
camera.SetExtrinsic({ 0.0f, 0.0f, 0.0f }, /* rvec [deg] */
|
76
|
+
{ 0.0f, 0.0f, 0.0f }, true); /* tvec (in world coordinate) */
|
77
|
+
|
78
|
+
estimateHeadPose(image_input, camera, face_list[i].second);
|
79
|
+
|
80
|
+
#endif
|
81
|
+
|
82
|
+
#define IF_UNDEF(a, b) \
|
83
|
+
((a) == Qundef) ? (b) : (a)
|
84
|
+
|
85
|
+
|
86
|
+
static VALUE cCirce = Qundef;
|
87
|
+
static VALUE eCirceError = Qundef;
|
88
|
+
|
89
|
+
#include <tuple>
|
90
|
+
#include <string>
|
91
|
+
#include <chrono>
|
92
|
+
#include <opencv2/highgui.hpp>
|
93
|
+
#include <opencv2/imgproc.hpp>
|
94
|
+
|
95
|
+
#include "yolo.h"
|
96
|
+
#include "yunet.h"
|
97
|
+
|
98
|
+
using namespace std;
|
99
|
+
using namespace cv;
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
// Text parameters.
|
104
|
+
const float FONT_SCALE = 0.7;
|
105
|
+
const int FONT_FACE = FONT_HERSHEY_SIMPLEX;
|
106
|
+
const int THICKNESS = 1;
|
107
|
+
|
108
|
+
// Colors.
|
109
|
+
cv::Scalar BLACK = cv::Scalar(0,0,0);
|
110
|
+
cv::Scalar BLUE = cv::Scalar(255, 178, 50);
|
111
|
+
cv::Scalar YELLOW = cv::Scalar(0, 255, 255);
|
112
|
+
cv::Scalar RED = cv::Scalar(0,0,255);
|
113
|
+
|
114
|
+
|
115
|
+
static ID id_debug;
|
116
|
+
static ID id_face;
|
117
|
+
static ID id_classify;
|
118
|
+
static ID id_class;
|
119
|
+
static ID id_png;
|
120
|
+
static ID id_jpg;
|
121
|
+
|
122
|
+
|
123
|
+
static Yolo *yolo;
|
124
|
+
static YuNet *yunet;
|
125
|
+
|
126
|
+
|
127
|
+
|
128
|
+
static void
|
129
|
+
draw_label(cv::Mat& img, string label, Point& origin,
|
130
|
+
Scalar& fgcolor, Scalar& bgcolor, int thickness = 1)
|
131
|
+
{
|
132
|
+
int baseLine;
|
133
|
+
cv::Size label_size =
|
134
|
+
cv::getTextSize(label, FONT_FACE, FONT_SCALE, thickness, &baseLine);
|
135
|
+
|
136
|
+
Point a = { origin.x, max(origin.y, label_size.height) };
|
137
|
+
Point b = { a.x + label_size.width,
|
138
|
+
a.y + label_size.height + baseLine };
|
139
|
+
Point t = { a.x, a.y + label_size.height };
|
140
|
+
|
141
|
+
cv::rectangle(img, a, b, bgcolor, cv::FILLED);
|
142
|
+
cv::putText(img, label, t, FONT_FACE, FONT_SCALE, fgcolor, THICKNESS);
|
143
|
+
}
|
144
|
+
|
145
|
+
static void
|
146
|
+
draw_labelbox(cv::Mat& img, string label, Rect& box,
|
147
|
+
Scalar& framecolor = BLUE, Scalar& textcolor = BLACK,
|
148
|
+
int thickness = 1) {
|
149
|
+
|
150
|
+
Point a = { box.x, box.y };
|
151
|
+
Point b = { box.x + box.width, box.y + box.height };
|
152
|
+
|
153
|
+
cv::rectangle(img, a, b, framecolor, thickness);
|
154
|
+
draw_label(img, label, a, textcolor, framecolor);
|
155
|
+
}
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
void
|
162
|
+
yunet_process_features(vector<YuNet::Face>& faces,
|
163
|
+
Mat& img, VALUE v_features, int *state)
|
164
|
+
{
|
165
|
+
for (int i = 0; i < faces.size(); i++) {
|
166
|
+
Rect box = faces[i].first;
|
167
|
+
YuNet::Landmark lmark = faces[i].second;
|
168
|
+
|
169
|
+
VALUE v_type = ID2SYM(id_face);
|
170
|
+
VALUE v_box = rb_ary_new_from_args(4,
|
171
|
+
INT2NUM(box.x ), INT2NUM(box.y ),
|
172
|
+
INT2NUM(box.width), INT2NUM(box.height));
|
173
|
+
VALUE v_landmark = rb_ary_new_from_args(5,
|
174
|
+
rb_ary_new_from_args(2, INT2NUM(lmark[0].x),
|
175
|
+
INT2NUM(lmark[0].y)),
|
176
|
+
rb_ary_new_from_args(2, INT2NUM(lmark[1].x),
|
177
|
+
INT2NUM(lmark[1].y)),
|
178
|
+
rb_ary_new_from_args(2, INT2NUM(lmark[2].x),
|
179
|
+
INT2NUM(lmark[2].y)),
|
180
|
+
rb_ary_new_from_args(2, INT2NUM(lmark[3].x),
|
181
|
+
INT2NUM(lmark[3].y)),
|
182
|
+
rb_ary_new_from_args(2, INT2NUM(lmark[4].x),
|
183
|
+
INT2NUM(lmark[4].y)));
|
184
|
+
VALUE v_feature = rb_ary_new_from_args(3, v_type, v_box,
|
185
|
+
v_landmark);
|
186
|
+
rb_ary_push(v_features, v_feature);
|
187
|
+
|
188
|
+
if (!img.empty() && rb_block_given_p()) {
|
189
|
+
VALUE v_annotation= rb_yield_splat(v_feature);
|
190
|
+
VALUE v_label = Qnil;
|
191
|
+
VALUE v_color = ULONG2NUM(0x0000ff);
|
192
|
+
VALUE v_thickness = INT2NUM(1);
|
193
|
+
|
194
|
+
switch (TYPE(v_annotation)) {
|
195
|
+
case T_NIL:
|
196
|
+
break;
|
197
|
+
case T_HASH:
|
198
|
+
break;
|
199
|
+
case T_ARRAY:
|
200
|
+
switch(RARRAY_LENINT(v_annotation)) {
|
201
|
+
default:
|
202
|
+
case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
|
203
|
+
case 2: v_color = RARRAY_AREF(v_annotation, 1);
|
204
|
+
case 1: v_label = RARRAY_AREF(v_annotation, 0);
|
205
|
+
case 0: break;
|
206
|
+
}
|
207
|
+
break;
|
208
|
+
case T_STRING:
|
209
|
+
v_label = v_annotation;
|
210
|
+
break;
|
211
|
+
}
|
212
|
+
|
213
|
+
if (! NIL_P(v_label)) {
|
214
|
+
string label = StringValueCStr(v_label);
|
215
|
+
long rgb = NUM2ULONG(v_color);
|
216
|
+
int thickness = NUM2INT(v_thickness);
|
217
|
+
Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
|
218
|
+
(rgb >> 8) & 0xFF,
|
219
|
+
(rgb >> 16) & 0xFF);
|
220
|
+
draw_labelbox(img, label, box, color, BLACK, thickness);
|
221
|
+
|
222
|
+
for (const auto& p : lmark) {
|
223
|
+
cv::circle(img, p, 3, cv::Scalar(255, 0, 0), 2);
|
224
|
+
}
|
225
|
+
}
|
226
|
+
}
|
227
|
+
}
|
228
|
+
}
|
229
|
+
|
230
|
+
|
231
|
+
|
232
|
+
void
|
233
|
+
yolo_process_features(vector<Yolo::Item>& items,
|
234
|
+
Mat& img, VALUE v_features, int *state)
|
235
|
+
{
|
236
|
+
for (int i = 0; i < items.size(); i++) {
|
237
|
+
string name = std::get<0>(items[i]);
|
238
|
+
float confidence = std::get<1>(items[i]);
|
239
|
+
Rect box = std::get<2>(items[i]);
|
240
|
+
|
241
|
+
VALUE v_type = ID2SYM(id_class);
|
242
|
+
VALUE v_name = rb_str_new(name.c_str(), name.size());
|
243
|
+
VALUE v_confidence = DBL2NUM(confidence);
|
244
|
+
VALUE v_box = rb_ary_new_from_args(4,
|
245
|
+
INT2NUM(box.x ), INT2NUM(box.y ),
|
246
|
+
INT2NUM(box.width), INT2NUM(box.height));
|
247
|
+
VALUE v_feature = rb_ary_new_from_args(4, v_type, v_box,
|
248
|
+
v_name, v_confidence);
|
249
|
+
rb_ary_push(v_features, v_feature);
|
250
|
+
|
251
|
+
if (rb_block_given_p()) {
|
252
|
+
VALUE v_annotation = rb_yield_splat(v_feature);
|
253
|
+
|
254
|
+
VALUE v_label = Qnil;
|
255
|
+
VALUE v_color = ULONG2NUM(0x0000ff);
|
256
|
+
VALUE v_thickness = INT2NUM(1);
|
257
|
+
|
258
|
+
switch (TYPE(v_annotation)) {
|
259
|
+
case T_NIL:
|
260
|
+
break;
|
261
|
+
case T_HASH:
|
262
|
+
break;
|
263
|
+
case T_ARRAY:
|
264
|
+
switch(RARRAY_LENINT(v_annotation)) {
|
265
|
+
default:
|
266
|
+
case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
|
267
|
+
case 2: v_color = RARRAY_AREF(v_annotation, 1);
|
268
|
+
case 1: v_label = RARRAY_AREF(v_annotation, 0);
|
269
|
+
case 0: break;
|
270
|
+
}
|
271
|
+
break;
|
272
|
+
case T_STRING:
|
273
|
+
v_label = v_annotation;
|
274
|
+
break;
|
275
|
+
}
|
276
|
+
|
277
|
+
if (! NIL_P(v_label)) {
|
278
|
+
string label = StringValueCStr(v_label);
|
279
|
+
long rgb = NUM2ULONG(v_color);
|
280
|
+
int thickness = NUM2INT(v_thickness);
|
281
|
+
Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
|
282
|
+
(rgb >> 8) & 0xFF,
|
283
|
+
(rgb >> 16) & 0xFF);
|
284
|
+
draw_labelbox(img, label, box, color, BLACK, thickness);
|
285
|
+
}
|
286
|
+
}
|
287
|
+
}
|
288
|
+
}
|
289
|
+
|
290
|
+
|
291
|
+
|
292
|
+
static VALUE
|
293
|
+
circe_m_analyze(int argc, VALUE* argv, VALUE self) {
|
294
|
+
// Retrieve arguments
|
295
|
+
VALUE v_imgstr, v_format, v_opts;
|
296
|
+
VALUE kwargs[3];
|
297
|
+
rb_scan_args(argc, argv, "11:", &v_imgstr, &v_format, &v_opts);
|
298
|
+
rb_get_kwargs(v_opts, (ID[]){ id_debug, id_face, id_classify },
|
299
|
+
0, 3, kwargs);
|
300
|
+
VALUE v_debug = IF_UNDEF(kwargs[0], Qfalse);
|
301
|
+
VALUE v_face = IF_UNDEF(kwargs[1], Qfalse);
|
302
|
+
VALUE v_classify = IF_UNDEF(kwargs[2], Qfalse);
|
303
|
+
|
304
|
+
VALUE v_features = rb_ary_new();
|
305
|
+
VALUE v_image = Qnil;
|
306
|
+
|
307
|
+
if (! NIL_P(v_format)) {
|
308
|
+
Check_Type(v_format, T_SYMBOL);
|
309
|
+
ID i_format = rb_sym2id(v_format);
|
310
|
+
if ((i_format != id_png) && (i_format != id_jpg))
|
311
|
+
rb_raise(rb_eArgError, "format must be :png, :jpg or nil");
|
312
|
+
}
|
313
|
+
|
314
|
+
if (!RTEST(v_face) && !RTEST(v_classify)) {
|
315
|
+
v_face = v_classify = Qtrue;
|
316
|
+
}
|
317
|
+
|
318
|
+
|
319
|
+
// Load image.
|
320
|
+
Mat i_img = cv::imdecode(cv::Mat(1, RSTRING_LEN(v_imgstr), CV_8UC1,
|
321
|
+
(unsigned char *)RSTRING_PTR(v_imgstr)),
|
322
|
+
IMREAD_UNCHANGED);
|
323
|
+
Mat o_img = NIL_P(v_format) ? cv::Mat() : i_img.clone();
|
324
|
+
|
325
|
+
// Processing
|
326
|
+
std::chrono::time_point<std::chrono::system_clock> start_time, end_time;
|
327
|
+
std::chrono::duration<double> duration;
|
328
|
+
int state = 0;
|
329
|
+
|
330
|
+
start_time = std::chrono::system_clock::now();
|
331
|
+
|
332
|
+
if (RTEST(v_classify)) {
|
333
|
+
vector<Yolo::Item> items;
|
334
|
+
yolo->process(i_img, items);
|
335
|
+
yolo_process_features(items, o_img, v_features, &state);
|
336
|
+
if (state) goto exception;
|
337
|
+
}
|
338
|
+
|
339
|
+
if (RTEST(v_face)) {
|
340
|
+
vector<YuNet::Face> faces;
|
341
|
+
yunet->process(i_img, faces);
|
342
|
+
yunet_process_features(faces, o_img, v_features, &state);
|
343
|
+
if (state) goto exception;
|
344
|
+
}
|
345
|
+
|
346
|
+
end_time = std::chrono::system_clock::now();
|
347
|
+
duration = end_time - start_time;
|
348
|
+
|
349
|
+
|
350
|
+
if (! NIL_P(v_format)) {
|
351
|
+
if (RTEST(v_debug)) {
|
352
|
+
double ms = duration / 1.0ms;
|
353
|
+
string label = cv::format("Inference time : %0.2f ms", ms);
|
354
|
+
cv::putText(o_img, label, Point(20, 40),
|
355
|
+
FONT_FACE, FONT_SCALE, RED);
|
356
|
+
}
|
357
|
+
|
358
|
+
ID i_format = rb_sym2id(v_format);
|
359
|
+
string format;
|
360
|
+
if (i_format == id_png) { format = ".png"; }
|
361
|
+
else if (i_format == id_jpg) { format = ".jpg"; }
|
362
|
+
|
363
|
+
std::vector<uchar> buf;
|
364
|
+
cv::imencode(format, o_img, buf);
|
365
|
+
v_image = rb_str_new(reinterpret_cast<char*>(buf.data()), buf.size());
|
366
|
+
}
|
367
|
+
return rb_ary_new_from_args(2, v_features, v_image);
|
368
|
+
|
369
|
+
exception:
|
370
|
+
i_img.release();
|
371
|
+
o_img.release();
|
372
|
+
rb_jump_tag(state);
|
373
|
+
}
|
374
|
+
|
375
|
+
|
376
|
+
|
377
|
+
|
378
|
+
extern "C"
|
379
|
+
void Init_core(void) {
|
380
|
+
/* Main classes */
|
381
|
+
cCirce = rb_define_class("Circe", rb_cObject);
|
382
|
+
eCirceError = rb_define_class_under(cCirce, "Error", rb_eStandardError);
|
383
|
+
// myclass = rb_const_get(mymodule, sym_myclass);
|
384
|
+
|
385
|
+
VALUE v_onnx_yolo = rb_const_get(cCirce, rb_intern("ONNX_YOLO"));
|
386
|
+
VALUE v_onnx_yunet = rb_const_get(cCirce, rb_intern("ONNX_YUNET"));
|
387
|
+
|
388
|
+
static Yolo _yolo = { StringValueCStr(v_onnx_yolo ) };
|
389
|
+
static YuNet _yunet = { StringValueCStr(v_onnx_yunet) };
|
390
|
+
|
391
|
+
yolo = &_yolo;
|
392
|
+
yunet = &_yunet;
|
393
|
+
|
394
|
+
|
395
|
+
id_debug = rb_intern_const("debug" );
|
396
|
+
id_face = rb_intern_const("face" );
|
397
|
+
id_classify = rb_intern_const("classify");
|
398
|
+
id_class = rb_intern_const("class" );
|
399
|
+
id_png = rb_intern_const("png" );
|
400
|
+
id_jpg = rb_intern_const("jpg" );
|
401
|
+
|
402
|
+
|
403
|
+
rb_define_method(cCirce, "analyze", circe_m_analyze, -1);
|
404
|
+
}
|
data/ext/extconf.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
|
3
|
+
$PKGCONFIG =
|
4
|
+
case RbConfig::CONFIG['host_os']
|
5
|
+
when /bsd/ then '/usr/local/bin/pkgconf'
|
6
|
+
end
|
7
|
+
|
8
|
+
|
9
|
+
cflags, ldflags, libs = pkg_config('opencv4')
|
10
|
+
|
11
|
+
$LDFLAGS += " #{ldflags} #{libs}"
|
12
|
+
$INCFLAGS += " #{cflags}"
|
13
|
+
$CXXFLAGS += "-std=c++17"
|
14
|
+
|
15
|
+
create_makefile("circe/core")
|
data/ext/yolo.cpp
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
#include <tuple>
|
2
|
+
#include <string>
|
3
|
+
#include <opencv2/imgproc.hpp>
|
4
|
+
#include <opencv2/dnn.hpp>
|
5
|
+
|
6
|
+
#include "yolo.h"
|
7
|
+
|
8
|
+
|
9
|
+
Yolo::Yolo(const std::string& model) {
|
10
|
+
net = cv::dnn::readNetFromONNX(model);
|
11
|
+
}
|
12
|
+
|
13
|
+
|
14
|
+
void Yolo::process(cv::Mat &img, std::vector<Yolo::Item> &items) {
|
15
|
+
|
16
|
+
// Pre-process
|
17
|
+
cv::Mat blob;
|
18
|
+
std::vector<cv::Mat> outputs;
|
19
|
+
|
20
|
+
cv::dnn::blobFromImage(img, blob, 1./255.,
|
21
|
+
cv::Size(INPUT_WIDTH, INPUT_HEIGHT),
|
22
|
+
cv::Scalar(), true, false);
|
23
|
+
|
24
|
+
// Process
|
25
|
+
net.setInput(blob);
|
26
|
+
net.forward(outputs, net.getUnconnectedOutLayersNames());
|
27
|
+
|
28
|
+
// Post-process
|
29
|
+
std::vector<int> class_ids;
|
30
|
+
std::vector<float> confidences;
|
31
|
+
std::vector<cv::Rect> boxes;
|
32
|
+
|
33
|
+
// Resizing factor.
|
34
|
+
float x_factor = img.cols / INPUT_WIDTH;
|
35
|
+
float y_factor = img.rows / INPUT_HEIGHT;
|
36
|
+
|
37
|
+
float *data = (float *)outputs[0].data;
|
38
|
+
const int dimensions = 85;
|
39
|
+
|
40
|
+
// 25200 for default size 640.
|
41
|
+
const int rows = 25200;
|
42
|
+
// Iterate through 25200 detections.
|
43
|
+
for (int i = 0; i < rows; ++i) {
|
44
|
+
float confidence = data[4];
|
45
|
+
|
46
|
+
// Discard bad detections and continue.
|
47
|
+
if (confidence >= CONFIDENCE_THRESHOLD) {
|
48
|
+
float *classes_scores = data + 5;
|
49
|
+
|
50
|
+
// Create a 1x85 Mat and store class scores of 80 classes.
|
51
|
+
cv::Mat scores(1, names.size(), CV_32FC1, classes_scores);
|
52
|
+
|
53
|
+
// Perform minMaxLoc and acquire the index of best class score.
|
54
|
+
cv::Point class_id;
|
55
|
+
double max_class_score;
|
56
|
+
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
|
57
|
+
|
58
|
+
// Continue if the class score is above the threshold.
|
59
|
+
if (max_class_score > SCORE_THRESHOLD) {
|
60
|
+
// Store class ID and confidence in the pre-defined
|
61
|
+
// respective vectors.
|
62
|
+
float cx = data[0]; // Center
|
63
|
+
float cy = data[1];
|
64
|
+
float w = data[2]; // Box dimension
|
65
|
+
float h = data[3];
|
66
|
+
|
67
|
+
// Bounding box coordinates.
|
68
|
+
int left = int((cx - 0.5 * w) * x_factor);
|
69
|
+
int top = int((cy - 0.5 * h) * y_factor);
|
70
|
+
int width = int(w * x_factor);
|
71
|
+
int height = int(h * y_factor);
|
72
|
+
|
73
|
+
// Store good detections in the boxes vector.
|
74
|
+
confidences.push_back(confidence);
|
75
|
+
class_ids.push_back(class_id.x);
|
76
|
+
boxes.push_back(cv::Rect(left, top, width, height));
|
77
|
+
}
|
78
|
+
}
|
79
|
+
// Jump to the next row.
|
80
|
+
data += 85;
|
81
|
+
}
|
82
|
+
|
83
|
+
// Perform Non-Maximum Suppression and draw predictions.
|
84
|
+
std::vector<int> indices;
|
85
|
+
cv::dnn::NMSBoxes(boxes, confidences,
|
86
|
+
SCORE_THRESHOLD, NMS_THRESHOLD, indices);
|
87
|
+
|
88
|
+
items.clear();
|
89
|
+
|
90
|
+
for (int i = 0; i < indices.size(); i++) {
|
91
|
+
int idx = indices[i];
|
92
|
+
items.push_back({ names[class_ids[idx]],confidences[idx],boxes[idx] });
|
93
|
+
}
|
94
|
+
}
|
data/ext/yolo.h
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#ifndef __YOLO__
|
2
|
+
#define __YOLO__
|
3
|
+
|
4
|
+
#include <string>
|
5
|
+
#include <vector>
|
6
|
+
#include <tuple>
|
7
|
+
|
8
|
+
#include <opencv2/dnn.hpp>
|
9
|
+
|
10
|
+
|
11
|
+
class Yolo
|
12
|
+
{
|
13
|
+
public:
|
14
|
+
typedef std::tuple<std::string, float, cv::Rect> Item;
|
15
|
+
|
16
|
+
private:
|
17
|
+
static constexpr float INPUT_WIDTH = 640.0;
|
18
|
+
static constexpr float INPUT_HEIGHT = 640.0;
|
19
|
+
static constexpr float SCORE_THRESHOLD = 0.5;
|
20
|
+
static constexpr float NMS_THRESHOLD = 0.45;
|
21
|
+
static constexpr float CONFIDENCE_THRESHOLD = 0.45;
|
22
|
+
|
23
|
+
public:
|
24
|
+
const std::vector<std::string> names = {
|
25
|
+
"person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train",
|
26
|
+
"truck", "boat", "traffic light", "fire hydrant", "stop sign",
|
27
|
+
"parking meter", "bench", "bird", "cat", "dog", "horse", "sheep",
|
28
|
+
"cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
|
29
|
+
"handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
|
30
|
+
"sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
|
31
|
+
"surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork",
|
32
|
+
"knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
|
33
|
+
"broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
|
34
|
+
"sofa", "potted plant", "bed", "dining table", "toilet", "tvmonitor",
|
35
|
+
"laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
|
36
|
+
"oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
|
37
|
+
"scissors", "teddy bear", "hair drier", "toothbrush"
|
38
|
+
};
|
39
|
+
|
40
|
+
public:
|
41
|
+
Yolo(const std::string& model);
|
42
|
+
void process(cv::Mat &img, std::vector<Item> &items);
|
43
|
+
|
44
|
+
private:
|
45
|
+
cv::dnn::Net net;
|
46
|
+
};
|
47
|
+
|
48
|
+
#endif
|
data/ext/yunet.cpp
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
#include <cmath>
|
2
|
+
#include <string>
|
3
|
+
#include <vector>
|
4
|
+
#include <numeric>
|
5
|
+
#include <algorithm>
|
6
|
+
|
7
|
+
#include <opencv2/dnn.hpp>
|
8
|
+
|
9
|
+
#include "yunet.h"
|
10
|
+
|
11
|
+
|
12
|
+
YuNet::YuNet(const std::string& model_filename)
|
13
|
+
{
|
14
|
+
net = cv::dnn::readNetFromONNX(model_filename);
|
15
|
+
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
16
|
+
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
17
|
+
}
|
18
|
+
|
19
|
+
|
20
|
+
void YuNet::process(const cv::Mat& img, std::vector<YuNet::Face>& faces)
|
21
|
+
{
|
22
|
+
/* -- Preparing for image size -- */
|
23
|
+
cv::Size model_size;
|
24
|
+
model_size.width = MODEL_WIDTH;
|
25
|
+
model_size.height = MODEL_WIDTH * img.rows / img.cols;
|
26
|
+
model_size.height = (model_size.height / 32) * 32;
|
27
|
+
|
28
|
+
std::pair<int32_t, int32_t> feature_map_2th = {
|
29
|
+
(model_size.height + 1) / 2 / 2,
|
30
|
+
(model_size.width + 1) / 2 / 2
|
31
|
+
};
|
32
|
+
|
33
|
+
std::vector<std::pair<int32_t, int32_t>> feature_map_list;
|
34
|
+
feature_map_list.push_back({ (feature_map_2th.first + 1) / 2 ,
|
35
|
+
(feature_map_2th.second + 1) / 2 });
|
36
|
+
|
37
|
+
for (int32_t i = 0; i < 3; i++) {
|
38
|
+
const auto& previous = feature_map_list.back();
|
39
|
+
feature_map_list.push_back({ (previous.first + 1) / 2 ,
|
40
|
+
(previous.second + 1) / 2 });
|
41
|
+
}
|
42
|
+
|
43
|
+
std::vector<std::vector<float>> prior_list;
|
44
|
+
for (int i = 0; i < static_cast<int32_t>(feature_map_list.size()); i++) {
|
45
|
+
const auto& min_sizes = MIN_SIZES[i];
|
46
|
+
const auto& feature_map = feature_map_list[i];
|
47
|
+
for (int y = 0; y < feature_map.first; y++) {
|
48
|
+
for (int x = 0; x < feature_map.second; x++) {
|
49
|
+
for (const auto& min_size : min_sizes) {
|
50
|
+
float s_kx = static_cast<float>(min_size) / model_size.width;
|
51
|
+
float s_ky = static_cast<float>(min_size) / model_size.height;
|
52
|
+
float cx = (x + 0.5f) * STEPS[i] / model_size.width;
|
53
|
+
float cy = (y + 0.5f) * STEPS[i] / model_size.height;
|
54
|
+
prior_list.push_back({ cx, cy, s_kx, s_ky });
|
55
|
+
}
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
|
61
|
+
/* -- Pre-process -- */
|
62
|
+
cv::Mat blob;
|
63
|
+
cv::dnn::blobFromImage(img, blob, 1.0, model_size);
|
64
|
+
|
65
|
+
/* -- Inference -- */
|
66
|
+
std::vector<cv::Mat> outputs;
|
67
|
+
net.setInput(blob);
|
68
|
+
net.forward(outputs, { "conf", "iou", "loc" });
|
69
|
+
|
70
|
+
/* -- Post Process -- */
|
71
|
+
const cv::Mat& mat_conf = outputs[0];
|
72
|
+
const cv::Mat& mat_iou = outputs[1];
|
73
|
+
const cv::Mat& mat_loc = outputs[2];
|
74
|
+
const cv::Size image_size = img.size();
|
75
|
+
|
76
|
+
// Get score list
|
77
|
+
std::vector<float> cls_score;
|
78
|
+
for (int32_t row = 0; row < mat_conf.rows; row++) {
|
79
|
+
float val = mat_conf.at<float>(cv::Point(1, row));
|
80
|
+
cls_score.push_back(std::clamp(val, 0.0f, 1.0f));
|
81
|
+
}
|
82
|
+
|
83
|
+
std::vector<float> iou_score;
|
84
|
+
for (int32_t row = 0; row < mat_iou.rows; row++) {
|
85
|
+
float val = mat_conf.at<float>(cv::Point(0, row));
|
86
|
+
iou_score.push_back(std::clamp(val, 0.0f, 1.0f));
|
87
|
+
}
|
88
|
+
|
89
|
+
std::vector<float> score;
|
90
|
+
for (int32_t row = 0; row < mat_conf.rows; row++) {
|
91
|
+
score.push_back(std::sqrt(cls_score[row] * iou_score[row]));
|
92
|
+
}
|
93
|
+
|
94
|
+
// All bbox
|
95
|
+
std::vector<cv::Rect> bbox_all;
|
96
|
+
for (int row = 0; row < mat_loc.rows; row++) {
|
97
|
+
float cx = mat_loc.at<float>(cv::Point(0, row));
|
98
|
+
float cy = mat_loc.at<float>(cv::Point(1, row));
|
99
|
+
float w = mat_loc.at<float>(cv::Point(2, row));
|
100
|
+
float h = mat_loc.at<float>(cv::Point(3, row));
|
101
|
+
|
102
|
+
cx = prior_list[row][0] + cx * VARIANCES[0] * prior_list[row][2];
|
103
|
+
cy = prior_list[row][1] + cy * VARIANCES[0] * prior_list[row][3];
|
104
|
+
w = prior_list[row][2] * std::exp(w * VARIANCES[0]);
|
105
|
+
h = prior_list[row][3] * std::exp(h * VARIANCES[1]);
|
106
|
+
|
107
|
+
bbox_all.push_back({
|
108
|
+
static_cast<int32_t>((cx - w / 2) * image_size.width),
|
109
|
+
static_cast<int32_t>((cy - h / 2) * image_size.height),
|
110
|
+
static_cast<int32_t>(w * image_size.width),
|
111
|
+
static_cast<int32_t>(h * image_size.height) });
|
112
|
+
}
|
113
|
+
|
114
|
+
// Non-Maximum Suppression
|
115
|
+
std::vector<int> indices;
|
116
|
+
cv::dnn::NMSBoxes(bbox_all, score, CONF_THRESHOLD, NMS_THRESHOLD, indices);
|
117
|
+
|
118
|
+
// Get valid bbox and landmark
|
119
|
+
faces.clear();
|
120
|
+
for (int idx : indices) {
|
121
|
+
Landmark landmark; // (landmark is 5 points)
|
122
|
+
for (int i = 0; i < static_cast<int>(landmark.size()); i++) {
|
123
|
+
cv::Point& p = landmark[i];
|
124
|
+
float x = mat_loc.at<float>(cv::Point(4 + i * 2, idx));
|
125
|
+
float y = mat_loc.at<float>(cv::Point(4 + i * 2 + 1, idx));
|
126
|
+
p.x = static_cast<int32_t>((prior_list[idx][0] + x * VARIANCES[0] * prior_list[idx][2]) * image_size.width);
|
127
|
+
p.y = static_cast<int32_t>((prior_list[idx][1] + y * VARIANCES[0] * prior_list[idx][3]) * image_size.height);
|
128
|
+
}
|
129
|
+
|
130
|
+
faces.push_back({ bbox_all[idx], landmark });
|
131
|
+
}
|
132
|
+
}
|