circe 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/circe.gemspec +24 -0
- data/data/face_detection_yunet_2022mar.onnx +0 -0
- data/data/yolov5s.onnx +0 -0
- data/ext/camera_model.h +560 -0
- data/ext/circe.cpp +404 -0
- data/ext/extconf.rb +15 -0
- data/ext/yolo.cpp +94 -0
- data/ext/yolo.h +48 -0
- data/ext/yunet.cpp +132 -0
- data/ext/yunet.h +38 -0
- data/lib/circe/version.rb +3 -0
- data/lib/circe.rb +13 -0
- metadata +60 -0
data/ext/circe.cpp
ADDED
@@ -0,0 +1,404 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
|
3
|
+
#if 0
|
4
|
+
void estimateHeadPose(cv::Mat& image, CameraModel& camera, const FaceDetection::Landmark& landmark)
|
5
|
+
{
|
6
|
+
/* reference: https://qiita.com/TaroYamada/items/e3f3d0ea4ecc0a832fac */
|
7
|
+
/* reference: https://github.com/spmallick/learnopencv/blob/master/HeadPose/headPose.cpp */
|
8
|
+
static const std::vector<cv::Point3f> face_object_point_list = {
|
9
|
+
{ 0.0f, 0.0f, 0.0f }, // Nose
|
10
|
+
{ -225.0f, 170.0f, -135.0f }, // Left eye
|
11
|
+
{ 225.0f, 170.0f, -135.0f }, // Right eye
|
12
|
+
{ -150.0f, -150.0f, -125.0f }, // Left lip
|
13
|
+
{ 150.0f, -150.0f, -125.0f }, // Right lip
|
14
|
+
};
|
15
|
+
|
16
|
+
static const std::vector<cv::Point3f> face_object_point_for_pnp_list = {
|
17
|
+
face_object_point_list[0], face_object_point_list[0], // Nose
|
18
|
+
face_object_point_list[1], face_object_point_list[1], // Left eye
|
19
|
+
face_object_point_list[2], face_object_point_list[2], // Right eye
|
20
|
+
face_object_point_list[3], face_object_point_list[3], // Left lip
|
21
|
+
face_object_point_list[4], face_object_point_list[4], // Righ lip
|
22
|
+
};
|
23
|
+
|
24
|
+
std::vector<cv::Point2f> face_image_point_list;
|
25
|
+
face_image_point_list.push_back(landmark[2]); // Nose
|
26
|
+
face_image_point_list.push_back(landmark[2]); // Nose
|
27
|
+
face_image_point_list.push_back(landmark[0]); // Left eye
|
28
|
+
face_image_point_list.push_back(landmark[0]); // Left eye
|
29
|
+
face_image_point_list.push_back(landmark[1]); // Right eye
|
30
|
+
face_image_point_list.push_back(landmark[1]); // Right eye
|
31
|
+
face_image_point_list.push_back(landmark[3]); // Left lip
|
32
|
+
face_image_point_list.push_back(landmark[3]); // Left lip
|
33
|
+
face_image_point_list.push_back(landmark[4]); // Right lip
|
34
|
+
face_image_point_list.push_back(landmark[4]); // Right lip
|
35
|
+
|
36
|
+
cv::Mat rvec = cv::Mat_<float>(3, 1);
|
37
|
+
cv::Mat tvec = cv::Mat_<float>(3, 1);
|
38
|
+
cv::solvePnP(face_object_point_for_pnp_list, face_image_point_list, camera.K, camera.dist_coeff, rvec, tvec, false, cv::SOLVEPNP_ITERATIVE);
|
39
|
+
|
40
|
+
char text[128];
|
41
|
+
snprintf(text, sizeof(text), "Pitch = %-+4.0f, Yaw = %-+4.0f, Roll = %-+4.0f", Rad2Deg(rvec.at<float>(0, 0)), Rad2Deg(rvec.at<float>(1, 0)), Rad2Deg(rvec.at<float>(2, 0)));
|
42
|
+
CommonHelper::drawText(image, text, cv::Point(10, 10), 0.7, 3, cv::Scalar(0, 0, 0), cv::Scalar(255, 255, 255), false);
|
43
|
+
|
44
|
+
std::vector<cv::Point3f> nose_end_point3D = { { 0.0f, 0.0f, 500.0f } };
|
45
|
+
std::vector<cv::Point2f> nose_end_point2D;
|
46
|
+
cv::projectPoints(nose_end_point3D, rvec, tvec,
|
47
|
+
camera.K, camera.dist_coeff, nose_end_point2D);
|
48
|
+
cv::arrowedLine(image, face_image_point_list[0], nose_end_point2D[0],
|
49
|
+
cv::Scalar(0, 255, 0), 5);
|
50
|
+
|
51
|
+
|
52
|
+
/* Calculate Euler Angle */
|
53
|
+
cv::Mat R;
|
54
|
+
cv::Rodrigues(rvec, R);
|
55
|
+
cv::Mat projMat = (cv::Mat_<double>(3, 4) <<
|
56
|
+
R.at<float>(0, 0), R.at<float>(0, 1), R.at<float>(0, 2), 0,
|
57
|
+
R.at<float>(1, 0), R.at<float>(1, 1), R.at<float>(1, 2), 0,
|
58
|
+
R.at<float>(2, 0), R.at<float>(2, 1), R.at<float>(2, 2), 0);
|
59
|
+
|
60
|
+
cv::Mat K, R2, tvec2, R_x, R_y, R_z, eulerAngles;
|
61
|
+
cv::decomposeProjectionMatrix(projMat, K, R, tvec2,
|
62
|
+
R_x, R_y, R_z, eulerAngles);
|
63
|
+
double pitch = eulerAngles.at<double>(0, 0);
|
64
|
+
double yaw = eulerAngles.at<double>(1, 0);
|
65
|
+
double roll = eulerAngles.at<double>(2, 0);
|
66
|
+
snprintf(text, sizeof(text), "X = %-+4.0f, Y = %-+4.0f, Z = %-+4.0f", pitch, yaw, roll);
|
67
|
+
CommonHelper::drawText(image, text, cv::Point(10, 40), 0.7, 3, cv::Scalar(0, 0, 0), cv::Scalar(255, 255, 255), false);
|
68
|
+
}
|
69
|
+
#endif
|
70
|
+
|
71
|
+
#if 0
|
72
|
+
CameraModel camera;
|
73
|
+
camera.SetIntrinsic(image_input.cols, image_input.rows, FocalLength(image_input.cols, kFovDeg));
|
74
|
+
camera.SetDist({ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f });
|
75
|
+
camera.SetExtrinsic({ 0.0f, 0.0f, 0.0f }, /* rvec [deg] */
|
76
|
+
{ 0.0f, 0.0f, 0.0f }, true); /* tvec (in world coordinate) */
|
77
|
+
|
78
|
+
estimateHeadPose(image_input, camera, face_list[i].second);
|
79
|
+
|
80
|
+
#endif
|
81
|
+
|
82
|
+
#define IF_UNDEF(a, b) \
|
83
|
+
((a) == Qundef) ? (b) : (a)
|
84
|
+
|
85
|
+
|
86
|
+
static VALUE cCirce = Qundef;
|
87
|
+
static VALUE eCirceError = Qundef;
|
88
|
+
|
89
|
+
#include <tuple>
|
90
|
+
#include <string>
|
91
|
+
#include <chrono>
|
92
|
+
#include <opencv2/highgui.hpp>
|
93
|
+
#include <opencv2/imgproc.hpp>
|
94
|
+
|
95
|
+
#include "yolo.h"
|
96
|
+
#include "yunet.h"
|
97
|
+
|
98
|
+
using namespace std;
|
99
|
+
using namespace cv;
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
// Text parameters.
|
104
|
+
const float FONT_SCALE = 0.7;
|
105
|
+
const int FONT_FACE = FONT_HERSHEY_SIMPLEX;
|
106
|
+
const int THICKNESS = 1;
|
107
|
+
|
108
|
+
// Colors.
|
109
|
+
cv::Scalar BLACK = cv::Scalar(0,0,0);
|
110
|
+
cv::Scalar BLUE = cv::Scalar(255, 178, 50);
|
111
|
+
cv::Scalar YELLOW = cv::Scalar(0, 255, 255);
|
112
|
+
cv::Scalar RED = cv::Scalar(0,0,255);
|
113
|
+
|
114
|
+
|
115
|
+
static ID id_debug;
|
116
|
+
static ID id_face;
|
117
|
+
static ID id_classify;
|
118
|
+
static ID id_class;
|
119
|
+
static ID id_png;
|
120
|
+
static ID id_jpg;
|
121
|
+
|
122
|
+
|
123
|
+
static Yolo *yolo;
|
124
|
+
static YuNet *yunet;
|
125
|
+
|
126
|
+
|
127
|
+
|
128
|
+
static void
|
129
|
+
draw_label(cv::Mat& img, string label, Point& origin,
|
130
|
+
Scalar& fgcolor, Scalar& bgcolor, int thickness = 1)
|
131
|
+
{
|
132
|
+
int baseLine;
|
133
|
+
cv::Size label_size =
|
134
|
+
cv::getTextSize(label, FONT_FACE, FONT_SCALE, thickness, &baseLine);
|
135
|
+
|
136
|
+
Point a = { origin.x, max(origin.y, label_size.height) };
|
137
|
+
Point b = { a.x + label_size.width,
|
138
|
+
a.y + label_size.height + baseLine };
|
139
|
+
Point t = { a.x, a.y + label_size.height };
|
140
|
+
|
141
|
+
cv::rectangle(img, a, b, bgcolor, cv::FILLED);
|
142
|
+
cv::putText(img, label, t, FONT_FACE, FONT_SCALE, fgcolor, THICKNESS);
|
143
|
+
}
|
144
|
+
|
145
|
+
static void
|
146
|
+
draw_labelbox(cv::Mat& img, string label, Rect& box,
|
147
|
+
Scalar& framecolor = BLUE, Scalar& textcolor = BLACK,
|
148
|
+
int thickness = 1) {
|
149
|
+
|
150
|
+
Point a = { box.x, box.y };
|
151
|
+
Point b = { box.x + box.width, box.y + box.height };
|
152
|
+
|
153
|
+
cv::rectangle(img, a, b, framecolor, thickness);
|
154
|
+
draw_label(img, label, a, textcolor, framecolor);
|
155
|
+
}
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
void
|
162
|
+
yunet_process_features(vector<YuNet::Face>& faces,
|
163
|
+
Mat& img, VALUE v_features, int *state)
|
164
|
+
{
|
165
|
+
for (int i = 0; i < faces.size(); i++) {
|
166
|
+
Rect box = faces[i].first;
|
167
|
+
YuNet::Landmark lmark = faces[i].second;
|
168
|
+
|
169
|
+
VALUE v_type = ID2SYM(id_face);
|
170
|
+
VALUE v_box = rb_ary_new_from_args(4,
|
171
|
+
INT2NUM(box.x ), INT2NUM(box.y ),
|
172
|
+
INT2NUM(box.width), INT2NUM(box.height));
|
173
|
+
VALUE v_landmark = rb_ary_new_from_args(5,
|
174
|
+
rb_ary_new_from_args(2, INT2NUM(lmark[0].x),
|
175
|
+
INT2NUM(lmark[0].y)),
|
176
|
+
rb_ary_new_from_args(2, INT2NUM(lmark[1].x),
|
177
|
+
INT2NUM(lmark[1].y)),
|
178
|
+
rb_ary_new_from_args(2, INT2NUM(lmark[2].x),
|
179
|
+
INT2NUM(lmark[2].y)),
|
180
|
+
rb_ary_new_from_args(2, INT2NUM(lmark[3].x),
|
181
|
+
INT2NUM(lmark[3].y)),
|
182
|
+
rb_ary_new_from_args(2, INT2NUM(lmark[4].x),
|
183
|
+
INT2NUM(lmark[4].y)));
|
184
|
+
VALUE v_feature = rb_ary_new_from_args(3, v_type, v_box,
|
185
|
+
v_landmark);
|
186
|
+
rb_ary_push(v_features, v_feature);
|
187
|
+
|
188
|
+
if (!img.empty() && rb_block_given_p()) {
|
189
|
+
VALUE v_annotation= rb_yield_splat(v_feature);
|
190
|
+
VALUE v_label = Qnil;
|
191
|
+
VALUE v_color = ULONG2NUM(0x0000ff);
|
192
|
+
VALUE v_thickness = INT2NUM(1);
|
193
|
+
|
194
|
+
switch (TYPE(v_annotation)) {
|
195
|
+
case T_NIL:
|
196
|
+
break;
|
197
|
+
case T_HASH:
|
198
|
+
break;
|
199
|
+
case T_ARRAY:
|
200
|
+
switch(RARRAY_LENINT(v_annotation)) {
|
201
|
+
default:
|
202
|
+
case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
|
203
|
+
case 2: v_color = RARRAY_AREF(v_annotation, 1);
|
204
|
+
case 1: v_label = RARRAY_AREF(v_annotation, 0);
|
205
|
+
case 0: break;
|
206
|
+
}
|
207
|
+
break;
|
208
|
+
case T_STRING:
|
209
|
+
v_label = v_annotation;
|
210
|
+
break;
|
211
|
+
}
|
212
|
+
|
213
|
+
if (! NIL_P(v_label)) {
|
214
|
+
string label = StringValueCStr(v_label);
|
215
|
+
long rgb = NUM2ULONG(v_color);
|
216
|
+
int thickness = NUM2INT(v_thickness);
|
217
|
+
Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
|
218
|
+
(rgb >> 8) & 0xFF,
|
219
|
+
(rgb >> 16) & 0xFF);
|
220
|
+
draw_labelbox(img, label, box, color, BLACK, thickness);
|
221
|
+
|
222
|
+
for (const auto& p : lmark) {
|
223
|
+
cv::circle(img, p, 3, cv::Scalar(255, 0, 0), 2);
|
224
|
+
}
|
225
|
+
}
|
226
|
+
}
|
227
|
+
}
|
228
|
+
}
|
229
|
+
|
230
|
+
|
231
|
+
|
232
|
+
void
|
233
|
+
yolo_process_features(vector<Yolo::Item>& items,
|
234
|
+
Mat& img, VALUE v_features, int *state)
|
235
|
+
{
|
236
|
+
for (int i = 0; i < items.size(); i++) {
|
237
|
+
string name = std::get<0>(items[i]);
|
238
|
+
float confidence = std::get<1>(items[i]);
|
239
|
+
Rect box = std::get<2>(items[i]);
|
240
|
+
|
241
|
+
VALUE v_type = ID2SYM(id_class);
|
242
|
+
VALUE v_name = rb_str_new(name.c_str(), name.size());
|
243
|
+
VALUE v_confidence = DBL2NUM(confidence);
|
244
|
+
VALUE v_box = rb_ary_new_from_args(4,
|
245
|
+
INT2NUM(box.x ), INT2NUM(box.y ),
|
246
|
+
INT2NUM(box.width), INT2NUM(box.height));
|
247
|
+
VALUE v_feature = rb_ary_new_from_args(4, v_type, v_box,
|
248
|
+
v_name, v_confidence);
|
249
|
+
rb_ary_push(v_features, v_feature);
|
250
|
+
|
251
|
+
if (rb_block_given_p()) {
|
252
|
+
VALUE v_annotation = rb_yield_splat(v_feature);
|
253
|
+
|
254
|
+
VALUE v_label = Qnil;
|
255
|
+
VALUE v_color = ULONG2NUM(0x0000ff);
|
256
|
+
VALUE v_thickness = INT2NUM(1);
|
257
|
+
|
258
|
+
switch (TYPE(v_annotation)) {
|
259
|
+
case T_NIL:
|
260
|
+
break;
|
261
|
+
case T_HASH:
|
262
|
+
break;
|
263
|
+
case T_ARRAY:
|
264
|
+
switch(RARRAY_LENINT(v_annotation)) {
|
265
|
+
default:
|
266
|
+
case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
|
267
|
+
case 2: v_color = RARRAY_AREF(v_annotation, 1);
|
268
|
+
case 1: v_label = RARRAY_AREF(v_annotation, 0);
|
269
|
+
case 0: break;
|
270
|
+
}
|
271
|
+
break;
|
272
|
+
case T_STRING:
|
273
|
+
v_label = v_annotation;
|
274
|
+
break;
|
275
|
+
}
|
276
|
+
|
277
|
+
if (! NIL_P(v_label)) {
|
278
|
+
string label = StringValueCStr(v_label);
|
279
|
+
long rgb = NUM2ULONG(v_color);
|
280
|
+
int thickness = NUM2INT(v_thickness);
|
281
|
+
Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
|
282
|
+
(rgb >> 8) & 0xFF,
|
283
|
+
(rgb >> 16) & 0xFF);
|
284
|
+
draw_labelbox(img, label, box, color, BLACK, thickness);
|
285
|
+
}
|
286
|
+
}
|
287
|
+
}
|
288
|
+
}
|
289
|
+
|
290
|
+
|
291
|
+
|
292
|
+
static VALUE
|
293
|
+
circe_m_analyze(int argc, VALUE* argv, VALUE self) {
|
294
|
+
// Retrieve arguments
|
295
|
+
VALUE v_imgstr, v_format, v_opts;
|
296
|
+
VALUE kwargs[3];
|
297
|
+
rb_scan_args(argc, argv, "11:", &v_imgstr, &v_format, &v_opts);
|
298
|
+
rb_get_kwargs(v_opts, (ID[]){ id_debug, id_face, id_classify },
|
299
|
+
0, 3, kwargs);
|
300
|
+
VALUE v_debug = IF_UNDEF(kwargs[0], Qfalse);
|
301
|
+
VALUE v_face = IF_UNDEF(kwargs[1], Qfalse);
|
302
|
+
VALUE v_classify = IF_UNDEF(kwargs[2], Qfalse);
|
303
|
+
|
304
|
+
VALUE v_features = rb_ary_new();
|
305
|
+
VALUE v_image = Qnil;
|
306
|
+
|
307
|
+
if (! NIL_P(v_format)) {
|
308
|
+
Check_Type(v_format, T_SYMBOL);
|
309
|
+
ID i_format = rb_sym2id(v_format);
|
310
|
+
if ((i_format != id_png) && (i_format != id_jpg))
|
311
|
+
rb_raise(rb_eArgError, "format must be :png, :jpg or nil");
|
312
|
+
}
|
313
|
+
|
314
|
+
if (!RTEST(v_face) && !RTEST(v_classify)) {
|
315
|
+
v_face = v_classify = Qtrue;
|
316
|
+
}
|
317
|
+
|
318
|
+
|
319
|
+
// Load image.
|
320
|
+
Mat i_img = cv::imdecode(cv::Mat(1, RSTRING_LEN(v_imgstr), CV_8UC1,
|
321
|
+
(unsigned char *)RSTRING_PTR(v_imgstr)),
|
322
|
+
IMREAD_UNCHANGED);
|
323
|
+
Mat o_img = NIL_P(v_format) ? cv::Mat() : i_img.clone();
|
324
|
+
|
325
|
+
// Processing
|
326
|
+
std::chrono::time_point<std::chrono::system_clock> start_time, end_time;
|
327
|
+
std::chrono::duration<double> duration;
|
328
|
+
int state = 0;
|
329
|
+
|
330
|
+
start_time = std::chrono::system_clock::now();
|
331
|
+
|
332
|
+
if (RTEST(v_classify)) {
|
333
|
+
vector<Yolo::Item> items;
|
334
|
+
yolo->process(i_img, items);
|
335
|
+
yolo_process_features(items, o_img, v_features, &state);
|
336
|
+
if (state) goto exception;
|
337
|
+
}
|
338
|
+
|
339
|
+
if (RTEST(v_face)) {
|
340
|
+
vector<YuNet::Face> faces;
|
341
|
+
yunet->process(i_img, faces);
|
342
|
+
yunet_process_features(faces, o_img, v_features, &state);
|
343
|
+
if (state) goto exception;
|
344
|
+
}
|
345
|
+
|
346
|
+
end_time = std::chrono::system_clock::now();
|
347
|
+
duration = end_time - start_time;
|
348
|
+
|
349
|
+
|
350
|
+
if (! NIL_P(v_format)) {
|
351
|
+
if (RTEST(v_debug)) {
|
352
|
+
double ms = duration / 1.0ms;
|
353
|
+
string label = cv::format("Inference time : %0.2f ms", ms);
|
354
|
+
cv::putText(o_img, label, Point(20, 40),
|
355
|
+
FONT_FACE, FONT_SCALE, RED);
|
356
|
+
}
|
357
|
+
|
358
|
+
ID i_format = rb_sym2id(v_format);
|
359
|
+
string format;
|
360
|
+
if (i_format == id_png) { format = ".png"; }
|
361
|
+
else if (i_format == id_jpg) { format = ".jpg"; }
|
362
|
+
|
363
|
+
std::vector<uchar> buf;
|
364
|
+
cv::imencode(format, o_img, buf);
|
365
|
+
v_image = rb_str_new(reinterpret_cast<char*>(buf.data()), buf.size());
|
366
|
+
}
|
367
|
+
return rb_ary_new_from_args(2, v_features, v_image);
|
368
|
+
|
369
|
+
exception:
|
370
|
+
i_img.release();
|
371
|
+
o_img.release();
|
372
|
+
rb_jump_tag(state);
|
373
|
+
}
|
374
|
+
|
375
|
+
|
376
|
+
|
377
|
+
|
378
|
+
extern "C"
|
379
|
+
void Init_core(void) {
|
380
|
+
/* Main classes */
|
381
|
+
cCirce = rb_define_class("Circe", rb_cObject);
|
382
|
+
eCirceError = rb_define_class_under(cCirce, "Error", rb_eStandardError);
|
383
|
+
// myclass = rb_const_get(mymodule, sym_myclass);
|
384
|
+
|
385
|
+
VALUE v_onnx_yolo = rb_const_get(cCirce, rb_intern("ONNX_YOLO"));
|
386
|
+
VALUE v_onnx_yunet = rb_const_get(cCirce, rb_intern("ONNX_YUNET"));
|
387
|
+
|
388
|
+
static Yolo _yolo = { StringValueCStr(v_onnx_yolo ) };
|
389
|
+
static YuNet _yunet = { StringValueCStr(v_onnx_yunet) };
|
390
|
+
|
391
|
+
yolo = &_yolo;
|
392
|
+
yunet = &_yunet;
|
393
|
+
|
394
|
+
|
395
|
+
id_debug = rb_intern_const("debug" );
|
396
|
+
id_face = rb_intern_const("face" );
|
397
|
+
id_classify = rb_intern_const("classify");
|
398
|
+
id_class = rb_intern_const("class" );
|
399
|
+
id_png = rb_intern_const("png" );
|
400
|
+
id_jpg = rb_intern_const("jpg" );
|
401
|
+
|
402
|
+
|
403
|
+
rb_define_method(cCirce, "analyze", circe_m_analyze, -1);
|
404
|
+
}
|
data/ext/extconf.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
|
3
|
+
$PKGCONFIG =
|
4
|
+
case RbConfig::CONFIG['host_os']
|
5
|
+
when /bsd/ then '/usr/local/bin/pkgconf'
|
6
|
+
end
|
7
|
+
|
8
|
+
|
9
|
+
cflags, ldflags, libs = pkg_config('opencv4')
|
10
|
+
|
11
|
+
$LDFLAGS += " #{ldflags} #{libs}"
|
12
|
+
$INCFLAGS += " #{cflags}"
|
13
|
+
$CXXFLAGS += "-std=c++17"
|
14
|
+
|
15
|
+
create_makefile("circe/core")
|
data/ext/yolo.cpp
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
#include <tuple>
|
2
|
+
#include <string>
|
3
|
+
#include <opencv2/imgproc.hpp>
|
4
|
+
#include <opencv2/dnn.hpp>
|
5
|
+
|
6
|
+
#include "yolo.h"
|
7
|
+
|
8
|
+
|
9
|
+
Yolo::Yolo(const std::string& model) {
|
10
|
+
net = cv::dnn::readNetFromONNX(model);
|
11
|
+
}
|
12
|
+
|
13
|
+
|
14
|
+
void Yolo::process(cv::Mat &img, std::vector<Yolo::Item> &items) {
|
15
|
+
|
16
|
+
// Pre-process
|
17
|
+
cv::Mat blob;
|
18
|
+
std::vector<cv::Mat> outputs;
|
19
|
+
|
20
|
+
cv::dnn::blobFromImage(img, blob, 1./255.,
|
21
|
+
cv::Size(INPUT_WIDTH, INPUT_HEIGHT),
|
22
|
+
cv::Scalar(), true, false);
|
23
|
+
|
24
|
+
// Process
|
25
|
+
net.setInput(blob);
|
26
|
+
net.forward(outputs, net.getUnconnectedOutLayersNames());
|
27
|
+
|
28
|
+
// Post-process
|
29
|
+
std::vector<int> class_ids;
|
30
|
+
std::vector<float> confidences;
|
31
|
+
std::vector<cv::Rect> boxes;
|
32
|
+
|
33
|
+
// Resizing factor.
|
34
|
+
float x_factor = img.cols / INPUT_WIDTH;
|
35
|
+
float y_factor = img.rows / INPUT_HEIGHT;
|
36
|
+
|
37
|
+
float *data = (float *)outputs[0].data;
|
38
|
+
const int dimensions = 85;
|
39
|
+
|
40
|
+
// 25200 for default size 640.
|
41
|
+
const int rows = 25200;
|
42
|
+
// Iterate through 25200 detections.
|
43
|
+
for (int i = 0; i < rows; ++i) {
|
44
|
+
float confidence = data[4];
|
45
|
+
|
46
|
+
// Discard bad detections and continue.
|
47
|
+
if (confidence >= CONFIDENCE_THRESHOLD) {
|
48
|
+
float *classes_scores = data + 5;
|
49
|
+
|
50
|
+
// Create a 1x85 Mat and store class scores of 80 classes.
|
51
|
+
cv::Mat scores(1, names.size(), CV_32FC1, classes_scores);
|
52
|
+
|
53
|
+
// Perform minMaxLoc and acquire the index of best class score.
|
54
|
+
cv::Point class_id;
|
55
|
+
double max_class_score;
|
56
|
+
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
|
57
|
+
|
58
|
+
// Continue if the class score is above the threshold.
|
59
|
+
if (max_class_score > SCORE_THRESHOLD) {
|
60
|
+
// Store class ID and confidence in the pre-defined
|
61
|
+
// respective vectors.
|
62
|
+
float cx = data[0]; // Center
|
63
|
+
float cy = data[1];
|
64
|
+
float w = data[2]; // Box dimension
|
65
|
+
float h = data[3];
|
66
|
+
|
67
|
+
// Bounding box coordinates.
|
68
|
+
int left = int((cx - 0.5 * w) * x_factor);
|
69
|
+
int top = int((cy - 0.5 * h) * y_factor);
|
70
|
+
int width = int(w * x_factor);
|
71
|
+
int height = int(h * y_factor);
|
72
|
+
|
73
|
+
// Store good detections in the boxes vector.
|
74
|
+
confidences.push_back(confidence);
|
75
|
+
class_ids.push_back(class_id.x);
|
76
|
+
boxes.push_back(cv::Rect(left, top, width, height));
|
77
|
+
}
|
78
|
+
}
|
79
|
+
// Jump to the next row.
|
80
|
+
data += 85;
|
81
|
+
}
|
82
|
+
|
83
|
+
// Perform Non-Maximum Suppression and draw predictions.
|
84
|
+
std::vector<int> indices;
|
85
|
+
cv::dnn::NMSBoxes(boxes, confidences,
|
86
|
+
SCORE_THRESHOLD, NMS_THRESHOLD, indices);
|
87
|
+
|
88
|
+
items.clear();
|
89
|
+
|
90
|
+
for (int i = 0; i < indices.size(); i++) {
|
91
|
+
int idx = indices[i];
|
92
|
+
items.push_back({ names[class_ids[idx]],confidences[idx],boxes[idx] });
|
93
|
+
}
|
94
|
+
}
|
data/ext/yolo.h
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#ifndef __YOLO__
|
2
|
+
#define __YOLO__
|
3
|
+
|
4
|
+
#include <string>
|
5
|
+
#include <vector>
|
6
|
+
#include <tuple>
|
7
|
+
|
8
|
+
#include <opencv2/dnn.hpp>
|
9
|
+
|
10
|
+
|
11
|
+
class Yolo
|
12
|
+
{
|
13
|
+
public:
|
14
|
+
typedef std::tuple<std::string, float, cv::Rect> Item;
|
15
|
+
|
16
|
+
private:
|
17
|
+
static constexpr float INPUT_WIDTH = 640.0;
|
18
|
+
static constexpr float INPUT_HEIGHT = 640.0;
|
19
|
+
static constexpr float SCORE_THRESHOLD = 0.5;
|
20
|
+
static constexpr float NMS_THRESHOLD = 0.45;
|
21
|
+
static constexpr float CONFIDENCE_THRESHOLD = 0.45;
|
22
|
+
|
23
|
+
public:
|
24
|
+
const std::vector<std::string> names = {
|
25
|
+
"person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train",
|
26
|
+
"truck", "boat", "traffic light", "fire hydrant", "stop sign",
|
27
|
+
"parking meter", "bench", "bird", "cat", "dog", "horse", "sheep",
|
28
|
+
"cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
|
29
|
+
"handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
|
30
|
+
"sports ball", "kite", "baseball bat", "baseball glove", "skateboard",
|
31
|
+
"surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork",
|
32
|
+
"knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange",
|
33
|
+
"broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair",
|
34
|
+
"sofa", "potted plant", "bed", "dining table", "toilet", "tvmonitor",
|
35
|
+
"laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
|
36
|
+
"oven", "toaster", "sink", "refrigerator", "book", "clock", "vase",
|
37
|
+
"scissors", "teddy bear", "hair drier", "toothbrush"
|
38
|
+
};
|
39
|
+
|
40
|
+
public:
|
41
|
+
Yolo(const std::string& model);
|
42
|
+
void process(cv::Mat &img, std::vector<Item> &items);
|
43
|
+
|
44
|
+
private:
|
45
|
+
cv::dnn::Net net;
|
46
|
+
};
|
47
|
+
|
48
|
+
#endif
|
data/ext/yunet.cpp
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
#include <cmath>
|
2
|
+
#include <string>
|
3
|
+
#include <vector>
|
4
|
+
#include <numeric>
|
5
|
+
#include <algorithm>
|
6
|
+
|
7
|
+
#include <opencv2/dnn.hpp>
|
8
|
+
|
9
|
+
#include "yunet.h"
|
10
|
+
|
11
|
+
|
12
|
+
YuNet::YuNet(const std::string& model_filename)
|
13
|
+
{
|
14
|
+
net = cv::dnn::readNetFromONNX(model_filename);
|
15
|
+
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
16
|
+
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
17
|
+
}
|
18
|
+
|
19
|
+
|
20
|
+
void YuNet::process(const cv::Mat& img, std::vector<YuNet::Face>& faces)
|
21
|
+
{
|
22
|
+
/* -- Preparing for image size -- */
|
23
|
+
cv::Size model_size;
|
24
|
+
model_size.width = MODEL_WIDTH;
|
25
|
+
model_size.height = MODEL_WIDTH * img.rows / img.cols;
|
26
|
+
model_size.height = (model_size.height / 32) * 32;
|
27
|
+
|
28
|
+
std::pair<int32_t, int32_t> feature_map_2th = {
|
29
|
+
(model_size.height + 1) / 2 / 2,
|
30
|
+
(model_size.width + 1) / 2 / 2
|
31
|
+
};
|
32
|
+
|
33
|
+
std::vector<std::pair<int32_t, int32_t>> feature_map_list;
|
34
|
+
feature_map_list.push_back({ (feature_map_2th.first + 1) / 2 ,
|
35
|
+
(feature_map_2th.second + 1) / 2 });
|
36
|
+
|
37
|
+
for (int32_t i = 0; i < 3; i++) {
|
38
|
+
const auto& previous = feature_map_list.back();
|
39
|
+
feature_map_list.push_back({ (previous.first + 1) / 2 ,
|
40
|
+
(previous.second + 1) / 2 });
|
41
|
+
}
|
42
|
+
|
43
|
+
std::vector<std::vector<float>> prior_list;
|
44
|
+
for (int i = 0; i < static_cast<int32_t>(feature_map_list.size()); i++) {
|
45
|
+
const auto& min_sizes = MIN_SIZES[i];
|
46
|
+
const auto& feature_map = feature_map_list[i];
|
47
|
+
for (int y = 0; y < feature_map.first; y++) {
|
48
|
+
for (int x = 0; x < feature_map.second; x++) {
|
49
|
+
for (const auto& min_size : min_sizes) {
|
50
|
+
float s_kx = static_cast<float>(min_size) / model_size.width;
|
51
|
+
float s_ky = static_cast<float>(min_size) / model_size.height;
|
52
|
+
float cx = (x + 0.5f) * STEPS[i] / model_size.width;
|
53
|
+
float cy = (y + 0.5f) * STEPS[i] / model_size.height;
|
54
|
+
prior_list.push_back({ cx, cy, s_kx, s_ky });
|
55
|
+
}
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
|
61
|
+
/* -- Pre-process -- */
|
62
|
+
cv::Mat blob;
|
63
|
+
cv::dnn::blobFromImage(img, blob, 1.0, model_size);
|
64
|
+
|
65
|
+
/* -- Inference -- */
|
66
|
+
std::vector<cv::Mat> outputs;
|
67
|
+
net.setInput(blob);
|
68
|
+
net.forward(outputs, { "conf", "iou", "loc" });
|
69
|
+
|
70
|
+
/* -- Post Process -- */
|
71
|
+
const cv::Mat& mat_conf = outputs[0];
|
72
|
+
const cv::Mat& mat_iou = outputs[1];
|
73
|
+
const cv::Mat& mat_loc = outputs[2];
|
74
|
+
const cv::Size image_size = img.size();
|
75
|
+
|
76
|
+
// Get score list
|
77
|
+
std::vector<float> cls_score;
|
78
|
+
for (int32_t row = 0; row < mat_conf.rows; row++) {
|
79
|
+
float val = mat_conf.at<float>(cv::Point(1, row));
|
80
|
+
cls_score.push_back(std::clamp(val, 0.0f, 1.0f));
|
81
|
+
}
|
82
|
+
|
83
|
+
std::vector<float> iou_score;
|
84
|
+
for (int32_t row = 0; row < mat_iou.rows; row++) {
|
85
|
+
float val = mat_conf.at<float>(cv::Point(0, row));
|
86
|
+
iou_score.push_back(std::clamp(val, 0.0f, 1.0f));
|
87
|
+
}
|
88
|
+
|
89
|
+
std::vector<float> score;
|
90
|
+
for (int32_t row = 0; row < mat_conf.rows; row++) {
|
91
|
+
score.push_back(std::sqrt(cls_score[row] * iou_score[row]));
|
92
|
+
}
|
93
|
+
|
94
|
+
// All bbox
|
95
|
+
std::vector<cv::Rect> bbox_all;
|
96
|
+
for (int row = 0; row < mat_loc.rows; row++) {
|
97
|
+
float cx = mat_loc.at<float>(cv::Point(0, row));
|
98
|
+
float cy = mat_loc.at<float>(cv::Point(1, row));
|
99
|
+
float w = mat_loc.at<float>(cv::Point(2, row));
|
100
|
+
float h = mat_loc.at<float>(cv::Point(3, row));
|
101
|
+
|
102
|
+
cx = prior_list[row][0] + cx * VARIANCES[0] * prior_list[row][2];
|
103
|
+
cy = prior_list[row][1] + cy * VARIANCES[0] * prior_list[row][3];
|
104
|
+
w = prior_list[row][2] * std::exp(w * VARIANCES[0]);
|
105
|
+
h = prior_list[row][3] * std::exp(h * VARIANCES[1]);
|
106
|
+
|
107
|
+
bbox_all.push_back({
|
108
|
+
static_cast<int32_t>((cx - w / 2) * image_size.width),
|
109
|
+
static_cast<int32_t>((cy - h / 2) * image_size.height),
|
110
|
+
static_cast<int32_t>(w * image_size.width),
|
111
|
+
static_cast<int32_t>(h * image_size.height) });
|
112
|
+
}
|
113
|
+
|
114
|
+
// Non-Maximum Suppression
|
115
|
+
std::vector<int> indices;
|
116
|
+
cv::dnn::NMSBoxes(bbox_all, score, CONF_THRESHOLD, NMS_THRESHOLD, indices);
|
117
|
+
|
118
|
+
// Get valid bbox and landmark
|
119
|
+
faces.clear();
|
120
|
+
for (int idx : indices) {
|
121
|
+
Landmark landmark; // (landmark is 5 points)
|
122
|
+
for (int i = 0; i < static_cast<int>(landmark.size()); i++) {
|
123
|
+
cv::Point& p = landmark[i];
|
124
|
+
float x = mat_loc.at<float>(cv::Point(4 + i * 2, idx));
|
125
|
+
float y = mat_loc.at<float>(cv::Point(4 + i * 2 + 1, idx));
|
126
|
+
p.x = static_cast<int32_t>((prior_list[idx][0] + x * VARIANCES[0] * prior_list[idx][2]) * image_size.width);
|
127
|
+
p.y = static_cast<int32_t>((prior_list[idx][1] + y * VARIANCES[0] * prior_list[idx][3]) * image_size.height);
|
128
|
+
}
|
129
|
+
|
130
|
+
faces.push_back({ bbox_all[idx], landmark });
|
131
|
+
}
|
132
|
+
}
|