circe 0.1.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 683391e9186e4a20233cb25ce28f37b7bf56c88586b1ac72f1312081e4e48c7d
4
- data.tar.gz: 680a92f2318df871cdd2f6f98732f85adab5098e067fe7c4fa986832e5f32caf
3
+ metadata.gz: 8d735b7de777b161f27ca0b0d50282eca7e18f1cd9e5aa6e298e089f6ccd2b89
4
+ data.tar.gz: 5ba4acf8e3c3ef5950c981de11a31f4bdcdb7a0fb1cc4c534b1e62f96a99fb3a
5
5
  SHA512:
6
- metadata.gz: 84422ff365869d1615c4cf17bbc035426e7a4cbfc5bd501104d5e1b2a5ea12535150de13e192b0a2849102fb0a49f7e174b2f9b804027614fc50f68858d139de
7
- data.tar.gz: d679d1b61f155a5170426d007e8d06e73b5e01c12fd4dc12ab70ceddb29fc1c80a35ab178b83986c32f0f05e1b508d9e6cf56ce8fd4840bacbf1d67f96dc6312
6
+ metadata.gz: 8ac69c1f5b7d3af53db342306629fc880ae8b6386ba0ee4ee02fcec391fcab29b9acc3aa972a01f2d99b31cf0267861d6553d2cdc54b839e2a36648801b83fd3
7
+ data.tar.gz: 76a3084d3b0fc496d39e9efc04733b17a64a31e5b4285ad450bf1dc681db132671febe821ae3ae717b68771a20901f28247ae62c89a2ec57a013ef4ca7bb6c4b
Binary file
Binary file
data/data/yolov8s.onnx ADDED
Binary file
data/ext/circe.cpp CHANGED
@@ -91,6 +91,7 @@ static VALUE eCirceError = Qundef;
91
91
  #include <chrono>
92
92
  #include <opencv2/highgui.hpp>
93
93
  #include <opencv2/imgproc.hpp>
94
+ #include <opencv2/core/mat.hpp>
94
95
 
95
96
  #include "yolo.h"
96
97
  #include "yunet.h"
@@ -119,6 +120,10 @@ static ID id_class;
119
120
  static ID id_png;
120
121
  static ID id_jpg;
121
122
 
123
+ static ID id_label;
124
+ static ID id_thickness;
125
+ static ID id_extra;
126
+ static ID id_color;
122
127
 
123
128
  static Yolo *yolo;
124
129
  static YuNet *yunet;
@@ -143,85 +148,157 @@ draw_label(cv::Mat& img, string label, Point& origin,
143
148
  }
144
149
 
145
150
  static void
146
- draw_labelbox(cv::Mat& img, string label, Rect& box,
147
- Scalar& framecolor = BLUE, Scalar& textcolor = BLACK,
148
- int thickness = 1) {
151
+ draw_box(cv::Mat& img, Rect& box,
152
+ Scalar& framecolor = BLUE, int thickness = 1) {
149
153
 
150
154
  Point a = { box.x, box.y };
151
155
  Point b = { box.x + box.width, box.y + box.height };
152
156
 
153
157
  cv::rectangle(img, a, b, framecolor, thickness);
154
- draw_label(img, label, a, textcolor, framecolor);
155
158
  }
156
159
 
160
+ static void
161
+ draw_labelbox(cv::Mat& img, string label, Rect& box,
162
+ Scalar& framecolor = BLUE, Scalar& textcolor = BLACK,
163
+ int thickness = 1) {
157
164
 
165
+ Point o = { box.x, box.y };
166
+
167
+ draw_box(img, box, framecolor, thickness);
168
+ draw_label(img, label, o, textcolor, framecolor);
169
+ }
170
+
171
+
172
+ VALUE
173
+ circe_annotate(Mat& img, Rect& box, VALUE v_annotation, int *state) {
174
+ if (img.empty() || NIL_P(v_annotation))
175
+ return Qnil;
176
+
177
+ VALUE v_label = Qnil;
178
+ VALUE v_color = ULONG2NUM(0x0000ff);
179
+ VALUE v_thickness = INT2NUM(1);
180
+ VALUE v_extra = Qtrue;
181
+
182
+ VALUE s_label = rb_id2sym(id_label);
183
+ VALUE s_color = rb_id2sym(id_color);
184
+ VALUE s_thickness = rb_id2sym(id_thickness);
185
+ VALUE s_extra = rb_id2sym(id_extra);
186
+
187
+ switch (TYPE(v_annotation)) {
188
+ case T_NIL:
189
+ break;
190
+ case T_HASH:
191
+ v_thickness = rb_hash_lookup2(v_annotation, s_thickness, v_thickness);
192
+ v_color = rb_hash_lookup2(v_annotation, s_color, v_color );
193
+ v_label = rb_hash_lookup2(v_annotation, s_label, v_label );
194
+ v_extra = rb_hash_lookup2(v_annotation, s_extra, v_extra );
195
+ break;
196
+ case T_ARRAY:
197
+ switch(RARRAY_LENINT(v_annotation)) {
198
+ default:
199
+ case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
200
+ case 2: v_color = RARRAY_AREF(v_annotation, 1);
201
+ case 1: v_label = RARRAY_AREF(v_annotation, 0);
202
+ case 0: break;
203
+ }
204
+ break;
205
+ case T_STRING:
206
+ v_label = v_annotation;
207
+ break;
208
+ }
209
+
210
+ // No color, no rendering
211
+ if (NIL_P(v_color))
212
+ return Qnil;
213
+
214
+ long rgb = NUM2ULONG(v_color);
215
+ Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
216
+ (rgb >> 8) & 0xFF,
217
+ (rgb >> 16) & 0xFF);
218
+
219
+ if (! NIL_P(v_thickness)) {
220
+ int thickness = NUM2INT(v_thickness);
221
+ draw_box(img, box, color, thickness);
222
+ }
223
+ if (! NIL_P(v_label)) {
224
+ string label = StringValueCStr(v_label);
225
+ Point o = { box.x, box.y };
226
+ draw_label(img, label, o, BLACK, color);
227
+ }
228
+
229
+ // Return normalized parameters
230
+ VALUE r = rb_hash_new();
231
+ rb_hash_aset(r, s_label, v_label );
232
+ rb_hash_aset(r, s_color, v_color );
233
+ rb_hash_aset(r, s_thickness, v_thickness);
234
+ rb_hash_aset(r, s_extra, v_extra );
235
+ return r;
236
+ }
158
237
 
159
238
 
160
239
 
161
240
  void
162
- yunet_process_features(vector<YuNet::Face>& faces,
163
- Mat& img, VALUE v_features, int *state)
241
+ yunet_process_features(cv::Mat& faces, Mat& img, VALUE v_features, int *state)
164
242
  {
165
- for (int i = 0; i < faces.size(); i++) {
166
- Rect box = faces[i].first;
167
- YuNet::Landmark lmark = faces[i].second;
243
+ for (int i = 0; i < faces.rows; i++) {
244
+ // Face
245
+ int x_f = static_cast<int>(faces.at<float>(i, 0));
246
+ int y_f = static_cast<int>(faces.at<float>(i, 1));
247
+ int w_f = static_cast<int>(faces.at<float>(i, 2));
248
+ int h_f = static_cast<int>(faces.at<float>(i, 3));
249
+ // Right eye
250
+ int x_re = static_cast<int>(faces.at<float>(i, 4));
251
+ int y_re = static_cast<int>(faces.at<float>(i, 5));
252
+ // Left eye
253
+ int x_le = static_cast<int>(faces.at<float>(i, 6));
254
+ int y_le = static_cast<int>(faces.at<float>(i, 7));
255
+ // Nose tip
256
+ int x_nt = static_cast<int>(faces.at<float>(i, 8));
257
+ int y_nt = static_cast<int>(faces.at<float>(i, 9));
258
+ // Right corner mouth
259
+ int x_rcm = static_cast<int>(faces.at<float>(i, 10));
260
+ int y_rcm = static_cast<int>(faces.at<float>(i, 11));
261
+ // Left corner mouth
262
+ int x_lcm = static_cast<int>(faces.at<float>(i, 12));
263
+ int y_lcm = static_cast<int>(faces.at<float>(i, 13));
264
+ // Confidence
265
+ float confidence = faces.at<float>(i, 14);
168
266
 
169
- VALUE v_type = ID2SYM(id_face);
267
+ VALUE v_type = ID2SYM(id_face);
170
268
  VALUE v_box = rb_ary_new_from_args(4,
171
- INT2NUM(box.x ), INT2NUM(box.y ),
172
- INT2NUM(box.width), INT2NUM(box.height));
269
+ INT2NUM(x_f), INT2NUM(y_f),
270
+ INT2NUM(w_f), INT2NUM(h_f));
173
271
  VALUE v_landmark = rb_ary_new_from_args(5,
174
- rb_ary_new_from_args(2, INT2NUM(lmark[0].x),
175
- INT2NUM(lmark[0].y)),
176
- rb_ary_new_from_args(2, INT2NUM(lmark[1].x),
177
- INT2NUM(lmark[1].y)),
178
- rb_ary_new_from_args(2, INT2NUM(lmark[2].x),
179
- INT2NUM(lmark[2].y)),
180
- rb_ary_new_from_args(2, INT2NUM(lmark[3].x),
181
- INT2NUM(lmark[3].y)),
182
- rb_ary_new_from_args(2, INT2NUM(lmark[4].x),
183
- INT2NUM(lmark[4].y)));
184
- VALUE v_feature = rb_ary_new_from_args(3, v_type, v_box,
185
- v_landmark);
272
+ rb_ary_new_from_args(2, INT2NUM(x_re),
273
+ INT2NUM(y_re)),
274
+ rb_ary_new_from_args(2, INT2NUM(x_le),
275
+ INT2NUM(y_le)),
276
+ rb_ary_new_from_args(2, INT2NUM(x_nt),
277
+ INT2NUM(y_nt)),
278
+ rb_ary_new_from_args(2, INT2NUM(x_rcm),
279
+ INT2NUM(y_rcm)),
280
+ rb_ary_new_from_args(2, INT2NUM(x_lcm),
281
+ INT2NUM(y_lcm)));
282
+ VALUE v_confidence = DBL2NUM(confidence);
283
+ VALUE v_feature = rb_ary_new_from_args(4, v_type, v_box, v_landmark,
284
+ v_confidence);
285
+
186
286
  rb_ary_push(v_features, v_feature);
287
+
187
288
 
188
289
  if (!img.empty() && rb_block_given_p()) {
189
- VALUE v_annotation= rb_yield_splat(v_feature);
190
- VALUE v_label = Qnil;
191
- VALUE v_color = ULONG2NUM(0x0000ff);
192
- VALUE v_thickness = INT2NUM(1);
193
-
194
- switch (TYPE(v_annotation)) {
195
- case T_NIL:
196
- break;
197
- case T_HASH:
198
- break;
199
- case T_ARRAY:
200
- switch(RARRAY_LENINT(v_annotation)) {
201
- default:
202
- case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
203
- case 2: v_color = RARRAY_AREF(v_annotation, 1);
204
- case 1: v_label = RARRAY_AREF(v_annotation, 0);
205
- case 0: break;
206
- }
207
- break;
208
- case T_STRING:
209
- v_label = v_annotation;
210
- break;
211
- }
212
-
213
- if (! NIL_P(v_label)) {
214
- string label = StringValueCStr(v_label);
215
- long rgb = NUM2ULONG(v_color);
216
- int thickness = NUM2INT(v_thickness);
217
- Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
218
- (rgb >> 8) & 0xFF,
219
- (rgb >> 16) & 0xFF);
220
- draw_labelbox(img, label, box, color, BLACK, thickness);
221
-
222
- for (const auto& p : lmark) {
223
- cv::circle(img, p, 3, cv::Scalar(255, 0, 0), 2);
224
- }
290
+ cv::Rect box = cv::Rect(x_f, y_f, w_f, h_f);
291
+ VALUE v_annotation = rb_yield_splat(v_feature);
292
+ VALUE cfg = circe_annotate(img, box, v_annotation, state);
293
+ VALUE s_extra = rb_id2sym(id_extra);
294
+
295
+ if (!NIL_P(cfg) && RTEST(rb_hash_aref(cfg, s_extra))) {
296
+ cv::Scalar color = cv::Scalar(255, 0, 0);
297
+ cv::circle(img, cv::Point(x_le, y_le ), 3, color, 2);
298
+ cv::circle(img, cv::Point(x_re, y_re ), 3, color, 2);
299
+ cv::circle(img, cv::Point(x_nt, y_nt ), 3, color, 2);
300
+ cv::circle(img, cv::Point(x_rcm, y_rcm), 3, color, 2);
301
+ cv::circle(img, cv::Point(x_lcm, y_lcm), 3, color, 2);
225
302
  }
226
303
  }
227
304
  }
@@ -239,50 +316,18 @@ yolo_process_features(vector<Yolo::Item>& items,
239
316
  Rect box = std::get<2>(items[i]);
240
317
 
241
318
  VALUE v_type = ID2SYM(id_class);
242
- VALUE v_name = rb_str_new(name.c_str(), name.size());
243
- VALUE v_confidence = DBL2NUM(confidence);
244
319
  VALUE v_box = rb_ary_new_from_args(4,
245
320
  INT2NUM(box.x ), INT2NUM(box.y ),
246
321
  INT2NUM(box.width), INT2NUM(box.height));
322
+ VALUE v_name = rb_str_new(name.c_str(), name.size());
323
+ VALUE v_confidence = DBL2NUM(confidence);
247
324
  VALUE v_feature = rb_ary_new_from_args(4, v_type, v_box,
248
325
  v_name, v_confidence);
249
326
  rb_ary_push(v_features, v_feature);
250
327
 
251
- if (rb_block_given_p()) {
328
+ if (!img.empty() && rb_block_given_p()) {
252
329
  VALUE v_annotation = rb_yield_splat(v_feature);
253
-
254
- VALUE v_label = Qnil;
255
- VALUE v_color = ULONG2NUM(0x0000ff);
256
- VALUE v_thickness = INT2NUM(1);
257
-
258
- switch (TYPE(v_annotation)) {
259
- case T_NIL:
260
- break;
261
- case T_HASH:
262
- break;
263
- case T_ARRAY:
264
- switch(RARRAY_LENINT(v_annotation)) {
265
- default:
266
- case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
267
- case 2: v_color = RARRAY_AREF(v_annotation, 1);
268
- case 1: v_label = RARRAY_AREF(v_annotation, 0);
269
- case 0: break;
270
- }
271
- break;
272
- case T_STRING:
273
- v_label = v_annotation;
274
- break;
275
- }
276
-
277
- if (! NIL_P(v_label)) {
278
- string label = StringValueCStr(v_label);
279
- long rgb = NUM2ULONG(v_color);
280
- int thickness = NUM2INT(v_thickness);
281
- Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
282
- (rgb >> 8) & 0xFF,
283
- (rgb >> 16) & 0xFF);
284
- draw_labelbox(img, label, box, color, BLACK, thickness);
285
- }
330
+ circe_annotate(img, box, v_annotation, state);
286
331
  }
287
332
  }
288
333
  }
@@ -337,9 +382,10 @@ circe_m_analyze(int argc, VALUE* argv, VALUE self) {
337
382
  }
338
383
 
339
384
  if (RTEST(v_face)) {
340
- vector<YuNet::Face> faces;
385
+ cv::Mat faces;
341
386
  yunet->process(i_img, faces);
342
387
  yunet_process_features(faces, o_img, v_features, &state);
388
+ faces.release();
343
389
  if (state) goto exception;
344
390
  }
345
391
 
@@ -363,7 +409,12 @@ circe_m_analyze(int argc, VALUE* argv, VALUE self) {
363
409
  std::vector<uchar> buf;
364
410
  cv::imencode(format, o_img, buf);
365
411
  v_image = rb_str_new(reinterpret_cast<char*>(buf.data()), buf.size());
412
+ buf.clear();
366
413
  }
414
+
415
+ i_img.release();
416
+ o_img.release();
417
+
367
418
  return rb_ary_new_from_args(2, v_features, v_image);
368
419
 
369
420
  exception:
@@ -382,22 +433,35 @@ void Init_core(void) {
382
433
  eCirceError = rb_define_class_under(cCirce, "Error", rb_eStandardError);
383
434
  // myclass = rb_const_get(mymodule, sym_myclass);
384
435
 
385
- VALUE v_onnx_yolo = rb_const_get(cCirce, rb_intern("ONNX_YOLO"));
386
- VALUE v_onnx_yunet = rb_const_get(cCirce, rb_intern("ONNX_YUNET"));
436
+ VALUE v_onnx_yolo = rb_const_get(cCirce, rb_intern("ONNX_YOLO"));
437
+ VALUE v_yolo_path = RARRAY_AREF(v_onnx_yolo, 0);
438
+ VALUE v_yolo_height = RARRAY_AREF(v_onnx_yolo, 1);
439
+ VALUE v_yolo_width = RARRAY_AREF(v_onnx_yolo, 2);
440
+
441
+ VALUE v_onnx_yunet = rb_const_get(cCirce, rb_intern("ONNX_YUNET"));
442
+ VALUE v_yunet_path = RARRAY_AREF(v_onnx_yunet, 0);
387
443
 
388
- static Yolo _yolo = { StringValueCStr(v_onnx_yolo ) };
389
- static YuNet _yunet = { StringValueCStr(v_onnx_yunet) };
444
+
445
+
446
+ static Yolo _yolo = { StringValueCStr(v_yolo_path ),
447
+ { NUM2INT(v_yolo_width),
448
+ NUM2INT(v_yolo_height) }};
449
+ static YuNet _yunet = { StringValueCStr(v_yunet_path) };
390
450
 
391
451
  yolo = &_yolo;
392
452
  yunet = &_yunet;
393
453
 
394
454
 
395
- id_debug = rb_intern_const("debug" );
396
- id_face = rb_intern_const("face" );
397
- id_classify = rb_intern_const("classify");
398
- id_class = rb_intern_const("class" );
399
- id_png = rb_intern_const("png" );
400
- id_jpg = rb_intern_const("jpg" );
455
+ id_debug = rb_intern_const("debug" );
456
+ id_face = rb_intern_const("face" );
457
+ id_classify = rb_intern_const("classify" );
458
+ id_class = rb_intern_const("class" );
459
+ id_png = rb_intern_const("png" );
460
+ id_jpg = rb_intern_const("jpg" );
461
+ id_label = rb_intern_const("label" );
462
+ id_thickness = rb_intern_const("thickness");
463
+ id_extra = rb_intern_const("extra" );
464
+ id_color = rb_intern_const("color" );
401
465
 
402
466
 
403
467
  rb_define_method(cCirce, "analyze", circe_m_analyze, -1);
data/ext/extconf.rb CHANGED
@@ -10,6 +10,6 @@ cflags, ldflags, libs = pkg_config('opencv4')
10
10
 
11
11
  $LDFLAGS += " #{ldflags} #{libs}"
12
12
  $INCFLAGS += " #{cflags}"
13
- $CXXFLAGS += "-std=c++17"
13
+ $CXXFLAGS += " -std=c++17"
14
14
 
15
15
  create_makefile("circe/core")
data/ext/yolo.cpp CHANGED
@@ -1,3 +1,14 @@
1
+ /*
2
+ * https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-CPP-Inference/inference.cpp
3
+ * https://github.com/opencv/opencv/blob/4.x/samples/dnn/yolo_detector.cpp
4
+ *
5
+ * yolov5 has an output of shape:
6
+ * (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
7
+ * yolov8 has an output of shape:
8
+ * (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
9
+ *
10
+ * yolo export model=yolov8s.pt imgsz=480,640 format=onnx opset=12
11
+ */
1
12
  #include <tuple>
2
13
  #include <string>
3
14
  #include <opencv2/imgproc.hpp>
@@ -6,89 +17,144 @@
6
17
  #include "yolo.h"
7
18
 
8
19
 
9
- Yolo::Yolo(const std::string& model) {
10
- net = cv::dnn::readNetFromONNX(model);
20
+ Yolo::Yolo(const std::string& model, cv::Size size) {
21
+ this->net = cv::dnn::readNetFromONNX(model);
22
+ this->size = size;
11
23
  }
12
24
 
13
25
 
14
26
  void Yolo::process(cv::Mat &img, std::vector<Yolo::Item> &items) {
27
+ int version = 5;
15
28
 
29
+ cv::Mat input = img;
30
+ if (letterBoxForSquare && size.width == size.height)
31
+ input = formatToSquare(input);
32
+
16
33
  // Pre-process
17
34
  cv::Mat blob;
18
- std::vector<cv::Mat> outputs;
19
-
20
- cv::dnn::blobFromImage(img, blob, 1./255.,
21
- cv::Size(INPUT_WIDTH, INPUT_HEIGHT),
22
- cv::Scalar(), true, false);
35
+ cv::dnn::blobFromImage(input, blob, 1./255.,
36
+ this->size, cv::Scalar(), true, false);
23
37
 
24
38
  // Process
39
+ std::vector<cv::Mat> outputs;
25
40
  net.setInput(blob);
26
41
  net.forward(outputs, net.getUnconnectedOutLayersNames());
27
42
 
28
- // Post-process
29
- std::vector<int> class_ids;
30
- std::vector<float> confidences;
31
- std::vector<cv::Rect> boxes;
43
+ // Output
44
+ int rows = outputs[0].size[1];
45
+ int dimensions = outputs[0].size[2];
46
+
47
+ // Infer yolo version
48
+ // -> Check if the shape[2] is more than shape[1] (yolov8)
49
+ if (dimensions > rows) {
50
+ version = 8;
51
+ }
52
+
53
+ // Adjust according to version
54
+ if (version == 8) {
55
+ // cv::transposeND(outs[0], {0, 2, 1}, outs[0]);
56
+ rows = outputs[0].size[2];
57
+ dimensions = outputs[0].size[1];
58
+ outputs[0] = outputs[0].reshape(1, dimensions);
59
+ cv::transpose(outputs[0], outputs[0]);
60
+ }
61
+
62
+ // Output
63
+ float *data = (float *)outputs[0].data;
32
64
 
33
65
  // Resizing factor.
34
- float x_factor = img.cols / INPUT_WIDTH;
35
- float y_factor = img.rows / INPUT_HEIGHT;
66
+ float x_factor = input.cols / size.width;
67
+ float y_factor = input.rows / size.height;
36
68
 
37
- float *data = (float *)outputs[0].data;
38
- const int dimensions = 85;
69
+ // Post-process
70
+ std::vector<int> class_ids;
71
+ std::vector<float> confidences;
72
+ std::vector<cv::Rect> boxes;
39
73
 
40
- // 25200 for default size 640.
41
- const int rows = 25200;
42
- // Iterate through 25200 detections.
43
- for (int i = 0; i < rows; ++i) {
44
- float confidence = data[4];
45
-
46
- // Discard bad detections and continue.
47
- if (confidence >= CONFIDENCE_THRESHOLD) {
48
- float *classes_scores = data + 5;
49
-
50
- // Create a 1x85 Mat and store class scores of 80 classes.
51
- cv::Mat scores(1, names.size(), CV_32FC1, classes_scores);
52
-
53
- // Perform minMaxLoc and acquire the index of best class score.
54
- cv::Point class_id;
55
- double max_class_score;
56
- minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
57
-
58
- // Continue if the class score is above the threshold.
59
- if (max_class_score > SCORE_THRESHOLD) {
60
- // Store class ID and confidence in the pre-defined
61
- // respective vectors.
62
- float cx = data[0]; // Center
63
- float cy = data[1];
64
- float w = data[2]; // Box dimension
65
- float h = data[3];
66
-
67
- // Bounding box coordinates.
68
- int left = int((cx - 0.5 * w) * x_factor);
69
- int top = int((cy - 0.5 * h) * y_factor);
70
- int width = int(w * x_factor);
71
- int height = int(h * y_factor);
72
-
73
- // Store good detections in the boxes vector.
74
- confidences.push_back(confidence);
74
+
75
+ if (version == 5) {
76
+ for (int i = 0; i < rows; ++i) {
77
+ float confidence = data[4];
78
+
79
+ if (confidence >= CONFIDENCE_THRESHOLD) {
80
+ float *classes_scores = data + 5;
81
+
82
+ cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
83
+ cv::Point class_id;
84
+ double max_class_score;
85
+
86
+ minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
87
+
88
+ if (max_class_score > SCORE_THRESHOLD) {
89
+ confidences.push_back(confidence);
90
+ class_ids.push_back(class_id.x);
91
+
92
+ float x = data[0];
93
+ float y = data[1];
94
+ float w = data[2];
95
+ float h = data[3];
96
+
97
+ int left = int((x - 0.5 * w) * x_factor);
98
+ int top = int((y - 0.5 * h) * y_factor);
99
+ int width = int(w * x_factor);
100
+ int height = int(h * y_factor);
101
+
102
+ boxes.push_back(cv::Rect(left, top, width, height));
103
+ }
104
+ }
105
+ data += dimensions;
106
+ }
107
+ } else if (version == 8) {
108
+ for (int i = 0; i < rows; ++i) {
109
+ float *classes_scores = data + 4;
110
+
111
+ cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
112
+ cv::Point class_id;
113
+ double maxClassScore;
114
+
115
+ minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
116
+
117
+ if (maxClassScore > SCORE_THRESHOLD) {
118
+ confidences.push_back(maxClassScore);
75
119
  class_ids.push_back(class_id.x);
120
+
121
+ float x = data[0];
122
+ float y = data[1];
123
+ float w = data[2];
124
+ float h = data[3];
125
+
126
+ int left = int((x - 0.5 * w) * x_factor);
127
+ int top = int((y - 0.5 * h) * y_factor);
128
+ int width = int(w * x_factor);
129
+ int height = int(h * y_factor);
130
+
76
131
  boxes.push_back(cv::Rect(left, top, width, height));
77
132
  }
78
- }
79
- // Jump to the next row.
80
- data += 85;
133
+ data += dimensions;
134
+ }
81
135
  }
136
+
82
137
 
83
138
  // Perform Non-Maximum Suppression and draw predictions.
84
- std::vector<int> indices;
139
+ std::vector<int> nms_result;
85
140
  cv::dnn::NMSBoxes(boxes, confidences,
86
- SCORE_THRESHOLD, NMS_THRESHOLD, indices);
141
+ SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
87
142
 
88
143
  items.clear();
89
144
 
90
- for (int i = 0; i < indices.size(); i++) {
91
- int idx = indices[i];
92
- items.push_back({ names[class_ids[idx]],confidences[idx],boxes[idx] });
145
+ for (int i = 0; i < nms_result.size(); i++) {
146
+ int idx = nms_result[i];
147
+ items.push_back({ classes[class_ids[idx]],confidences[idx],boxes[idx] });
93
148
  }
94
149
  }
150
+
151
+
152
+ cv::Mat Yolo::formatToSquare(const cv::Mat &source)
153
+ {
154
+ int col = source.cols;
155
+ int row = source.rows;
156
+ int _max = MAX(col, row);
157
+ cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
158
+ source.copyTo(result(cv::Rect(0, 0, col, row)));
159
+ return result;
160
+ }
data/ext/yolo.h CHANGED
@@ -16,12 +16,14 @@ public:
16
16
  private:
17
17
  static constexpr float INPUT_WIDTH = 640.0;
18
18
  static constexpr float INPUT_HEIGHT = 640.0;
19
- static constexpr float SCORE_THRESHOLD = 0.5;
20
- static constexpr float NMS_THRESHOLD = 0.45;
21
- static constexpr float CONFIDENCE_THRESHOLD = 0.45;
19
+ static constexpr float CONFIDENCE_THRESHOLD = 0.25;
20
+ static constexpr float SCORE_THRESHOLD = 0.50;
21
+ static constexpr float NMS_THRESHOLD = 0.50;
22
22
 
23
+ bool letterBoxForSquare = true;
24
+
23
25
  public:
24
- const std::vector<std::string> names = {
26
+ const std::vector<std::string> classes = {
25
27
  "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train",
26
28
  "truck", "boat", "traffic light", "fire hydrant", "stop sign",
27
29
  "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep",
@@ -38,11 +40,13 @@ private:
38
40
  };
39
41
 
40
42
  public:
41
- Yolo(const std::string& model);
43
+ Yolo(const std::string& model, cv::Size size);
42
44
  void process(cv::Mat &img, std::vector<Item> &items);
43
45
 
44
46
  private:
45
47
  cv::dnn::Net net;
48
+ cv::Size size;
49
+ cv::Mat formatToSquare(const cv::Mat &source);
46
50
  };
47
51
 
48
52
  #endif
data/ext/yunet.h CHANGED
@@ -2,37 +2,37 @@
2
2
  #define __YUNET__
3
3
 
4
4
  #include <string>
5
- #include <vector>
6
- #include <array>
7
- #include <utility>
8
-
9
- #include <opencv2/dnn.hpp>
10
-
5
+ #include <opencv2/objdetect/face.hpp>
6
+ #include <opencv2/core.hpp>
11
7
 
12
8
  class YuNet
13
9
  {
14
10
 
15
11
  public:
16
- typedef std::array<cv::Point, 5> Landmark;
17
- typedef std::pair<cv::Rect, Landmark> Face;
18
-
19
- private:
20
- static constexpr int MODEL_WIDTH = 512;
21
- static constexpr float CONF_THRESHOLD = 0.4f;
22
- static constexpr float NMS_THRESHOLD = 0.3f;
23
-
24
- const std::vector<float> VARIANCES = { 0.1f, 0.2f };
25
- const std::vector<int> STEPS = { 8, 16, 32, 64 };
26
- const std::vector<std::vector<int>> MIN_SIZES = {
27
- { 10, 16, 24 }, { 32, 48 }, { 64, 96 }, { 128, 192, 256 } };
12
+ YuNet(const std::string& model_path,
13
+ const cv::Size& input_size = cv::Size(320, 320),
14
+ float conf_threshold = 0.6f,
15
+ float nms_threshold = 0.3f,
16
+ int top_k = 5000,
17
+ int backend_id = cv::dnn::DNN_BACKEND_OPENCV,
18
+ int target_id = cv::dnn::DNN_TARGET_CPU)
19
+ {
20
+ model = cv::FaceDetectorYN::create(model_path, "", input_size,
21
+ conf_threshold,
22
+ nms_threshold, top_k,
23
+ backend_id, target_id);
24
+ }
28
25
 
29
- public:
30
- YuNet(const std::string& model);
31
26
  ~YuNet() {};
32
- void process(const cv::Mat& img, std::vector<Face>& faces);
27
+
28
+ void process(const cv::Mat& img, cv::Mat& faces) {
29
+ model->setInputSize(img.size());
30
+ model->detect(img, faces);
31
+ }
33
32
 
34
33
  private:
35
- cv::dnn::Net net;
34
+
35
+ cv::Ptr<cv::FaceDetectorYN> model;
36
36
  };
37
37
 
38
38
  #endif
data/lib/circe/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class Circe
2
- VERSION = '0.1.0'
2
+ VERSION = '0.2.1'
3
3
  end
data/lib/circe.rb CHANGED
@@ -3,9 +3,9 @@ class Circe
3
3
  private
4
4
 
5
5
  # Don't know how to do it inside the c extension
6
- DATA_DIR = File.join(__dir__, '..', 'data').freeze
7
- ONNX_YOLO = File.join(DATA_DIR, 'yolov5s.onnx')
8
- ONNX_YUNET = File.join(DATA_DIR, 'face_detection_yunet_2022mar.onnx')
6
+ DATA_DIR = File.join(__dir__, '..', 'data').freeze
7
+ ONNX_YOLO = [ File.join(DATA_DIR, 'yolov8s.onnx'), 480, 640 ]
8
+ ONNX_YUNET = [ File.join(DATA_DIR, 'face_detection_yunet_2023mar.onnx') ]
9
9
 
10
10
  end
11
11
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: circe
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stéphane D'Alu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-03 00:00:00.000000000 Z
11
+ date: 2024-07-05 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |2+
14
14
 
@@ -22,14 +22,16 @@ extensions:
22
22
  extra_rdoc_files: []
23
23
  files:
24
24
  - circe.gemspec
25
- - data/face_detection_yunet_2022mar.onnx
26
- - data/yolov5s.onnx
25
+ - data/face_detection_yunet_2023mar.onnx
26
+ - data/yolov5su-sim.onnx
27
+ - data/yolov5su.onnx
28
+ - data/yolov8s-sim.onnx
29
+ - data/yolov8s.onnx
27
30
  - ext/camera_model.h
28
31
  - ext/circe.cpp
29
32
  - ext/extconf.rb
30
33
  - ext/yolo.cpp
31
34
  - ext/yolo.h
32
- - ext/yunet.cpp
33
35
  - ext/yunet.h
34
36
  - lib/circe.rb
35
37
  - lib/circe/version.rb
@@ -52,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
52
54
  - !ruby/object:Gem::Version
53
55
  version: '0'
54
56
  requirements: []
55
- rubygems_version: 3.4.2
57
+ rubygems_version: 3.5.9
56
58
  signing_key:
57
59
  specification_version: 4
58
60
  summary: Face and object recognition
Binary file
data/ext/yunet.cpp DELETED
@@ -1,132 +0,0 @@
1
- #include <cmath>
2
- #include <string>
3
- #include <vector>
4
- #include <numeric>
5
- #include <algorithm>
6
-
7
- #include <opencv2/dnn.hpp>
8
-
9
- #include "yunet.h"
10
-
11
-
12
- YuNet::YuNet(const std::string& model_filename)
13
- {
14
- net = cv::dnn::readNetFromONNX(model_filename);
15
- net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
16
- net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
17
- }
18
-
19
-
20
- void YuNet::process(const cv::Mat& img, std::vector<YuNet::Face>& faces)
21
- {
22
- /* -- Preparing for image size -- */
23
- cv::Size model_size;
24
- model_size.width = MODEL_WIDTH;
25
- model_size.height = MODEL_WIDTH * img.rows / img.cols;
26
- model_size.height = (model_size.height / 32) * 32;
27
-
28
- std::pair<int32_t, int32_t> feature_map_2th = {
29
- (model_size.height + 1) / 2 / 2,
30
- (model_size.width + 1) / 2 / 2
31
- };
32
-
33
- std::vector<std::pair<int32_t, int32_t>> feature_map_list;
34
- feature_map_list.push_back({ (feature_map_2th.first + 1) / 2 ,
35
- (feature_map_2th.second + 1) / 2 });
36
-
37
- for (int32_t i = 0; i < 3; i++) {
38
- const auto& previous = feature_map_list.back();
39
- feature_map_list.push_back({ (previous.first + 1) / 2 ,
40
- (previous.second + 1) / 2 });
41
- }
42
-
43
- std::vector<std::vector<float>> prior_list;
44
- for (int i = 0; i < static_cast<int32_t>(feature_map_list.size()); i++) {
45
- const auto& min_sizes = MIN_SIZES[i];
46
- const auto& feature_map = feature_map_list[i];
47
- for (int y = 0; y < feature_map.first; y++) {
48
- for (int x = 0; x < feature_map.second; x++) {
49
- for (const auto& min_size : min_sizes) {
50
- float s_kx = static_cast<float>(min_size) / model_size.width;
51
- float s_ky = static_cast<float>(min_size) / model_size.height;
52
- float cx = (x + 0.5f) * STEPS[i] / model_size.width;
53
- float cy = (y + 0.5f) * STEPS[i] / model_size.height;
54
- prior_list.push_back({ cx, cy, s_kx, s_ky });
55
- }
56
- }
57
- }
58
- }
59
-
60
-
61
- /* -- Pre-process -- */
62
- cv::Mat blob;
63
- cv::dnn::blobFromImage(img, blob, 1.0, model_size);
64
-
65
- /* -- Inference -- */
66
- std::vector<cv::Mat> outputs;
67
- net.setInput(blob);
68
- net.forward(outputs, { "conf", "iou", "loc" });
69
-
70
- /* -- Post Process -- */
71
- const cv::Mat& mat_conf = outputs[0];
72
- const cv::Mat& mat_iou = outputs[1];
73
- const cv::Mat& mat_loc = outputs[2];
74
- const cv::Size image_size = img.size();
75
-
76
- // Get score list
77
- std::vector<float> cls_score;
78
- for (int32_t row = 0; row < mat_conf.rows; row++) {
79
- float val = mat_conf.at<float>(cv::Point(1, row));
80
- cls_score.push_back(std::clamp(val, 0.0f, 1.0f));
81
- }
82
-
83
- std::vector<float> iou_score;
84
- for (int32_t row = 0; row < mat_iou.rows; row++) {
85
- float val = mat_conf.at<float>(cv::Point(0, row));
86
- iou_score.push_back(std::clamp(val, 0.0f, 1.0f));
87
- }
88
-
89
- std::vector<float> score;
90
- for (int32_t row = 0; row < mat_conf.rows; row++) {
91
- score.push_back(std::sqrt(cls_score[row] * iou_score[row]));
92
- }
93
-
94
- // All bbox
95
- std::vector<cv::Rect> bbox_all;
96
- for (int row = 0; row < mat_loc.rows; row++) {
97
- float cx = mat_loc.at<float>(cv::Point(0, row));
98
- float cy = mat_loc.at<float>(cv::Point(1, row));
99
- float w = mat_loc.at<float>(cv::Point(2, row));
100
- float h = mat_loc.at<float>(cv::Point(3, row));
101
-
102
- cx = prior_list[row][0] + cx * VARIANCES[0] * prior_list[row][2];
103
- cy = prior_list[row][1] + cy * VARIANCES[0] * prior_list[row][3];
104
- w = prior_list[row][2] * std::exp(w * VARIANCES[0]);
105
- h = prior_list[row][3] * std::exp(h * VARIANCES[1]);
106
-
107
- bbox_all.push_back({
108
- static_cast<int32_t>((cx - w / 2) * image_size.width),
109
- static_cast<int32_t>((cy - h / 2) * image_size.height),
110
- static_cast<int32_t>(w * image_size.width),
111
- static_cast<int32_t>(h * image_size.height) });
112
- }
113
-
114
- // Non-Maximum Suppression
115
- std::vector<int> indices;
116
- cv::dnn::NMSBoxes(bbox_all, score, CONF_THRESHOLD, NMS_THRESHOLD, indices);
117
-
118
- // Get valid bbox and landmark
119
- faces.clear();
120
- for (int idx : indices) {
121
- Landmark landmark; // (landmark is 5 points)
122
- for (int i = 0; i < static_cast<int>(landmark.size()); i++) {
123
- cv::Point& p = landmark[i];
124
- float x = mat_loc.at<float>(cv::Point(4 + i * 2, idx));
125
- float y = mat_loc.at<float>(cv::Point(4 + i * 2 + 1, idx));
126
- p.x = static_cast<int32_t>((prior_list[idx][0] + x * VARIANCES[0] * prior_list[idx][2]) * image_size.width);
127
- p.y = static_cast<int32_t>((prior_list[idx][1] + y * VARIANCES[0] * prior_list[idx][3]) * image_size.height);
128
- }
129
-
130
- faces.push_back({ bbox_all[idx], landmark });
131
- }
132
- }