circe 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 683391e9186e4a20233cb25ce28f37b7bf56c88586b1ac72f1312081e4e48c7d
4
- data.tar.gz: 680a92f2318df871cdd2f6f98732f85adab5098e067fe7c4fa986832e5f32caf
3
+ metadata.gz: 8d735b7de777b161f27ca0b0d50282eca7e18f1cd9e5aa6e298e089f6ccd2b89
4
+ data.tar.gz: 5ba4acf8e3c3ef5950c981de11a31f4bdcdb7a0fb1cc4c534b1e62f96a99fb3a
5
5
  SHA512:
6
- metadata.gz: 84422ff365869d1615c4cf17bbc035426e7a4cbfc5bd501104d5e1b2a5ea12535150de13e192b0a2849102fb0a49f7e174b2f9b804027614fc50f68858d139de
7
- data.tar.gz: d679d1b61f155a5170426d007e8d06e73b5e01c12fd4dc12ab70ceddb29fc1c80a35ab178b83986c32f0f05e1b508d9e6cf56ce8fd4840bacbf1d67f96dc6312
6
+ metadata.gz: 8ac69c1f5b7d3af53db342306629fc880ae8b6386ba0ee4ee02fcec391fcab29b9acc3aa972a01f2d99b31cf0267861d6553d2cdc54b839e2a36648801b83fd3
7
+ data.tar.gz: 76a3084d3b0fc496d39e9efc04733b17a64a31e5b4285ad450bf1dc681db132671febe821ae3ae717b68771a20901f28247ae62c89a2ec57a013ef4ca7bb6c4b
Binary file
Binary file
data/data/yolov8s.onnx ADDED
Binary file
data/ext/circe.cpp CHANGED
@@ -91,6 +91,7 @@ static VALUE eCirceError = Qundef;
91
91
  #include <chrono>
92
92
  #include <opencv2/highgui.hpp>
93
93
  #include <opencv2/imgproc.hpp>
94
+ #include <opencv2/core/mat.hpp>
94
95
 
95
96
  #include "yolo.h"
96
97
  #include "yunet.h"
@@ -119,6 +120,10 @@ static ID id_class;
119
120
  static ID id_png;
120
121
  static ID id_jpg;
121
122
 
123
+ static ID id_label;
124
+ static ID id_thickness;
125
+ static ID id_extra;
126
+ static ID id_color;
122
127
 
123
128
  static Yolo *yolo;
124
129
  static YuNet *yunet;
@@ -143,85 +148,157 @@ draw_label(cv::Mat& img, string label, Point& origin,
143
148
  }
144
149
 
145
150
  static void
146
- draw_labelbox(cv::Mat& img, string label, Rect& box,
147
- Scalar& framecolor = BLUE, Scalar& textcolor = BLACK,
148
- int thickness = 1) {
151
+ draw_box(cv::Mat& img, Rect& box,
152
+ Scalar& framecolor = BLUE, int thickness = 1) {
149
153
 
150
154
  Point a = { box.x, box.y };
151
155
  Point b = { box.x + box.width, box.y + box.height };
152
156
 
153
157
  cv::rectangle(img, a, b, framecolor, thickness);
154
- draw_label(img, label, a, textcolor, framecolor);
155
158
  }
156
159
 
160
+ static void
161
+ draw_labelbox(cv::Mat& img, string label, Rect& box,
162
+ Scalar& framecolor = BLUE, Scalar& textcolor = BLACK,
163
+ int thickness = 1) {
157
164
 
165
+ Point o = { box.x, box.y };
166
+
167
+ draw_box(img, box, framecolor, thickness);
168
+ draw_label(img, label, o, textcolor, framecolor);
169
+ }
170
+
171
+
172
+ VALUE
173
+ circe_annotate(Mat& img, Rect& box, VALUE v_annotation, int *state) {
174
+ if (img.empty() || NIL_P(v_annotation))
175
+ return Qnil;
176
+
177
+ VALUE v_label = Qnil;
178
+ VALUE v_color = ULONG2NUM(0x0000ff);
179
+ VALUE v_thickness = INT2NUM(1);
180
+ VALUE v_extra = Qtrue;
181
+
182
+ VALUE s_label = rb_id2sym(id_label);
183
+ VALUE s_color = rb_id2sym(id_color);
184
+ VALUE s_thickness = rb_id2sym(id_thickness);
185
+ VALUE s_extra = rb_id2sym(id_extra);
186
+
187
+ switch (TYPE(v_annotation)) {
188
+ case T_NIL:
189
+ break;
190
+ case T_HASH:
191
+ v_thickness = rb_hash_lookup2(v_annotation, s_thickness, v_thickness);
192
+ v_color = rb_hash_lookup2(v_annotation, s_color, v_color );
193
+ v_label = rb_hash_lookup2(v_annotation, s_label, v_label );
194
+ v_extra = rb_hash_lookup2(v_annotation, s_extra, v_extra );
195
+ break;
196
+ case T_ARRAY:
197
+ switch(RARRAY_LENINT(v_annotation)) {
198
+ default:
199
+ case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
200
+ case 2: v_color = RARRAY_AREF(v_annotation, 1);
201
+ case 1: v_label = RARRAY_AREF(v_annotation, 0);
202
+ case 0: break;
203
+ }
204
+ break;
205
+ case T_STRING:
206
+ v_label = v_annotation;
207
+ break;
208
+ }
209
+
210
+ // No color, no rendering
211
+ if (NIL_P(v_color))
212
+ return Qnil;
213
+
214
+ long rgb = NUM2ULONG(v_color);
215
+ Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
216
+ (rgb >> 8) & 0xFF,
217
+ (rgb >> 16) & 0xFF);
218
+
219
+ if (! NIL_P(v_thickness)) {
220
+ int thickness = NUM2INT(v_thickness);
221
+ draw_box(img, box, color, thickness);
222
+ }
223
+ if (! NIL_P(v_label)) {
224
+ string label = StringValueCStr(v_label);
225
+ Point o = { box.x, box.y };
226
+ draw_label(img, label, o, BLACK, color);
227
+ }
228
+
229
+ // Return normalized parameters
230
+ VALUE r = rb_hash_new();
231
+ rb_hash_aset(r, s_label, v_label );
232
+ rb_hash_aset(r, s_color, v_color );
233
+ rb_hash_aset(r, s_thickness, v_thickness);
234
+ rb_hash_aset(r, s_extra, v_extra );
235
+ return r;
236
+ }
158
237
 
159
238
 
160
239
 
161
240
  void
162
- yunet_process_features(vector<YuNet::Face>& faces,
163
- Mat& img, VALUE v_features, int *state)
241
+ yunet_process_features(cv::Mat& faces, Mat& img, VALUE v_features, int *state)
164
242
  {
165
- for (int i = 0; i < faces.size(); i++) {
166
- Rect box = faces[i].first;
167
- YuNet::Landmark lmark = faces[i].second;
243
+ for (int i = 0; i < faces.rows; i++) {
244
+ // Face
245
+ int x_f = static_cast<int>(faces.at<float>(i, 0));
246
+ int y_f = static_cast<int>(faces.at<float>(i, 1));
247
+ int w_f = static_cast<int>(faces.at<float>(i, 2));
248
+ int h_f = static_cast<int>(faces.at<float>(i, 3));
249
+ // Right eye
250
+ int x_re = static_cast<int>(faces.at<float>(i, 4));
251
+ int y_re = static_cast<int>(faces.at<float>(i, 5));
252
+ // Left eye
253
+ int x_le = static_cast<int>(faces.at<float>(i, 6));
254
+ int y_le = static_cast<int>(faces.at<float>(i, 7));
255
+ // Nose tip
256
+ int x_nt = static_cast<int>(faces.at<float>(i, 8));
257
+ int y_nt = static_cast<int>(faces.at<float>(i, 9));
258
+ // Right corner mouth
259
+ int x_rcm = static_cast<int>(faces.at<float>(i, 10));
260
+ int y_rcm = static_cast<int>(faces.at<float>(i, 11));
261
+ // Left corner mouth
262
+ int x_lcm = static_cast<int>(faces.at<float>(i, 12));
263
+ int y_lcm = static_cast<int>(faces.at<float>(i, 13));
264
+ // Confidence
265
+ float confidence = faces.at<float>(i, 14);
168
266
 
169
- VALUE v_type = ID2SYM(id_face);
267
+ VALUE v_type = ID2SYM(id_face);
170
268
  VALUE v_box = rb_ary_new_from_args(4,
171
- INT2NUM(box.x ), INT2NUM(box.y ),
172
- INT2NUM(box.width), INT2NUM(box.height));
269
+ INT2NUM(x_f), INT2NUM(y_f),
270
+ INT2NUM(w_f), INT2NUM(h_f));
173
271
  VALUE v_landmark = rb_ary_new_from_args(5,
174
- rb_ary_new_from_args(2, INT2NUM(lmark[0].x),
175
- INT2NUM(lmark[0].y)),
176
- rb_ary_new_from_args(2, INT2NUM(lmark[1].x),
177
- INT2NUM(lmark[1].y)),
178
- rb_ary_new_from_args(2, INT2NUM(lmark[2].x),
179
- INT2NUM(lmark[2].y)),
180
- rb_ary_new_from_args(2, INT2NUM(lmark[3].x),
181
- INT2NUM(lmark[3].y)),
182
- rb_ary_new_from_args(2, INT2NUM(lmark[4].x),
183
- INT2NUM(lmark[4].y)));
184
- VALUE v_feature = rb_ary_new_from_args(3, v_type, v_box,
185
- v_landmark);
272
+ rb_ary_new_from_args(2, INT2NUM(x_re),
273
+ INT2NUM(y_re)),
274
+ rb_ary_new_from_args(2, INT2NUM(x_le),
275
+ INT2NUM(y_le)),
276
+ rb_ary_new_from_args(2, INT2NUM(x_nt),
277
+ INT2NUM(y_nt)),
278
+ rb_ary_new_from_args(2, INT2NUM(x_rcm),
279
+ INT2NUM(y_rcm)),
280
+ rb_ary_new_from_args(2, INT2NUM(x_lcm),
281
+ INT2NUM(y_lcm)));
282
+ VALUE v_confidence = DBL2NUM(confidence);
283
+ VALUE v_feature = rb_ary_new_from_args(4, v_type, v_box, v_landmark,
284
+ v_confidence);
285
+
186
286
  rb_ary_push(v_features, v_feature);
287
+
187
288
 
188
289
  if (!img.empty() && rb_block_given_p()) {
189
- VALUE v_annotation= rb_yield_splat(v_feature);
190
- VALUE v_label = Qnil;
191
- VALUE v_color = ULONG2NUM(0x0000ff);
192
- VALUE v_thickness = INT2NUM(1);
193
-
194
- switch (TYPE(v_annotation)) {
195
- case T_NIL:
196
- break;
197
- case T_HASH:
198
- break;
199
- case T_ARRAY:
200
- switch(RARRAY_LENINT(v_annotation)) {
201
- default:
202
- case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
203
- case 2: v_color = RARRAY_AREF(v_annotation, 1);
204
- case 1: v_label = RARRAY_AREF(v_annotation, 0);
205
- case 0: break;
206
- }
207
- break;
208
- case T_STRING:
209
- v_label = v_annotation;
210
- break;
211
- }
212
-
213
- if (! NIL_P(v_label)) {
214
- string label = StringValueCStr(v_label);
215
- long rgb = NUM2ULONG(v_color);
216
- int thickness = NUM2INT(v_thickness);
217
- Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
218
- (rgb >> 8) & 0xFF,
219
- (rgb >> 16) & 0xFF);
220
- draw_labelbox(img, label, box, color, BLACK, thickness);
221
-
222
- for (const auto& p : lmark) {
223
- cv::circle(img, p, 3, cv::Scalar(255, 0, 0), 2);
224
- }
290
+ cv::Rect box = cv::Rect(x_f, y_f, w_f, h_f);
291
+ VALUE v_annotation = rb_yield_splat(v_feature);
292
+ VALUE cfg = circe_annotate(img, box, v_annotation, state);
293
+ VALUE s_extra = rb_id2sym(id_extra);
294
+
295
+ if (!NIL_P(cfg) && RTEST(rb_hash_aref(cfg, s_extra))) {
296
+ cv::Scalar color = cv::Scalar(255, 0, 0);
297
+ cv::circle(img, cv::Point(x_le, y_le ), 3, color, 2);
298
+ cv::circle(img, cv::Point(x_re, y_re ), 3, color, 2);
299
+ cv::circle(img, cv::Point(x_nt, y_nt ), 3, color, 2);
300
+ cv::circle(img, cv::Point(x_rcm, y_rcm), 3, color, 2);
301
+ cv::circle(img, cv::Point(x_lcm, y_lcm), 3, color, 2);
225
302
  }
226
303
  }
227
304
  }
@@ -239,50 +316,18 @@ yolo_process_features(vector<Yolo::Item>& items,
239
316
  Rect box = std::get<2>(items[i]);
240
317
 
241
318
  VALUE v_type = ID2SYM(id_class);
242
- VALUE v_name = rb_str_new(name.c_str(), name.size());
243
- VALUE v_confidence = DBL2NUM(confidence);
244
319
  VALUE v_box = rb_ary_new_from_args(4,
245
320
  INT2NUM(box.x ), INT2NUM(box.y ),
246
321
  INT2NUM(box.width), INT2NUM(box.height));
322
+ VALUE v_name = rb_str_new(name.c_str(), name.size());
323
+ VALUE v_confidence = DBL2NUM(confidence);
247
324
  VALUE v_feature = rb_ary_new_from_args(4, v_type, v_box,
248
325
  v_name, v_confidence);
249
326
  rb_ary_push(v_features, v_feature);
250
327
 
251
- if (rb_block_given_p()) {
328
+ if (!img.empty() && rb_block_given_p()) {
252
329
  VALUE v_annotation = rb_yield_splat(v_feature);
253
-
254
- VALUE v_label = Qnil;
255
- VALUE v_color = ULONG2NUM(0x0000ff);
256
- VALUE v_thickness = INT2NUM(1);
257
-
258
- switch (TYPE(v_annotation)) {
259
- case T_NIL:
260
- break;
261
- case T_HASH:
262
- break;
263
- case T_ARRAY:
264
- switch(RARRAY_LENINT(v_annotation)) {
265
- default:
266
- case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
267
- case 2: v_color = RARRAY_AREF(v_annotation, 1);
268
- case 1: v_label = RARRAY_AREF(v_annotation, 0);
269
- case 0: break;
270
- }
271
- break;
272
- case T_STRING:
273
- v_label = v_annotation;
274
- break;
275
- }
276
-
277
- if (! NIL_P(v_label)) {
278
- string label = StringValueCStr(v_label);
279
- long rgb = NUM2ULONG(v_color);
280
- int thickness = NUM2INT(v_thickness);
281
- Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
282
- (rgb >> 8) & 0xFF,
283
- (rgb >> 16) & 0xFF);
284
- draw_labelbox(img, label, box, color, BLACK, thickness);
285
- }
330
+ circe_annotate(img, box, v_annotation, state);
286
331
  }
287
332
  }
288
333
  }
@@ -337,9 +382,10 @@ circe_m_analyze(int argc, VALUE* argv, VALUE self) {
337
382
  }
338
383
 
339
384
  if (RTEST(v_face)) {
340
- vector<YuNet::Face> faces;
385
+ cv::Mat faces;
341
386
  yunet->process(i_img, faces);
342
387
  yunet_process_features(faces, o_img, v_features, &state);
388
+ faces.release();
343
389
  if (state) goto exception;
344
390
  }
345
391
 
@@ -363,7 +409,12 @@ circe_m_analyze(int argc, VALUE* argv, VALUE self) {
363
409
  std::vector<uchar> buf;
364
410
  cv::imencode(format, o_img, buf);
365
411
  v_image = rb_str_new(reinterpret_cast<char*>(buf.data()), buf.size());
412
+ buf.clear();
366
413
  }
414
+
415
+ i_img.release();
416
+ o_img.release();
417
+
367
418
  return rb_ary_new_from_args(2, v_features, v_image);
368
419
 
369
420
  exception:
@@ -382,22 +433,35 @@ void Init_core(void) {
382
433
  eCirceError = rb_define_class_under(cCirce, "Error", rb_eStandardError);
383
434
  // myclass = rb_const_get(mymodule, sym_myclass);
384
435
 
385
- VALUE v_onnx_yolo = rb_const_get(cCirce, rb_intern("ONNX_YOLO"));
386
- VALUE v_onnx_yunet = rb_const_get(cCirce, rb_intern("ONNX_YUNET"));
436
+ VALUE v_onnx_yolo = rb_const_get(cCirce, rb_intern("ONNX_YOLO"));
437
+ VALUE v_yolo_path = RARRAY_AREF(v_onnx_yolo, 0);
438
+ VALUE v_yolo_height = RARRAY_AREF(v_onnx_yolo, 1);
439
+ VALUE v_yolo_width = RARRAY_AREF(v_onnx_yolo, 2);
440
+
441
+ VALUE v_onnx_yunet = rb_const_get(cCirce, rb_intern("ONNX_YUNET"));
442
+ VALUE v_yunet_path = RARRAY_AREF(v_onnx_yunet, 0);
387
443
 
388
- static Yolo _yolo = { StringValueCStr(v_onnx_yolo ) };
389
- static YuNet _yunet = { StringValueCStr(v_onnx_yunet) };
444
+
445
+
446
+ static Yolo _yolo = { StringValueCStr(v_yolo_path ),
447
+ { NUM2INT(v_yolo_width),
448
+ NUM2INT(v_yolo_height) }};
449
+ static YuNet _yunet = { StringValueCStr(v_yunet_path) };
390
450
 
391
451
  yolo = &_yolo;
392
452
  yunet = &_yunet;
393
453
 
394
454
 
395
- id_debug = rb_intern_const("debug" );
396
- id_face = rb_intern_const("face" );
397
- id_classify = rb_intern_const("classify");
398
- id_class = rb_intern_const("class" );
399
- id_png = rb_intern_const("png" );
400
- id_jpg = rb_intern_const("jpg" );
455
+ id_debug = rb_intern_const("debug" );
456
+ id_face = rb_intern_const("face" );
457
+ id_classify = rb_intern_const("classify" );
458
+ id_class = rb_intern_const("class" );
459
+ id_png = rb_intern_const("png" );
460
+ id_jpg = rb_intern_const("jpg" );
461
+ id_label = rb_intern_const("label" );
462
+ id_thickness = rb_intern_const("thickness");
463
+ id_extra = rb_intern_const("extra" );
464
+ id_color = rb_intern_const("color" );
401
465
 
402
466
 
403
467
  rb_define_method(cCirce, "analyze", circe_m_analyze, -1);
data/ext/extconf.rb CHANGED
@@ -10,6 +10,6 @@ cflags, ldflags, libs = pkg_config('opencv4')
10
10
 
11
11
  $LDFLAGS += " #{ldflags} #{libs}"
12
12
  $INCFLAGS += " #{cflags}"
13
- $CXXFLAGS += "-std=c++17"
13
+ $CXXFLAGS += " -std=c++17"
14
14
 
15
15
  create_makefile("circe/core")
data/ext/yolo.cpp CHANGED
@@ -1,3 +1,14 @@
1
+ /*
2
+ * https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-CPP-Inference/inference.cpp
3
+ * https://github.com/opencv/opencv/blob/4.x/samples/dnn/yolo_detector.cpp
4
+ *
5
+ * yolov5 has an output of shape:
6
+ * (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
7
+ * yolov8 has an output of shape:
8
+ * (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
9
+ *
10
+ * yolo export model=yolov8s.pt imgsz=480,640 format=onnx opset=12
11
+ */
1
12
  #include <tuple>
2
13
  #include <string>
3
14
  #include <opencv2/imgproc.hpp>
@@ -6,89 +17,144 @@
6
17
  #include "yolo.h"
7
18
 
8
19
 
9
- Yolo::Yolo(const std::string& model) {
10
- net = cv::dnn::readNetFromONNX(model);
20
+ Yolo::Yolo(const std::string& model, cv::Size size) {
21
+ this->net = cv::dnn::readNetFromONNX(model);
22
+ this->size = size;
11
23
  }
12
24
 
13
25
 
14
26
  void Yolo::process(cv::Mat &img, std::vector<Yolo::Item> &items) {
27
+ int version = 5;
15
28
 
29
+ cv::Mat input = img;
30
+ if (letterBoxForSquare && size.width == size.height)
31
+ input = formatToSquare(input);
32
+
16
33
  // Pre-process
17
34
  cv::Mat blob;
18
- std::vector<cv::Mat> outputs;
19
-
20
- cv::dnn::blobFromImage(img, blob, 1./255.,
21
- cv::Size(INPUT_WIDTH, INPUT_HEIGHT),
22
- cv::Scalar(), true, false);
35
+ cv::dnn::blobFromImage(input, blob, 1./255.,
36
+ this->size, cv::Scalar(), true, false);
23
37
 
24
38
  // Process
39
+ std::vector<cv::Mat> outputs;
25
40
  net.setInput(blob);
26
41
  net.forward(outputs, net.getUnconnectedOutLayersNames());
27
42
 
28
- // Post-process
29
- std::vector<int> class_ids;
30
- std::vector<float> confidences;
31
- std::vector<cv::Rect> boxes;
43
+ // Output
44
+ int rows = outputs[0].size[1];
45
+ int dimensions = outputs[0].size[2];
46
+
47
+ // Infer yolo version
48
+ // -> Check if the shape[2] is more than shape[1] (yolov8)
49
+ if (dimensions > rows) {
50
+ version = 8;
51
+ }
52
+
53
+ // Adjust according to version
54
+ if (version == 8) {
55
+ // cv::transposeND(outs[0], {0, 2, 1}, outs[0]);
56
+ rows = outputs[0].size[2];
57
+ dimensions = outputs[0].size[1];
58
+ outputs[0] = outputs[0].reshape(1, dimensions);
59
+ cv::transpose(outputs[0], outputs[0]);
60
+ }
61
+
62
+ // Output
63
+ float *data = (float *)outputs[0].data;
32
64
 
33
65
  // Resizing factor.
34
- float x_factor = img.cols / INPUT_WIDTH;
35
- float y_factor = img.rows / INPUT_HEIGHT;
66
+ float x_factor = input.cols / size.width;
67
+ float y_factor = input.rows / size.height;
36
68
 
37
- float *data = (float *)outputs[0].data;
38
- const int dimensions = 85;
69
+ // Post-process
70
+ std::vector<int> class_ids;
71
+ std::vector<float> confidences;
72
+ std::vector<cv::Rect> boxes;
39
73
 
40
- // 25200 for default size 640.
41
- const int rows = 25200;
42
- // Iterate through 25200 detections.
43
- for (int i = 0; i < rows; ++i) {
44
- float confidence = data[4];
45
-
46
- // Discard bad detections and continue.
47
- if (confidence >= CONFIDENCE_THRESHOLD) {
48
- float *classes_scores = data + 5;
49
-
50
- // Create a 1x85 Mat and store class scores of 80 classes.
51
- cv::Mat scores(1, names.size(), CV_32FC1, classes_scores);
52
-
53
- // Perform minMaxLoc and acquire the index of best class score.
54
- cv::Point class_id;
55
- double max_class_score;
56
- minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
57
-
58
- // Continue if the class score is above the threshold.
59
- if (max_class_score > SCORE_THRESHOLD) {
60
- // Store class ID and confidence in the pre-defined
61
- // respective vectors.
62
- float cx = data[0]; // Center
63
- float cy = data[1];
64
- float w = data[2]; // Box dimension
65
- float h = data[3];
66
-
67
- // Bounding box coordinates.
68
- int left = int((cx - 0.5 * w) * x_factor);
69
- int top = int((cy - 0.5 * h) * y_factor);
70
- int width = int(w * x_factor);
71
- int height = int(h * y_factor);
72
-
73
- // Store good detections in the boxes vector.
74
- confidences.push_back(confidence);
74
+
75
+ if (version == 5) {
76
+ for (int i = 0; i < rows; ++i) {
77
+ float confidence = data[4];
78
+
79
+ if (confidence >= CONFIDENCE_THRESHOLD) {
80
+ float *classes_scores = data + 5;
81
+
82
+ cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
83
+ cv::Point class_id;
84
+ double max_class_score;
85
+
86
+ minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
87
+
88
+ if (max_class_score > SCORE_THRESHOLD) {
89
+ confidences.push_back(confidence);
90
+ class_ids.push_back(class_id.x);
91
+
92
+ float x = data[0];
93
+ float y = data[1];
94
+ float w = data[2];
95
+ float h = data[3];
96
+
97
+ int left = int((x - 0.5 * w) * x_factor);
98
+ int top = int((y - 0.5 * h) * y_factor);
99
+ int width = int(w * x_factor);
100
+ int height = int(h * y_factor);
101
+
102
+ boxes.push_back(cv::Rect(left, top, width, height));
103
+ }
104
+ }
105
+ data += dimensions;
106
+ }
107
+ } else if (version == 8) {
108
+ for (int i = 0; i < rows; ++i) {
109
+ float *classes_scores = data + 4;
110
+
111
+ cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
112
+ cv::Point class_id;
113
+ double maxClassScore;
114
+
115
+ minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
116
+
117
+ if (maxClassScore > SCORE_THRESHOLD) {
118
+ confidences.push_back(maxClassScore);
75
119
  class_ids.push_back(class_id.x);
120
+
121
+ float x = data[0];
122
+ float y = data[1];
123
+ float w = data[2];
124
+ float h = data[3];
125
+
126
+ int left = int((x - 0.5 * w) * x_factor);
127
+ int top = int((y - 0.5 * h) * y_factor);
128
+ int width = int(w * x_factor);
129
+ int height = int(h * y_factor);
130
+
76
131
  boxes.push_back(cv::Rect(left, top, width, height));
77
132
  }
78
- }
79
- // Jump to the next row.
80
- data += 85;
133
+ data += dimensions;
134
+ }
81
135
  }
136
+
82
137
 
83
138
  // Perform Non-Maximum Suppression and draw predictions.
84
- std::vector<int> indices;
139
+ std::vector<int> nms_result;
85
140
  cv::dnn::NMSBoxes(boxes, confidences,
86
- SCORE_THRESHOLD, NMS_THRESHOLD, indices);
141
+ SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
87
142
 
88
143
  items.clear();
89
144
 
90
- for (int i = 0; i < indices.size(); i++) {
91
- int idx = indices[i];
92
- items.push_back({ names[class_ids[idx]],confidences[idx],boxes[idx] });
145
+ for (int i = 0; i < nms_result.size(); i++) {
146
+ int idx = nms_result[i];
147
+ items.push_back({ classes[class_ids[idx]],confidences[idx],boxes[idx] });
93
148
  }
94
149
  }
150
+
151
+
152
+ cv::Mat Yolo::formatToSquare(const cv::Mat &source)
153
+ {
154
+ int col = source.cols;
155
+ int row = source.rows;
156
+ int _max = MAX(col, row);
157
+ cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
158
+ source.copyTo(result(cv::Rect(0, 0, col, row)));
159
+ return result;
160
+ }
data/ext/yolo.h CHANGED
@@ -16,12 +16,14 @@ public:
16
16
  private:
17
17
  static constexpr float INPUT_WIDTH = 640.0;
18
18
  static constexpr float INPUT_HEIGHT = 640.0;
19
- static constexpr float SCORE_THRESHOLD = 0.5;
20
- static constexpr float NMS_THRESHOLD = 0.45;
21
- static constexpr float CONFIDENCE_THRESHOLD = 0.45;
19
+ static constexpr float CONFIDENCE_THRESHOLD = 0.25;
20
+ static constexpr float SCORE_THRESHOLD = 0.50;
21
+ static constexpr float NMS_THRESHOLD = 0.50;
22
22
 
23
+ bool letterBoxForSquare = true;
24
+
23
25
  public:
24
- const std::vector<std::string> names = {
26
+ const std::vector<std::string> classes = {
25
27
  "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train",
26
28
  "truck", "boat", "traffic light", "fire hydrant", "stop sign",
27
29
  "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep",
@@ -38,11 +40,13 @@ private:
38
40
  };
39
41
 
40
42
  public:
41
- Yolo(const std::string& model);
43
+ Yolo(const std::string& model, cv::Size size);
42
44
  void process(cv::Mat &img, std::vector<Item> &items);
43
45
 
44
46
  private:
45
47
  cv::dnn::Net net;
48
+ cv::Size size;
49
+ cv::Mat formatToSquare(const cv::Mat &source);
46
50
  };
47
51
 
48
52
  #endif
data/ext/yunet.h CHANGED
@@ -2,37 +2,37 @@
2
2
  #define __YUNET__
3
3
 
4
4
  #include <string>
5
- #include <vector>
6
- #include <array>
7
- #include <utility>
8
-
9
- #include <opencv2/dnn.hpp>
10
-
5
+ #include <opencv2/objdetect/face.hpp>
6
+ #include <opencv2/core.hpp>
11
7
 
12
8
  class YuNet
13
9
  {
14
10
 
15
11
  public:
16
- typedef std::array<cv::Point, 5> Landmark;
17
- typedef std::pair<cv::Rect, Landmark> Face;
18
-
19
- private:
20
- static constexpr int MODEL_WIDTH = 512;
21
- static constexpr float CONF_THRESHOLD = 0.4f;
22
- static constexpr float NMS_THRESHOLD = 0.3f;
23
-
24
- const std::vector<float> VARIANCES = { 0.1f, 0.2f };
25
- const std::vector<int> STEPS = { 8, 16, 32, 64 };
26
- const std::vector<std::vector<int>> MIN_SIZES = {
27
- { 10, 16, 24 }, { 32, 48 }, { 64, 96 }, { 128, 192, 256 } };
12
+ YuNet(const std::string& model_path,
13
+ const cv::Size& input_size = cv::Size(320, 320),
14
+ float conf_threshold = 0.6f,
15
+ float nms_threshold = 0.3f,
16
+ int top_k = 5000,
17
+ int backend_id = cv::dnn::DNN_BACKEND_OPENCV,
18
+ int target_id = cv::dnn::DNN_TARGET_CPU)
19
+ {
20
+ model = cv::FaceDetectorYN::create(model_path, "", input_size,
21
+ conf_threshold,
22
+ nms_threshold, top_k,
23
+ backend_id, target_id);
24
+ }
28
25
 
29
- public:
30
- YuNet(const std::string& model);
31
26
  ~YuNet() {};
32
- void process(const cv::Mat& img, std::vector<Face>& faces);
27
+
28
+ void process(const cv::Mat& img, cv::Mat& faces) {
29
+ model->setInputSize(img.size());
30
+ model->detect(img, faces);
31
+ }
33
32
 
34
33
  private:
35
- cv::dnn::Net net;
34
+
35
+ cv::Ptr<cv::FaceDetectorYN> model;
36
36
  };
37
37
 
38
38
  #endif
data/lib/circe/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class Circe
2
- VERSION = '0.1.0'
2
+ VERSION = '0.2.1'
3
3
  end
data/lib/circe.rb CHANGED
@@ -3,9 +3,9 @@ class Circe
3
3
  private
4
4
 
5
5
  # Don't know how to do it inside the c extension
6
- DATA_DIR = File.join(__dir__, '..', 'data').freeze
7
- ONNX_YOLO = File.join(DATA_DIR, 'yolov5s.onnx')
8
- ONNX_YUNET = File.join(DATA_DIR, 'face_detection_yunet_2022mar.onnx')
6
+ DATA_DIR = File.join(__dir__, '..', 'data').freeze
7
+ ONNX_YOLO = [ File.join(DATA_DIR, 'yolov8s.onnx'), 480, 640 ]
8
+ ONNX_YUNET = [ File.join(DATA_DIR, 'face_detection_yunet_2023mar.onnx') ]
9
9
 
10
10
  end
11
11
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: circe
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stéphane D'Alu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-03 00:00:00.000000000 Z
11
+ date: 2024-07-05 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |2+
14
14
 
@@ -22,14 +22,16 @@ extensions:
22
22
  extra_rdoc_files: []
23
23
  files:
24
24
  - circe.gemspec
25
- - data/face_detection_yunet_2022mar.onnx
26
- - data/yolov5s.onnx
25
+ - data/face_detection_yunet_2023mar.onnx
26
+ - data/yolov5su-sim.onnx
27
+ - data/yolov5su.onnx
28
+ - data/yolov8s-sim.onnx
29
+ - data/yolov8s.onnx
27
30
  - ext/camera_model.h
28
31
  - ext/circe.cpp
29
32
  - ext/extconf.rb
30
33
  - ext/yolo.cpp
31
34
  - ext/yolo.h
32
- - ext/yunet.cpp
33
35
  - ext/yunet.h
34
36
  - lib/circe.rb
35
37
  - lib/circe/version.rb
@@ -52,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
52
54
  - !ruby/object:Gem::Version
53
55
  version: '0'
54
56
  requirements: []
55
- rubygems_version: 3.4.2
57
+ rubygems_version: 3.5.9
56
58
  signing_key:
57
59
  specification_version: 4
58
60
  summary: Face and object recognition
Binary file
data/ext/yunet.cpp DELETED
@@ -1,132 +0,0 @@
1
- #include <cmath>
2
- #include <string>
3
- #include <vector>
4
- #include <numeric>
5
- #include <algorithm>
6
-
7
- #include <opencv2/dnn.hpp>
8
-
9
- #include "yunet.h"
10
-
11
-
12
- YuNet::YuNet(const std::string& model_filename)
13
- {
14
- net = cv::dnn::readNetFromONNX(model_filename);
15
- net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
16
- net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
17
- }
18
-
19
-
20
- void YuNet::process(const cv::Mat& img, std::vector<YuNet::Face>& faces)
21
- {
22
- /* -- Preparing for image size -- */
23
- cv::Size model_size;
24
- model_size.width = MODEL_WIDTH;
25
- model_size.height = MODEL_WIDTH * img.rows / img.cols;
26
- model_size.height = (model_size.height / 32) * 32;
27
-
28
- std::pair<int32_t, int32_t> feature_map_2th = {
29
- (model_size.height + 1) / 2 / 2,
30
- (model_size.width + 1) / 2 / 2
31
- };
32
-
33
- std::vector<std::pair<int32_t, int32_t>> feature_map_list;
34
- feature_map_list.push_back({ (feature_map_2th.first + 1) / 2 ,
35
- (feature_map_2th.second + 1) / 2 });
36
-
37
- for (int32_t i = 0; i < 3; i++) {
38
- const auto& previous = feature_map_list.back();
39
- feature_map_list.push_back({ (previous.first + 1) / 2 ,
40
- (previous.second + 1) / 2 });
41
- }
42
-
43
- std::vector<std::vector<float>> prior_list;
44
- for (int i = 0; i < static_cast<int32_t>(feature_map_list.size()); i++) {
45
- const auto& min_sizes = MIN_SIZES[i];
46
- const auto& feature_map = feature_map_list[i];
47
- for (int y = 0; y < feature_map.first; y++) {
48
- for (int x = 0; x < feature_map.second; x++) {
49
- for (const auto& min_size : min_sizes) {
50
- float s_kx = static_cast<float>(min_size) / model_size.width;
51
- float s_ky = static_cast<float>(min_size) / model_size.height;
52
- float cx = (x + 0.5f) * STEPS[i] / model_size.width;
53
- float cy = (y + 0.5f) * STEPS[i] / model_size.height;
54
- prior_list.push_back({ cx, cy, s_kx, s_ky });
55
- }
56
- }
57
- }
58
- }
59
-
60
-
61
- /* -- Pre-process -- */
62
- cv::Mat blob;
63
- cv::dnn::blobFromImage(img, blob, 1.0, model_size);
64
-
65
- /* -- Inference -- */
66
- std::vector<cv::Mat> outputs;
67
- net.setInput(blob);
68
- net.forward(outputs, { "conf", "iou", "loc" });
69
-
70
- /* -- Post Process -- */
71
- const cv::Mat& mat_conf = outputs[0];
72
- const cv::Mat& mat_iou = outputs[1];
73
- const cv::Mat& mat_loc = outputs[2];
74
- const cv::Size image_size = img.size();
75
-
76
- // Get score list
77
- std::vector<float> cls_score;
78
- for (int32_t row = 0; row < mat_conf.rows; row++) {
79
- float val = mat_conf.at<float>(cv::Point(1, row));
80
- cls_score.push_back(std::clamp(val, 0.0f, 1.0f));
81
- }
82
-
83
- std::vector<float> iou_score;
84
- for (int32_t row = 0; row < mat_iou.rows; row++) {
85
- float val = mat_conf.at<float>(cv::Point(0, row));
86
- iou_score.push_back(std::clamp(val, 0.0f, 1.0f));
87
- }
88
-
89
- std::vector<float> score;
90
- for (int32_t row = 0; row < mat_conf.rows; row++) {
91
- score.push_back(std::sqrt(cls_score[row] * iou_score[row]));
92
- }
93
-
94
- // All bbox
95
- std::vector<cv::Rect> bbox_all;
96
- for (int row = 0; row < mat_loc.rows; row++) {
97
- float cx = mat_loc.at<float>(cv::Point(0, row));
98
- float cy = mat_loc.at<float>(cv::Point(1, row));
99
- float w = mat_loc.at<float>(cv::Point(2, row));
100
- float h = mat_loc.at<float>(cv::Point(3, row));
101
-
102
- cx = prior_list[row][0] + cx * VARIANCES[0] * prior_list[row][2];
103
- cy = prior_list[row][1] + cy * VARIANCES[0] * prior_list[row][3];
104
- w = prior_list[row][2] * std::exp(w * VARIANCES[0]);
105
- h = prior_list[row][3] * std::exp(h * VARIANCES[1]);
106
-
107
- bbox_all.push_back({
108
- static_cast<int32_t>((cx - w / 2) * image_size.width),
109
- static_cast<int32_t>((cy - h / 2) * image_size.height),
110
- static_cast<int32_t>(w * image_size.width),
111
- static_cast<int32_t>(h * image_size.height) });
112
- }
113
-
114
- // Non-Maximum Suppression
115
- std::vector<int> indices;
116
- cv::dnn::NMSBoxes(bbox_all, score, CONF_THRESHOLD, NMS_THRESHOLD, indices);
117
-
118
- // Get valid bbox and landmark
119
- faces.clear();
120
- for (int idx : indices) {
121
- Landmark landmark; // (landmark is 5 points)
122
- for (int i = 0; i < static_cast<int>(landmark.size()); i++) {
123
- cv::Point& p = landmark[i];
124
- float x = mat_loc.at<float>(cv::Point(4 + i * 2, idx));
125
- float y = mat_loc.at<float>(cv::Point(4 + i * 2 + 1, idx));
126
- p.x = static_cast<int32_t>((prior_list[idx][0] + x * VARIANCES[0] * prior_list[idx][2]) * image_size.width);
127
- p.y = static_cast<int32_t>((prior_list[idx][1] + y * VARIANCES[0] * prior_list[idx][3]) * image_size.height);
128
- }
129
-
130
- faces.push_back({ bbox_all[idx], landmark });
131
- }
132
- }