RubyGems - circe - Versions diffs - 0.1.0 → 0.2.1 - Mend

circe 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +4 -4
data/data/face_detection_yunet_2023mar.onnx +0 -0
data/data/{yolov5s.onnx → yolov5su-sim.onnx} +0 -0
data/data/yolov5su.onnx +0 -0
data/data/yolov8s-sim.onnx +0 -0
data/data/yolov8s.onnx +0 -0
data/ext/circe.cpp +171 -107
data/ext/extconf.rb +1 -1
data/ext/yolo.cpp +124 -58
data/ext/yolo.h +9 -5
data/ext/yunet.h +22 -22
data/lib/circe/version.rb +1 -1
data/lib/circe.rb +3 -3
metadata +8 -6
data/data/face_detection_yunet_2022mar.onnx +0 -0
data/ext/yunet.cpp +0 -132

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 683391e9186e4a20233cb25ce28f37b7bf56c88586b1ac72f1312081e4e48c7d
-  data.tar.gz: 680a92f2318df871cdd2f6f98732f85adab5098e067fe7c4fa986832e5f32caf
+  metadata.gz: 8d735b7de777b161f27ca0b0d50282eca7e18f1cd9e5aa6e298e089f6ccd2b89
+  data.tar.gz: 5ba4acf8e3c3ef5950c981de11a31f4bdcdb7a0fb1cc4c534b1e62f96a99fb3a
 SHA512:
-  metadata.gz: 84422ff365869d1615c4cf17bbc035426e7a4cbfc5bd501104d5e1b2a5ea12535150de13e192b0a2849102fb0a49f7e174b2f9b804027614fc50f68858d139de
-  data.tar.gz: d679d1b61f155a5170426d007e8d06e73b5e01c12fd4dc12ab70ceddb29fc1c80a35ab178b83986c32f0f05e1b508d9e6cf56ce8fd4840bacbf1d67f96dc6312
+  metadata.gz: 8ac69c1f5b7d3af53db342306629fc880ae8b6386ba0ee4ee02fcec391fcab29b9acc3aa972a01f2d99b31cf0267861d6553d2cdc54b839e2a36648801b83fd3
+  data.tar.gz: 76a3084d3b0fc496d39e9efc04733b17a64a31e5b4285ad450bf1dc681db132671febe821ae3ae717b68771a20901f28247ae62c89a2ec57a013ef4ca7bb6c4b

data/data/face_detection_yunet_2023mar.onnx ADDED Viewed

Binary file

data/data/{yolov5s.onnx → yolov5su-sim.onnx} RENAMED Viewed

Binary file

data/data/yolov5su.onnx ADDED Viewed

Binary file

data/data/yolov8s-sim.onnx ADDED Viewed

Binary file

data/data/yolov8s.onnx ADDED Viewed

Binary file

data/ext/circe.cpp CHANGED Viewed

@@ -91,6 +91,7 @@ static VALUE eCirceError  = Qundef;
 #include <chrono>
 #include <opencv2/highgui.hpp>
 #include <opencv2/imgproc.hpp>
+#include <opencv2/core/mat.hpp>
 #include "yolo.h"
 #include "yunet.h"
@@ -119,6 +120,10 @@ static ID id_class;
 static ID id_png;
 static ID id_jpg;
+static ID id_label;
+static ID id_thickness;
+static ID id_extra;
+static ID id_color;
 static Yolo  *yolo;
 static YuNet *yunet;
@@ -143,85 +148,157 @@ draw_label(cv::Mat& img, string label, Point& origin,
 }
 static void
-draw_labelbox(cv::Mat& img, string label, Rect& box,
-	      Scalar& framecolor = BLUE, Scalar& textcolor = BLACK,
-	      int thickness = 1) {
+draw_box(cv::Mat& img, Rect& box,
+	 Scalar& framecolor = BLUE, int thickness = 1) {
     Point a = { box.x,             box.y              };
     Point b = { box.x + box.width, box.y + box.height };
     cv::rectangle(img, a, b, framecolor, thickness);
-    draw_label(img, label, a, textcolor, framecolor);
 }
+static void
+draw_labelbox(cv::Mat& img, string label, Rect& box,
+	      Scalar& framecolor = BLUE, Scalar& textcolor = BLACK,
+	      int thickness = 1) {
+    Point o = { box.x, box.y };
+    draw_box(img, box, framecolor, thickness);
+    draw_label(img, label, o, textcolor, framecolor);
+}
+VALUE
+circe_annotate(Mat& img, Rect& box, VALUE v_annotation, int *state) {
+    if (img.empty() || NIL_P(v_annotation))
+        return Qnil;
+    VALUE v_label      = Qnil;
+    VALUE v_color      = ULONG2NUM(0x0000ff);
+    VALUE v_thickness  = INT2NUM(1);
+    VALUE v_extra      = Qtrue;
+    VALUE s_label      = rb_id2sym(id_label);
+    VALUE s_color      = rb_id2sym(id_color);
+    VALUE s_thickness  = rb_id2sym(id_thickness);
+    VALUE s_extra      = rb_id2sym(id_extra);
+    switch (TYPE(v_annotation)) {
+    case T_NIL:
+        break;
+    case T_HASH:
+        v_thickness = rb_hash_lookup2(v_annotation, s_thickness, v_thickness);
+	v_color     = rb_hash_lookup2(v_annotation, s_color,     v_color    );
+	v_label     = rb_hash_lookup2(v_annotation, s_label,     v_label    );
+	v_extra     = rb_hash_lookup2(v_annotation, s_extra,     v_extra    );
+	break;
+    case T_ARRAY:
+        switch(RARRAY_LENINT(v_annotation)) {
+	default:
+	case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
+	case 2: v_color     = RARRAY_AREF(v_annotation, 1);
+	case 1: v_label     = RARRAY_AREF(v_annotation, 0);
+	case 0: break;
+	}
+	break;
+    case T_STRING:
+        v_label = v_annotation;
+	break;
+    }
+    // No color, no rendering
+    if (NIL_P(v_color))
+        return Qnil;
+    long   rgb   = NUM2ULONG(v_color);
+    Scalar color = cv::Scalar((rgb >>  0) & 0xFF,
+			      (rgb >>  8) & 0xFF,
+			      (rgb >> 16) & 0xFF);
+    if (! NIL_P(v_thickness)) {
+        int thickness = NUM2INT(v_thickness);
+	draw_box(img, box, color, thickness);
+    }
+    if (! NIL_P(v_label)) {
+        string label  = StringValueCStr(v_label);
+	Point  o      = { box.x, box.y };
+	draw_label(img, label, o, BLACK, color);
+    }
+    // Return normalized parameters
+    VALUE r = rb_hash_new();
+    rb_hash_aset(r, s_label,     v_label    );
+    rb_hash_aset(r, s_color,     v_color    );
+    rb_hash_aset(r, s_thickness, v_thickness);
+    rb_hash_aset(r, s_extra,     v_extra    );
+    return r;
+}
 void
-yunet_process_features(vector<YuNet::Face>& faces,
-		       Mat& img, VALUE v_features, int *state)
+yunet_process_features(cv::Mat& faces, Mat& img, VALUE v_features, int *state)
 {
-    for (int i = 0; i < faces.size(); i++) {
-	Rect box              = faces[i].first;
-	YuNet::Landmark lmark = faces[i].second;
+    for (int i = 0; i < faces.rows; i++) {
+	// Face
+	int x_f   = static_cast<int>(faces.at<float>(i,  0));
+        int y_f   = static_cast<int>(faces.at<float>(i,  1));
+        int w_f   = static_cast<int>(faces.at<float>(i,  2));
+        int h_f   = static_cast<int>(faces.at<float>(i,  3));
+	// Right eye
+	int x_re  = static_cast<int>(faces.at<float>(i,  4));
+	int y_re  = static_cast<int>(faces.at<float>(i,  5));
+	// Left eye
+        int x_le  = static_cast<int>(faces.at<float>(i,  6));
+	int y_le  = static_cast<int>(faces.at<float>(i,  7));
+	// Nose tip
+        int x_nt  = static_cast<int>(faces.at<float>(i,  8));
+	int y_nt  = static_cast<int>(faces.at<float>(i,  9));
+        // Right corner mouth
+        int x_rcm = static_cast<int>(faces.at<float>(i, 10));
+	int y_rcm = static_cast<int>(faces.at<float>(i, 11));
+        // Left corner mouth
+	int x_lcm = static_cast<int>(faces.at<float>(i, 12));
+	int y_lcm = static_cast<int>(faces.at<float>(i, 13));
+	// Confidence
+	float confidence = faces.at<float>(i, 14);
-	VALUE v_type       = ID2SYM(id_face);
+        VALUE v_type       = ID2SYM(id_face);
 	VALUE v_box        = rb_ary_new_from_args(4,
-				 INT2NUM(box.x    ), INT2NUM(box.y     ),
-				 INT2NUM(box.width), INT2NUM(box.height));
+				INT2NUM(x_f), INT2NUM(y_f),
+				INT2NUM(w_f), INT2NUM(h_f));
 	VALUE v_landmark   = rb_ary_new_from_args(5,
-			        rb_ary_new_from_args(2, INT2NUM(lmark[0].x),
-						        INT2NUM(lmark[0].y)),
-				rb_ary_new_from_args(2, INT2NUM(lmark[1].x),
-						        INT2NUM(lmark[1].y)),
-				rb_ary_new_from_args(2, INT2NUM(lmark[2].x),
-						        INT2NUM(lmark[2].y)),
-				rb_ary_new_from_args(2, INT2NUM(lmark[3].x),
-						        INT2NUM(lmark[3].y)),
-				rb_ary_new_from_args(2, INT2NUM(lmark[4].x),
-						        INT2NUM(lmark[4].y)));
-	VALUE v_feature    = rb_ary_new_from_args(3, v_type, v_box,
-						     v_landmark);
+			        rb_ary_new_from_args(2, INT2NUM(x_re),
+						        INT2NUM(y_re)),
+			        rb_ary_new_from_args(2, INT2NUM(x_le),
+						        INT2NUM(y_le)),
+			        rb_ary_new_from_args(2, INT2NUM(x_nt),
+						        INT2NUM(y_nt)),
+			        rb_ary_new_from_args(2, INT2NUM(x_rcm),
+						        INT2NUM(y_rcm)),
+				rb_ary_new_from_args(2, INT2NUM(x_lcm),
+							INT2NUM(y_lcm)));
+	VALUE v_confidence = DBL2NUM(confidence);
+	VALUE v_feature    = rb_ary_new_from_args(4, v_type, v_box, v_landmark,
+						  v_confidence);
 	rb_ary_push(v_features, v_feature);
 	if (!img.empty() && rb_block_given_p()) {
-	    VALUE v_annotation= rb_yield_splat(v_feature);
-	    VALUE v_label     = Qnil;
-	    VALUE v_color     = ULONG2NUM(0x0000ff);
-	    VALUE v_thickness = INT2NUM(1);
-	    switch (TYPE(v_annotation)) {
-	    case T_NIL:
-		break;
-	    case T_HASH:
-		break;
-	    case T_ARRAY:
-		switch(RARRAY_LENINT(v_annotation)) {
-		default:
-		case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
-		case 2: v_color     = RARRAY_AREF(v_annotation, 1);
-		case 1: v_label     = RARRAY_AREF(v_annotation, 0);
-		case 0: break;
-		}
-		break;
-	    case T_STRING:
-		v_label = v_annotation;
-		break;
-	    }
-	    if (! NIL_P(v_label)) {
-		string label     = StringValueCStr(v_label);
-		long   rgb       = NUM2ULONG(v_color);
-		int    thickness = NUM2INT(v_thickness);
-		Scalar color     = cv::Scalar((rgb >>  0) & 0xFF,
-					      (rgb >>  8) & 0xFF,
-					      (rgb >> 16) & 0xFF);
-		draw_labelbox(img, label, box, color, BLACK, thickness);
-		for (const auto& p : lmark) {
-		    cv::circle(img, p, 3, cv::Scalar(255, 0, 0), 2);
-		}
+	    cv::Rect box       = cv::Rect(x_f, y_f, w_f, h_f);
+	    VALUE v_annotation = rb_yield_splat(v_feature);
+	    VALUE cfg          = circe_annotate(img, box, v_annotation, state);
+	    VALUE s_extra      = rb_id2sym(id_extra);
+	    if (!NIL_P(cfg) && RTEST(rb_hash_aref(cfg, s_extra))) {
+		cv::Scalar color = cv::Scalar(255, 0, 0);
+		cv::circle(img, cv::Point(x_le,  y_le ), 3, color, 2);
+		cv::circle(img, cv::Point(x_re,  y_re ), 3, color, 2);
+		cv::circle(img, cv::Point(x_nt,  y_nt ), 3, color, 2);
+		cv::circle(img, cv::Point(x_rcm, y_rcm), 3, color, 2);
+		cv::circle(img, cv::Point(x_lcm, y_lcm), 3, color, 2);
 	    }
 	}
     }
@@ -239,50 +316,18 @@ yolo_process_features(vector<Yolo::Item>& items,
 	Rect   box         = std::get<2>(items[i]);
 	VALUE v_type       = ID2SYM(id_class);
-	VALUE v_name       = rb_str_new(name.c_str(), name.size());
-	VALUE v_confidence = DBL2NUM(confidence);
 	VALUE v_box        = rb_ary_new_from_args(4,
 				  INT2NUM(box.x    ), INT2NUM(box.y     ),
 				  INT2NUM(box.width), INT2NUM(box.height));
+	VALUE v_name       = rb_str_new(name.c_str(), name.size());
+	VALUE v_confidence = DBL2NUM(confidence);
 	VALUE v_feature    = rb_ary_new_from_args(4, v_type, v_box,
 						     v_name, v_confidence);
 	rb_ary_push(v_features, v_feature);
-	if (rb_block_given_p()) {
+	if (!img.empty() && rb_block_given_p()) {
 	    VALUE v_annotation = rb_yield_splat(v_feature);
-	    VALUE v_label      = Qnil;
-	    VALUE v_color      = ULONG2NUM(0x0000ff);
-	    VALUE v_thickness  = INT2NUM(1);
-	    switch (TYPE(v_annotation)) {
-	    case T_NIL:
-		break;
-	    case T_HASH:
-		break;
-	    case T_ARRAY:
-		switch(RARRAY_LENINT(v_annotation)) {
-		default:
-		case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
-		case 2: v_color     = RARRAY_AREF(v_annotation, 1);
-		case 1: v_label     = RARRAY_AREF(v_annotation, 0);
-		case 0: break;
-		}
-		break;
-	    case T_STRING:
-		v_label = v_annotation;
-		break;
-	    }
-	    if (! NIL_P(v_label)) {
-		string label     = StringValueCStr(v_label);
-		long   rgb       = NUM2ULONG(v_color);
-		int    thickness = NUM2INT(v_thickness);
-		Scalar color     = cv::Scalar((rgb >>  0) & 0xFF,
-					      (rgb >>  8) & 0xFF,
-					      (rgb >> 16) & 0xFF);
-		draw_labelbox(img, label, box, color, BLACK, thickness);
-	    }
+	    circe_annotate(img, box, v_annotation, state);
 	}
     }
 }
@@ -337,9 +382,10 @@ circe_m_analyze(int argc, VALUE* argv, VALUE self) {
     }
     if (RTEST(v_face)) {
-	vector<YuNet::Face> faces;
+	cv::Mat faces;
 	yunet->process(i_img, faces);
 	yunet_process_features(faces, o_img, v_features, &state);
+	faces.release();
 	if (state) goto exception;
     }
@@ -363,7 +409,12 @@ circe_m_analyze(int argc, VALUE* argv, VALUE self) {
 	std::vector<uchar> buf;
 	cv::imencode(format, o_img, buf);
 	v_image = rb_str_new(reinterpret_cast<char*>(buf.data()), buf.size());
+	buf.clear();
     }
+    i_img.release();
+    o_img.release();
     return rb_ary_new_from_args(2, v_features, v_image);
  exception:
@@ -382,22 +433,35 @@ void Init_core(void) {
     eCirceError = rb_define_class_under(cCirce, "Error", rb_eStandardError);
     // myclass = rb_const_get(mymodule, sym_myclass);
-    VALUE v_onnx_yolo  = rb_const_get(cCirce, rb_intern("ONNX_YOLO"));
-    VALUE v_onnx_yunet = rb_const_get(cCirce, rb_intern("ONNX_YUNET"));
+    VALUE v_onnx_yolo   = rb_const_get(cCirce, rb_intern("ONNX_YOLO"));
+    VALUE v_yolo_path   = RARRAY_AREF(v_onnx_yolo, 0);
+    VALUE v_yolo_height = RARRAY_AREF(v_onnx_yolo, 1);
+    VALUE v_yolo_width  = RARRAY_AREF(v_onnx_yolo, 2);
+    VALUE v_onnx_yunet  = rb_const_get(cCirce, rb_intern("ONNX_YUNET"));
+    VALUE v_yunet_path  = RARRAY_AREF(v_onnx_yunet, 0);
-    static Yolo  _yolo  = { StringValueCStr(v_onnx_yolo ) };
-    static YuNet _yunet = { StringValueCStr(v_onnx_yunet) };
+    static Yolo  _yolo  = { StringValueCStr(v_yolo_path ),
+			    { NUM2INT(v_yolo_width),
+			      NUM2INT(v_yolo_height) }};
+    static YuNet _yunet = { StringValueCStr(v_yunet_path) };
     yolo  = &_yolo;
     yunet = &_yunet;
-    id_debug       = rb_intern_const("debug"   );
-    id_face        = rb_intern_const("face"    );
-    id_classify    = rb_intern_const("classify");
-    id_class       = rb_intern_const("class"   );
-    id_png         = rb_intern_const("png"     );
-    id_jpg         = rb_intern_const("jpg"     );
+    id_debug       = rb_intern_const("debug"    );
+    id_face        = rb_intern_const("face"     );
+    id_classify    = rb_intern_const("classify" );
+    id_class       = rb_intern_const("class"    );
+    id_png         = rb_intern_const("png"      );
+    id_jpg         = rb_intern_const("jpg"      );
+    id_label       = rb_intern_const("label"    );
+    id_thickness   = rb_intern_const("thickness");
+    id_extra       = rb_intern_const("extra"    );
+    id_color       = rb_intern_const("color"    );
     rb_define_method(cCirce, "analyze", circe_m_analyze, -1);

data/ext/extconf.rb CHANGED Viewed

@@ -10,6 +10,6 @@ cflags, ldflags, libs = pkg_config('opencv4')
 $LDFLAGS  += " #{ldflags} #{libs}"
 $INCFLAGS += " #{cflags}"
-$CXXFLAGS += "-std=c++17"
+$CXXFLAGS += " -std=c++17"
 create_makefile("circe/core")

data/ext/yolo.cpp CHANGED Viewed

@@ -1,3 +1,14 @@
+/*
+ * https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-CPP-Inference/inference.cpp
+ * https://github.com/opencv/opencv/blob/4.x/samples/dnn/yolo_detector.cpp
+ *
+ * yolov5 has an output of shape:
+ *  (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
+ * yolov8 has an output of shape:
+ *  (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
+ *
+ * yolo export model=yolov8s.pt imgsz=480,640 format=onnx opset=12
+ */
 #include <tuple>
 #include <string>
 #include <opencv2/imgproc.hpp>
@@ -6,89 +17,144 @@
 #include "yolo.h"
-Yolo::Yolo(const std::string& model) {
-    net = cv::dnn::readNetFromONNX(model);
+Yolo::Yolo(const std::string& model, cv::Size size) {
+    this->net  = cv::dnn::readNetFromONNX(model);
+    this->size = size;
 }
 void Yolo::process(cv::Mat &img, std::vector<Yolo::Item> &items) {
+    int version = 5;
+    cv::Mat input = img;
+    if (letterBoxForSquare && size.width == size.height)
+        input = formatToSquare(input);
     // Pre-process
     cv::Mat blob;
-    std::vector<cv::Mat> outputs;
-    cv::dnn::blobFromImage(img, blob, 1./255.,
-			   cv::Size(INPUT_WIDTH, INPUT_HEIGHT),
-			   cv::Scalar(), true, false);
+    cv::dnn::blobFromImage(input, blob, 1./255.,
+			   this->size, cv::Scalar(), true, false);
     // Process
+    std::vector<cv::Mat> outputs;
     net.setInput(blob);
     net.forward(outputs, net.getUnconnectedOutLayersNames());
-    // Post-process
-    std::vector<int>   class_ids;
-    std::vector<float> confidences;
-    std::vector<cv::Rect>  boxes;
+    // Output
+    int    rows       = outputs[0].size[1];
+    int    dimensions = outputs[0].size[2];
+    // Infer yolo version
+    //  -> Check if the shape[2] is more than shape[1] (yolov8)
+    if (dimensions > rows) {
+        version = 8;
+    }
+    // Adjust according to version
+    if (version == 8) {
+	//  cv::transposeND(outs[0], {0, 2, 1}, outs[0]);
+        rows       = outputs[0].size[2];
+        dimensions = outputs[0].size[1];
+        outputs[0] = outputs[0].reshape(1, dimensions);
+        cv::transpose(outputs[0], outputs[0]);
+    }
+    // Output
+    float *data    = (float *)outputs[0].data;
     // Resizing factor.
-    float x_factor = img.cols / INPUT_WIDTH;
-    float y_factor = img.rows / INPUT_HEIGHT;
+    float x_factor = input.cols / size.width;
+    float y_factor = input.rows / size.height;
-    float *data = (float *)outputs[0].data;
-    const int dimensions = 85;
+    // Post-process
+    std::vector<int>       class_ids;
+    std::vector<float>     confidences;
+    std::vector<cv::Rect>  boxes;
-    // 25200 for default size 640.
-    const int rows = 25200;
-    // Iterate through 25200 detections.
-    for (int i = 0; i < rows; ++i) {
-	float confidence = data[4];
-	// Discard bad detections and continue.
-	if (confidence >= CONFIDENCE_THRESHOLD) {
-            float *classes_scores = data + 5;
-            // Create a 1x85 Mat and store class scores of 80 classes.
-	    cv::Mat scores(1, names.size(), CV_32FC1, classes_scores);
-            // Perform minMaxLoc and acquire the index of best class score.
-	    cv::Point class_id;
-            double max_class_score;
-            minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
-            // Continue if the class score is above the threshold.
-            if (max_class_score > SCORE_THRESHOLD) {
-                // Store class ID and confidence in the pre-defined
-		// respective vectors.
-                float cx = data[0];  // Center
-                float cy = data[1];
-                float w  = data[2];  // Box dimension
-                float h  = data[3];
-                // Bounding box coordinates.
-                int left   = int((cx - 0.5 * w) * x_factor);
-                int top    = int((cy - 0.5 * h) * y_factor);
-                int width  = int(w              * x_factor);
-                int height = int(h              * y_factor);
-                // Store good detections in the boxes vector.
-                confidences.push_back(confidence);
+    if        (version == 5) {
+	for (int i = 0; i < rows; ++i) {
+            float confidence = data[4];
+            if (confidence >= CONFIDENCE_THRESHOLD) {
+                float *classes_scores = data + 5;
+                cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
+                cv::Point class_id;
+                double max_class_score;
+                minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
+                if (max_class_score > SCORE_THRESHOLD) {
+                    confidences.push_back(confidence);
+                    class_ids.push_back(class_id.x);
+                    float x = data[0];
+                    float y = data[1];
+                    float w = data[2];
+                    float h = data[3];
+                    int left   = int((x - 0.5 * w) * x_factor);
+                    int top    = int((y - 0.5 * h) * y_factor);
+                    int width  = int(w * x_factor);
+                    int height = int(h * y_factor);
+                    boxes.push_back(cv::Rect(left, top, width, height));
+                }
+            }
+	    data += dimensions;
+	}
+    } else if (version == 8) {
+	for (int i = 0; i < rows; ++i) {
+	    float *classes_scores = data + 4;
+            cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
+            cv::Point class_id;
+            double maxClassScore;
+            minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
+            if (maxClassScore > SCORE_THRESHOLD) {
+                confidences.push_back(maxClassScore);
                 class_ids.push_back(class_id.x);
+                float x = data[0];
+                float y = data[1];
+                float w = data[2];
+                float h = data[3];
+                int left   = int((x - 0.5 * w) * x_factor);
+                int top    = int((y - 0.5 * h) * y_factor);
+                int width  = int(w * x_factor);
+                int height = int(h * y_factor);
                 boxes.push_back(cv::Rect(left, top, width, height));
             }
-        }
-        // Jump to the next row.
-        data += 85;
+	    data += dimensions;
+	}
     }
     // Perform Non-Maximum Suppression and draw predictions.
-    std::vector<int> indices;
+    std::vector<int> nms_result;
     cv::dnn::NMSBoxes(boxes, confidences,
-		      SCORE_THRESHOLD, NMS_THRESHOLD, indices);
+		      SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
     items.clear();
-    for (int i = 0; i < indices.size(); i++) {
-        int  idx = indices[i];
-	items.push_back({ names[class_ids[idx]],confidences[idx],boxes[idx] });
+    for (int i = 0; i < nms_result.size(); i++) {
+        int  idx = nms_result[i];
+	items.push_back({ classes[class_ids[idx]],confidences[idx],boxes[idx] });
     }
 }
+cv::Mat Yolo::formatToSquare(const cv::Mat &source)
+{
+    int col = source.cols;
+    int row = source.rows;
+    int _max = MAX(col, row);
+    cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
+    source.copyTo(result(cv::Rect(0, 0, col, row)));
+    return result;
+}

data/ext/yolo.h CHANGED Viewed

@@ -16,12 +16,14 @@ public:
 private:
     static constexpr float INPUT_WIDTH          = 640.0;
     static constexpr float INPUT_HEIGHT         = 640.0;
-    static constexpr float SCORE_THRESHOLD      =   0.5;
-    static constexpr float NMS_THRESHOLD        =   0.45;
-    static constexpr float CONFIDENCE_THRESHOLD =   0.45;
+    static constexpr float CONFIDENCE_THRESHOLD =   0.25;
+    static constexpr float SCORE_THRESHOLD      =   0.50;
+    static constexpr float NMS_THRESHOLD        =   0.50;
+    bool letterBoxForSquare = true;
  public:
-    const std::vector<std::string> names = {
+    const std::vector<std::string> classes = {
 	"person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train",
 	"truck", "boat", "traffic light", "fire hydrant", "stop sign",
 	"parking meter", "bench", "bird", "cat", "dog", "horse", "sheep",
@@ -38,11 +40,13 @@ private:
     };
 public:
-    Yolo(const std::string& model);
+    Yolo(const std::string& model, cv::Size size);
     void process(cv::Mat &img, std::vector<Item> &items);
 private:
     cv::dnn::Net net;
+    cv::Size     size;
+    cv::Mat formatToSquare(const cv::Mat &source);
 };
 #endif

data/ext/yunet.h CHANGED Viewed

@@ -2,37 +2,37 @@
 #define __YUNET__
 #include <string>
-#include <vector>
-#include <array>
-#include <utility>
-#include <opencv2/dnn.hpp>
+#include <opencv2/objdetect/face.hpp>
+#include <opencv2/core.hpp>
 class YuNet
 {
 public:
-    typedef std::array<cv::Point, 5> Landmark;
-    typedef std::pair<cv::Rect, Landmark> Face;
-private:
-    static constexpr int                MODEL_WIDTH    = 512;
-    static constexpr float              CONF_THRESHOLD = 0.4f;
-    static constexpr float              NMS_THRESHOLD  = 0.3f;
-    const std::vector<float>            VARIANCES      = { 0.1f, 0.2f };
-    const std::vector<int>              STEPS          = { 8, 16, 32, 64 };
-    const std::vector<std::vector<int>> MIN_SIZES      = {
-	{ 10, 16, 24 }, { 32, 48 }, { 64, 96 }, { 128, 192, 256 } };
+    YuNet(const std::string& model_path,
+          const cv::Size&    input_size     = cv::Size(320, 320),
+          float              conf_threshold = 0.6f,
+          float              nms_threshold  = 0.3f,
+          int                top_k          = 5000,
+          int                backend_id     = cv::dnn::DNN_BACKEND_OPENCV,
+          int                target_id      = cv::dnn::DNN_TARGET_CPU)
+    {
+        model = cv::FaceDetectorYN::create(model_path, "", input_size,
+					   conf_threshold,
+					   nms_threshold, top_k,
+					   backend_id, target_id);
+    }
-public:
-    YuNet(const std::string& model);
     ~YuNet() {};
-    void process(const cv::Mat& img, std::vector<Face>& faces);
+    void process(const cv::Mat& img, cv::Mat& faces) {
+	model->setInputSize(img.size());
+	model->detect(img, faces);
+    }
 private:
-    cv::dnn::Net net;
+    cv::Ptr<cv::FaceDetectorYN> model;
 };
 #endif

data/lib/circe/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 class Circe
-    VERSION = '0.1.0'
+    VERSION = '0.2.1'
 end

data/lib/circe.rb CHANGED Viewed

@@ -3,9 +3,9 @@ class Circe
     private
     # Don't know how to do it inside the c extension
-    DATA_DIR   = File.join(__dir__, '..', 'data').freeze
-    ONNX_YOLO  = File.join(DATA_DIR, 'yolov5s.onnx')
-    ONNX_YUNET = File.join(DATA_DIR, 'face_detection_yunet_2022mar.onnx')
+    DATA_DIR     = File.join(__dir__, '..', 'data').freeze
+    ONNX_YOLO     = [ File.join(DATA_DIR, 'yolov8s.onnx'), 480, 640 ]
+    ONNX_YUNET    = [ File.join(DATA_DIR, 'face_detection_yunet_2023mar.onnx') ]
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: circe
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.2.1
 platform: ruby
 authors:
 - Stéphane D'Alu
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-02-03 00:00:00.000000000 Z
+date: 2024-07-05 00:00:00.000000000 Z
 dependencies: []
 description: |2+
@@ -22,14 +22,16 @@ extensions:
 extra_rdoc_files: []
 files:
 - circe.gemspec
-- data/face_detection_yunet_2022mar.onnx
-- data/yolov5s.onnx
+- data/face_detection_yunet_2023mar.onnx
+- data/yolov5su-sim.onnx
+- data/yolov5su.onnx
+- data/yolov8s-sim.onnx
+- data/yolov8s.onnx
 - ext/camera_model.h
 - ext/circe.cpp
 - ext/extconf.rb
 - ext/yolo.cpp
 - ext/yolo.h
-- ext/yunet.cpp
 - ext/yunet.h
 - lib/circe.rb
 - lib/circe/version.rb
@@ -52,7 +54,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.4.2
+rubygems_version: 3.5.9
 signing_key:
 specification_version: 4
 summary: Face and object recognition

data/data/face_detection_yunet_2022mar.onnx DELETED Viewed

Binary file

data/ext/yunet.cpp DELETED Viewed

@@ -1,132 +0,0 @@
-#include <cmath>
-#include <string>
-#include <vector>
-#include <numeric>
-#include <algorithm>
-#include <opencv2/dnn.hpp>
-#include "yunet.h"
-YuNet::YuNet(const std::string& model_filename)
-{
-    net = cv::dnn::readNetFromONNX(model_filename);
-    net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
-    net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
-}
-void YuNet::process(const cv::Mat& img, std::vector<YuNet::Face>& faces)
-{
-    /* -- Preparing for image size -- */
-    cv::Size model_size;
-    model_size.width  = MODEL_WIDTH;
-    model_size.height = MODEL_WIDTH * img.rows / img.cols;
-    model_size.height = (model_size.height / 32) * 32;
-    std::pair<int32_t, int32_t> feature_map_2th = {
-	(model_size.height + 1) / 2 / 2,
-	(model_size.width  + 1) / 2 / 2
-    };
-    std::vector<std::pair<int32_t, int32_t>> feature_map_list;
-    feature_map_list.push_back({ (feature_map_2th.first  + 1) / 2 ,
-	                         (feature_map_2th.second + 1) / 2 });
-    for (int32_t i = 0; i < 3; i++) {
-        const auto& previous = feature_map_list.back();
-        feature_map_list.push_back({ (previous.first  + 1) / 2 ,
-		                     (previous.second + 1) / 2 });
-    }
-    std::vector<std::vector<float>> prior_list;
-    for (int i = 0; i < static_cast<int32_t>(feature_map_list.size()); i++) {
-        const auto& min_sizes   = MIN_SIZES[i];
-        const auto& feature_map = feature_map_list[i];
-        for (int y = 0; y < feature_map.first; y++) {
-            for (int x = 0; x < feature_map.second; x++) {
-                for (const auto& min_size : min_sizes) {
-                    float s_kx = static_cast<float>(min_size) / model_size.width;
-                    float s_ky = static_cast<float>(min_size) / model_size.height;
-                    float cx = (x + 0.5f) * STEPS[i] / model_size.width;
-                    float cy = (y + 0.5f) * STEPS[i] / model_size.height;
-                    prior_list.push_back({ cx, cy, s_kx, s_ky });
-                }
-            }
-        }
-    }
-    /* -- Pre-process -- */
-    cv::Mat blob;
-    cv::dnn::blobFromImage(img, blob, 1.0, model_size);
-    /* -- Inference -- */
-    std::vector<cv::Mat> outputs;
-    net.setInput(blob);
-    net.forward(outputs, { "conf", "iou",  "loc" });
-    /* -- Post Process -- */
-    const cv::Mat& mat_conf   = outputs[0];
-    const cv::Mat& mat_iou    = outputs[1];
-    const cv::Mat& mat_loc    = outputs[2];
-    const cv::Size image_size = img.size();
-    // Get score list
-    std::vector<float> cls_score;
-    for (int32_t row = 0; row < mat_conf.rows; row++) {
-        float val = mat_conf.at<float>(cv::Point(1, row));
-        cls_score.push_back(std::clamp(val, 0.0f, 1.0f));
-    }
-    std::vector<float> iou_score;
-    for (int32_t row = 0; row < mat_iou.rows; row++) {
-        float val = mat_conf.at<float>(cv::Point(0, row));
-        iou_score.push_back(std::clamp(val, 0.0f, 1.0f));
-    }
-    std::vector<float> score;
-    for (int32_t row = 0; row < mat_conf.rows; row++) {
-        score.push_back(std::sqrt(cls_score[row] * iou_score[row]));
-    }
-    // All bbox
-    std::vector<cv::Rect> bbox_all;
-    for (int row = 0; row < mat_loc.rows; row++) {
-        float cx = mat_loc.at<float>(cv::Point(0, row));
-        float cy = mat_loc.at<float>(cv::Point(1, row));
-        float w  = mat_loc.at<float>(cv::Point(2, row));
-        float h  = mat_loc.at<float>(cv::Point(3, row));
-        cx = prior_list[row][0] + cx * VARIANCES[0] * prior_list[row][2];
-        cy = prior_list[row][1] + cy * VARIANCES[0] * prior_list[row][3];
-        w  = prior_list[row][2] * std::exp(w * VARIANCES[0]);
-        h  = prior_list[row][3] * std::exp(h * VARIANCES[1]);
-        bbox_all.push_back({
-		static_cast<int32_t>((cx - w / 2) * image_size.width),
-		static_cast<int32_t>((cy - h / 2) * image_size.height),
-		static_cast<int32_t>(w * image_size.width),
-		static_cast<int32_t>(h * image_size.height) });
-    }
-    // Non-Maximum Suppression
-    std::vector<int> indices;
-    cv::dnn::NMSBoxes(bbox_all, score, CONF_THRESHOLD, NMS_THRESHOLD, indices);
-    // Get valid bbox and landmark
-    faces.clear();
-    for (int idx : indices) {
-        Landmark landmark; // (landmark is 5 points)
-        for (int i = 0; i < static_cast<int>(landmark.size()); i++) {
-	    cv::Point& p = landmark[i];
-            float x      = mat_loc.at<float>(cv::Point(4 + i * 2,     idx));
-            float y      = mat_loc.at<float>(cv::Point(4 + i * 2 + 1, idx));
-            p.x = static_cast<int32_t>((prior_list[idx][0] + x * VARIANCES[0] * prior_list[idx][2]) * image_size.width);
-            p.y = static_cast<int32_t>((prior_list[idx][1] + y * VARIANCES[0] * prior_list[idx][3]) * image_size.height);
-        }
-	faces.push_back({ bbox_all[idx], landmark });
-    }
-}