circe 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 683391e9186e4a20233cb25ce28f37b7bf56c88586b1ac72f1312081e4e48c7d
4
- data.tar.gz: 680a92f2318df871cdd2f6f98732f85adab5098e067fe7c4fa986832e5f32caf
3
+ metadata.gz: 046e41e59ccb0202ad32729da191ba2f003a9e3a0370f4454996fdd544f47103
4
+ data.tar.gz: 1d3d18f2bcacfc6ae6e8ff816d166b43f53686841e510fbf722a5a2c85f80080
5
5
  SHA512:
6
- metadata.gz: 84422ff365869d1615c4cf17bbc035426e7a4cbfc5bd501104d5e1b2a5ea12535150de13e192b0a2849102fb0a49f7e174b2f9b804027614fc50f68858d139de
7
- data.tar.gz: d679d1b61f155a5170426d007e8d06e73b5e01c12fd4dc12ab70ceddb29fc1c80a35ab178b83986c32f0f05e1b508d9e6cf56ce8fd4840bacbf1d67f96dc6312
6
+ metadata.gz: 17a645694b3ac252fe68048a58e63cbfe4d580a45ed8b3bdfda6cc0ff94e0ab821536a4bed63aff1a42b7ceba3637c889b0a767c9e51ab3e2f5bf4d4c193db53
7
+ data.tar.gz: 5fcffb7041bcd9f53f379576847748a5579d77b681b71e0eae810f3ce090aca5d83644c6bba442bfc06f3fd4e8bfb6c5550dd6ae172f9636bdf263822a94446e
data/ext/circe.cpp CHANGED
@@ -119,6 +119,9 @@ static ID id_class;
119
119
  static ID id_png;
120
120
  static ID id_jpg;
121
121
 
122
+ static ID id_label;
123
+ static ID id_thickness;
124
+ static ID id_color;
122
125
 
123
126
  static Yolo *yolo;
124
127
  static YuNet *yunet;
@@ -143,18 +146,88 @@ draw_label(cv::Mat& img, string label, Point& origin,
143
146
  }
144
147
 
145
148
  static void
146
- draw_labelbox(cv::Mat& img, string label, Rect& box,
147
- Scalar& framecolor = BLUE, Scalar& textcolor = BLACK,
148
- int thickness = 1) {
149
+ draw_box(cv::Mat& img, Rect& box,
150
+ Scalar& framecolor = BLUE, int thickness = 1) {
149
151
 
150
152
  Point a = { box.x, box.y };
151
153
  Point b = { box.x + box.width, box.y + box.height };
152
154
 
153
155
  cv::rectangle(img, a, b, framecolor, thickness);
154
- draw_label(img, label, a, textcolor, framecolor);
155
156
  }
156
157
 
158
+ static void
159
+ draw_labelbox(cv::Mat& img, string label, Rect& box,
160
+ Scalar& framecolor = BLUE, Scalar& textcolor = BLACK,
161
+ int thickness = 1) {
162
+
163
+ Point o = { box.x, box.y };
164
+
165
+ draw_box(img, box, framecolor, thickness);
166
+ draw_label(img, label, o, textcolor, framecolor);
167
+ }
168
+
169
+
170
+ VALUE
171
+ circe_annotate(Mat& img, Rect& box, VALUE v_annotation, int *state) {
172
+ if (img.empty() || NIL_P(v_annotation))
173
+ return Qnil;
174
+
175
+ VALUE v_label = Qnil;
176
+ VALUE v_color = ULONG2NUM(0x0000ff);
177
+ VALUE v_thickness = INT2NUM(1);
178
+
179
+ VALUE s_label = rb_id2sym(id_label);
180
+ VALUE s_color = rb_id2sym(id_color);
181
+ VALUE s_thickness = rb_id2sym(id_thickness);
182
+
183
+ switch (TYPE(v_annotation)) {
184
+ case T_NIL:
185
+ break;
186
+ case T_HASH:
187
+ v_thickness = rb_hash_aref(v_annotation, s_thickness);
188
+ v_color = rb_hash_aref(v_annotation, s_color);
189
+ v_label = rb_hash_aref(v_annotation, s_label);
190
+ break;
191
+ case T_ARRAY:
192
+ switch(RARRAY_LENINT(v_annotation)) {
193
+ default:
194
+ case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
195
+ case 2: v_color = RARRAY_AREF(v_annotation, 1);
196
+ case 1: v_label = RARRAY_AREF(v_annotation, 0);
197
+ case 0: break;
198
+ }
199
+ break;
200
+ case T_STRING:
201
+ v_label = v_annotation;
202
+ break;
203
+ }
204
+
205
+ // No color, no rendering
206
+ if (NIL_P(v_color))
207
+ return Qnil;
208
+
209
+ long rgb = NUM2ULONG(v_color);
210
+ Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
211
+ (rgb >> 8) & 0xFF,
212
+ (rgb >> 16) & 0xFF);
213
+
214
+ if (! NIL_P(v_thickness)) {
215
+ int thickness = NUM2INT(v_thickness);
216
+ draw_box(img, box, color, thickness);
217
+ }
218
+ if (! NIL_P(v_label)) {
219
+ string label = StringValueCStr(v_label);
220
+ Point o = { box.x, box.y };
221
+ draw_label(img, label, o, BLACK, color);
222
+ }
157
223
 
224
+ // Return normalized parameters
225
+ VALUE r = rb_hash_new();
226
+ rb_hash_aset(r, s_label, v_label );
227
+ rb_hash_aset(r, s_color, v_color );
228
+ rb_hash_aset(r, s_thickness, v_thickness);
229
+ return r;
230
+ }
158
231
 
159
232
 
160
233
 
@@ -186,40 +259,11 @@ yunet_process_features(vector<YuNet::Face>& faces,
186
259
  rb_ary_push(v_features, v_feature);
187
260
 
188
261
  if (!img.empty() && rb_block_given_p()) {
189
- VALUE v_annotation= rb_yield_splat(v_feature);
190
- VALUE v_label = Qnil;
191
- VALUE v_color = ULONG2NUM(0x0000ff);
192
- VALUE v_thickness = INT2NUM(1);
193
-
194
- switch (TYPE(v_annotation)) {
195
- case T_NIL:
196
- break;
197
- case T_HASH:
198
- break;
199
- case T_ARRAY:
200
- switch(RARRAY_LENINT(v_annotation)) {
201
- default:
202
- case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
203
- case 2: v_color = RARRAY_AREF(v_annotation, 1);
204
- case 1: v_label = RARRAY_AREF(v_annotation, 0);
205
- case 0: break;
206
- }
207
- break;
208
- case T_STRING:
209
- v_label = v_annotation;
210
- break;
211
- }
212
-
213
- if (! NIL_P(v_label)) {
214
- string label = StringValueCStr(v_label);
215
- long rgb = NUM2ULONG(v_color);
216
- int thickness = NUM2INT(v_thickness);
217
- Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
218
- (rgb >> 8) & 0xFF,
219
- (rgb >> 16) & 0xFF);
220
- draw_labelbox(img, label, box, color, BLACK, thickness);
221
-
222
- for (const auto& p : lmark) {
262
+ VALUE v_annotation = rb_yield_splat(v_feature);
263
+ VALUE cfg = circe_annotate(img, box, v_annotation, state);
264
+
265
+ if (! NIL_P(cfg)) {
266
+ for (const auto& p : lmark) {
223
267
  cv::circle(img, p, 3, cv::Scalar(255, 0, 0), 2);
224
268
  }
225
269
  }
@@ -248,41 +292,9 @@ yolo_process_features(vector<Yolo::Item>& items,
248
292
  v_name, v_confidence);
249
293
  rb_ary_push(v_features, v_feature);
250
294
 
251
- if (rb_block_given_p()) {
295
+ if (!img.empty() && rb_block_given_p()) {
252
296
  VALUE v_annotation = rb_yield_splat(v_feature);
253
-
254
- VALUE v_label = Qnil;
255
- VALUE v_color = ULONG2NUM(0x0000ff);
256
- VALUE v_thickness = INT2NUM(1);
257
-
258
- switch (TYPE(v_annotation)) {
259
- case T_NIL:
260
- break;
261
- case T_HASH:
262
- break;
263
- case T_ARRAY:
264
- switch(RARRAY_LENINT(v_annotation)) {
265
- default:
266
- case 3: v_thickness = RARRAY_AREF(v_annotation, 2);
267
- case 2: v_color = RARRAY_AREF(v_annotation, 1);
268
- case 1: v_label = RARRAY_AREF(v_annotation, 0);
269
- case 0: break;
270
- }
271
- break;
272
- case T_STRING:
273
- v_label = v_annotation;
274
- break;
275
- }
276
-
277
- if (! NIL_P(v_label)) {
278
- string label = StringValueCStr(v_label);
279
- long rgb = NUM2ULONG(v_color);
280
- int thickness = NUM2INT(v_thickness);
281
- Scalar color = cv::Scalar((rgb >> 0) & 0xFF,
282
- (rgb >> 8) & 0xFF,
283
- (rgb >> 16) & 0xFF);
284
- draw_labelbox(img, label, box, color, BLACK, thickness);
285
- }
297
+ circe_annotate(img, box, v_annotation, state);
286
298
  }
287
299
  }
288
300
  }
@@ -364,6 +376,10 @@ circe_m_analyze(int argc, VALUE* argv, VALUE self) {
364
376
  cv::imencode(format, o_img, buf);
365
377
  v_image = rb_str_new(reinterpret_cast<char*>(buf.data()), buf.size());
366
378
  }
379
+
380
+ i_img.release();
381
+ o_img.release();
382
+
367
383
  return rb_ary_new_from_args(2, v_features, v_image);
368
384
 
369
385
  exception:
@@ -382,22 +398,34 @@ void Init_core(void) {
382
398
  eCirceError = rb_define_class_under(cCirce, "Error", rb_eStandardError);
383
399
  // myclass = rb_const_get(mymodule, sym_myclass);
384
400
 
385
- VALUE v_onnx_yolo = rb_const_get(cCirce, rb_intern("ONNX_YOLO"));
386
- VALUE v_onnx_yunet = rb_const_get(cCirce, rb_intern("ONNX_YUNET"));
401
+ VALUE v_onnx_yolo = rb_const_get(cCirce, rb_intern("ONNX_YOLO"));
402
+ VALUE v_yolo_path = RARRAY_AREF(v_onnx_yolo, 0);
403
+ VALUE v_yolo_height = RARRAY_AREF(v_onnx_yolo, 1);
404
+ VALUE v_yolo_width = RARRAY_AREF(v_onnx_yolo, 2);
405
+
406
+ VALUE v_onnx_yunet = rb_const_get(cCirce, rb_intern("ONNX_YUNET"));
407
+ VALUE v_yunet_path = RARRAY_AREF(v_onnx_yunet, 0);
387
408
 
388
- static Yolo _yolo = { StringValueCStr(v_onnx_yolo ) };
389
- static YuNet _yunet = { StringValueCStr(v_onnx_yunet) };
409
+
410
+
411
+ static Yolo _yolo = { StringValueCStr(v_yolo_path ),
412
+ { NUM2INT(v_yolo_width),
413
+ NUM2INT(v_yolo_height) }};
414
+ static YuNet _yunet = { StringValueCStr(v_yunet_path) };
390
415
 
391
416
  yolo = &_yolo;
392
417
  yunet = &_yunet;
393
418
 
394
419
 
395
- id_debug = rb_intern_const("debug" );
396
- id_face = rb_intern_const("face" );
397
- id_classify = rb_intern_const("classify");
398
- id_class = rb_intern_const("class" );
399
- id_png = rb_intern_const("png" );
400
- id_jpg = rb_intern_const("jpg" );
420
+ id_debug = rb_intern_const("debug" );
421
+ id_face = rb_intern_const("face" );
422
+ id_classify = rb_intern_const("classify" );
423
+ id_class = rb_intern_const("class" );
424
+ id_png = rb_intern_const("png" );
425
+ id_jpg = rb_intern_const("jpg" );
426
+ id_label = rb_intern_const("label" );
427
+ id_thickness = rb_intern_const("thickness");
428
+ id_color = rb_intern_const("color" );
401
429
 
402
430
 
403
431
  rb_define_method(cCirce, "analyze", circe_m_analyze, -1);
data/ext/yolo.cpp CHANGED
@@ -1,3 +1,14 @@
1
+ /*
2
+ * https://github.com/ultralytics/ultralytics/blob/main/examples/YOLOv8-CPP-Inference/inference.cpp
3
+ * https://github.com/opencv/opencv/blob/4.x/samples/dnn/yolo_detector.cpp
4
+ *
5
+ * yolov5 has an output of shape:
6
+ * (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
7
+ * yolov8 has an output of shape:
8
+ * (batchSize, 84, 8400) (Num classes + box[x,y,w,h])
9
+ *
10
+ * yolo export model=yolov8s.pt imgsz=480,640 format=onnx opset=12
11
+ */
1
12
  #include <tuple>
2
13
  #include <string>
3
14
  #include <opencv2/imgproc.hpp>
@@ -6,89 +17,144 @@
6
17
  #include "yolo.h"
7
18
 
8
19
 
9
- Yolo::Yolo(const std::string& model) {
10
- net = cv::dnn::readNetFromONNX(model);
20
+ Yolo::Yolo(const std::string& model, cv::Size size) {
21
+ this->net = cv::dnn::readNetFromONNX(model);
22
+ this->size = size;
11
23
  }
12
24
 
13
25
 
14
26
  void Yolo::process(cv::Mat &img, std::vector<Yolo::Item> &items) {
27
+ int version = 5;
15
28
 
29
+ cv::Mat input = img;
30
+ if (letterBoxForSquare && size.width == size.height)
31
+ input = formatToSquare(input);
32
+
16
33
  // Pre-process
17
34
  cv::Mat blob;
18
- std::vector<cv::Mat> outputs;
19
-
20
- cv::dnn::blobFromImage(img, blob, 1./255.,
21
- cv::Size(INPUT_WIDTH, INPUT_HEIGHT),
22
- cv::Scalar(), true, false);
35
+ cv::dnn::blobFromImage(input, blob, 1./255.,
36
+ this->size, cv::Scalar(), true, false);
23
37
 
24
38
  // Process
39
+ std::vector<cv::Mat> outputs;
25
40
  net.setInput(blob);
26
41
  net.forward(outputs, net.getUnconnectedOutLayersNames());
27
42
 
28
- // Post-process
29
- std::vector<int> class_ids;
30
- std::vector<float> confidences;
31
- std::vector<cv::Rect> boxes;
43
+ // Output
44
+ int rows = outputs[0].size[1];
45
+ int dimensions = outputs[0].size[2];
46
+
47
+ // Infer yolo version
48
+ // -> Check if the shape[2] is more than shape[1] (yolov8)
49
+ if (dimensions > rows) {
50
+ version = 8;
51
+ }
52
+
53
+ // Adjust according to version
54
+ if (version == 8) {
55
+ // cv::transposeND(outs[0], {0, 2, 1}, outs[0]);
56
+ rows = outputs[0].size[2];
57
+ dimensions = outputs[0].size[1];
58
+ outputs[0] = outputs[0].reshape(1, dimensions);
59
+ cv::transpose(outputs[0], outputs[0]);
60
+ }
61
+
62
+ // Output
63
+ float *data = (float *)outputs[0].data;
32
64
 
33
65
  // Resizing factor.
34
- float x_factor = img.cols / INPUT_WIDTH;
35
- float y_factor = img.rows / INPUT_HEIGHT;
66
+ float x_factor = input.cols / size.width;
67
+ float y_factor = input.rows / size.height;
36
68
 
37
- float *data = (float *)outputs[0].data;
38
- const int dimensions = 85;
69
+ // Post-process
70
+ std::vector<int> class_ids;
71
+ std::vector<float> confidences;
72
+ std::vector<cv::Rect> boxes;
39
73
 
40
- // 25200 for default size 640.
41
- const int rows = 25200;
42
- // Iterate through 25200 detections.
43
- for (int i = 0; i < rows; ++i) {
44
- float confidence = data[4];
45
-
46
- // Discard bad detections and continue.
47
- if (confidence >= CONFIDENCE_THRESHOLD) {
48
- float *classes_scores = data + 5;
49
-
50
- // Create a 1x85 Mat and store class scores of 80 classes.
51
- cv::Mat scores(1, names.size(), CV_32FC1, classes_scores);
52
-
53
- // Perform minMaxLoc and acquire the index of best class score.
54
- cv::Point class_id;
55
- double max_class_score;
56
- minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
57
-
58
- // Continue if the class score is above the threshold.
59
- if (max_class_score > SCORE_THRESHOLD) {
60
- // Store class ID and confidence in the pre-defined
61
- // respective vectors.
62
- float cx = data[0]; // Center
63
- float cy = data[1];
64
- float w = data[2]; // Box dimension
65
- float h = data[3];
66
-
67
- // Bounding box coordinates.
68
- int left = int((cx - 0.5 * w) * x_factor);
69
- int top = int((cy - 0.5 * h) * y_factor);
70
- int width = int(w * x_factor);
71
- int height = int(h * y_factor);
72
-
73
- // Store good detections in the boxes vector.
74
- confidences.push_back(confidence);
74
+
75
+ if (version == 5) {
76
+ for (int i = 0; i < rows; ++i) {
77
+ float confidence = data[4];
78
+
79
+ if (confidence >= CONFIDENCE_THRESHOLD) {
80
+ float *classes_scores = data + 5;
81
+
82
+ cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
83
+ cv::Point class_id;
84
+ double max_class_score;
85
+
86
+ minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
87
+
88
+ if (max_class_score > SCORE_THRESHOLD) {
89
+ confidences.push_back(confidence);
90
+ class_ids.push_back(class_id.x);
91
+
92
+ float x = data[0];
93
+ float y = data[1];
94
+ float w = data[2];
95
+ float h = data[3];
96
+
97
+ int left = int((x - 0.5 * w) * x_factor);
98
+ int top = int((y - 0.5 * h) * y_factor);
99
+ int width = int(w * x_factor);
100
+ int height = int(h * y_factor);
101
+
102
+ boxes.push_back(cv::Rect(left, top, width, height));
103
+ }
104
+ }
105
+ data += dimensions;
106
+ }
107
+ } else if (version == 8) {
108
+ for (int i = 0; i < rows; ++i) {
109
+ float *classes_scores = data + 4;
110
+
111
+ cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
112
+ cv::Point class_id;
113
+ double maxClassScore;
114
+
115
+ minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
116
+
117
+ if (maxClassScore > SCORE_THRESHOLD) {
118
+ confidences.push_back(maxClassScore);
75
119
  class_ids.push_back(class_id.x);
120
+
121
+ float x = data[0];
122
+ float y = data[1];
123
+ float w = data[2];
124
+ float h = data[3];
125
+
126
+ int left = int((x - 0.5 * w) * x_factor);
127
+ int top = int((y - 0.5 * h) * y_factor);
128
+ int width = int(w * x_factor);
129
+ int height = int(h * y_factor);
130
+
76
131
  boxes.push_back(cv::Rect(left, top, width, height));
77
132
  }
78
- }
79
- // Jump to the next row.
80
- data += 85;
133
+ data += dimensions;
134
+ }
81
135
  }
136
+
82
137
 
83
138
  // Perform Non-Maximum Suppression and draw predictions.
84
- std::vector<int> indices;
139
+ std::vector<int> nms_result;
85
140
  cv::dnn::NMSBoxes(boxes, confidences,
86
- SCORE_THRESHOLD, NMS_THRESHOLD, indices);
141
+ SCORE_THRESHOLD, NMS_THRESHOLD, nms_result);
87
142
 
88
143
  items.clear();
89
144
 
90
- for (int i = 0; i < indices.size(); i++) {
91
- int idx = indices[i];
92
- items.push_back({ names[class_ids[idx]],confidences[idx],boxes[idx] });
145
+ for (int i = 0; i < nms_result.size(); i++) {
146
+ int idx = nms_result[i];
147
+ items.push_back({ classes[class_ids[idx]],confidences[idx],boxes[idx] });
93
148
  }
94
149
  }
150
+
151
+
152
+ cv::Mat Yolo::formatToSquare(const cv::Mat &source)
153
+ {
154
+ int col = source.cols;
155
+ int row = source.rows;
156
+ int _max = MAX(col, row);
157
+ cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
158
+ source.copyTo(result(cv::Rect(0, 0, col, row)));
159
+ return result;
160
+ }
data/ext/yolo.h CHANGED
@@ -16,12 +16,14 @@ public:
16
16
  private:
17
17
  static constexpr float INPUT_WIDTH = 640.0;
18
18
  static constexpr float INPUT_HEIGHT = 640.0;
19
- static constexpr float SCORE_THRESHOLD = 0.5;
20
- static constexpr float NMS_THRESHOLD = 0.45;
21
- static constexpr float CONFIDENCE_THRESHOLD = 0.45;
19
+ static constexpr float CONFIDENCE_THRESHOLD = 0.25;
20
+ static constexpr float SCORE_THRESHOLD = 0.45;
21
+ static constexpr float NMS_THRESHOLD = 0.50;
22
22
 
23
+ bool letterBoxForSquare = true;
24
+
23
25
  public:
24
- const std::vector<std::string> names = {
26
+ const std::vector<std::string> classes = {
25
27
  "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train",
26
28
  "truck", "boat", "traffic light", "fire hydrant", "stop sign",
27
29
  "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep",
@@ -38,11 +40,13 @@ private:
38
40
  };
39
41
 
40
42
  public:
41
- Yolo(const std::string& model);
43
+ Yolo(const std::string& model, cv::Size size);
42
44
  void process(cv::Mat &img, std::vector<Item> &items);
43
45
 
44
46
  private:
45
47
  cv::dnn::Net net;
48
+ cv::Size size;
49
+ cv::Mat formatToSquare(const cv::Mat &source);
46
50
  };
47
51
 
48
52
  #endif
data/lib/circe/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class Circe
2
- VERSION = '0.1.0'
2
+ VERSION = '0.1.1'
3
3
  end
data/lib/circe.rb CHANGED
@@ -4,8 +4,8 @@ class Circe
4
4
 
5
5
  # Don't know how to do it inside the c extension
6
6
  DATA_DIR = File.join(__dir__, '..', 'data').freeze
7
- ONNX_YOLO = File.join(DATA_DIR, 'yolov5s.onnx')
8
- ONNX_YUNET = File.join(DATA_DIR, 'face_detection_yunet_2022mar.onnx')
7
+ ONNX_YOLO = [ File.join(DATA_DIR, 'yolov5s.onnx'), 640, 640 ]
8
+ ONNX_YUNET = [ File.join(DATA_DIR, 'face_detection_yunet_2022mar.onnx') ]
9
9
 
10
10
  end
11
11
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: circe
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stéphane D'Alu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-02-03 00:00:00.000000000 Z
11
+ date: 2024-04-30 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |2+
14
14
 
@@ -52,7 +52,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
52
52
  - !ruby/object:Gem::Version
53
53
  version: '0'
54
54
  requirements: []
55
- rubygems_version: 3.4.2
55
+ rubygems_version: 3.4.20
56
56
  signing_key:
57
57
  specification_version: 4
58
58
  summary: Face and object recognition