contrek 1.2.7 → 1.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ae46fa52ddce3383ef534142814ed28ddc5cae45042e677c8f445140bee67e82
4
- data.tar.gz: 57cbaf53a0d6e8c7d31bdfcef0718c94654ceaf0fd7b2be84325751dc29f6224
3
+ metadata.gz: d6702682d1ddd30acbdc45a4344b1d761473afc336b7c07734f169739228f855
4
+ data.tar.gz: 8fde7e85a08f0e8037b7711b303fefb0d4edcff1f7c9725a767ff09f815445a6
5
5
  SHA512:
6
- metadata.gz: ee922b6b787ac0f91b98a2e7d608c351499ea5405e325a0eb6d23f751a29075b390c08d757824132c7fbef7d64cef9f3af7dbb11709506e84777ff652f9f15fc
7
- data.tar.gz: 848102a6980857b1f4d3914e60de04408d0f7a4cc4c950a20b3fa8ef7231e31db029220eb6e3fb71847b6986a09c0d7b1fedae70f8e30e8ef758a3adeabeddcd
6
+ metadata.gz: f498831ee81c1ff533ce412b39efdfe9c9dc42ff40679749600a2e2cf833cd8193b1f3ca8bf00e8ff98ae3a3798f9670380274d3cf63e485bad4428200256af4
7
+ data.tar.gz: 4fd128cf5cd14d929f2baaf9c611708cd9bd8ce01550e267f1d93252f5f23a9012b6f66bb59a4ab4390cb02c38b31a37f3fc6fd56d22478cf046359834a3bf0d
@@ -11,8 +11,10 @@ if(CMAKE_BUILD_TYPE STREQUAL "Debug")
11
11
  list(FILTER CMAKE_CXX_FLAGS EXCLUDE REGEX "-DNDEBUG")
12
12
  list(FILTER CMAKE_C_FLAGS EXCLUDE REGEX "-DNDEBUG")
13
13
  else()
14
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pthread -march=native -DNDEBUG -Ofast -flto")
15
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -march=native -fPIC -DNDEBUG")
14
+ #set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pthread -march=native -DNDEBUG -Ofast -flto")
15
+ #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -march=native -fPIC -DNDEBUG")
16
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pthread -march=native -DNDEBUG -Ofast -flto -fopenmp-simd -ftree-vectorize")
17
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -pthread -march=native -DNDEBUG -Ofast -flto -fopenmp-simd -ftree-vectorize")
16
18
  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread -flto=auto -Wl,--no-as-needed")
17
19
  find_library(TCMALLOC_LIB tcmalloc)
18
20
  if(TCMALLOC_LIB)
@@ -46,6 +46,7 @@ PolygonFinder::PolygonFinder(Bitmap *bitmap,
46
46
  cpu_timer.start();
47
47
  scan();
48
48
  reports["scan"] = cpu_timer.stop();
49
+ //std::cout << "scan " << reports["scan"] << std::endl;
49
50
  //=====================//
50
51
 
51
52
  //= BUILD_TANGS_SEQUENCE ===//
@@ -20,6 +20,8 @@
20
20
  #include <sstream>
21
21
  #include <stdexcept>
22
22
  #include <limits>
23
+ #include <cstdint>
24
+
23
25
  #include "../bitmaps/Bitmap.h"
24
26
  #include "NodeCluster.h"
25
27
  #include "Node.h"
@@ -205,7 +207,7 @@ class PolygonFinder {
205
207
  std::map<std::string, double> reports;
206
208
  void scan();
207
209
  CpuTimer cpu_timer;
208
-
210
+ /*
209
211
  template <typename M, typename F>
210
212
  void run_loop(M* specific_matcher, F&& fetch_color, int offset) {
211
213
  int img_h = this->source_bitmap->h();
@@ -236,7 +238,299 @@ class PolygonFinder {
236
238
  }
237
239
  }
238
240
  }
239
- }
241
+ }*/
242
+
243
+ template <typename M, typename F>
244
+ void run_loop(M* specific_matcher, F&& fetch_color, int offset) {
245
+ int img_h = this->source_bitmap->h();
246
+ int bpp = this->source_bitmap->get_bytes_per_pixel();
247
+
248
+ for (int y = 0; y < img_h; y++) {
249
+ const unsigned char* row_ptr = this->source_bitmap->get_row_ptr(y);
250
+ const unsigned char* p = row_ptr + (this->start_x * bpp);
251
+
252
+ int min_x = 0;
253
+ bool matching = false;
254
+ unsigned char last_red_value = 0;
255
+
256
+ int x = this->start_x;
257
+
258
+ if (bpp == 4) {
259
+ for (; x <= this->end_x - 4; x += 4) {
260
+ // 1. Unico accesso alla RAM per pixel: leggiamo i 4 byte completi
261
+ unsigned int c0 = fetch_color(p);
262
+ unsigned int c1 = fetch_color(p + 4);
263
+ unsigned int c2 = fetch_color(p + 8);
264
+ unsigned int c3 = fetch_color(p + 12);
265
+
266
+ // 2. Estrazione a costo zero dai registri della CPU (Operazione bitwise, NO RAM)
267
+ // Se fetch_color ritorna il mascheramento standard, il primo byte si prende con il cast o lo shift
268
+ unsigned char v0 = static_cast<unsigned char>(c0);
269
+ unsigned char v1 = static_cast<unsigned char>(c1);
270
+ unsigned char v2 = static_cast<unsigned char>(c2);
271
+ unsigned char v3 = static_cast<unsigned char>(c3);
272
+
273
+ p += 16;
274
+
275
+ bool m0 = specific_matcher->match(c0);
276
+ bool m1 = specific_matcher->match(c1);
277
+ bool m2 = specific_matcher->match(c2);
278
+ bool m3 = specific_matcher->match(c3);
279
+
280
+ // [Resto della logica dei pixel 0, 1, 2, 3 invariata...]
281
+ if (m0) {
282
+ if (!matching) { min_x = x; last_red_value = v0; matching = true; }
283
+ } else if (matching) {
284
+ this->node_cluster->add_node(min_x, x - 1, y, last_red_value, offset);
285
+ matching = false;
286
+ }
287
+
288
+ if (m1) {
289
+ if (!matching) { min_x = x + 1; last_red_value = v1; matching = true; }
290
+ } else if (matching) {
291
+ this->node_cluster->add_node(min_x, x, y, last_red_value, offset);
292
+ matching = false;
293
+ }
294
+
295
+ if (m2) {
296
+ if (!matching) { min_x = x + 2; last_red_value = v2; matching = true; }
297
+ } else if (matching) {
298
+ this->node_cluster->add_node(min_x, x + 1, y, last_red_value, offset);
299
+ matching = false;
300
+ }
301
+
302
+ if (m3) {
303
+ if (!matching) { min_x = x + 3; last_red_value = v3; matching = true; }
304
+ } else if (matching) {
305
+ this->node_cluster->add_node(min_x, x + 2, y, last_red_value, offset);
306
+ matching = false;
307
+ }
308
+ }
309
+ }
310
+
311
+ // --- CLEANUP SCALARE ---
312
+ for (; x < this->end_x; x++) {
313
+ unsigned int color = fetch_color(p);
314
+ // Applichiamo la stessa ottimizzazione anche qui
315
+ unsigned char current_val = static_cast<unsigned char>(color);
316
+ p += bpp;
317
+
318
+ if (specific_matcher->match(color)) {
319
+ if (!matching) {
320
+ min_x = x;
321
+ last_red_value = current_val;
322
+ matching = true;
323
+ }
324
+ } else if (matching) {
325
+ this->node_cluster->add_node(min_x, x - 1, y, last_red_value, offset);
326
+ matching = false;
327
+ }
328
+ }
329
+
330
+ if (matching) {
331
+ this->node_cluster->add_node(min_x, this->end_x - 1, y, last_red_value, offset);
332
+ }
333
+ }
334
+ }
335
+
336
+ /*template <typename M, typename F>
337
+ void run_loop(M* specific_matcher, F&& fetch_color, int offset) {
338
+ int img_h = this->source_bitmap->h();
339
+ int bpp = this->source_bitmap->get_bytes_per_pixel();
340
+
341
+ for (int y = 0; y < img_h; y++) {
342
+ const unsigned char* row_ptr = this->source_bitmap->get_row_ptr(y);
343
+ const unsigned char* p = row_ptr + (this->start_x * bpp);
344
+
345
+ int min_x = 0;
346
+ bool matching = false;
347
+ unsigned char last_red_value = 0;
348
+
349
+ int x = this->start_x;
350
+
351
+ // --- IL TUO SWEET SPOT: UNROLLING A 4 VIE RIFINITO ---
352
+ if (bpp == 4) {
353
+ for (; x <= this->end_x - 4; x += 4) {
354
+ // Leggiamo in sequenza stretta per ottimizzare i register e la cache L1
355
+ unsigned int c0 = fetch_color(p);
356
+ unsigned char v0 = p[0];
357
+
358
+ unsigned int c1 = fetch_color(p + 4);
359
+ unsigned char v1 = p[4];
360
+
361
+ unsigned int c2 = fetch_color(p + 8);
362
+ unsigned char v2 = p[8];
363
+
364
+ unsigned int c3 = fetch_color(p + 12);
365
+ unsigned char v3 = p[12];
366
+
367
+ p += 16; // Spostato qui: avanza di 4 pixel RGBA precisi
368
+
369
+ bool m0 = specific_matcher->match(c0);
370
+ bool m1 = specific_matcher->match(c1);
371
+ bool m2 = specific_matcher->match(c2);
372
+ bool m3 = specific_matcher->match(c3);
373
+
374
+ // Pixel 0
375
+ if (m0) {
376
+ if (!matching) { min_x = x; last_red_value = v0; matching = true; }
377
+ } else if (matching) {
378
+ this->node_cluster->add_node(min_x, x - 1, y, last_red_value, offset);
379
+ matching = false;
380
+ }
381
+
382
+ // Pixel 1
383
+ if (m1) {
384
+ if (!matching) { min_x = x + 1; last_red_value = v1; matching = true; }
385
+ } else if (matching) {
386
+ this->node_cluster->add_node(min_x, x, y, last_red_value, offset);
387
+ matching = false;
388
+ }
389
+
390
+ // Pixel 2
391
+ if (m2) {
392
+ if (!matching) { min_x = x + 2; last_red_value = v2; matching = true; }
393
+ } else if (matching) {
394
+ this->node_cluster->add_node(min_x, x + 1, y, last_red_value, offset);
395
+ matching = false;
396
+ }
397
+
398
+ // Pixel 3
399
+ if (m3) {
400
+ if (!matching) { min_x = x + 3; last_red_value = v3; matching = true; }
401
+ } else if (matching) {
402
+ this->node_cluster->add_node(min_x, x + 2, y, last_red_value, offset);
403
+ matching = false;
404
+ }
405
+ }
406
+ }
407
+
408
+ // --- CLEANUP SCALARE PER I RIMANENTI ---
409
+ for (; x < this->end_x; x++) {
410
+ unsigned int color = fetch_color(p);
411
+ unsigned char current_val = p[0];
412
+ p += bpp;
413
+
414
+ if (specific_matcher->match(color)) {
415
+ if (!matching) {
416
+ min_x = x;
417
+ last_red_value = current_val;
418
+ matching = true;
419
+ }
420
+ } else if (matching) {
421
+ this->node_cluster->add_node(min_x, x - 1, y, last_red_value, offset);
422
+ matching = false;
423
+ }
424
+ }
425
+
426
+ // --- CHIUSURA DI SICUREZZA FINE RIGA ---
427
+ if (matching) {
428
+ this->node_cluster->add_node(min_x, this->end_x - 1, y, last_red_value, offset);
429
+ }
430
+ }
431
+ }*/
432
+
433
+
434
+
435
+ /*
436
+
437
+ #include <cstdint>
438
+
439
+ template <typename M, typename F>
440
+ void run_loop(M* specific_matcher, F&& fetch_color, int offset) {
441
+ int img_h = this->source_bitmap->h();
442
+ int bpp = this->source_bitmap->get_bytes_per_pixel();
443
+
444
+ for (int y = 0; y < img_h; y++) {
445
+ const unsigned char* row_ptr = this->source_bitmap->get_row_ptr(y);
446
+ const unsigned char* p = row_ptr + (this->start_x * bpp);
447
+
448
+ int min_x = 0;
449
+ bool matching = false;
450
+ unsigned char last_red_value = 0;
451
+
452
+ int x = this->start_x;
453
+
454
+ // --- LINEA 1: UNROLLING A 4 VIE (SIMD FRIENDLY, NO DEPENDENCY) ---
455
+ if (bpp == 4) {
456
+ for (; x <= this->end_x - 4; x += 4) {
457
+ // Carichiamo 4 pixel in variabili separate (zero dipendenze tra loro)
458
+ unsigned int c0 = fetch_color(p);
459
+ unsigned int c1 = fetch_color(p + 4);
460
+ unsigned int c2 = fetch_color(p + 8);
461
+ unsigned int c3 = fetch_color(p + 12);
462
+
463
+ unsigned char v0 = p[0];
464
+ unsigned char v1 = p[4];
465
+ unsigned char v2 = p[8];
466
+ unsigned char v3 = p[12];
467
+
468
+ p += 16; // Avanza di 4 pixel (4 * 4 byte)
469
+
470
+ // Eseguiamo il match in parallelo. Il compilatore trasforma questo in istruzioni vettoriali
471
+ bool m0 = specific_matcher->match(c0);
472
+ bool m1 = specific_matcher->match(c1);
473
+ bool m2 = specific_matcher->match(c2);
474
+ bool m3 = specific_matcher->match(c3);
475
+
476
+ // Pixel 0
477
+ if (m0) {
478
+ if (!matching) { min_x = x; last_red_value = v0; matching = true; }
479
+ } else if (matching) {
480
+ this->node_cluster->add_node(min_x, x - 1, y, last_red_value, offset);
481
+ matching = false;
482
+ }
483
+
484
+ // Pixel 1
485
+ if (m1) {
486
+ if (!matching) { min_x = x + 1; last_red_value = v1; matching = true; }
487
+ } else if (matching) {
488
+ this->node_cluster->add_node(min_x, x, y, last_red_value, offset);
489
+ matching = false;
490
+ }
491
+
492
+ // Pixel 2
493
+ if (m2) {
494
+ if (!matching) { min_x = x + 2; last_red_value = v2; matching = true; }
495
+ } else if (matching) {
496
+ this->node_cluster->add_node(min_x, x + 1, y, last_red_value, offset);
497
+ matching = false;
498
+ }
499
+
500
+ // Pixel 3
501
+ if (m3) {
502
+ if (!matching) { min_x = x + 3; last_red_value = v3; matching = true; }
503
+ } else if (matching) {
504
+ this->node_cluster->add_node(min_x, x + 2, y, last_red_value, offset);
505
+ matching = false;
506
+ }
507
+ }
508
+ }
509
+
510
+ // --- LINEA 2: SCALAR CLEANUP ---
511
+ for (; x < this->end_x; x++) {
512
+ unsigned int color = fetch_color(p);
513
+ unsigned char current_val = p[0];
514
+ p += bpp;
515
+
516
+ if (specific_matcher->match(color)) {
517
+ if (!matching) {
518
+ min_x = x;
519
+ last_red_value = current_val;
520
+ matching = true;
521
+ }
522
+ } else if (matching) {
523
+ this->node_cluster->add_node(min_x, x - 1, y, last_red_value, offset);
524
+ matching = false;
525
+ }
526
+ }
527
+
528
+ // --- CHIUSURA DI SICUREZZA ---
529
+ if (matching) {
530
+ this->node_cluster->add_node(min_x, this->end_x - 1, y, last_red_value, offset);
531
+ }
532
+ }
533
+ }*/
240
534
 
241
535
  public:
242
536
  PolygonFinder(Bitmap *bitmap,
@@ -1,3 +1,3 @@
1
1
  module Contrek
2
- VERSION = "1.2.7"
2
+ VERSION = "1.2.8"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: contrek
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.7
4
+ version: 1.2.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emanuele Cesaroni
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-06-02 00:00:00.000000000 Z
11
+ date: 2026-06-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -301,7 +301,7 @@ metadata:
301
301
  homepage_uri: https://github.com/runout77/contrek
302
302
  documentation_uri: https://github.com/runout77/contrek#readme
303
303
  changelog_uri: https://github.com/runout77/contrek/blob/main/CHANGELOG.md
304
- post_install_message:
304
+ post_install_message:
305
305
  rdoc_options: []
306
306
  require_paths:
307
307
  - lib
@@ -317,7 +317,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
317
317
  version: '0'
318
318
  requirements: []
319
319
  rubygems_version: 3.5.22
320
- signing_key:
320
+ signing_key:
321
321
  specification_version: 4
322
322
  summary: Fast PNG contour tracing and shape detection for Ruby
323
323
  test_files: []