contrek 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -1
  3. data/Gemfile.lock +1 -1
  4. data/PERFORMANCE.md +177 -0
  5. data/README.md +22 -3
  6. data/Rakefile +1 -1
  7. data/contrek.gemspec +5 -1
  8. data/ext/cpp_polygon_finder/PolygonFinder/CMakeLists.txt +10 -12
  9. data/ext/cpp_polygon_finder/PolygonFinder/src/Tests.cpp +1 -1
  10. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/Node.cpp +2 -0
  11. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/Node.h +36 -2
  12. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/NodeCluster.cpp +4 -4
  13. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/Cluster.cpp +33 -42
  14. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/Cluster.h +2 -1
  15. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/Cursor.cpp +33 -5
  16. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/Cursor.h +3 -2
  17. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/Hub.cpp +2 -2
  18. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/Hub.h +1 -5
  19. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/InnerPolyline.cpp +1 -2
  20. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/InnerPolyline.h +1 -3
  21. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/Polyline.cpp +16 -20
  22. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/Polyline.h +8 -3
  23. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/Shape.cpp +5 -0
  24. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/Shape.h +3 -1
  25. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/ShapePool.cpp +2 -2
  26. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/ShapePool.h +1 -1
  27. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/Tile.cpp +5 -3
  28. data/ext/cpp_polygon_finder/PolygonFinder/src/polygon/finder/concurrent/Tile.h +4 -4
  29. data/ext/cpp_polygon_finder/cpp_polygon_finder.cpp +9 -0
  30. data/lib/contrek/finder/concurrent/cluster.rb +30 -29
  31. data/lib/contrek/finder/concurrent/cursor.rb +35 -12
  32. data/lib/contrek/finder/concurrent/hub.rb +1 -1
  33. data/lib/contrek/finder/concurrent/inner_polyline.rb +2 -3
  34. data/lib/contrek/finder/concurrent/polyline.rb +5 -7
  35. data/lib/contrek/finder/concurrent/shape.rb +7 -3
  36. data/lib/contrek/finder/concurrent/tile.rb +5 -4
  37. data/lib/contrek/version.rb +1 -1
  38. metadata +3 -6
  39. data/ext/cpp_polygon_finder/PolygonFinder/examples/example.cpp +0 -50
  40. data/ext/cpp_polygon_finder/PolygonFinder/images/graphs_1024x1024.png +0 -0
  41. data/ext/cpp_polygon_finder/PolygonFinder/images/labyrinth.png +0 -0
  42. data/ext/cpp_polygon_finder/PolygonFinder/images/sample_10240x10240.png +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b46d3ae57168cfeb52a0788b6e11af74a164c8e19b4414783f95ac96d1507ed2
4
- data.tar.gz: e0637093c914f2426a74b4a47ebeac34152ae473de619fda2e164f22c5d03bc8
3
+ metadata.gz: 0d4e8a9a3c94ae345edb0ecbb6087020141ad20cd2661e2b58578323a721f66e
4
+ data.tar.gz: ec99c9629d41d589e90ff39d8305c7e0355743b3f51caeb1e0a9da4702007fde
5
5
  SHA512:
6
- metadata.gz: 472da77db1202e4cf38416e7c5dcfbd483a1f9b9fdb414afb3583a441c3ea180107741d1bc939270fb2aa46e54541712a0efc56f542abe9afd5f39d66e7c847a
7
- data.tar.gz: 66ee95022392360b2cb9dbd9757d669a7f01cc95512ee4a74a288e7dc5c94657ebfc0ebe1107090b3e24df490462073f9ea3f2fd6cb57f7432214a809e486b0b
6
+ metadata.gz: 9180029576fc846f3cbc8adcd68e5f68374b49fb734db8352e9ced637a447cfad93beb37cb5206084b411f4cb5cc26de9a1ac075b09f8ce1b39bb6e33fadd018
7
+ data.tar.gz: 163a00611440eb83d538b4dcd3f3c36745350c1d34e657fcdae069dd8c8020f36eec005264bd581e2024fca772e7f53dc0f856d35eca4717b741b0bcbaa91e5a
data/CHANGELOG.md CHANGED
@@ -83,4 +83,12 @@ All notable changes to this project will be documented in this file.
83
83
 
84
84
  ## [1.2.0] - 2026-05-02
85
85
  ### Changed
86
- - Further improvements have been applied to the internal parts joining algorithm using a new structural approach. This update is faster and resolves edge cases where inner parts were mistakenly classified as outer perimeters, ensuring precise contour hierarchy. The simplified logic has led to a significant reduction in codebase complexity and the removal of substantial redundant code.
86
+ - Further improvements have been applied to the internal parts joining algorithm using a new structural approach. This update is faster and resolves edge cases where inner parts were mistakenly classified as outer perimeters, ensuring precise contour hierarchy. The simplified logic has led to a significant reduction in codebase complexity and the removal of substantial redundant code.
87
+
88
+ ## [1.2.1] - 2026-05-09
89
+ ### Changed
90
+ - Some c++ optimizations.
91
+
92
+ ## [1.2.2] - 2026-05-20
93
+ ### Changed
94
+ - The treemap determination algorithm has been heavily optimized. Calls to the geometric routine that checks whether a newly generated inner polyline encloses other already-existing ones have been reduced to the minimum. Polylines adjacent to the shared overlap stripe are now excluded from these checks, as they are already identified during the initial polygon detection phase. The geometric approach remains unavoidable in this context and is still a performance bottleneck. It will certainly be the subject of future optimizations.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- contrek (1.2.0)
4
+ contrek (1.2.2)
5
5
  chunky_png (~> 1.4)
6
6
  concurrent-ruby (~> 1.3.5)
7
7
  rice (= 4.5.0)
data/PERFORMANCE.md ADDED
@@ -0,0 +1,177 @@
1
+ # ⚡ Contrek Performance Tuning
2
+
3
+ This document describes optional dependencies and configuration tips to get the best performance out of Contrek on large images.
4
+
5
+ All optimizations are **optional** — Contrek works correctly without any of them. However, on high-resolution images (10k×10k and above), the combined effect is significant.
6
+
7
+ ---
8
+
9
+ ## Benchmark Reference
10
+
11
+ > System: AMD Ryzen 7 3700X 8-Core Processor (BogoMIPS: 7199,99) on Ubuntu distro
12
+ > Image: 20480×20480 pixels — 8 threads / 8 tiles
13
+ >
14
+ > **Note:** Benchmarks were measured inside a VMware virtual machine.
15
+
16
+ | Configuration | Time |
17
+ |---|---|
18
+ | Baseline (no tuning) | 5316 ms |
19
+ | **Fully tuned** | **2938.05 ms** |
20
+
21
+ ---
22
+
23
+ ## 1. zlib-ng — Faster PNG Decoding
24
+
25
+ **Impact: ~57% reduction in PNG decode time**
26
+
27
+ Contrek uses [libspng](https://libspng.org/) for PNG decoding, which internally relies on zlib for decompression. [zlib-ng](https://github.com/zlib-ng/zlib-ng) is a high-performance, drop-in replacement for zlib that uses modern CPU instructions (AVX2, SSE4) to significantly accelerate deflate decompression.
28
+
29
+ If zlib-ng is not installed, standard zlib is used automatically — no errors, just slower PNG decoding.
30
+
31
+ ### Installation
32
+
33
+ **Ubuntu / Debian** — not available in standard repos, build from source:
34
+
35
+ ```bash
36
+ git clone https://github.com/zlib-ng/zlib-ng.git
37
+ cd zlib-ng && mkdir build && cd build
38
+ cmake .. -DZLIB_COMPAT=ON -DCMAKE_BUILD_TYPE=Release
39
+ make -j$(nproc)
40
+ sudo make install
41
+ sudo ldconfig
42
+ ```
43
+
44
+ > ⚠️ The `-DZLIB_COMPAT=ON` flag is mandatory. Without it, zlib-ng uses a different ABI and CMake's `find_package(ZLIB)` won't detect it.
45
+
46
+ **macOS:**
47
+ ```bash
48
+ brew install zlib-ng
49
+ ```
50
+
51
+ **Arch Linux:**
52
+ ```bash
53
+ sudo pacman -S zlib-ng
54
+ ```
55
+
56
+ After installation, rebuild Contrek — CMake will automatically detect zlib-ng in `/usr/local` and use it instead of system zlib.
57
+
58
+ ---
59
+
60
+ ## 2. tcmalloc — Faster Memory Allocation
61
+
62
+ **Impact: significant reduction in allocator contention under multithreaded load**
63
+
64
+ Contrek creates and destroys large numbers of small objects during processing. Under multithreaded workloads, the standard glibc allocator serializes many of these operations, causing thread contention. [tcmalloc](https://github.com/google/tcmalloc) (Thread-Caching Malloc) is Google's high-performance allocator that maintains per-thread caches, dramatically reducing lock contention.
65
+
66
+ ### Installation
67
+
68
+ **Ubuntu / Debian:**
69
+ ```bash
70
+ sudo apt-get install libgoogle-perftools-dev
71
+ ```
72
+
73
+ **macOS:**
74
+ ```bash
75
+ brew install gperftools
76
+ ```
77
+
78
+ CMake will detect tcmalloc automatically. You will see this confirmation during the build:
79
+ ```
80
+ -- Contrek: tcmalloc found in /usr/lib/x86_64-linux-gnu/libtcmalloc.so
81
+ ```
82
+
83
+ ### Tuning tcmalloc cache size
84
+
85
+ For large images with many threads, increasing the per-thread cache size reduces requests to the central allocator. Add this at the very beginning of your `main()`:
86
+
87
+ ```cpp
88
+ #include <gperftools/malloc_extension.h>
89
+
90
+ int main() {
91
+ MallocExtension::instance()->SetNumericProperty(
92
+ "tcmalloc.max_total_thread_cache_bytes",
93
+ 1024 * 1024 * 1024 // 1GB total thread cache
94
+ );
95
+ // ...
96
+ }
97
+ ```
98
+
99
+ The default is 32MB total. On systems with 16GB+ RAM, 1GB is a safe value that virtually eliminates allocator contention.
100
+
101
+ ---
102
+
103
+ ## 3. Thread and Tile Configuration
104
+
105
+ **Impact: up to ~35% reduction in processing time on multi-core systems**
106
+
107
+ Contrek splits the image into vertical tiles processed in parallel. The optimal configuration depends on your hardware.
108
+
109
+ ### General rule
110
+
111
+ Set both `threads` and `tiles` to the number of **physical cores** on your machine.
112
+
113
+ ```cpp
114
+ Contrek::Config cfg;
115
+ cfg.threads = 8; // match your physical core count
116
+ cfg.tiles = 8; // same as threads for best results
117
+ ```
118
+
119
+ ```ruby
120
+ result = Contrek.contour!(
121
+ png_file_path: "image.png",
122
+ options: {
123
+ number_of_threads: 8,
124
+ finder: { number_of_tiles: 8 }
125
+ }
126
+ )
127
+ ```
128
+
129
+ ### Why threads == tiles?
130
+
131
+ - **Fewer tiles than threads**: some cores sit idle waiting for others to finish
132
+ - **More tiles than threads**: merge overhead increases without adding parallelism
133
+ - **threads == tiles**: optimal balance between parallel scan and merge cost
134
+
135
+ Consider this depends your system. Probably is better not to saturate all cores leaving one ot two to the system and the others to Contrek. So on 8 cpu core 6 thread/tiles at maximum.
136
+
137
+ ---
138
+
139
+ ## 4. Combining All Optimizations
140
+
141
+ Install zlib-ng and tcmalloc, then configure:
142
+
143
+ ```ruby
144
+ # Ruby
145
+ result = Contrek.contour!(
146
+ png_file_path: "large_image.png",
147
+ options: {
148
+ number_of_threads: 8, # match your core count (or 1-2 less)
149
+ class: "value_not_matcher",
150
+ color: { r: 255, g: 255, b: 255, a: 255 },
151
+ finder: {
152
+ number_of_tiles: 8, # same as threads
153
+ compress: { uniq: true }
154
+ }
155
+ }
156
+ )
157
+ ```
158
+
159
+ ```cpp
160
+ // C++ standalone
161
+ #include <gperftools/malloc_extension.h>
162
+ #include "ContrekApi.h"
163
+
164
+ int main() {
165
+ MallocExtension::instance()->SetNumericProperty(
166
+ "tcmalloc.max_total_thread_cache_bytes",
167
+ 1024 * 1024 * 1024
168
+ );
169
+
170
+ Contrek::Config cfg;
171
+ cfg.threads = 8;
172
+ cfg.tiles = 8;
173
+
174
+ auto result = Contrek::trace("large_image.png", cfg);
175
+ std::cout << "Time: " << result->total_time << " ms" << std::endl;
176
+ }
177
+ ```
data/README.md CHANGED
@@ -49,6 +49,17 @@ The core strength of Contrek is its **Topologically Consistent Merging** algorit
49
49
  </tr>
50
50
  </table>
51
51
 
52
+ ## 📊 Benchmarking & Performance
53
+ The **Stripe-Merging** algorithm has been validated through a dedicated testing suite comparing **Contrek** against **OpenCV** (industry-standard contour extraction).
54
+
55
+ ### Key Metrics:
56
+ * **Execution Latency:** Single-threaded OpenCV vs. Contrek's parallel thread management.
57
+ * **Memory Footprint:** RAM consumption during ultra-high-resolution processing.
58
+ * **Extraction Fidelity:** Verifying polygon precision across both engines.
59
+
60
+ The complete testing suite, source code, and raw benchmarks are available here:
61
+ 👉 **[test_opencv_contrek](https://github.com/runout77/test_opencv_contrek)**
62
+
52
63
  ## Prerequisites
53
64
 
54
65
  For optimal performance and efficient memory management with large images (20k+), it is highly recommended to install **tcmalloc**.
@@ -57,7 +68,11 @@ For optimal performance and efficient memory management with large images (20k+)
57
68
  ```bash
58
69
  sudo apt-get install libgoogle-perftools-dev
59
70
  ```
71
+ > For advanced performance tuning (zlib-ng, tcmalloc, thread configuration) see [PERFORMANCE.md](PERFORMANCE.md).
60
72
 
73
+ > ⚠️ **Platform support:** Contrek native extensions are supported on **Linux** and **macOS** only.
74
+ > Windows is not supported due to the use of POSIX threading primitives and platform-specific
75
+ > memory management. On Windows, consider using WSL2 (Windows Subsystem for Linux).
61
76
 
62
77
  ## Install
63
78
 
@@ -143,7 +158,8 @@ You can process from a raw stream
143
158
  [{:outer=>[{:x=>5, :y=>4}, {:x=>5, :y=>5}, {:x=>8, :y=>5}, {:x=>8, :y=>4}], :inner=>[]}]
144
159
  ```
145
160
 
146
- Multithreaded contour processing is supported. However, on Ruby MRI (the standard Ruby implementation, at least up to 3.x), the Global Interpreter Lock (GIL) prevents more than one thread from executing Ruby code simultaneously. As a consequence, execution remains effectively serialized even on multicore systems, unless the gem is used under JRuby or TruffleRuby (not tested).
161
+ Multithreaded contour processing is supported by both the native C++ and pure Ruby implementations. When using the C++ engine (default), multithreading works as expected and fully utilizes all available cores.
162
+ When running the pure Ruby implementation, however, the Global Interpreter Lock (GIL) in Ruby MRI (the standard Ruby interpreter, up to at least version 3.x) prevents true parallel execution — threads are serialized even on multicore systems. Switching to JRuby or TruffleRuby would bypass this limitation, though these runtimes have not been tested with Contrek.
147
163
 
148
164
  ```ruby
149
165
  result = Contrek.contour!(
@@ -167,7 +183,7 @@ Regarding multithreading:
167
183
 
168
184
  - The algorithm splits the contour-detection workflow into multiple phases that can be executed in parallel. The initial contour extraction on each band and the subsequent merging of coordinates between adjacent bands—performed pairwise, recursively, and in a non-deterministic order—results in a final output that is not idempotent. Idempotence is guaranteed only when the exact same merging sequence is repeated.
169
185
 
170
- By not declaring native option CPP Multithreading optimized code is used. In the above example a [105 MP image](spec/files/images/sample_10240x10240.png) is examined by 4 threads working on 4 tiles (total compute time about 1.1 secs with image load).
186
+ By not declaring native option CPP Multithreading optimized code is used. In the above example a [105 MP image](spec/files/images/sample_10240x10240.png) is examined by 4 threads working on 4 tiles (total compute time about 0.816 secs with image load (0.37 secs)).
171
187
 
172
188
  ```ruby
173
189
  result = Contrek.contour!(
@@ -232,9 +248,10 @@ Engineered for **Pixel-Perfect** precision.
232
248
  * **Result:** 100% topologically faithful geometry with no micro-gaps between adjacent polygons.
233
249
 
234
250
  Below are two images illustrating the difference in tracing modes. In the first case, with **strict_bounds ON**, the anti-clockwise sequence includes two additional points, **H** and **I**, which trace the shape more accurately. In the second case, the transition between **G** and **H** is approximated, omitting the indentation.
251
+
235
252
  | Strict Bounds ON | Strict Bounds OFF |
236
253
  |:---:|:---:|
237
- | ![Originale](./docs/images/strict_bounds_on.png) | ![Poligoni](./docs/images/strict_bounds_off.png) |
254
+ | <img src="./docs/images/strict_bounds_on.png" alt="Originale" width="60%"/> | <img src="./docs/images/strict_bounds_off.png" alt="Poligoni" width="60%"/> |
238
255
 
239
256
 
240
257
  ## Result
@@ -411,6 +428,8 @@ This the one for the native C++
411
428
 
412
429
  About 130x faster. Times are in microseconds; system: AMD Ryzen 7 3700X 8-Core Processor (BogoMIPS: 7199,99) on Ubuntu distro.
413
430
 
431
+ **Note:** Benchmarks were measured inside a VMware virtual machine.
432
+
414
433
  ## 🛠 C++ Standalone Library Usage
415
434
 
416
435
  The core of **Contrek** is a high-performance `C++17` library. It is designed to be **standalone**, meaning it has zero dependencies on Ruby and can be integrated into any `C++` project.
data/Rakefile CHANGED
@@ -6,7 +6,7 @@ task :compile do |t|
6
6
  Dir.glob("**/*.o").each { |f| File.delete(f) }
7
7
  File.delete("Makefile") if File.exist?("Makefile")
8
8
  system "ruby", "extconf.rb"
9
- system "make", "-B"
9
+ system "make", "-j#{`nproc`.strip}", "-B"
10
10
  Dir.glob("**/*.o").each { |f| File.delete(f) }
11
11
  system "cp cpp_polygon_finder.so ./../../lib"
12
12
  end
data/contrek.gemspec CHANGED
@@ -11,7 +11,11 @@ Gem::Specification.new do |s|
11
11
  s.homepage = "https://github.com/runout77/contrek"
12
12
  s.licenses = ["MIT", "AGPL-3.0-only"]
13
13
  s.files = Dir.chdir(File.expand_path("..", __FILE__)) do
14
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(docs|pkg|spec)/}) }
14
+ `git ls-files -z`.split("\x0").reject do |f|
15
+ f.match(%r{^(docs|pkg|spec)/}) ||
16
+ f.include?("PolygonFinder/images/") ||
17
+ f.include?("PolygonFinder/examples/")
18
+ end
15
19
  end
16
20
  s.metadata = {
17
21
  "homepage_uri" => "https://github.com/runout77/contrek",
@@ -1,6 +1,5 @@
1
1
  cmake_minimum_required(VERSION 3.10)
2
2
  project(ContrekCore C CXX)
3
-
4
3
  set(CMAKE_CXX_STANDARD 17)
5
4
  set(CMAKE_C_STANDARD 11)
6
5
  if(CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -22,28 +21,27 @@ else()
22
21
  message(WARNING "Contrek: tcmalloc not found; standard one will be used.")
23
22
  endif()
24
23
  endif()
25
-
26
24
  find_package(ZLIB REQUIRED)
27
-
25
+ message(STATUS "Contrek: ZLIB path found at ${ZLIB_LIBRARIES}")
28
26
  file(GLOB_RECURSE CPP_SOURCES "*.cpp")
29
27
  file(GLOB_RECURSE C_SOURCES "*.c")
30
-
31
28
  list(FILTER CPP_SOURCES EXCLUDE REGEX "examples/.*\\.cpp")
32
-
33
29
  add_library(ContrekLib STATIC ${CPP_SOURCES} ${C_SOURCES})
34
-
35
30
  file(GLOB_RECURSE ALL_HEADERS "*.h")
36
31
  foreach(header_file ${ALL_HEADERS})
37
32
  get_filename_component(header_dir ${header_file} DIRECTORY)
38
33
  list(APPEND ALL_INCLUDE_DIRS ${header_dir})
39
34
  endforeach()
40
35
  list(REMOVE_DUPLICATES ALL_INCLUDE_DIRS)
41
-
42
- target_include_directories(ContrekLib PUBLIC ${ALL_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS})
43
- target_link_libraries(ContrekLib PRIVATE ${ZLIB_LIBRARIES} pthread)
44
-
36
+ target_include_directories(ContrekLib PUBLIC
37
+ ${ALL_INCLUDE_DIRS}
38
+ ${ZLIB_INCLUDE_DIRS}
39
+ )
40
+ target_link_libraries(ContrekLib PRIVATE
41
+ ${ZLIB_LIBRARIES}
42
+ pthread
43
+ )
45
44
  option(BUILD_EXAMPLES "Build the example application" OFF)
46
-
47
45
  if(BUILD_EXAMPLES)
48
46
  if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/examples/example.cpp")
49
47
  message(STATUS "Contrek: Compiling example option ON")
@@ -56,4 +54,4 @@ if(BUILD_EXAMPLES)
56
54
  else()
57
55
  message(WARNING "Contrek: examples/example.cpp not found!")
58
56
  endif()
59
- endif()
57
+ endif()
@@ -98,7 +98,7 @@ void Tests::test_c()
98
98
  Point* p2 = new Point({2, 2});
99
99
  Point* p3 = new Point({3, 3});
100
100
 
101
- Hub* hub = new Hub(4, 0, 3);
101
+ Hub* hub = new Hub(4);
102
102
 
103
103
  Position* pos1 = new Position(hub, p1);
104
104
  Position* pos2 = new Position(hub, p2);
@@ -17,6 +17,8 @@
17
17
  #include "Node.h"
18
18
  #include "NodeCluster.h"
19
19
 
20
+ static const int TURNER[2][2] = {{Node::OMAX, Node::OMIN}, {Node::TURN_MAX, Node::TURN_MIN}};
21
+
20
22
  Node::Node(int min_x, int max_x, int y, NodeCluster* cluster, char name)
21
23
  : start_point(min_x, y),
22
24
  end_point(max_x, y),
@@ -14,8 +14,43 @@
14
14
  #include <limits>
15
15
  #include <algorithm>
16
16
  #include <map>
17
+ #include <cstring>
18
+ #include <cstddef>
17
19
  #include "List.h"
18
20
 
21
+ struct SmallVec {
22
+ static constexpr int INLINE_CAP = 6;
23
+ int buf[INLINE_CAP];
24
+ int* ptr = buf;
25
+ int sz = 0, cap = INLINE_CAP;
26
+ int front() const { return ptr[0]; }
27
+ int back() const { return ptr[sz - 1]; }
28
+ void push_back(int v) {
29
+ if (sz == cap) {
30
+ cap *= 2;
31
+ int* np = new int[cap];
32
+ std::memcpy(np, ptr, sz * sizeof(int));
33
+ if (ptr != buf) delete[] ptr;
34
+ ptr = np;
35
+ }
36
+ ptr[sz++] = v;
37
+ }
38
+ void reserve(int n) {
39
+ if (n <= cap) return;
40
+ int* np = new int[n];
41
+ std::memcpy(np, ptr, sz * sizeof(int));
42
+ if (ptr != buf) delete[] ptr;
43
+ ptr = np; cap = n;
44
+ }
45
+ void clear() { sz = 0; ptr = buf; cap = INLINE_CAP; }
46
+ int size() const { return sz; }
47
+ int& operator[](int i) { return ptr[i]; }
48
+ int operator[](int i) const { return ptr[i]; }
49
+ int* begin() { return ptr; }
50
+ int* end() { return ptr + sz; }
51
+ ~SmallVec() { if (ptr != buf) delete[] ptr; }
52
+ };
53
+
19
54
  class NodeCluster;
20
55
  struct Point {
21
56
  int x;
@@ -42,7 +77,6 @@ class Node : public Listable {
42
77
  static const int OCOMPLETE = OMIN | OMAX;
43
78
  static const int TURN_MAX = IMAX | OMAX;
44
79
  static const int TURN_MIN = IMIN | OMIN;
45
- const int TURNER[2][2] = {{OMAX, OMIN}, {TURN_MAX, TURN_MIN}};
46
80
  static const int OUTER = 0;
47
81
  static const int INNER = 1;
48
82
 
@@ -60,7 +94,7 @@ class Node : public Listable {
60
94
  Point start_point, end_point;
61
95
  NodeCluster* cluster;
62
96
  void add_intersection(Node& other_node, int other_node_index);
63
- std::vector<int> tangs_sequence;
97
+ SmallVec tangs_sequence;
64
98
  Point* coords_entering_to(Node *enter_to, int mode, int tracking);
65
99
  Node* my_next_outer(Node *last, int versus);
66
100
  Node* my_next_inner(Node *last, int versus);
@@ -35,6 +35,7 @@ NodeCluster::NodeCluster(int h, int w, pf_Options *options) {
35
35
  this->root_nodes = this->lists.add_list();
36
36
  this->inner_plot = this->lists.add_list();
37
37
  this->inner_new = this->lists.add_list();
38
+ this->plot_sequence.reserve(1024);
38
39
  }
39
40
 
40
41
  NodeCluster::~NodeCluster() {
@@ -68,8 +69,8 @@ void NodeCluster::compress_coords(std::list<Polygon>& polygons, pf_Options optio
68
69
  }
69
70
 
70
71
  void NodeCluster::build_tangs_sequence() {
71
- for (auto& line : vert_nodes) {
72
- for (Node& node : line) {
72
+ for (int y = 0; y < (int)vert_nodes.size(); y++) {
73
+ for (Node& node : vert_nodes[y]) {
73
74
  node.precalc_tangs_sequences(*this);
74
75
  }
75
76
  }
@@ -94,8 +95,7 @@ Node* NodeCluster::add_node(int min_x, int max_x, int y, char name, int offset)
94
95
 
95
96
  while (it != up_nodes.end()) {
96
97
  if ((it->min_x - offset) > node.max_x) break;
97
- int current_index = std::distance(up_nodes.begin(), it);
98
- node.add_intersection(*it, current_index);
98
+ node.add_intersection(*it, it->abs_x_index);
99
99
  it->add_intersection(node, node.abs_x_index);
100
100
  ++it;
101
101
  }
@@ -20,7 +20,7 @@
20
20
  Cluster::Cluster(Finder *finder, int height, int start_x, int end_x)
21
21
  : finder(finder)
22
22
  { tiles_.reserve(2); // only two (left|right)
23
- this->hub_ = new Hub(height, start_x, end_x);
23
+ this->hub_ = new Hub(height);
24
24
  }
25
25
 
26
26
  Cluster::~Cluster() {
@@ -54,7 +54,7 @@ Tile* Cluster::merge_tiles() {
54
54
  double tot_outer = 0;
55
55
  CpuTimer timer;
56
56
 
57
- std::list<Shape*> new_shapes;
57
+ std::vector<Shape*> new_shapes;
58
58
  std::vector<InnerPolyline*> all_new_inner_polylines;
59
59
 
60
60
  timer.start();
@@ -69,7 +69,7 @@ Tile* Cluster::merge_tiles() {
69
69
  tot_outer += timer.stop();
70
70
 
71
71
  for (Tile* tile : tiles_) {
72
- std::list<Shape*>& src = tile->shapes();
72
+ std::vector<Shape*>& src = tile->shapes();
73
73
 
74
74
  for (Shape* shape : src) {
75
75
  if (shape->outer_polyline->is_on(Polyline::TRACKED_OUTER) || shape->outer_polyline->width() == 0) {
@@ -86,7 +86,7 @@ Tile* Cluster::merge_tiles() {
86
86
 
87
87
  timer.start();
88
88
  std::vector<InnerPolyline*> new_inners = shape->inner_polylines;
89
- std::vector<InnerPolyline*> new_inner_polylines = cursor.join_inners(new_outer);
89
+ std::vector<InnerPolyline*> new_inner_polylines = cursor.join_inners(new_outer, treemap);
90
90
  tot_inner += timer.stop();
91
91
 
92
92
  for (InnerPolyline* inner_polyline : new_inner_polylines) {
@@ -94,59 +94,44 @@ Tile* Cluster::merge_tiles() {
94
94
  if (treemap) {
95
95
  inner_polyline->sequence()->compute_vertical_bounds();
96
96
  all_new_inner_polylines.push_back(inner_polyline);
97
- for (const auto orphan_inner : cursor.orphan_inners()) {
98
- if (orphan_inner->recombined()) {
99
- all_new_inner_polylines.push_back(orphan_inner);
100
- }
101
- }
102
97
  }
103
98
  }
104
-
105
99
  for (auto s : cursor.orphan_inners()) {
106
100
  new_inners.push_back(s);
107
101
  }
108
-
109
102
  Polyline* polyline = tile->shapes_pool->acquire_polyline(tile, new_outer->to_vector(), std::nullopt);
110
103
  Shape* inserting_new_shape = tile->shapes_pool->acquire_shape(polyline, new_inners);
111
-
112
104
  new_shapes.push_back(inserting_new_shape);
113
105
  polyline->shape = inserting_new_shape;
114
- inserting_new_shape->set_parent_shape(shape->parent_shape());
115
106
 
116
107
  for (InnerPolyline* inner_polyline : new_inner_polylines) {
117
108
  inner_polyline->sequence()->shape = inserting_new_shape;
118
109
  }
119
-
120
110
  if (treemap) {
121
111
  for (const auto merged_shape : cursor.shapes_sequence()) {
122
112
  merged_shape->merged_to_shape = inserting_new_shape;
123
113
  }
124
- this->assign_ancestry(inserting_new_shape, all_new_inner_polylines);
114
+ InnerPolyline* inside_inner_polyline = shape->outer_polyline->inside_inner_polyline;
115
+ if (inside_inner_polyline) {
116
+ assign_ancestry(inserting_new_shape, inside_inner_polyline);
117
+ }
125
118
  }
126
-
127
119
  } else {
128
- if (treemap && !shape->reassociation_skip && shape->parent_shape() == nullptr) {
129
- this->assign_ancestry(shape, all_new_inner_polylines);
120
+ if (treemap) {
121
+ if (shape->fixed) {
122
+ Shape* ms = shape->parent_shape()->merged_to_shape;
123
+ if (ms) {
124
+ shape->set_parent_shape(ms);
125
+ }
126
+ } else {
127
+ is_children(shape, all_new_inner_polylines);
128
+ }
130
129
  }
131
130
  new_shapes.push_back(shape);
132
131
  }
133
132
  }
134
133
  }
135
134
 
136
- if (treemap) {
137
- for (Tile* tile : tiles_) {
138
- for (Shape* shape : tile->shapes()) {
139
- Shape* parent = shape->parent_shape();
140
- while (parent && parent->merged_to_shape != nullptr) {
141
- parent = parent->merged_to_shape;
142
- }
143
- if (parent != shape->parent_shape()) {
144
- shape->set_parent_shape(parent);
145
- }
146
- }
147
- }
148
- }
149
-
150
135
  double past_tot_outer = tiles_.front()->benchmarks.outer + tiles_.back()->benchmarks.outer;
151
136
  double past_tot_inner = tiles_.front()->benchmarks.inner + tiles_.back()->benchmarks.inner;
152
137
 
@@ -165,16 +150,22 @@ Tile* Cluster::merge_tiles() {
165
150
  return tile;
166
151
  }
167
152
 
168
- void Cluster::assign_ancestry(Shape *shape, std::vector<InnerPolyline*>& inner_polylines)
169
- { for (auto* inner_polyline : inner_polylines) {
170
- if (shape->outer_polyline->vert_bounds_intersect(inner_polyline->vertical_bounds())) {
171
- if (shape->outer_polyline->within(inner_polyline->raw())) {
172
- shape->set_parent_shape(inner_polyline->shape());
173
- shape->parent_inner_polyline = inner_polyline;
174
- for (auto* children_shape : shape->children_shapes) {
175
- children_shape->reassociation_skip = true;
176
- }
177
- }
153
+ void Cluster::assign_ancestry(Shape *shape, InnerPolyline* inner_polyline)
154
+ { shape->set_parent_shape(inner_polyline->sequence()->shape);
155
+ shape->parent_inner_polyline = inner_polyline;
156
+ shape->fixed = true;
157
+ }
158
+
159
+ void Cluster::is_children(Shape* shape, std::vector<InnerPolyline*> inner_polylines) {
160
+ int shape_max_y = shape->outer_polyline->max_y();
161
+ int shape_min_y = shape->outer_polyline->min_y();
162
+ for (InnerPolyline* inner_polyline : inner_polylines) {
163
+ Bounds bounds = inner_polyline->vertical_bounds();
164
+ int min_y = bounds.min;
165
+ int max_y = bounds.max;
166
+ if (shape_max_y < min_y || shape_min_y > max_y ) continue;
167
+ if (shape->outer_polyline->within(inner_polyline->raw())) {
168
+ assign_ancestry(shape, inner_polyline);
178
169
  }
179
170
  }
180
171
  }
@@ -21,7 +21,8 @@ class Cluster {
21
21
  Finder *finder;
22
22
  std::vector<Tile*> tiles_;
23
23
  Hub *hub_ = nullptr;
24
- void assign_ancestry(Shape *shape, std::vector<InnerPolyline*>& inner_polylines);
24
+ void assign_ancestry(Shape *shape, InnerPolyline* inner_polyline);
25
+ void is_children(Shape* shape, std::vector<InnerPolyline*> inner_polylines);
25
26
 
26
27
  public:
27
28
  Cluster(Finder *finder, int height, int start_x, int end_x);