tg_geometry 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +103 -0
  3. data/Gemfile +3 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +385 -0
  6. data/Rakefile +129 -0
  7. data/benchmark/_support.rb +115 -0
  8. data/benchmark/batch_packed_vs_loop.rb +27 -0
  9. data/benchmark/falcon_concurrency.rb +25 -0
  10. data/benchmark/flat_vs_rtree.rb +27 -0
  11. data/benchmark/gvl_threshold.rb +41 -0
  12. data/benchmark/objectspace_memsize.rb +17 -0
  13. data/benchmark/parse_throughput.rb +38 -0
  14. data/benchmark/rss_stability.rb +70 -0
  15. data/docs/ACTIVE_RECORD.md +26 -0
  16. data/docs/ARCHITECTURE.md +130 -0
  17. data/docs/AUTO_STRATEGY.md +15 -0
  18. data/docs/BENCHMARKING.md +75 -0
  19. data/docs/CASUAL_EXAMPLE.md +618 -0
  20. data/docs/CONCURRENCY.md +65 -0
  21. data/docs/ERROR_HANDLING.md +55 -0
  22. data/docs/EXPANSION_E_TO_H_STATUS.md +51 -0
  23. data/docs/FORMAT_COVERAGE.md +23 -0
  24. data/docs/FULL_TG_API_COVERAGE.md +109 -0
  25. data/docs/LIMITATIONS.md +61 -0
  26. data/docs/LOW_LEVEL_GEOMETRY.md +121 -0
  27. data/docs/MEMORY_OWNERSHIP.md +94 -0
  28. data/docs/RACTOR.md +40 -0
  29. data/docs/REGISTRY.md +37 -0
  30. data/docs/RELEASE_CHECKLIST.md +39 -0
  31. data/ext/tg_geometry/extconf.rb +91 -0
  32. data/ext/tg_geometry/tg_geometry_ext.c +3054 -0
  33. data/ext/tg_geometry/tg_geometry_vendor_rtree.c +1 -0
  34. data/ext/tg_geometry/tg_geometry_vendor_tg.c +24 -0
  35. data/ext/tg_geometry/vendor/.vendored +16 -0
  36. data/ext/tg_geometry/vendor/rtree/LICENSE +20 -0
  37. data/ext/tg_geometry/vendor/rtree/README.md +202 -0
  38. data/ext/tg_geometry/vendor/rtree/VERSION +3 -0
  39. data/ext/tg_geometry/vendor/rtree/rtree.c +840 -0
  40. data/ext/tg_geometry/vendor/rtree/rtree.h +105 -0
  41. data/ext/tg_geometry/vendor/tg/LICENSE +19 -0
  42. data/ext/tg_geometry/vendor/tg/README.md +197 -0
  43. data/ext/tg_geometry/vendor/tg/VERSION +3 -0
  44. data/ext/tg_geometry/vendor/tg/tg.c +16010 -0
  45. data/ext/tg_geometry/vendor/tg/tg.h +359 -0
  46. data/lib/tg/geometry/active_record_source.rb +57 -0
  47. data/lib/tg/geometry/registry.rb +119 -0
  48. data/lib/tg/geometry/version.rb +7 -0
  49. data/lib/tg/geometry.rb +6 -0
  50. data/lib/tg_geometry.rb +3 -0
  51. data/script/vendor_libs.rb +264 -0
  52. data/spec/block_10_rtree_strategy_spec.rb +82 -0
  53. data/spec/block_11_rtree_order_spec.rb +53 -0
  54. data/spec/block_12_batch_packed_spec.rb +55 -0
  55. data/spec/block_13_error_hardening_spec.rb +65 -0
  56. data/spec/block_14_memory_gc_hardening_spec.rb +116 -0
  57. data/spec/block_1_skeleton_spec.rb +45 -0
  58. data/spec/block_20_concurrency_spec.rb +157 -0
  59. data/spec/block_20_fuzz_spec.rb +145 -0
  60. data/spec/block_2_vendor_spec.rb +79 -0
  61. data/spec/block_3_geom_parse_spec.rb +89 -0
  62. data/spec/block_4_geom_api_spec.rb +90 -0
  63. data/spec/block_5_rect_api_spec.rb +96 -0
  64. data/spec/block_6_index_build_spec.rb +111 -0
  65. data/spec/block_7_index_owned_geometry_spec.rb +143 -0
  66. data/spec/block_8_index_borrowed_geometry_spec.rb +106 -0
  67. data/spec/block_9_flat_query_spec.rb +65 -0
  68. data/spec/expansion_a_auto_strategy_spec.rb +14 -0
  69. data/spec/expansion_b_registry_spec.rb +47 -0
  70. data/spec/expansion_c_active_record_source_spec.rb +42 -0
  71. data/spec/expansion_d_format_coverage_spec.rb +30 -0
  72. data/spec/expansion_e_low_level_geometry_spec.rb +82 -0
  73. data/spec/expansion_i_ractor_spec.rb +25 -0
  74. data/spec/expansion_j_full_tg_api_coverage_spec.rb +114 -0
  75. data/spec/spec_helper.rb +15 -0
  76. metadata +157 -0
@@ -0,0 +1 @@
1
+ #include "vendor/rtree/rtree.c"
@@ -0,0 +1,24 @@
1
+ #if defined(__clang__)
2
+ #pragma clang diagnostic push
3
+ #pragma clang diagnostic ignored "-Wshorten-64-to-32"
4
+ #pragma clang diagnostic ignored "-Wincompatible-pointer-types-discards-qualifiers"
5
+ #elif defined(__GNUC__)
6
+ #pragma GCC diagnostic push
7
+ #pragma GCC diagnostic ignored "-Wconversion"
8
+ #endif
9
+
10
+ #ifndef asm
11
+ #define asm __asm__
12
+ #endif
13
+
14
+ #ifndef M_PI
15
+ #define M_PI 3.14159265358979323846264338327950288
16
+ #endif
17
+
18
+ #include "vendor/tg/tg.c"
19
+
20
+ #if defined(__clang__)
21
+ #pragma clang diagnostic pop
22
+ #elif defined(__GNUC__)
23
+ #pragma GCC diagnostic pop
24
+ #endif
@@ -0,0 +1,16 @@
1
+ # tg_geometry vendor manifest. Do not edit by hand. Regenerate with: ruby script/vendor_libs.rb --sync
2
+ gem=tg_geometry
3
+ libraries=tg,rtree
4
+ tg_repo=https://github.com/tidwall/tg.git
5
+ tg_ref=main
6
+ tg_commit=caf840504eaab4563280cf4ab16d618f69a23720
7
+ tg_target=tg
8
+ tg_files=tg.c,tg.h,LICENSE,README.md,VERSION
9
+ tg_tree_sha256=f8e0d904055c209b2a23a2200456f9374ab95cadb69e61b5721d7f8e2500e705
10
+ rtree_repo=https://github.com/tidwall/rtree.c.git
11
+ rtree_ref=v0.5.3
12
+ rtree_commit=5717a8a1eb373428ebaae8c1c623f186ec46461f
13
+ rtree_target=rtree
14
+ rtree_files=rtree.c,rtree.h,LICENSE,README.md,VERSION
15
+ rtree_tree_sha256=4afc86cbd3abe03730206031a5aff5b8b29d37b055fc356052f6f06e1d1f9a61
16
+ manifest_sha256=b7e0c21dc3bc29918b1336f30d1d08ee2a32083b77ece8a5e56d77651a1d3a98
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 Joshua J Baker
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,202 @@
1
+ # rtree.c
2
+
3
+ An [R-tree](https://en.wikipedia.org/wiki/R-tree) implementation in C.
4
+
5
+ <img src="cities.png" border="0" alt="Cities">
6
+
7
+ ## Features
8
+
9
+ - [Generic interface](#generic-interface) for multiple dimensions and data types
10
+ - Supports custom allocators
11
+ - Copy-on-write support
12
+ - Includes [test suite](#testing-and-benchmarks) with 100% coverage.
13
+ - [Very fast](#testing-and-benchmarks) 🚀
14
+
15
+ ## Example
16
+
17
+ ```c
18
+ #include <stdio.h>
19
+ #include <string.h>
20
+ #include <math.h>
21
+ #include "rtree.h"
22
+
23
+ struct city {
24
+ char *name;
25
+ double lat;
26
+ double lon;
27
+ };
28
+
29
+ struct city phx = { .name = "Phoenix", .lat = 33.448, .lon = -112.073 };
30
+ struct city enn = { .name = "Ennis", .lat = 52.843, .lon = -8.986 };
31
+ struct city pra = { .name = "Prague", .lat = 50.088, .lon = 14.420 };
32
+ struct city tai = { .name = "Taipei", .lat = 25.033, .lon = 121.565 };
33
+ struct city her = { .name = "Hermosillo", .lat = 29.102, .lon = -110.977 };
34
+ struct city him = { .name = "Himeji", .lat = 34.816, .lon = 134.700 };
35
+
36
+ bool city_iter(const double *min, const double *max, const void *item, void *udata) {
37
+ const struct city *city = item;
38
+ printf("%s\n", city->name);
39
+ return true;
40
+ }
41
+
42
+ int main() {
43
+ // Create a new rtree where each item is a `struct city*`.
44
+ struct rtree *tr = rtree_new();
45
+
46
+ // Load some cities into the rtree. Each insert operation performs a copy
47
+ // of the data that is pointed to in the second and third arguments.
48
+ // The R-tree expects a rectangle, which is two arrays of doubles.
49
+ // The first N values as the minimum corner of the rect, and the next
50
+ // N values as the maximum corner of the rect, where N is the number of
51
+ // dimensions. The default R-tree has 2 dimensions.
52
+ // When inserting points, the max coordinates are optional.
53
+ rtree_insert(tr, (double[2]){phx.lon, phx.lat}, NULL, &phx);
54
+ rtree_insert(tr, (double[2]){enn.lon, enn.lat}, NULL, &enn);
55
+ rtree_insert(tr, (double[2]){pra.lon, pra.lat}, NULL, &pra);
56
+ rtree_insert(tr, (double[2]){tai.lon, tai.lat}, NULL, &tai);
57
+ rtree_insert(tr, (double[2]){her.lon, her.lat}, NULL, &her);
58
+ rtree_insert(tr, (double[2]){him.lon, him.lat}, NULL, &him);
59
+
60
+ printf("\n-- Northwestern cities --\n");
61
+ rtree_search(tr, (double[2]){-180, 0}, (double[2]){0, 90}, city_iter, NULL);
62
+
63
+ printf("\n-- Northeastern cities --\n");
64
+ rtree_search(tr, (double[2]){0, 0}, (double[2]){180, 90}, city_iter, NULL);
65
+
66
+ // Deleting an item is the same inserting
67
+ rtree_delete(tr, (double[2]){phx.lon, phx.lat}, NULL, &phx);
68
+
69
+ printf("\n-- Northwestern cities --\n");
70
+ rtree_search(tr, (double[2]){-180, 0}, (double[2]){0, 90}, city_iter, NULL);
71
+
72
+ rtree_free(tr);
73
+ }
74
+ // output:
75
+ // -- Northwestern cities --
76
+ // Phoenix
77
+ // Hermosillo
78
+ // Ennis
79
+ //
80
+ // -- Northeastern cities --
81
+ // Prague
82
+ // Taipei
83
+ // Himeji
84
+ //
85
+ // -- Northwestern cities --
86
+ // Hermosillo
87
+ // Ennis
88
+ ```
89
+
90
+ ## Functions
91
+
92
+ ```sh
93
+ rtree_new # allocate a new rtree
94
+ rtree_free # free the rtree
95
+ rtree_count # return number of items in rtree
96
+ rtree_insert # insert an item
97
+ rtree_delete # delete an item
98
+ rtree_search # search the rtree for items with interecting rectangles
99
+ rtree_clone # make an clone of the rtree using a copy-on-write technique
100
+ ```
101
+
102
+ ## Generic interface
103
+
104
+ This implementation is set to 2 dimensions, using doubles as the
105
+ numeric coordinate type, and `void *` as the data type.
106
+
107
+ The `rtree.c` and `rtree.h` files can be easily customized to change these
108
+ settings.
109
+
110
+ Please find the type parameters at the top of the `rtree.c` file:
111
+
112
+ ```c
113
+ #define DATATYPE void *
114
+ #define NUMTYPE double
115
+ #define DIMS 2
116
+ #define MAXITEMS 64
117
+ ```
118
+
119
+ Change these to suit your needs, then modify the `rtree.h` file to match.
120
+
121
+ ## Testing and benchmarks
122
+
123
+ ```sh
124
+ $ tests/run.sh # run tests
125
+ $ tests/run.sh bench # run benchmarks
126
+ ```
127
+
128
+ The following benchmarks were run on Ubuntu 20.04 (3.4GHz 16-Core AMD Ryzen 9 5950X) using clang-17.
129
+ One million random (evenly distributed) points are inserted, searched, deleted, and replaced.
130
+
131
+ ```
132
+ clang-17 -O3 -DTEST_PRIVATE_FUNCTIONS -DTEST_DEBUG ../rtree.c bench.c -lm
133
+ seed=1694563565, count=1000000
134
+ -- RANDOM ORDER --
135
+ insert 1,000,000 ops in 0.151 secs 151.3 ns/op 6,610,346 op/sec
136
+ search-item 1,000,000 ops in 0.242 secs 242.4 ns/op 4,125,140 op/sec
137
+ search-1% 1,000 ops in 0.002 secs 1958.0 ns/op 510,725 op/sec
138
+ search-5% 1,000 ops in 0.017 secs 16585.0 ns/op 60,295 op/sec
139
+ search-10% 1,000 ops in 0.051 secs 50791.0 ns/op 19,688 op/sec
140
+ delete 1,000,000 ops in 0.233 secs 232.8 ns/op 4,296,197 op/sec
141
+ replace 1,000,000 ops in 0.302 secs 302.3 ns/op 3,308,355 op/sec
142
+ search-item 1,000,000 ops in 0.241 secs 241.4 ns/op 4,142,862 op/sec
143
+ search-1% 1,000 ops in 0.002 secs 1968.0 ns/op 508,130 op/sec
144
+ search-5% 1,000 ops in 0.017 secs 16680.0 ns/op 59,952 op/sec
145
+ search-10% 1,000 ops in 0.052 secs 52415.0 ns/op 19,078 op/sec
146
+ ```
147
+
148
+ The following benchmarks are the same as above but the points are ordered on a
149
+ [hilbert curve](https://en.wikipedia.org/wiki/Hilbert_curve).
150
+
151
+ ```
152
+ -- HILBERT ORDER --
153
+ insert 1,000,000 ops in 0.073 secs 73.1 ns/op 13,686,068 op/sec
154
+ search-item 1,000,000 ops in 0.083 secs 83.1 ns/op 12,039,199 op/sec
155
+ search-1% 1,000 ops in 0.002 secs 2015.0 ns/op 496,277 op/sec
156
+ search-5% 1,000 ops in 0.016 secs 16031.0 ns/op 62,379 op/sec
157
+ search-10% 1,000 ops in 0.046 secs 46241.0 ns/op 21,625 op/sec
158
+ delete 1,000,000 ops in 0.063 secs 62.7 ns/op 15,941,844 op/sec
159
+ replace 1,000,000 ops in 0.083 secs 83.2 ns/op 12,013,599 op/sec
160
+ search-item 1,000,000 ops in 0.084 secs 84.5 ns/op 11,840,344 op/sec
161
+ search-1% 1,000 ops in 0.002 secs 2110.0 ns/op 473,933 op/sec
162
+ search-5% 1,000 ops in 0.016 secs 16055.0 ns/op 62,285 op/sec
163
+ search-10% 1,000 ops in 0.046 secs 46134.0 ns/op 21,675 op/sec
164
+ ```
165
+
166
+ ## Algorithms
167
+
168
+ This implementation is a variant of the original paper:
169
+ [R-TREES. A DYNAMIC INDEX STRUCTURE FOR SPATIAL SEARCHING](https://www.cs.princeton.edu/courses/archive/fall08/cos597B/papers/rtrees.pdf)
170
+
171
+ ### Inserting
172
+
173
+ Similar to the original paper. From the root to the leaf, the rects which will incur the least enlargment are chosen. Ties go to rects with the smallest area.
174
+
175
+ Added to this implementation: when a rect does not incur any enlargement at all, it's chosen immediately and without further checks on other rects in the same node.
176
+ ### Deleting
177
+
178
+ A target rect is searched for from root to the leaf, and if found it's deleted. When there are no more child rects in a node, that node is immedately removed from the tree.
179
+
180
+ ### Searching
181
+
182
+ Same as the original algorithm.
183
+
184
+ ### Splitting
185
+
186
+ This is a custom algorithm. It attempts to minimize intensive operations such as pre-sorting the children and comparing overlaps & area sizes. The desire is to do simple single axis distance calculations each child only once, with a target 50/50 chance that the child might be moved in-memory.
187
+
188
+ When a rect has reached it's max number of entries it's largest axis is calculated and the rect is split into two smaller rects, named `left` and `right`.
189
+ Each child rects is then evaluated to determine which smaller rect it should be placed into.
190
+ Two values, `min-dist` and `max-dist`, are calcuated for each child.
191
+
192
+ - `min-dist` is the distance from the parent's minumum value of it's largest axis to the child's minumum value of the parent largest axis.
193
+ - `max-dist` is the distance from the parent's maximum value of it's largest axis to the child's maximum value of the parent largest axis.
194
+
195
+ When the `min-dist` is less than `max-dist` then the child is placed into the `left` rect.
196
+ When the `max-dist` is less than `min-dist` then the child is placed into the `right` rect.
197
+ When the `min-dist` is equal to `max-dist` then the child is placed into an `equal` bucket until all of the children are evaluated.
198
+ Each `equal` rect is then one-by-one placed in either `left` or `right`, whichever has fewer children.
199
+
200
+ ## License
201
+
202
+ rtree.c source code is available under the MIT License.
@@ -0,0 +1,3 @@
1
+ repo=https://github.com/tidwall/rtree.c.git
2
+ ref=v0.5.3
3
+ commit=5717a8a1eb373428ebaae8c1c623f186ec46461f