tg_geometry 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -79
  3. data/README.md +82 -191
  4. data/Rakefile +3 -3
  5. data/benchmark/falcon_concurrency.rb +1 -1
  6. data/benchmark/feature_source.rb +92 -0
  7. data/docs/ARCHITECTURE.md +29 -107
  8. data/docs/BENCHMARKING.md +20 -1
  9. data/docs/CASUAL_EXAMPLE.md +71 -458
  10. data/docs/CONCURRENCY.md +13 -7
  11. data/docs/ERROR_HANDLING.md +30 -0
  12. data/docs/FEATURE_SOURCE.md +166 -0
  13. data/docs/LIMITATIONS.md +11 -50
  14. data/docs/MEMORY_OWNERSHIP.md +20 -2
  15. data/ext/tg_geometry/extconf.rb +46 -4
  16. data/ext/tg_geometry/tg_geometry_ext.c +2453 -150
  17. data/ext/tg_geometry/tg_geometry_vendor_json.c +17 -0
  18. data/ext/tg_geometry/tg_geometry_vendor_tg.c +3 -0
  19. data/ext/tg_geometry/vendor/.vendored +8 -2
  20. data/ext/tg_geometry/vendor/json/LICENSE +20 -0
  21. data/ext/tg_geometry/vendor/json/VERSION +3 -0
  22. data/ext/tg_geometry/vendor/json/json.c +1024 -0
  23. data/ext/tg_geometry/vendor/json/json.h +207 -0
  24. data/lib/tg/geometry/registry.rb +3 -3
  25. data/lib/tg/geometry/version.rb +1 -1
  26. data/script/vendor_libs.rb +22 -6
  27. data/spec/{expansion_a_auto_strategy_spec.rb → auto_strategy_spec.rb} +1 -1
  28. data/spec/{block_12_batch_packed_spec.rb → batch_packed_spec.rb} +1 -1
  29. data/spec/{block_20_concurrency_spec.rb → concurrency_spec.rb} +1 -1
  30. data/spec/{block_13_error_hardening_spec.rb → error_hardening_spec.rb} +1 -1
  31. data/spec/feature_source_nogvl_spec.rb +51 -0
  32. data/spec/feature_source_spec.rb +268 -0
  33. data/spec/{expansion_d_format_coverage_spec.rb → format_coverage_spec.rb} +1 -1
  34. data/spec/{block_20_fuzz_spec.rb → fuzz_spec.rb} +1 -1
  35. data/spec/{block_4_geom_api_spec.rb → geom_api_spec.rb} +1 -1
  36. data/spec/{block_3_geom_parse_spec.rb → geom_parse_spec.rb} +1 -1
  37. data/spec/{block_8_index_borrowed_geometry_spec.rb → index_borrowed_geometry_spec.rb} +1 -1
  38. data/spec/{block_6_index_build_spec.rb → index_build_spec.rb} +2 -2
  39. data/spec/{block_9_flat_query_spec.rb → index_flat_query_spec.rb} +1 -1
  40. data/spec/{block_7_index_owned_geometry_spec.rb → index_owned_geometry_spec.rb} +1 -1
  41. data/spec/{block_10_rtree_strategy_spec.rb → index_rtree_accounting_spec.rb} +1 -1
  42. data/spec/{block_11_rtree_order_spec.rb → index_rtree_order_spec.rb} +1 -1
  43. data/spec/{block_1_skeleton_spec.rb → load_and_errors_spec.rb} +1 -1
  44. data/spec/{expansion_e_low_level_geometry_spec.rb → low_level_geometry_spec.rb} +1 -1
  45. data/spec/{block_14_memory_gc_hardening_spec.rb → memory_gc_spec.rb} +1 -1
  46. data/spec/{expansion_i_ractor_spec.rb → ractor_spec.rb} +1 -1
  47. data/spec/{block_5_rect_api_spec.rb → rect_api_spec.rb} +1 -1
  48. data/spec/{expansion_b_registry_spec.rb → registry_spec.rb} +1 -1
  49. data/spec/{expansion_j_full_tg_api_coverage_spec.rb → tg_api_coverage_spec.rb} +1 -1
  50. data/spec/{block_2_vendor_spec.rb → vendor_sources_spec.rb} +4 -4
  51. metadata +39 -38
  52. data/docs/ACTIVE_RECORD.md +0 -26
  53. data/docs/AUTO_STRATEGY.md +0 -15
  54. data/docs/EXPANSION_E_TO_H_STATUS.md +0 -51
  55. data/docs/FORMAT_COVERAGE.md +0 -23
  56. data/docs/FULL_TG_API_COVERAGE.md +0 -109
  57. data/docs/LOW_LEVEL_GEOMETRY.md +0 -121
  58. data/docs/RACTOR.md +0 -40
  59. data/docs/REGISTRY.md +0 -37
  60. data/docs/RELEASE_CHECKLIST.md +0 -39
  61. /data/spec/{expansion_c_active_record_source_spec.rb → active_record_source_spec.rb} +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 01f50a6c3b237bc983741eaf46a6440220c023dde0d91e8a1e64debb0dd8cf9e
4
- data.tar.gz: e07214e6f201842029f415e1dc4c39f77cd9c003187cc9068013f72300f5398a
3
+ metadata.gz: f3b2fd9ecb971425116e95aed147c0467eba4105b151ff15ba128ef35b10ad6a
4
+ data.tar.gz: e87484d1ab62a0c21b19c79603ed7da3f0017962543c32b5cd5095cc353a044e
5
5
  SHA512:
6
- metadata.gz: c80efd671c29339c6023a4b7b616eb0f4eba73513aecea5dea1f239797a8c8c3c4b9a9fd12a1bc69d359a599b3dfc3c4891a8a2073d29e5525083538379b3dea
7
- data.tar.gz: '0508a2a6f736791048212c214b75533131c83d0e38029a9b2a13d7860991f882868faeb6237f6305645fc0b4a4819ac0fd21b2c3968006e2a85d2b736425a7ff'
6
+ metadata.gz: e82e6a1729a0e076fdd16e6c378b3e363a744fddb0b41216d1371a4a7e320234772f829cb38e024b5b397bca630dcb787ae9d31e0cc114e84836ae7cb0bb904e
7
+ data.tar.gz: '0529e7db75d831d0fa39877ca0ddd4f43d9138c985471f2827bbd92abefc9fec07ad9bba881ec9ba1c157f14a8c5c99be229d2c0fa9c08db73904620e043a029'
data/CHANGELOG.md CHANGED
@@ -2,102 +2,41 @@
2
2
 
3
3
  ## 0.1.0 - unreleased
4
4
 
5
- Initial release-core implementation for `tg_geometry`.
5
+ Initial public release candidate for `tg_geometry`.
6
6
 
7
7
  ### Added
8
8
 
9
9
  - Canonical public require path: `require "tg/geometry"`.
10
10
  - Public namespace: `TG::Geometry`.
11
11
  - Native extension build through `ext/tg_geometry/extconf.rb`.
12
- - Vendored `tidwall/tg` and `tidwall/rtree.c` sources with pinned `VERSION` files and upstream license files.
12
+ - Vendored `tidwall/tg`, `tidwall/rtree.c`, and `tidwall/json.c` sources with pinned `VERSION` files and upstream license files.
13
13
  - Error classes:
14
14
  - `TG::Geometry::Error`
15
15
  - `TG::Geometry::ParseError`
16
16
  - `TG::Geometry::ArgumentError < ::ArgumentError`
17
17
  - `TG::Geometry::FrozenIndexError`
18
18
  - Immutable `TG::Geometry::Geom` parsing for GeoJSON, WKT, WKB, Hex, GeoBIN, and auto format detection.
19
- - `TG::Geometry::Geom` methods:
20
- - `#type`
21
- - `#bbox`
22
- - `#covers_xy?`
23
- - `#contains?`
24
- - `#intersects?`
25
- - `#to_geojson`
26
- - `#to_wkt`
27
- - `#to_wkb`
28
19
  - Immutable `TG::Geometry::Rect` API.
29
20
  - Immutable `TG::Geometry::Index.build` with strict `[[id, object], ...]` entry format.
30
- - Index ingestion modes:
31
- - `via: :geom` borrowed geometry with `geom_owner` lifetime protection;
32
- - `via: :geojson` owned geometry;
33
- - `via: :wkb` owned geometry.
34
- - Index strategies:
35
- - `:flat`
36
- - `:rtree`
21
+ - Index ingestion modes: `via: :geom`, `via: :geojson`, and `via: :wkb`.
22
+ - Index strategies: `:flat` and `:rtree`.
37
23
  - Deterministic insertion-order results for flat and rtree queries.
38
- - Exact rtree memory accounting through a custom malloc/free allocator with headers.
24
+ - Exact rtree memory accounting through a custom malloc/free allocator with allocation headers.
39
25
  - Native-endian packed point batch API: `TG::Geometry::Index#covering_ids_batch_packed`.
40
- - Debug-only test hooks under `TG_DEBUG_TEST=1` for allocation failure simulation and byte counter inspection.
41
- - Block 14 memory/GC/compaction hardening specs.
42
- - Benchmark harnesses:
43
- - `benchmark/parse_throughput.rb`
44
- - `benchmark/gvl_threshold.rb`
45
- - `benchmark/flat_vs_rtree.rb`
46
- - `benchmark/batch_packed_vs_loop.rb`
47
- - `benchmark/falcon_concurrency.rb`
48
- - `benchmark/objectspace_memsize.rb`
49
- - `benchmark/rss_stability.rb`
50
- - Documentation:
51
- - `docs/ARCHITECTURE.md`
52
- - `docs/MEMORY_OWNERSHIP.md`
53
- - `docs/CONCURRENCY.md`
54
- - `docs/ERROR_HANDLING.md`
55
- - `docs/BENCHMARKING.md`
56
- - `docs/LIMITATIONS.md`
57
- - `docs/RELEASE_CHECKLIST.md`
26
+ - GeoJSON FeatureSource APIs for reading FeatureCollection entries/features and building an Index directly from file, JSON string, or IO.
27
+ - FeatureSource bulk execution now runs file read, JSON traversal, and TG geometry parsing in a C-only no-GVL phase before Ruby materialization / Index ownership transfer. The implementation uses Ruby VM no-GVL APIs only: `RB_NOGVL_OFFLOAD_SAFE` when available, otherwise `rb_thread_call_without_gvl`. It does not use a manual Fiber scheduler block/unblock worker path.
28
+ - Ruby-level `TG::Geometry::Registry` reload helper.
29
+ - Optional `TG::Geometry::ActiveRecordSource` helper without a Rails runtime dependency.
30
+ - Benchmark scripts under `benchmark/`.
31
+ - Minimal documentation under `docs/`.
58
32
 
59
33
  ### Not included
60
34
 
61
- - `strategy: :auto` is not part of the release-core contract; it is tracked as Expansion Block A below.
62
- - No Ractor support claim.
63
- - No no-GVL execution.
64
- - No full GIS, routing, geocoding, projections, geodesic helpers, nearest POI index, or result-geometry overlay operations.
65
- - No public performance claims until benchmark results are produced by this gem.
35
+ - `strategy: :auto`.
36
+ - Ractor support claim.
37
+ - no general no-GVL claim for short query/parse/write paths.
38
+ - Full GIS functionality such as routing, projections, geodesics, overlay result geometries, or nearest POI search.
39
+ - Public callback/search APIs.
40
+ - Universal performance claims.
66
41
 
67
- ### OPEN QUESTION
68
-
69
- - Final ASAN setup requires Roman approval before replacing the placeholder CI job.
70
- - Final Valgrind setup requires Roman approval before replacing the placeholder CI job.
71
-
72
- ## Unreleased
73
-
74
- ### Added
75
-
76
- - Expansion Block A status: `strategy: :auto` remains postponed for the first public release; explicit `:flat` / `:rtree` strategies are required.
77
- - Expansion Block B: `TG::Geometry::Registry` Ruby helper for immutable Index reload/swap workflows.
78
- - Expansion Block C: optional `TG::Geometry::ActiveRecordSource` helper that converts relation-like records into strict `[[id, object], ...]` entries without adding a Rails dependency.
79
- - Expansion Block D: `TG::Geometry.parse_hex`, `TG::Geometry.parse_geobin`, `TG::Geometry::Geom#to_hex`, `#to_geobin`, and `#extra_json`.
80
- - Expansion Block E: read-only borrowed low-level wrappers: `TG::Geometry::Line`, `TG::Geometry::Ring`, `TG::Geometry::Polygon`, plus `TG::Geometry::Geom#point`, `#line`, and `#polygon`.
81
- - Expansion Block I: Ractor unsupported-boundary investigation documented in `docs/RACTOR.md` with specs asserting native wrappers are not treated as shareable Ractor objects.
82
- - Expansion Block J grouped API coverage:
83
- - safe point and empty geometry constructors;
84
- - additional `TG::Geometry::Geom` predicates;
85
- - geometry metadata and Z/M read accessors;
86
- - MultiPoint/MultiLineString/MultiPolygon and GeometryCollection accessors;
87
- - borrowed child `TG::Geometry::Geom` wrappers with `geom_owner`;
88
- - value `TG::Geometry::Segment` wrappers from Line/Ring segment accessors.
89
-
90
- ### Fixed
91
-
92
- - Corrected `benchmark/gvl_threshold.rb` so each target size uses a valid WKT payload near that size instead of repeatedly benchmarking the same tiny polygon.
93
-
94
- ### Documentation
95
-
96
- - Added docs for Registry, ActiveRecord source helper, additional format coverage, low-level borrowed geometry wrappers, Ractor unsupported-boundary status, grouped full TG API coverage, Auto Strategy postponed status, and Expansion Blocks E–H status.
97
-
98
- ### OPEN QUESTION
99
-
100
- - Expansion Block F callback/search APIs remain blocked until a callback safety contract, exception semantics, GVL rules, and callback overhead benchmarks are approved.
101
- - Expansion Block G no-allocation point query optimization remains blocked until boundary/hole-boundary equivalence tests and benchmarks prove it preserves `:covers` / `:contains` semantics.
102
- - Expansion Block H geodesic/projection helpers remain blocked until an explicit optional dependency/API decision is approved.
103
- - Remaining Expansion Block J scope such as Line/Ring/Polygon constructors, callback/search APIs, nearest segment APIs, global environment configuration, and allocator override APIs remains blocked until separate ownership/thread-safety contracts are approved.
42
+ - Suppressed known GCC diagnostics from vendored tidwall/tg wrapper on Linux CI without muting warnings from tg_geometry's own C code.
data/README.md CHANGED
@@ -1,39 +1,17 @@
1
1
  # tg_geometry
2
2
 
3
- `tg_geometry` is a Ruby C extension around the vendored `tidwall/tg` geometry
4
- library and `tidwall/rtree.c`.
3
+ `tg_geometry` is a Ruby C extension around vendored `tidwall/tg`, `tidwall/rtree.c`, and pinned `tidwall/json.c`.
5
4
 
6
- It exposes the public Ruby namespace `TG::Geometry` and the canonical require
7
- path:
5
+ It exposes the public Ruby namespace `TG::Geometry`:
8
6
 
9
7
  ```ruby
10
8
  require "tg/geometry"
11
9
  ```
12
10
 
13
- The gem targets fast in-process planar geometry parsing, predicates,
14
- format conversion, and geofencing-oriented immutable indexes. It does not try
15
- to be a full GIS system.
16
-
17
- ## Status
18
-
19
- This repository is prepared as a first public release candidate with an
20
- expanded API surface:
21
-
22
- - release-core `Geom`, `Rect`, and immutable `Index` APIs;
23
- - expanded format coverage for Hex and GeoBIN;
24
- - read-only borrowed wrappers for lower-level TG geometry components;
25
- - `Registry` reload/swap sugar;
26
- - optional ActiveRecord-style source helpers that do not add a Rails runtime
27
- dependency.
28
-
29
- `strategy: :auto`, Ractor support, callback/search APIs, no-allocation point
30
- query optimization, geodesic helpers, projections, and no-GVL execution are not
31
- claimed in this release.
11
+ The gem is focused on fast in-process planar geometry parsing, predicates, format conversion, GeoJSON FeatureCollection imports, and immutable geofencing indexes. It is not a full GIS system.
32
12
 
33
13
  ## Installation
34
14
 
35
- Add this line to your application's Gemfile:
36
-
37
15
  ```ruby
38
16
  gem "tg_geometry"
39
17
  ```
@@ -44,17 +22,13 @@ Then run:
44
22
  bundle install
45
23
  ```
46
24
 
47
- The extension is built from vendored C sources. There is no GEOS, PostGIS,
48
- PROJ, GDAL, system TG, or system rtree dependency.
25
+ The extension builds from vendored C sources. It does not require GEOS, PostGIS, PROJ, GDAL, system TG, or system rtree.
49
26
 
50
- Supported first-release platforms are Linux and macOS on x86_64/aarch64.
51
- Windows is not supported in this release.
27
+ Supported platforms: Linux and macOS on x86_64/aarch64. Windows is not supported for the first release.
52
28
 
53
- ## Basic parsing and predicates
29
+ ## Parsing and predicates
54
30
 
55
31
  ```ruby
56
- require "tg/geometry"
57
-
58
32
  zone = TG::Geometry.parse_geojson(<<~JSON)
59
33
  {
60
34
  "type": "Polygon",
@@ -67,16 +41,11 @@ zone.type # => :polygon
67
41
  zone.covers_xy?(5, 5) # => true
68
42
  zone.covers_xy?(0, 0) # => true, boundary is covered
69
43
  zone.bbox # => #<TG::Geometry::Rect ...>
70
-
71
- wkt = zone.to_wkt
72
- wkb = zone.to_wkb
44
+ zone.to_wkt
45
+ zone.to_wkb
73
46
  ```
74
47
 
75
- `TG::Geometry::Geom` objects are immutable. They cannot be manually allocated or
76
- manually freed from Ruby. Native memory is released by Ruby GC through the typed
77
- data wrapper.
78
-
79
- ## Parse API
48
+ Parse shortcuts:
80
49
 
81
50
  ```ruby
82
51
  TG::Geometry.parse(str, format: :auto, index: :ystripes)
@@ -87,62 +56,12 @@ TG::Geometry.parse_hex(str, index: :ystripes)
87
56
  TG::Geometry.parse_geobin(bytes, index: :ystripes)
88
57
  ```
89
58
 
90
- Accepted `format:` values for `parse` are:
91
-
92
- - `:auto`
93
- - `:geojson`
94
- - `:wkt`
95
- - `:wkb`
96
- - `:hex`
97
- - `:geobin`
98
-
99
- Accepted TG internal polygon index values are:
100
-
101
- - `:default`
102
- - `:none`
103
- - `:natural`
104
- - `:ystripes`
105
-
106
- Parse failures raise `TG::Geometry::ParseError`. Invalid options raise
107
- `TG::Geometry::ArgumentError`, which inherits from Ruby's `::ArgumentError`.
108
-
109
- ## Geom API
110
-
111
- Release-core methods:
112
-
113
- ```ruby
114
- geom.type
115
- geom.bbox
116
- geom.covers_xy?(x, y)
117
- geom.contains?(other_geom)
118
- geom.intersects?(other_geom)
119
- geom.to_geojson
120
- geom.to_wkt
121
- geom.to_wkb
122
- ```
123
-
124
- Expanded methods include additional predicates, format writers, metadata
125
- accessors, and read-only borrowed child wrappers. See:
126
-
127
- - `docs/FORMAT_COVERAGE.md`
128
- - `docs/LOW_LEVEL_GEOMETRY.md`
129
- - `docs/FULL_TG_API_COVERAGE.md`
59
+ `TG::Geometry::Geom` objects are immutable and cannot be manually allocated or manually freed from Ruby.
130
60
 
131
- For point predicates, this release prioritizes exact `covers` / `contains`
132
- semantics over the fastest possible no-allocation path. Query methods construct
133
- a temporary TG point geometry and free it before returning. A future optimized
134
- point path requires boundary and hole-boundary equivalence tests plus benchmark
135
- proof.
136
-
137
- ## Rect API
61
+ ## Rect
138
62
 
139
63
  ```ruby
140
64
  rect = TG::Geometry::Rect.new(0, 0, 10, 10)
141
-
142
- rect.min_x
143
- rect.min_y
144
- rect.max_x
145
- rect.max_y
146
65
  rect.center # => [5.0, 5.0]
147
66
  rect.contains_point?(5, 5) # => true
148
67
  rect.intersects?(other_rect)
@@ -150,16 +69,10 @@ rect.expand_to_include(other_rect)
150
69
  rect.expand_to_include_point(x, y)
151
70
  ```
152
71
 
153
- `Rect` rejects non-finite coordinates and invalid coordinate order. It is frozen
154
- after construction.
155
-
156
- There is intentionally no first-release `Rect#contains?` method because the name
157
- is ambiguous. Use `contains_point?`.
72
+ `Rect` rejects non-finite coordinates and invalid coordinate order. It is frozen after construction.
158
73
 
159
74
  ## Immutable Index
160
75
 
161
- `TG::Geometry::Index` is built once and then read-only forever.
162
-
163
76
  ```ruby
164
77
  entries = [
165
78
  [:zone_a, '{"type":"Polygon","coordinates":[[[0,0],[10,0],[10,10],[0,10],[0,0]]]}'],
@@ -183,7 +96,7 @@ index.covering_ids(5, 5) # => [:zone_a]
183
96
  index.intersecting_rect(0, 0, 25, 25)
184
97
  ```
185
98
 
186
- Accepted input format:
99
+ Accepted input shape:
187
100
 
188
101
  ```ruby
189
102
  [[id1, object1], [id2, object2], ...]
@@ -194,17 +107,13 @@ Rules:
194
107
  - `entries` must be an Array.
195
108
  - Every entry must be a two-element Array.
196
109
  - `id` may be any Ruby object except `nil`.
197
- - `false` ids are accepted, but discouraged because `find_covering` uses `nil`
198
- for no match.
199
110
  - Duplicate ids are allowed.
200
- - Returned ids are the same Ruby objects stored in the index; they are not
201
- copied, frozen, stringified, or duplicated.
111
+ - Returned ids are the same Ruby objects stored in the index.
202
112
  - Result order is insertion order for both `:flat` and `:rtree`.
203
113
 
204
114
  Accepted `via:` modes:
205
115
 
206
- - `:geom` — borrow an existing `TG::Geometry::Geom`; the index marks the owner
207
- wrapper so the borrowed native pointer remains valid.
116
+ - `:geom` — borrow an existing `TG::Geometry::Geom` and keep its owner alive.
208
117
  - `:geojson` — parse and own native TG geometries inside the index.
209
118
  - `:wkb` — parse and own native TG geometries inside the index.
210
119
 
@@ -213,21 +122,61 @@ Accepted strategies:
213
122
  - `:flat`
214
123
  - `:rtree`
215
124
 
216
- `strategy: :auto` is not exposed in this release. The benchmark output does not
217
- support a single universal threshold: flat scan may win for early insertion-order
218
- hits or heavily overlapping datasets, while rtree may win for misses, later hits,
219
- or selective rectangle queries. Choose the strategy explicitly and benchmark on
220
- your own data.
221
-
222
125
  Accepted predicates:
223
126
 
224
127
  - `:covers` — default for geofencing; boundary points are included.
225
- - `:contains` — stricter OGC-style containment semantics.
128
+ - `:contains` — stricter containment semantics.
226
129
 
227
- ## Packed batch point queries
130
+ `strategy: :auto` is intentionally not exposed. Choose the strategy explicitly and benchmark on your own data.
131
+
132
+ ## GeoJSON FeatureSource
133
+
134
+ `TG::Geometry::FeatureSource` reads GeoJSON `FeatureCollection` sources without `JSON.parse` of the whole document into Ruby Hash/Array objects.
135
+
136
+ ```ruby
137
+ entries = TG::Geometry::FeatureSource.read_entries_file(
138
+ "zones.geojson",
139
+ id: ["properties", "@id"],
140
+ only: [:polygon, :multipolygon]
141
+ )
142
+
143
+ index = TG::Geometry::Index.build(
144
+ entries,
145
+ via: :geojson,
146
+ strategy: :rtree,
147
+ predicate: :covers
148
+ )
149
+ ```
150
+
151
+ For imports that also need raw properties JSON:
152
+
153
+ ```ruby
154
+ features = TG::Geometry::FeatureSource.read_features_file(
155
+ "zones.geojson",
156
+ id: ["properties", "@id"],
157
+ report: true,
158
+ on_invalid: :skip
159
+ )
160
+
161
+ features[:features].each do |id, geometry_json, properties_json|
162
+ # Store geometry_json and parse properties_json in application code if needed.
163
+ end
164
+ ```
165
+
166
+ For direct file-to-index loading:
167
+
168
+ ```ruby
169
+ index = TG::Geometry::FeatureSource.build_index_file(
170
+ "zones.geojson",
171
+ id: ["properties", "@id"],
172
+ strategy: :rtree,
173
+ predicate: :covers
174
+ )
175
+ ```
228
176
 
229
- For high-throughput same-process point lookups, the index supports a packed
230
- native-endian double input format:
177
+ FeatureSource methods are explicit: use `_file` for paths, `_json` for raw content strings, and `_io` for IO objects. There is no path/content auto-detection.
178
+
179
+ ## Packed batch point queries
231
180
 
232
181
  ```ruby
233
182
  points = [5.0, 5.0, 25.0, 25.0].pack("d*")
@@ -235,18 +184,9 @@ index.covering_ids_batch_packed(points)
235
184
  # => [:zone_a, :zone_b]
236
185
  ```
237
186
 
238
- Input format:
239
-
240
- - Ruby String treated as raw bytes.
241
- - Native-endian doubles.
242
- - Pairs of `lon, lat`.
243
- - Length must be a multiple of 16 bytes.
244
- - Empty string returns `[]`.
187
+ Input is a Ruby String containing native-endian doubles in `lon, lat` pairs. Length must be a multiple of 16 bytes. Empty string returns `[]`.
245
188
 
246
- This format is intentionally native-endian for same-process speed and simplicity.
247
- Do not use it as a cross-platform serialized file format.
248
-
249
- ## Registry reload pattern
189
+ ## Registry helper
250
190
 
251
191
  `Registry` is Ruby-level sugar over immutable indexes:
252
192
 
@@ -266,53 +206,15 @@ registry.reload!
266
206
  registry.find_covering(5, 5)
267
207
  ```
268
208
 
269
- Reload builds a new full immutable index first and swaps the reference only after
270
- successful build:
271
-
272
- ```ruby
273
- new_index = TG::Geometry::Index.build(entries, via: :geojson, strategy: :rtree)
274
- @index = new_index
275
- ```
276
-
277
- Old indexes remain alive while existing readers hold references to them. There
278
- is no in-place mutation, no public `add`, `delete`, `clear`, or `rebuild!` API on
279
- `Index`.
280
-
281
- See `docs/REGISTRY.md` and `docs/ACTIVE_RECORD.md` for the expanded helpers.
209
+ Reload builds a new immutable index first and swaps the reference only after a successful build. Existing readers keep using the previous index safely.
282
210
 
283
- ## Memory ownership model
211
+ ## Memory and concurrency
284
212
 
285
- The implementation uses explicit allocator pairs and GC accounting:
213
+ The implementation uses explicit allocator pairs and Ruby GC accounting for native memory. `ObjectSpace.memsize_of(index)` includes entries, owned TG geometries, and exact rtree allocation bytes. Borrowed geometries are not double-counted by the index.
286
214
 
287
- | Resource | Allocator | Deallocator | Owner |
288
- |---|---|---|---|
289
- | `tg_geom_wrapper_t` | `TypedData_Make_Struct` / Ruby allocator | `ruby_xfree` | Ruby `Geom` object |
290
- | TG geometry in `Geom` | TG parser/constructor | `tg_geom_free` | `Geom` wrapper |
291
- | `tg_index_t` | `TypedData_Make_Struct` / Ruby allocator | `ruby_xfree` | Ruby `Index` object |
292
- | Index entries array | `calloc` | `free` | `Index` |
293
- | TG geometry via `:geojson` / `:wkb` | TG parser | `tg_geom_free` | `Index` |
294
- | TG geometry via `:geom` | Existing `Geom` wrapper | Existing `Geom` wrapper | Borrowed by `Index` through `geom_owner` |
295
- | rtree internals | custom `tg_rtree_malloc` with header | custom `tg_rtree_free` | rtree / `Index` accounting |
296
- | Ruby ids | Ruby VM | Ruby GC | Marked and compacted by `Index` |
215
+ `Index` and `Geom` are immutable after construction. Concurrent read-only use from normal Ruby threads is supported. Short query/parse/write paths keep the GVL. FeatureSource bulk loading uses a C-only no-GVL heavy phase for file read, JSON traversal, and TG geometry parsing, then reacquires the GVL to create Ruby objects or transfer ownership into the final Index. On Ruby versions that expose `RB_NOGVL_OFFLOAD_SAFE`, that no-GVL phase is marked offload-safe for the Ruby VM. On older Rubies it still releases the GVL for other Ruby threads, but no explicit Fiber scheduler friendliness is claimed.
297
216
 
298
- `ObjectSpace.memsize_of(index)` includes entries, owned TG geometries, and exact
299
- rtree allocation bytes. Borrowed geometries are not double-counted by the index.
300
-
301
- See `docs/MEMORY_OWNERSHIP.md` for the full table and cleanup rules.
302
-
303
- ## Concurrency model
304
-
305
- `Index` and `Geom` are immutable after construction. Concurrent read-only use
306
- from normal Ruby threads is supported by design and covered by tests.
307
-
308
- The first release keeps GVL for parse, write, query, batch, and rtree build/free
309
- paths. This is intentional: the rtree allocator calls Ruby GC accounting APIs,
310
- and no-GVL execution would require separate input-copying and allocator-accounting
311
- design.
312
-
313
- No Ractor support is claimed.
314
-
315
- See `docs/CONCURRENCY.md` and `docs/RACTOR.md`.
217
+ No Ractor support and no universal performance claim are advertised.
316
218
 
317
219
  ## Benchmarks
318
220
 
@@ -326,19 +228,16 @@ bundle exec ruby benchmark/objectspace_memsize.rb
326
228
  bundle exec ruby benchmark/rss_stability.rb
327
229
  bundle exec ruby benchmark/gvl_threshold.rb
328
230
  bundle exec ruby benchmark/falcon_concurrency.rb
231
+ bundle exec ruby benchmark/feature_source.rb
329
232
  ```
330
233
 
331
- By default, benchmarks use a fast local matrix. Set `TGEOMETRY_BENCH_FULL=1` for
332
- the larger matrix where supported.
333
-
334
- The repository benchmarks are engineering tools, not universal marketing claims.
335
- Do not copy upstream TG C benchmark numbers as Ruby gem performance claims.
234
+ The benchmarks are engineering tools, not marketing claims.
336
235
 
337
236
  ## Limitations
338
237
 
339
238
  `tg_geometry` is not a full GIS system.
340
239
 
341
- Not included in this release:
240
+ Not included:
342
241
 
343
242
  - geocoding;
344
243
  - routing;
@@ -346,17 +245,14 @@ Not included in this release:
346
245
  - geodesic distance/area;
347
246
  - buffer / union / difference / overlay result geometry operations;
348
247
  - nearest POI index;
349
- - Rails dependency in the core extension;
248
+ - Rails dependency in the native extension;
350
249
  - Redis or external service dependency;
351
250
  - public callback/search APIs;
352
251
  - Ractor support claim;
353
252
  - no-GVL execution claim;
354
253
  - universal `:auto` strategy.
355
254
 
356
- TG works in planar XY coordinates. If lon/lat coordinates are passed in, length,
357
- area, and perimeter-style values are in input coordinate units, not meters.
358
- Use PostGIS, GEOS, PROJ, or other GIS tooling when full GIS functionality is
359
- needed.
255
+ TG works in planar XY coordinates. If lon/lat coordinates are passed in, length, area, and perimeter-style values are in input coordinate units, not meters.
360
256
 
361
257
  ## Development
362
258
 
@@ -369,17 +265,12 @@ bundle exec rake spec
369
265
  Useful targeted checks:
370
266
 
371
267
  ```bash
372
- bundle exec rspec spec/block_12_batch_packed_spec.rb
373
- bundle exec rspec spec/block_14_memory_gc_hardening_spec.rb
374
- bundle exec rspec spec/block_20_concurrency_spec.rb
375
- bundle exec rspec spec/block_20_fuzz_spec.rb
268
+ bundle exec rspec spec/batch_packed_spec.rb
269
+ bundle exec rspec spec/memory_gc_spec.rb
270
+ bundle exec rspec spec/concurrency_spec.rb
271
+ bundle exec rspec spec/fuzz_spec.rb
376
272
  ```
377
273
 
378
- Memory-tool CI jobs for ASAN and Valgrind are intentionally left as OPEN QUESTION
379
- placeholders until the exact setup is approved. Do not replace them with guessed
380
- configuration.
381
-
382
274
  ## License
383
275
 
384
- MIT. Vendored upstream license files for `tidwall/tg` and `tidwall/rtree.c` are
385
- included under `ext/tg_geometry/vendor/`.
276
+ MIT. Vendored upstream license files for `tidwall/tg`, `tidwall/rtree.c`, and `tidwall/json.c` are included under `ext/tg_geometry/vendor/`.
data/Rakefile CHANGED
@@ -82,7 +82,7 @@ begin
82
82
  end
83
83
 
84
84
  RSpec::Core::RakeTask.new(:gc_compact => "compile:test") do |task|
85
- task.pattern = "spec/block_3_geom_parse_spec.rb spec/block_6_index_build_spec.rb spec/block_8_index_borrowed_geometry_spec.rb spec/block_14_memory_gc_hardening_spec.rb"
85
+ task.pattern = "spec/geom_parse_spec.rb spec/index_build_spec.rb spec/index_borrowed_geometry_spec.rb spec/memory_gc_spec.rb"
86
86
  end
87
87
  end
88
88
  rescue LoadError
@@ -109,12 +109,12 @@ namespace :benchmark do
109
109
  end
110
110
 
111
111
  namespace :vendor do
112
- desc "Sync vendored tidwall/tg and rtree.c sources to pinned commits"
112
+ desc "Sync vendored C sources to pinned commits"
113
113
  task :sync do
114
114
  ruby "script/vendor_libs.rb", "--sync"
115
115
  end
116
116
 
117
- desc "Verify vendored tidwall/tg and rtree.c sources against pinned tree SHA256"
117
+ desc "Verify vendored C sources against pinned tree SHA256"
118
118
  task :verify do
119
119
  ruby "script/vendor_libs.rb", "--verify"
120
120
  end
@@ -4,7 +4,7 @@ require_relative "_support"
4
4
 
5
5
  TGGeometryBench.say_header("falcon_concurrency")
6
6
  puts "No Falcon dependency is used here. This is a thread-read baseline for the immutable Index model."
7
- puts "Falcon/Async behavior remains an OPEN QUESTION until Roman approves a dedicated dependency/setup."
7
+ puts "Falcon/Async behavior remains an Pending decision until Roman approves a dedicated dependency/setup."
8
8
 
9
9
  entries = TGGeometryBench.compact_entries(1_000)
10
10
  index = TGGeometryBench.build_index(entries, strategy: :rtree)
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "_support"
4
+ require "json"
5
+ require "tempfile"
6
+
7
+ module FeatureSourceBenchData
8
+ module_function
9
+
10
+ def feature_collection(count)
11
+ features = Array.new(count) do |i|
12
+ x = i % 250
13
+ y = i / 250
14
+ <<~JSON.chomp
15
+ {"type":"Feature","properties":{"@id":"zone/#{i}","name":"Zone #{i}"},"geometry":#{TGGeometryBench.box_geojson(x, y, x + 0.8, y + 0.8)}}
16
+ JSON
17
+ end
18
+
19
+ %({"type":"FeatureCollection","features":[#{features.join(",")}]} )
20
+ end
21
+
22
+ def write_temp_geojson(json)
23
+ file = Tempfile.new(["tg_geometry_feature_source", ".geojson"])
24
+ file.binmode
25
+ file.write(json)
26
+ file.flush
27
+ file
28
+ end
29
+
30
+ def ruby_json_parse_entries(path)
31
+ parsed = JSON.parse(File.binread(path))
32
+ parsed.fetch("features").filter_map do |feature|
33
+ geometry = feature["geometry"]
34
+ next unless %w[Polygon MultiPolygon].include?(geometry["type"])
35
+
36
+ [feature.fetch("properties").fetch("@id"), JSON.generate(geometry)]
37
+ end
38
+ end
39
+ end
40
+
41
+ TGGeometryBench.say_header("feature_source")
42
+
43
+ sizes = ENV["TGEOMETRY_BENCH_FULL"] == "1" ? [100, 1_000, 10_000, 50_000] : [100, 1_000]
44
+
45
+ sizes.each do |size|
46
+ json = FeatureSourceBenchData.feature_collection(size)
47
+ file = FeatureSourceBenchData.write_temp_geojson(json)
48
+ path = file.path
49
+
50
+ begin
51
+ ruby_entries = nil
52
+ feature_entries = nil
53
+ feature_rows = nil
54
+ direct_index = nil
55
+ roundtrip_index = nil
56
+
57
+ ruby_time = Benchmark.realtime do
58
+ ruby_entries = FeatureSourceBenchData.ruby_json_parse_entries(path)
59
+ end
60
+
61
+ read_entries_time = Benchmark.realtime do
62
+ feature_entries = TG::Geometry::FeatureSource.read_entries_file(path, id: ["properties", "@id"])
63
+ end
64
+
65
+ read_features_time = Benchmark.realtime do
66
+ feature_rows = TG::Geometry::FeatureSource.read_features_file(path, id: ["properties", "@id"])
67
+ end
68
+
69
+ direct_index_time = Benchmark.realtime do
70
+ direct_index = TG::Geometry::FeatureSource.build_index_file(path, id: ["properties", "@id"], strategy: :rtree)
71
+ end
72
+
73
+ roundtrip_index_time = Benchmark.realtime do
74
+ roundtrip_index = TG::Geometry::Index.build(feature_entries, via: :geojson, strategy: :rtree)
75
+ end
76
+
77
+ puts "n=#{size} ruby_json_parse_sec=%.6f read_entries_sec=%.6f read_features_sec=%.6f build_index_direct_sec=%.6f build_index_from_entries_sec=%.6f entries=%d features=%d direct_size=%d roundtrip_size=%d rss_kb=%d" % [
78
+ ruby_time,
79
+ read_entries_time,
80
+ read_features_time,
81
+ direct_index_time,
82
+ roundtrip_index_time,
83
+ ruby_entries.length,
84
+ feature_rows.length,
85
+ direct_index.size,
86
+ roundtrip_index.size,
87
+ TGGeometryBench.rss_kb
88
+ ]
89
+ ensure
90
+ file.close!
91
+ end
92
+ end