tg_geometry 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +103 -0
  3. data/Gemfile +3 -0
  4. data/LICENSE.txt +21 -0
  5. data/README.md +385 -0
  6. data/Rakefile +129 -0
  7. data/benchmark/_support.rb +115 -0
  8. data/benchmark/batch_packed_vs_loop.rb +27 -0
  9. data/benchmark/falcon_concurrency.rb +25 -0
  10. data/benchmark/flat_vs_rtree.rb +27 -0
  11. data/benchmark/gvl_threshold.rb +41 -0
  12. data/benchmark/objectspace_memsize.rb +17 -0
  13. data/benchmark/parse_throughput.rb +38 -0
  14. data/benchmark/rss_stability.rb +70 -0
  15. data/docs/ACTIVE_RECORD.md +26 -0
  16. data/docs/ARCHITECTURE.md +130 -0
  17. data/docs/AUTO_STRATEGY.md +15 -0
  18. data/docs/BENCHMARKING.md +75 -0
  19. data/docs/CASUAL_EXAMPLE.md +618 -0
  20. data/docs/CONCURRENCY.md +65 -0
  21. data/docs/ERROR_HANDLING.md +55 -0
  22. data/docs/EXPANSION_E_TO_H_STATUS.md +51 -0
  23. data/docs/FORMAT_COVERAGE.md +23 -0
  24. data/docs/FULL_TG_API_COVERAGE.md +109 -0
  25. data/docs/LIMITATIONS.md +61 -0
  26. data/docs/LOW_LEVEL_GEOMETRY.md +121 -0
  27. data/docs/MEMORY_OWNERSHIP.md +94 -0
  28. data/docs/RACTOR.md +40 -0
  29. data/docs/REGISTRY.md +37 -0
  30. data/docs/RELEASE_CHECKLIST.md +39 -0
  31. data/ext/tg_geometry/extconf.rb +91 -0
  32. data/ext/tg_geometry/tg_geometry_ext.c +3054 -0
  33. data/ext/tg_geometry/tg_geometry_vendor_rtree.c +1 -0
  34. data/ext/tg_geometry/tg_geometry_vendor_tg.c +24 -0
  35. data/ext/tg_geometry/vendor/.vendored +16 -0
  36. data/ext/tg_geometry/vendor/rtree/LICENSE +20 -0
  37. data/ext/tg_geometry/vendor/rtree/README.md +202 -0
  38. data/ext/tg_geometry/vendor/rtree/VERSION +3 -0
  39. data/ext/tg_geometry/vendor/rtree/rtree.c +840 -0
  40. data/ext/tg_geometry/vendor/rtree/rtree.h +105 -0
  41. data/ext/tg_geometry/vendor/tg/LICENSE +19 -0
  42. data/ext/tg_geometry/vendor/tg/README.md +197 -0
  43. data/ext/tg_geometry/vendor/tg/VERSION +3 -0
  44. data/ext/tg_geometry/vendor/tg/tg.c +16010 -0
  45. data/ext/tg_geometry/vendor/tg/tg.h +359 -0
  46. data/lib/tg/geometry/active_record_source.rb +57 -0
  47. data/lib/tg/geometry/registry.rb +119 -0
  48. data/lib/tg/geometry/version.rb +7 -0
  49. data/lib/tg/geometry.rb +6 -0
  50. data/lib/tg_geometry.rb +3 -0
  51. data/script/vendor_libs.rb +264 -0
  52. data/spec/block_10_rtree_strategy_spec.rb +82 -0
  53. data/spec/block_11_rtree_order_spec.rb +53 -0
  54. data/spec/block_12_batch_packed_spec.rb +55 -0
  55. data/spec/block_13_error_hardening_spec.rb +65 -0
  56. data/spec/block_14_memory_gc_hardening_spec.rb +116 -0
  57. data/spec/block_1_skeleton_spec.rb +45 -0
  58. data/spec/block_20_concurrency_spec.rb +157 -0
  59. data/spec/block_20_fuzz_spec.rb +145 -0
  60. data/spec/block_2_vendor_spec.rb +79 -0
  61. data/spec/block_3_geom_parse_spec.rb +89 -0
  62. data/spec/block_4_geom_api_spec.rb +90 -0
  63. data/spec/block_5_rect_api_spec.rb +96 -0
  64. data/spec/block_6_index_build_spec.rb +111 -0
  65. data/spec/block_7_index_owned_geometry_spec.rb +143 -0
  66. data/spec/block_8_index_borrowed_geometry_spec.rb +106 -0
  67. data/spec/block_9_flat_query_spec.rb +65 -0
  68. data/spec/expansion_a_auto_strategy_spec.rb +14 -0
  69. data/spec/expansion_b_registry_spec.rb +47 -0
  70. data/spec/expansion_c_active_record_source_spec.rb +42 -0
  71. data/spec/expansion_d_format_coverage_spec.rb +30 -0
  72. data/spec/expansion_e_low_level_geometry_spec.rb +82 -0
  73. data/spec/expansion_i_ractor_spec.rb +25 -0
  74. data/spec/expansion_j_full_tg_api_coverage_spec.rb +114 -0
  75. data/spec/spec_helper.rb +15 -0
  76. metadata +157 -0
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "benchmark"
4
+ require "rbconfig"
5
+
6
+ ROOT = File.expand_path("..", __dir__)
7
+ EXT_DIR = File.join(ROOT, "ext", "tg_geometry")
8
+ LIB_DIR = File.join(ROOT, "lib")
9
+
10
+ $LOAD_PATH.unshift(LIB_DIR) unless $LOAD_PATH.include?(LIB_DIR)
11
+ $LOAD_PATH.unshift(EXT_DIR) unless $LOAD_PATH.include?(EXT_DIR)
12
+
13
+ begin
14
+ require "tg/geometry"
15
+ rescue LoadError
16
+ warn "Native extension is not built. Run: bundle exec rake compile"
17
+ raise
18
+ end
19
+
20
+ module TGGeometryBench
21
+ module_function
22
+
23
+ SIZES = [100, 500, 1_000, 5_000, 50_000].freeze
24
+ FAST_SIZES = [100, 500, 1_000].freeze
25
+
26
+ def sizes
27
+ ENV["TGEOMETRY_BENCH_FULL"] == "1" ? SIZES : FAST_SIZES
28
+ end
29
+
30
+ def iterations(default)
31
+ Integer(ENV.fetch("TGEOMETRY_BENCH_ITERATIONS", default.to_s))
32
+ end
33
+
34
+ def box_wkt(min_x, min_y, max_x, max_y)
35
+ "POLYGON ((#{min_x} #{min_y}, #{max_x} #{min_y}, #{max_x} #{max_y}, #{min_x} #{max_y}, #{min_x} #{min_y}))"
36
+ end
37
+
38
+ def box_geojson(min_x, min_y, max_x, max_y)
39
+ %({"type":"Polygon","coordinates":[[[#{min_x},#{min_y}],[#{max_x},#{min_y}],[#{max_x},#{max_y}],[#{min_x},#{max_y}],[#{min_x},#{min_y}]]]})
40
+ end
41
+
42
+ def compact_entries(count)
43
+ Array.new(count) do |i|
44
+ x = i % 250
45
+ y = i / 250
46
+ [i, box_geojson(x, y, x + 0.8, y + 0.8)]
47
+ end
48
+ end
49
+
50
+ def long_thin_entries(count)
51
+ Array.new(count) do |i|
52
+ [i, box_geojson(i * 0.01, i, i * 0.01 + 1_000.0, i + 0.05)]
53
+ end
54
+ end
55
+
56
+ def overlapping_entries(count)
57
+ Array.new(count) do |i|
58
+ offset = i * 0.001
59
+ [i, box_geojson(offset, offset, 100.0 + offset, 100.0 + offset)]
60
+ end
61
+ end
62
+
63
+ def entries_for(kind, count)
64
+ case kind
65
+ when :compact then compact_entries(count)
66
+ when :long_thin then long_thin_entries(count)
67
+ when :overlapping then overlapping_entries(count)
68
+ else raise ArgumentError, "unknown benchmark kind: #{kind.inspect}"
69
+ end
70
+ end
71
+
72
+ def build_index(entries, strategy:)
73
+ TG::Geometry::Index.build(entries, via: :geojson, strategy: strategy)
74
+ end
75
+
76
+ def points_for(kind)
77
+ case kind
78
+ when :compact then [[0.4, 0.4], [10.4, 2.4], [10_000.0, 10_000.0]]
79
+ when :long_thin then [[1.0, 10.02], [500.0, 500.02], [-1.0, -1.0]]
80
+ when :overlapping then [[50.0, 50.0], [0.0, 0.0], [200.0, 200.0]]
81
+ else raise ArgumentError, "unknown benchmark kind: #{kind.inspect}"
82
+ end
83
+ end
84
+
85
+ def rects_for(kind)
86
+ case kind
87
+ when :compact then [[0, 0, 10, 10], [100, 100, 105, 105]]
88
+ when :long_thin then [[0, 10, 1_000, 10.05], [2_000, 2_000, 2_001, 2_001]]
89
+ when :overlapping then [[25, 25, 75, 75], [200, 200, 201, 201]]
90
+ else raise ArgumentError, "unknown benchmark kind: #{kind.inspect}"
91
+ end
92
+ end
93
+
94
+ def packed_points(points)
95
+ points.flatten.pack("d*")
96
+ end
97
+
98
+ def rss_kb
99
+ case RbConfig::CONFIG["host_os"]
100
+ when /linux/i
101
+ File.read("/proc/self/status")[/^VmRSS:\s+(\d+)\s+kB/, 1].to_i
102
+ when /darwin/i
103
+ Integer(`ps -o rss= -p #{Process.pid}`)
104
+ else
105
+ 0
106
+ end
107
+ rescue StandardError
108
+ 0
109
+ end
110
+
111
+ def say_header(title)
112
+ puts "\n== #{title} =="
113
+ puts "ruby=#{RUBY_VERSION} platform=#{RUBY_PLATFORM} full=#{ENV['TGEOMETRY_BENCH_FULL'] == '1'}"
114
+ end
115
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "_support"
4
+
5
+ TGGeometryBench.say_header("batch_packed_vs_loop")
6
+ iterations = TGGeometryBench.iterations(500)
7
+
8
+ %i[compact long_thin overlapping].each do |kind|
9
+ TGGeometryBench.sizes.each do |size|
10
+ entries = TGGeometryBench.entries_for(kind, size)
11
+ points = Array.new(1_000) { |i| TGGeometryBench.points_for(kind)[i % TGGeometryBench.points_for(kind).length] }
12
+ packed = TGGeometryBench.packed_points(points)
13
+
14
+ %i[flat rtree].each do |strategy|
15
+ index = TGGeometryBench.build_index(entries, strategy: strategy)
16
+
17
+ scalar_time = Benchmark.realtime do
18
+ iterations.times { points.map { |lon, lat| index.find_covering(lon, lat) } }
19
+ end
20
+ batch_time = Benchmark.realtime do
21
+ iterations.times { index.covering_ids_batch_packed(packed) }
22
+ end
23
+
24
+ puts "kind=#{kind} n=#{size} strategy=#{strategy} points=#{points.length} scalar_sec=%.6f batch_sec=%.6f scalar_batches_per_sec=%.2f batch_batches_per_sec=%.2f" % [scalar_time, batch_time, iterations / scalar_time, iterations / batch_time]
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "_support"
4
+
5
+ TGGeometryBench.say_header("falcon_concurrency")
6
+ puts "No Falcon dependency is used here. This is a thread-read baseline for the immutable Index model."
7
+ puts "Falcon/Async behavior remains an OPEN QUESTION until Roman approves a dedicated dependency/setup."
8
+
9
+ entries = TGGeometryBench.compact_entries(1_000)
10
+ index = TGGeometryBench.build_index(entries, strategy: :rtree)
11
+ threads = Integer(ENV.fetch("TGEOMETRY_BENCH_THREADS", "4"))
12
+ iterations = TGGeometryBench.iterations(10_000)
13
+
14
+ elapsed = Benchmark.realtime do
15
+ threads.times.map do
16
+ Thread.new do
17
+ iterations.times do |i|
18
+ lon, lat = TGGeometryBench.points_for(:compact)[i % 3]
19
+ index.find_covering(lon, lat)
20
+ end
21
+ end
22
+ end.each(&:join)
23
+ end
24
+
25
+ puts "threads=#{threads} iterations_per_thread=#{iterations} total_queries=#{threads * iterations} seconds=%.6f qps=%.2f" % [elapsed, (threads * iterations) / elapsed]
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "_support"
4
+
5
+ TGGeometryBench.say_header("flat_vs_rtree")
6
+ iterations = TGGeometryBench.iterations(1_000)
7
+
8
+ %i[compact long_thin overlapping].each do |kind|
9
+ TGGeometryBench.sizes.each do |size|
10
+ entries = TGGeometryBench.entries_for(kind, size)
11
+
12
+ build_flat = Benchmark.realtime { @flat = TGGeometryBench.build_index(entries, strategy: :flat) }
13
+ build_rtree = Benchmark.realtime { @rtree = TGGeometryBench.build_index(entries, strategy: :rtree) }
14
+
15
+ TGGeometryBench.points_for(kind).each do |lon, lat|
16
+ flat_time = Benchmark.realtime { iterations.times { @flat.find_covering(lon, lat) } }
17
+ rtree_time = Benchmark.realtime { iterations.times { @rtree.find_covering(lon, lat) } }
18
+ puts "kind=#{kind} n=#{size} query=point lon=#{lon} lat=#{lat} flat_sec=%.6f rtree_sec=%.6f flat_qps=%.2f rtree_qps=%.2f build_flat=%.6f build_rtree=%.6f" % [flat_time, rtree_time, iterations / flat_time, iterations / rtree_time, build_flat, build_rtree]
19
+ end
20
+
21
+ TGGeometryBench.rects_for(kind).each do |rect|
22
+ flat_time = Benchmark.realtime { iterations.times { @flat.intersecting_rect(*rect) } }
23
+ rtree_time = Benchmark.realtime { iterations.times { @rtree.intersecting_rect(*rect) } }
24
+ puts "kind=#{kind} n=#{size} query=rect rect=#{rect.join(',')} flat_sec=%.6f rtree_sec=%.6f flat_qps=%.2f rtree_qps=%.2f" % [flat_time, rtree_time, iterations / flat_time, iterations / rtree_time]
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "_support"
4
+
5
+ TGGeometryBench.say_header("gvl_threshold")
6
+ puts "First release intentionally performs parse/write/batch/query with GVL held."
7
+ puts "This harness records baseline parse wall time only; it does not enable no-GVL execution."
8
+
9
+ # Build one valid WKT polygon close to the requested byte size. The previous
10
+ # implementation accidentally benchmarked the same tiny 39-byte polygon for all
11
+ # target sizes because it used Array(...).first.
12
+ def polygon_wkt_at_least(target_bytes)
13
+ points_count = [4, target_bytes / 38].max
14
+
15
+ loop do
16
+ points = Array.new(points_count) do |i|
17
+ angle = (2.0 * Math::PI * i) / points_count
18
+ [Math.cos(angle) * 10.0, Math.sin(angle) * 10.0]
19
+ end
20
+ points << points.first
21
+
22
+ coordinates = points.map { |x, y| "#{x} #{y}" }.join(", ")
23
+ payload = "POLYGON ((#{coordinates}))"
24
+ return payload if payload.bytesize >= target_bytes
25
+
26
+ points_count = (points_count * 1.25).ceil
27
+ end
28
+ end
29
+
30
+ sizes = [128, 1_024, 16_384, 262_144]
31
+ iterations = TGGeometryBench.iterations(2_000)
32
+
33
+ sizes.each do |target_bytes|
34
+ payload = polygon_wkt_at_least(target_bytes)
35
+
36
+ time = Benchmark.realtime do
37
+ iterations.times { TG::Geometry.parse_wkt(payload) }
38
+ end
39
+
40
+ puts "target_bytes=#{target_bytes} payload_bytes=#{payload.bytesize} iterations=#{iterations} seconds=%.6f ops_per_sec=%.2f" % [time, iterations / time]
41
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "_support"
4
+ require "objspace"
5
+
6
+ TGGeometryBench.say_header("objectspace_memsize")
7
+
8
+ %i[compact long_thin overlapping].each do |kind|
9
+ TGGeometryBench.sizes.each do |size|
10
+ entries = TGGeometryBench.entries_for(kind, size)
11
+ flat = TGGeometryBench.build_index(entries, strategy: :flat)
12
+ rtree = TGGeometryBench.build_index(entries, strategy: :rtree)
13
+ geom = TG::Geometry.parse_geojson(entries.first.last)
14
+
15
+ puts "kind=#{kind} n=#{size} geom_memsize=#{ObjectSpace.memsize_of(geom)} flat_memsize=#{ObjectSpace.memsize_of(flat)} rtree_memsize=#{ObjectSpace.memsize_of(rtree)}"
16
+ end
17
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "_support"
4
+
5
+ TGGeometryBench.say_header("parse_throughput")
6
+
7
+ def polygon_with_points(count)
8
+ points = Array.new(count) do |i|
9
+ angle = (2.0 * Math::PI * i) / count
10
+ [Math.cos(angle) * 10.0, Math.sin(angle) * 10.0]
11
+ end
12
+ points << points.first
13
+ coordinates = points.map { |x, y| "#{x} #{y}" }.join(", ")
14
+ "POLYGON ((#{coordinates}))"
15
+ end
16
+
17
+ small = TGGeometryBench.box_wkt(0, 0, 10, 10)
18
+ medium = polygon_with_points(250)
19
+ large = polygon_with_points(2_500)
20
+ geojson = '{"type":"Polygon","coordinates":[[[0,0],[10,0],[10,10],[0,10],[0,0]]]}'
21
+ wkb = TG::Geometry.parse_wkt(small).to_wkb
22
+
23
+ cases = {
24
+ "wkt:small" => [small, :wkt],
25
+ "geojson:small" => [geojson, :geojson],
26
+ "wkb:small" => [wkb, :wkb],
27
+ "wkt:medium" => [medium, :wkt],
28
+ "wkt:large" => [large, :wkt]
29
+ }
30
+
31
+ iterations = TGGeometryBench.iterations(10_000)
32
+
33
+ cases.each do |name, (payload, format)|
34
+ time = Benchmark.realtime do
35
+ iterations.times { TG::Geometry.parse(payload, format: format) }
36
+ end
37
+ puts "%s iterations=%d seconds=%.6f ops_per_sec=%.2f" % [name, iterations, time, iterations / time]
38
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "_support"
4
+
5
+ TGGeometryBench.say_header("rss_stability")
6
+
7
+ total_queries = Integer(ENV.fetch("TGEOMETRY_RSS_QUERIES", "10_000_000"))
8
+ rebuilds = Integer(ENV.fetch("TGEOMETRY_RSS_REBUILDS", "100"))
9
+ entries_count = Integer(ENV.fetch("TGEOMETRY_RSS_ENTRIES", "1_000"))
10
+ max_drift_kb = Integer(ENV.fetch("TGEOMETRY_RSS_MAX_DRIFT_KB", "51_200"))
11
+
12
+ queries_per_rebuild = (total_queries / rebuilds).clamp(1, total_queries)
13
+ entries = TGGeometryBench.compact_entries(entries_count)
14
+ points = TGGeometryBench.points_for(:compact)
15
+ packed_batch = TGGeometryBench.packed_points(points * 10)
16
+
17
+ GC.start
18
+ GC.start
19
+ start_rss = TGGeometryBench.rss_kb
20
+ peak_rss = start_rss
21
+ samples = []
22
+
23
+ queries_executed = 0
24
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
25
+
26
+ rebuilds.times do |cycle|
27
+ index = TGGeometryBench.build_index(entries, strategy: (cycle.even? ? :flat : :rtree))
28
+
29
+ queries_per_rebuild.times do |q|
30
+ lon, lat = points[(q + cycle) % points.length]
31
+ index.find_covering(lon, lat)
32
+ queries_executed += 1
33
+
34
+ if (q & 0xff).zero?
35
+ index.covering_ids_batch_packed(packed_batch)
36
+ queries_executed += points.length * 10
37
+ end
38
+ end
39
+
40
+ index = nil
41
+
42
+ if (cycle % 10).zero?
43
+ GC.start
44
+ rss = TGGeometryBench.rss_kb
45
+ peak_rss = [peak_rss, rss].max
46
+ samples << [cycle, queries_executed, rss]
47
+ end
48
+ end
49
+
50
+ GC.start
51
+ GC.start
52
+ finish_rss = TGGeometryBench.rss_kb
53
+ elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at
54
+ drift_kb = finish_rss - start_rss
55
+
56
+ puts format(
57
+ "queries=%d rebuilds=%d entries=%d elapsed_s=%.2f start_rss_kb=%d peak_rss_kb=%d finish_rss_kb=%d drift_kb=%d",
58
+ queries_executed, rebuilds, entries_count, elapsed,
59
+ start_rss, peak_rss, finish_rss, drift_kb
60
+ )
61
+
62
+ if samples.length > 1
63
+ puts "samples (cycle, queries, rss_kb):"
64
+ samples.each { |row| puts " #{row.inspect}" }
65
+ end
66
+
67
+ if max_drift_kb.positive? && drift_kb > max_drift_kb
68
+ warn "[rss_stability] RSS drift #{drift_kb} KB exceeds threshold #{max_drift_kb} KB"
69
+ exit 1
70
+ end
@@ -0,0 +1,26 @@
1
+ # ActiveRecord source helper
2
+
3
+ `TG::Geometry::ActiveRecordSource` is an optional Ruby helper. It does not require Rails or ActiveRecord and is not loaded from a separate integration gem.
4
+
5
+ It accepts any object that responds to `find_each`, or any Enumerable:
6
+
7
+ ```ruby
8
+ entries = TG::Geometry::ActiveRecordSource.call(
9
+ Zone.where(active: true),
10
+ id: :id,
11
+ geometry: :geojson,
12
+ batch_size: 1_000
13
+ )
14
+
15
+ index = TG::Geometry::Index.build(entries, via: :geojson, strategy: :rtree)
16
+ ```
17
+
18
+ It can also feed a Registry:
19
+
20
+ ```ruby
21
+ class DeliveryZones < TG::Geometry::Registry
22
+ active_record_source Zone.where(active: true), id: :id, geometry: :geojson
23
+ end
24
+ ```
25
+
26
+ Field readers may be Symbols, Strings, or Procs. The helper only converts application records into `[[id, object], ...]`; it does not mutate records, keep database connections, create background jobs, install reload hooks, or add a Rails dependency to the native extension.
@@ -0,0 +1,130 @@
1
+ # tg_geometry architecture
2
+
3
+ `tg_geometry` is a Ruby C extension for the public namespace `TG::Geometry`. It vendors the upstream `tidwall/tg` geometry engine and `tidwall/rtree.c`; it does not depend on GEOS, PostGIS, PROJ, GDAL, system TG, or system rtree libraries.
4
+
5
+ The gem targets a small production-grade core:
6
+
7
+ - parsing and writing TG geometries from Ruby;
8
+ - exact planar geometry predicates;
9
+ - immutable rectangles for bounding boxes and query windows;
10
+ - immutable geofencing-oriented indexes that return user ids;
11
+ - flat and rtree collection-level search strategies;
12
+ - native-endian packed point batches for high-throughput same-process calls;
13
+ - read-only borrowed low-level Line/Ring/Polygon wrappers;
14
+ - grouped TG API coverage for predicates, accessors, point/empty constructors, Segment values, and GeometryCollection children.
15
+
16
+ The gem is not a full GIS system. See `docs/LIMITATIONS.md`.
17
+
18
+ ## Public namespace
19
+
20
+ The canonical require path is:
21
+
22
+ ```ruby
23
+ require "tg/geometry"
24
+ ```
25
+
26
+ The public API lives under `TG::Geometry`. The top-level `TG` module is only a namespace container, not the public gem API.
27
+
28
+ ## Native extension shape
29
+
30
+ The current extension is built by `ext/tg_geometry/extconf.rb` and loaded as `tg_geometry_ext_geometry_ext`. The build requires Ruby >= 3.0 and a C11 compiler. Vendor sources are compiled into the extension through small wrapper files:
31
+
32
+ - `ext/tg_geometry/tg_geometry_vendor_tg.c` includes `vendor/tg/tg.c`;
33
+ - `ext/tg_geometry/tg_geometry_vendor_rtree.c` includes `vendor/rtree/rtree.c`.
34
+
35
+ No visibility-hiding flag is enabled unless the Init symbol is explicitly exported. The Init function is exported with `RUBY_FUNC_EXPORTED`.
36
+
37
+ ## Immutable `TG::Geometry::Geom`
38
+
39
+ `TG::Geometry::Geom` usually wraps one owned `struct tg_geom *`. Expansion Block J also allows internal borrowed `TG::Geometry::Geom` wrappers for GeometryCollection children. Borrowed wrappers keep a parent `geom_owner`, report no owned native bytes, and do not call `tg_geom_free` on the borrowed child pointer.
40
+
41
+ Rules:
42
+
43
+ - public `.allocate` is disabled;
44
+ - objects are created only by parse APIs, safe constructors, or internal borrowed child wrappers;
45
+ - the native pointer is never replaced;
46
+ - there is no `close!`, `free!`, `detach!`, or mutation API;
47
+ - parsed objects are frozen before being returned.
48
+
49
+ This immutability is required because `TG::Geometry::Index` can borrow a native geometry pointer from a `TG::Geometry::Geom` and keep the Ruby owner alive.
50
+
51
+ ## Immutable `TG::Geometry::Rect`
52
+
53
+ `TG::Geometry::Rect` is a small Ruby object around four finite coordinates. It is constructible from Ruby and frozen after initialization. The first release exposes only unambiguous rectangle APIs:
54
+
55
+ - coordinate readers;
56
+ - `center`;
57
+ - `intersects?`;
58
+ - `contains_point?`;
59
+ - expansion methods returning new Rect objects.
60
+
61
+ `Rect#contains?` is intentionally not exposed because the name is ambiguous.
62
+
63
+ ## Immutable `TG::Geometry::Index`
64
+
65
+ `TG::Geometry::Index` is built once and read-only afterwards.
66
+
67
+ ```ruby
68
+ index = TG::Geometry::Index.build(
69
+ [[id1, object1], [id2, object2]],
70
+ via: :geojson,
71
+ strategy: :rtree,
72
+ predicate: :covers,
73
+ geometry_index: :ystripes
74
+ )
75
+ ```
76
+
77
+ Accepted `via:` modes:
78
+
79
+ - `:geom` borrows native geometry from existing `TG::Geometry::Geom` wrappers and marks the owner Ruby objects;
80
+ - `:geojson` parses entry strings into Index-owned TG geometries;
81
+ - `:wkb` parses entry strings as raw WKB bytes into Index-owned TG geometries.
82
+
83
+ Accepted strategies:
84
+
85
+ - `:flat` scans entries in insertion order with bbox prefiltering and exact TG geometry filtering;
86
+ - `:rtree` uses vendored `rtree.c` over entry bboxes, then applies exact TG geometry filtering.
87
+
88
+ `strategy: :auto` is not implemented in the first public release. Use explicit `:flat` or `:rtree` and validate with repository benchmarks.
89
+
90
+ ## Result order
91
+
92
+ Insertion order is public behavior.
93
+
94
+ Each entry stores a unique `ordinal`. Flat strategy naturally scans entries in ordinal order. Rtree strategy uses rtree only as a candidate prefilter; candidate marks are local to the query and results are emitted by scanning entries in ordinal order. Rtree traversal order never leaks into Ruby results.
95
+
96
+ ## Point predicate implementation
97
+
98
+ Point queries allocate a temporary TG point geometry and use exact TG predicates:
99
+
100
+ - `:covers` calls `tg_geom_covers(entry_geom, point_geom)`;
101
+ - `:contains` calls `tg_geom_contains(entry_geom, point_geom)`.
102
+
103
+ This is intentionally not the fastest possible point path. The first release chooses exact `covers` / `contains` semantics over a no-allocation shortcut. A faster path such as `tg_geom_intersects_xy` can only replace it after boundary and hole-boundary equivalence tests plus benchmarks are added.
104
+
105
+ ## Reload pattern
106
+
107
+ The intended application reload pattern is atomic reference replacement:
108
+
109
+ ```ruby
110
+ new_index = TG::Geometry::Index.build(entries, via: :geojson, strategy: :rtree)
111
+ @index = new_index
112
+ ```
113
+
114
+ Old readers keep using the old immutable object until they release it. New readers see the new object after the Ruby reference swap. There is no in-place reload, mutation, add, delete, or builder API in the first release.
115
+
116
+ ## Expansion Blocks A-E and I-J
117
+
118
+ Expansion Block A (`strategy: :auto`) is not enabled in the first public release. The native Index stores only explicit concrete strategies (`:flat` or `:rtree`).
119
+
120
+ Expansion Block B adds `TG::Geometry::Registry` in Ruby. Registry wraps an immutable Index reference and reloads by building a new Index before swapping the reference.
121
+
122
+ Expansion Block C adds `TG::Geometry::ActiveRecordSource` as optional Ruby-only source sugar. The native extension does not depend on Rails or ActiveRecord.
123
+
124
+ Expansion Block D adds Hex/GeoBIN parse/write helpers and raw `extra_json` copying. It does not parse properties into Ruby Hashes.
125
+
126
+ Expansion Block E adds read-only borrowed wrappers for `TG::Geometry::Line`, `TG::Geometry::Ring`, and `TG::Geometry::Polygon`. These wrappers keep the parent `TG::Geometry::Geom` alive through `geom_owner`, mark it for GC, update it during compaction, and never free borrowed TG child pointers directly.
127
+
128
+ Expansion Block I documents and tests the current Ractor boundary: native wrappers are not advertised as Ractor-shareable objects. Normal thread read-only access remains the supported concurrency model.
129
+
130
+ Expansion Block J adds grouped TG API coverage without exposing global mutable environment settings or callback-based APIs. Implemented groups are additional predicates, geometry metadata/collection accessors, point and empty geometry constructors, value `TG::Geometry::Segment`, and borrowed GeometryCollection child `Geom` wrappers. See `docs/FULL_TG_API_COVERAGE.md`.
@@ -0,0 +1,15 @@
1
+ # Auto strategy status
2
+
3
+ `strategy: :auto` is not exposed in the first public release.
4
+
5
+ The release-core contract only enables explicit `strategy: :flat` and `strategy: :rtree`. Automatic threshold selection requires a complete project-owned benchmark matrix and explicit approval before it can become public API.
6
+
7
+ Use `benchmark/flat_vs_rtree.rb` to compare strategies for a workload, then pass the chosen strategy explicitly:
8
+
9
+ ```ruby
10
+ index = TG::Geometry::Index.build(entries, via: :geojson, strategy: :rtree)
11
+ # or
12
+ index = TG::Geometry::Index.build(entries, via: :geojson, strategy: :flat)
13
+ ```
14
+
15
+ Do not infer a universal crossover from rtree internals or from a partial benchmark run.
@@ -0,0 +1,75 @@
1
+ # Benchmarking
2
+
3
+ Benchmarks are engineering tools for this gem. They are not marketing claims.
4
+
5
+ Do not copy upstream TG C benchmark numbers into `tg_geometry` docs. Ruby C extension boundary cost, Ruby object handling, Index construction, batch result arrays, and GC behavior must be measured in this project.
6
+
7
+ ## Required scripts
8
+
9
+ The repository includes these benchmark entry points:
10
+
11
+ - `benchmark/parse_throughput.rb`
12
+ - `benchmark/gvl_threshold.rb`
13
+ - `benchmark/flat_vs_rtree.rb`
14
+ - `benchmark/batch_packed_vs_loop.rb`
15
+ - `benchmark/falcon_concurrency.rb`
16
+ - `benchmark/objectspace_memsize.rb`
17
+ - `benchmark/rss_stability.rb`
18
+
19
+ Run after compiling the extension:
20
+
21
+ ```sh
22
+ bundle exec rake compile
23
+ ruby benchmark/flat_vs_rtree.rb
24
+ ```
25
+
26
+ By default, scripts use a reduced local set of entry sizes so they can be run quickly while developing. Full first-release benchmark scenarios are enabled with:
27
+
28
+ ```sh
29
+ TGEOMETRY_BENCH_FULL=1 ruby benchmark/flat_vs_rtree.rb
30
+ ```
31
+
32
+ ## Scenarios
33
+
34
+ Benchmark generators cover:
35
+
36
+ - entry counts: 100, 500, 1K, 5K, 50K;
37
+ - compact bboxes;
38
+ - long thin bboxes;
39
+ - overlapping zones;
40
+ - point queries;
41
+ - viewport rect queries;
42
+ - flat vs rtree;
43
+ - scalar vs packed batch;
44
+ - parse small/medium/large geometry strings;
45
+ - RSS stability over repeated build/query/free.
46
+
47
+ ## Output format
48
+
49
+ Scripts print line-oriented key/value records such as:
50
+
51
+ ```text
52
+ kind=compact n=1000 query=point lon=0.4 lat=0.4 flat_sec=... rtree_sec=... flat_qps=... rtree_qps=...
53
+ ```
54
+
55
+ These records are intentionally plain text so they can be redirected to files and compared across machines.
56
+
57
+ ## No `:auto` strategy yet
58
+
59
+ The first release does not expose `strategy: :auto`. Choosing a threshold requires project-owned benchmark output across the required scenario matrix. Internal rtree constants such as leaf capacity are not a flat-vs-rtree crossover threshold.
60
+
61
+ ## GVL threshold
62
+
63
+ `benchmark/gvl_threshold.rb` records baseline parse wall time for several valid WKT payload sizes while the first release keeps the GVL. It does not enable no-GVL execution. A future no-GVL implementation requires a separate design because Ruby C API calls and `RSTRING_PTR` lifetimes are not valid outside the GVL.
64
+
65
+ ## RSS stability
66
+
67
+ `benchmark/rss_stability.rb` reports start, peak, finish, and delta RSS while repeatedly building, querying, and releasing indexes. CI thresholds should be chosen from observed baseline data on the target CI image, not guessed.
68
+
69
+ ## Falcon / Async
70
+
71
+ The first release does not claim Falcon or Async behavior. A dedicated Falcon/Async benchmark remains an open setup item until the dependency and scenario are approved.
72
+
73
+ ## Expansion Block A: auto strategy threshold
74
+
75
+ `strategy: :auto` remains postponed for the first public release. A future implementation must use a complete project-owned benchmark matrix and document the selected threshold before exposing the public option.