tg_geometry 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +103 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +21 -0
- data/README.md +385 -0
- data/Rakefile +129 -0
- data/benchmark/_support.rb +115 -0
- data/benchmark/batch_packed_vs_loop.rb +27 -0
- data/benchmark/falcon_concurrency.rb +25 -0
- data/benchmark/flat_vs_rtree.rb +27 -0
- data/benchmark/gvl_threshold.rb +41 -0
- data/benchmark/objectspace_memsize.rb +17 -0
- data/benchmark/parse_throughput.rb +38 -0
- data/benchmark/rss_stability.rb +70 -0
- data/docs/ACTIVE_RECORD.md +26 -0
- data/docs/ARCHITECTURE.md +130 -0
- data/docs/AUTO_STRATEGY.md +15 -0
- data/docs/BENCHMARKING.md +75 -0
- data/docs/CASUAL_EXAMPLE.md +618 -0
- data/docs/CONCURRENCY.md +65 -0
- data/docs/ERROR_HANDLING.md +55 -0
- data/docs/EXPANSION_E_TO_H_STATUS.md +51 -0
- data/docs/FORMAT_COVERAGE.md +23 -0
- data/docs/FULL_TG_API_COVERAGE.md +109 -0
- data/docs/LIMITATIONS.md +61 -0
- data/docs/LOW_LEVEL_GEOMETRY.md +121 -0
- data/docs/MEMORY_OWNERSHIP.md +94 -0
- data/docs/RACTOR.md +40 -0
- data/docs/REGISTRY.md +37 -0
- data/docs/RELEASE_CHECKLIST.md +39 -0
- data/ext/tg_geometry/extconf.rb +91 -0
- data/ext/tg_geometry/tg_geometry_ext.c +3054 -0
- data/ext/tg_geometry/tg_geometry_vendor_rtree.c +1 -0
- data/ext/tg_geometry/tg_geometry_vendor_tg.c +24 -0
- data/ext/tg_geometry/vendor/.vendored +16 -0
- data/ext/tg_geometry/vendor/rtree/LICENSE +20 -0
- data/ext/tg_geometry/vendor/rtree/README.md +202 -0
- data/ext/tg_geometry/vendor/rtree/VERSION +3 -0
- data/ext/tg_geometry/vendor/rtree/rtree.c +840 -0
- data/ext/tg_geometry/vendor/rtree/rtree.h +105 -0
- data/ext/tg_geometry/vendor/tg/LICENSE +19 -0
- data/ext/tg_geometry/vendor/tg/README.md +197 -0
- data/ext/tg_geometry/vendor/tg/VERSION +3 -0
- data/ext/tg_geometry/vendor/tg/tg.c +16010 -0
- data/ext/tg_geometry/vendor/tg/tg.h +359 -0
- data/lib/tg/geometry/active_record_source.rb +57 -0
- data/lib/tg/geometry/registry.rb +119 -0
- data/lib/tg/geometry/version.rb +7 -0
- data/lib/tg/geometry.rb +6 -0
- data/lib/tg_geometry.rb +3 -0
- data/script/vendor_libs.rb +264 -0
- data/spec/block_10_rtree_strategy_spec.rb +82 -0
- data/spec/block_11_rtree_order_spec.rb +53 -0
- data/spec/block_12_batch_packed_spec.rb +55 -0
- data/spec/block_13_error_hardening_spec.rb +65 -0
- data/spec/block_14_memory_gc_hardening_spec.rb +116 -0
- data/spec/block_1_skeleton_spec.rb +45 -0
- data/spec/block_20_concurrency_spec.rb +157 -0
- data/spec/block_20_fuzz_spec.rb +145 -0
- data/spec/block_2_vendor_spec.rb +79 -0
- data/spec/block_3_geom_parse_spec.rb +89 -0
- data/spec/block_4_geom_api_spec.rb +90 -0
- data/spec/block_5_rect_api_spec.rb +96 -0
- data/spec/block_6_index_build_spec.rb +111 -0
- data/spec/block_7_index_owned_geometry_spec.rb +143 -0
- data/spec/block_8_index_borrowed_geometry_spec.rb +106 -0
- data/spec/block_9_flat_query_spec.rb +65 -0
- data/spec/expansion_a_auto_strategy_spec.rb +14 -0
- data/spec/expansion_b_registry_spec.rb +47 -0
- data/spec/expansion_c_active_record_source_spec.rb +42 -0
- data/spec/expansion_d_format_coverage_spec.rb +30 -0
- data/spec/expansion_e_low_level_geometry_spec.rb +82 -0
- data/spec/expansion_i_ractor_spec.rb +25 -0
- data/spec/expansion_j_full_tg_api_coverage_spec.rb +114 -0
- data/spec/spec_helper.rb +15 -0
- metadata +157 -0
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "benchmark"
|
|
4
|
+
require "rbconfig"
|
|
5
|
+
|
|
6
|
+
ROOT = File.expand_path("..", __dir__)
|
|
7
|
+
EXT_DIR = File.join(ROOT, "ext", "tg_geometry")
|
|
8
|
+
LIB_DIR = File.join(ROOT, "lib")
|
|
9
|
+
|
|
10
|
+
$LOAD_PATH.unshift(LIB_DIR) unless $LOAD_PATH.include?(LIB_DIR)
|
|
11
|
+
$LOAD_PATH.unshift(EXT_DIR) unless $LOAD_PATH.include?(EXT_DIR)
|
|
12
|
+
|
|
13
|
+
begin
|
|
14
|
+
require "tg/geometry"
|
|
15
|
+
rescue LoadError
|
|
16
|
+
warn "Native extension is not built. Run: bundle exec rake compile"
|
|
17
|
+
raise
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
module TGGeometryBench
|
|
21
|
+
module_function
|
|
22
|
+
|
|
23
|
+
SIZES = [100, 500, 1_000, 5_000, 50_000].freeze
|
|
24
|
+
FAST_SIZES = [100, 500, 1_000].freeze
|
|
25
|
+
|
|
26
|
+
def sizes
|
|
27
|
+
ENV["TGEOMETRY_BENCH_FULL"] == "1" ? SIZES : FAST_SIZES
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def iterations(default)
|
|
31
|
+
Integer(ENV.fetch("TGEOMETRY_BENCH_ITERATIONS", default.to_s))
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def box_wkt(min_x, min_y, max_x, max_y)
|
|
35
|
+
"POLYGON ((#{min_x} #{min_y}, #{max_x} #{min_y}, #{max_x} #{max_y}, #{min_x} #{max_y}, #{min_x} #{min_y}))"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def box_geojson(min_x, min_y, max_x, max_y)
|
|
39
|
+
%({"type":"Polygon","coordinates":[[[#{min_x},#{min_y}],[#{max_x},#{min_y}],[#{max_x},#{max_y}],[#{min_x},#{max_y}],[#{min_x},#{min_y}]]]})
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def compact_entries(count)
|
|
43
|
+
Array.new(count) do |i|
|
|
44
|
+
x = i % 250
|
|
45
|
+
y = i / 250
|
|
46
|
+
[i, box_geojson(x, y, x + 0.8, y + 0.8)]
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def long_thin_entries(count)
|
|
51
|
+
Array.new(count) do |i|
|
|
52
|
+
[i, box_geojson(i * 0.01, i, i * 0.01 + 1_000.0, i + 0.05)]
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def overlapping_entries(count)
|
|
57
|
+
Array.new(count) do |i|
|
|
58
|
+
offset = i * 0.001
|
|
59
|
+
[i, box_geojson(offset, offset, 100.0 + offset, 100.0 + offset)]
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def entries_for(kind, count)
|
|
64
|
+
case kind
|
|
65
|
+
when :compact then compact_entries(count)
|
|
66
|
+
when :long_thin then long_thin_entries(count)
|
|
67
|
+
when :overlapping then overlapping_entries(count)
|
|
68
|
+
else raise ArgumentError, "unknown benchmark kind: #{kind.inspect}"
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def build_index(entries, strategy:)
|
|
73
|
+
TG::Geometry::Index.build(entries, via: :geojson, strategy: strategy)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def points_for(kind)
|
|
77
|
+
case kind
|
|
78
|
+
when :compact then [[0.4, 0.4], [10.4, 2.4], [10_000.0, 10_000.0]]
|
|
79
|
+
when :long_thin then [[1.0, 10.02], [500.0, 500.02], [-1.0, -1.0]]
|
|
80
|
+
when :overlapping then [[50.0, 50.0], [0.0, 0.0], [200.0, 200.0]]
|
|
81
|
+
else raise ArgumentError, "unknown benchmark kind: #{kind.inspect}"
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def rects_for(kind)
|
|
86
|
+
case kind
|
|
87
|
+
when :compact then [[0, 0, 10, 10], [100, 100, 105, 105]]
|
|
88
|
+
when :long_thin then [[0, 10, 1_000, 10.05], [2_000, 2_000, 2_001, 2_001]]
|
|
89
|
+
when :overlapping then [[25, 25, 75, 75], [200, 200, 201, 201]]
|
|
90
|
+
else raise ArgumentError, "unknown benchmark kind: #{kind.inspect}"
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def packed_points(points)
|
|
95
|
+
points.flatten.pack("d*")
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def rss_kb
|
|
99
|
+
case RbConfig::CONFIG["host_os"]
|
|
100
|
+
when /linux/i
|
|
101
|
+
File.read("/proc/self/status")[/^VmRSS:\s+(\d+)\s+kB/, 1].to_i
|
|
102
|
+
when /darwin/i
|
|
103
|
+
Integer(`ps -o rss= -p #{Process.pid}`)
|
|
104
|
+
else
|
|
105
|
+
0
|
|
106
|
+
end
|
|
107
|
+
rescue StandardError
|
|
108
|
+
0
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def say_header(title)
|
|
112
|
+
puts "\n== #{title} =="
|
|
113
|
+
puts "ruby=#{RUBY_VERSION} platform=#{RUBY_PLATFORM} full=#{ENV['TGEOMETRY_BENCH_FULL'] == '1'}"
|
|
114
|
+
end
|
|
115
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "_support"
|
|
4
|
+
|
|
5
|
+
TGGeometryBench.say_header("batch_packed_vs_loop")
|
|
6
|
+
iterations = TGGeometryBench.iterations(500)
|
|
7
|
+
|
|
8
|
+
%i[compact long_thin overlapping].each do |kind|
|
|
9
|
+
TGGeometryBench.sizes.each do |size|
|
|
10
|
+
entries = TGGeometryBench.entries_for(kind, size)
|
|
11
|
+
points = Array.new(1_000) { |i| TGGeometryBench.points_for(kind)[i % TGGeometryBench.points_for(kind).length] }
|
|
12
|
+
packed = TGGeometryBench.packed_points(points)
|
|
13
|
+
|
|
14
|
+
%i[flat rtree].each do |strategy|
|
|
15
|
+
index = TGGeometryBench.build_index(entries, strategy: strategy)
|
|
16
|
+
|
|
17
|
+
scalar_time = Benchmark.realtime do
|
|
18
|
+
iterations.times { points.map { |lon, lat| index.find_covering(lon, lat) } }
|
|
19
|
+
end
|
|
20
|
+
batch_time = Benchmark.realtime do
|
|
21
|
+
iterations.times { index.covering_ids_batch_packed(packed) }
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
puts "kind=#{kind} n=#{size} strategy=#{strategy} points=#{points.length} scalar_sec=%.6f batch_sec=%.6f scalar_batches_per_sec=%.2f batch_batches_per_sec=%.2f" % [scalar_time, batch_time, iterations / scalar_time, iterations / batch_time]
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "_support"
|
|
4
|
+
|
|
5
|
+
TGGeometryBench.say_header("falcon_concurrency")
|
|
6
|
+
puts "No Falcon dependency is used here. This is a thread-read baseline for the immutable Index model."
|
|
7
|
+
puts "Falcon/Async behavior remains an OPEN QUESTION until Roman approves a dedicated dependency/setup."
|
|
8
|
+
|
|
9
|
+
entries = TGGeometryBench.compact_entries(1_000)
|
|
10
|
+
index = TGGeometryBench.build_index(entries, strategy: :rtree)
|
|
11
|
+
threads = Integer(ENV.fetch("TGEOMETRY_BENCH_THREADS", "4"))
|
|
12
|
+
iterations = TGGeometryBench.iterations(10_000)
|
|
13
|
+
|
|
14
|
+
elapsed = Benchmark.realtime do
|
|
15
|
+
threads.times.map do
|
|
16
|
+
Thread.new do
|
|
17
|
+
iterations.times do |i|
|
|
18
|
+
lon, lat = TGGeometryBench.points_for(:compact)[i % 3]
|
|
19
|
+
index.find_covering(lon, lat)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end.each(&:join)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
puts "threads=#{threads} iterations_per_thread=#{iterations} total_queries=#{threads * iterations} seconds=%.6f qps=%.2f" % [elapsed, (threads * iterations) / elapsed]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "_support"
|
|
4
|
+
|
|
5
|
+
TGGeometryBench.say_header("flat_vs_rtree")
|
|
6
|
+
iterations = TGGeometryBench.iterations(1_000)
|
|
7
|
+
|
|
8
|
+
%i[compact long_thin overlapping].each do |kind|
|
|
9
|
+
TGGeometryBench.sizes.each do |size|
|
|
10
|
+
entries = TGGeometryBench.entries_for(kind, size)
|
|
11
|
+
|
|
12
|
+
build_flat = Benchmark.realtime { @flat = TGGeometryBench.build_index(entries, strategy: :flat) }
|
|
13
|
+
build_rtree = Benchmark.realtime { @rtree = TGGeometryBench.build_index(entries, strategy: :rtree) }
|
|
14
|
+
|
|
15
|
+
TGGeometryBench.points_for(kind).each do |lon, lat|
|
|
16
|
+
flat_time = Benchmark.realtime { iterations.times { @flat.find_covering(lon, lat) } }
|
|
17
|
+
rtree_time = Benchmark.realtime { iterations.times { @rtree.find_covering(lon, lat) } }
|
|
18
|
+
puts "kind=#{kind} n=#{size} query=point lon=#{lon} lat=#{lat} flat_sec=%.6f rtree_sec=%.6f flat_qps=%.2f rtree_qps=%.2f build_flat=%.6f build_rtree=%.6f" % [flat_time, rtree_time, iterations / flat_time, iterations / rtree_time, build_flat, build_rtree]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
TGGeometryBench.rects_for(kind).each do |rect|
|
|
22
|
+
flat_time = Benchmark.realtime { iterations.times { @flat.intersecting_rect(*rect) } }
|
|
23
|
+
rtree_time = Benchmark.realtime { iterations.times { @rtree.intersecting_rect(*rect) } }
|
|
24
|
+
puts "kind=#{kind} n=#{size} query=rect rect=#{rect.join(',')} flat_sec=%.6f rtree_sec=%.6f flat_qps=%.2f rtree_qps=%.2f" % [flat_time, rtree_time, iterations / flat_time, iterations / rtree_time]
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "_support"
|
|
4
|
+
|
|
5
|
+
TGGeometryBench.say_header("gvl_threshold")
|
|
6
|
+
puts "First release intentionally performs parse/write/batch/query with GVL held."
|
|
7
|
+
puts "This harness records baseline parse wall time only; it does not enable no-GVL execution."
|
|
8
|
+
|
|
9
|
+
# Build one valid WKT polygon close to the requested byte size. The previous
|
|
10
|
+
# implementation accidentally benchmarked the same tiny 39-byte polygon for all
|
|
11
|
+
# target sizes because it used Array(...).first.
|
|
12
|
+
def polygon_wkt_at_least(target_bytes)
|
|
13
|
+
points_count = [4, target_bytes / 38].max
|
|
14
|
+
|
|
15
|
+
loop do
|
|
16
|
+
points = Array.new(points_count) do |i|
|
|
17
|
+
angle = (2.0 * Math::PI * i) / points_count
|
|
18
|
+
[Math.cos(angle) * 10.0, Math.sin(angle) * 10.0]
|
|
19
|
+
end
|
|
20
|
+
points << points.first
|
|
21
|
+
|
|
22
|
+
coordinates = points.map { |x, y| "#{x} #{y}" }.join(", ")
|
|
23
|
+
payload = "POLYGON ((#{coordinates}))"
|
|
24
|
+
return payload if payload.bytesize >= target_bytes
|
|
25
|
+
|
|
26
|
+
points_count = (points_count * 1.25).ceil
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
sizes = [128, 1_024, 16_384, 262_144]
|
|
31
|
+
iterations = TGGeometryBench.iterations(2_000)
|
|
32
|
+
|
|
33
|
+
sizes.each do |target_bytes|
|
|
34
|
+
payload = polygon_wkt_at_least(target_bytes)
|
|
35
|
+
|
|
36
|
+
time = Benchmark.realtime do
|
|
37
|
+
iterations.times { TG::Geometry.parse_wkt(payload) }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
puts "target_bytes=#{target_bytes} payload_bytes=#{payload.bytesize} iterations=#{iterations} seconds=%.6f ops_per_sec=%.2f" % [time, iterations / time]
|
|
41
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "_support"
|
|
4
|
+
require "objspace"
|
|
5
|
+
|
|
6
|
+
TGGeometryBench.say_header("objectspace_memsize")
|
|
7
|
+
|
|
8
|
+
%i[compact long_thin overlapping].each do |kind|
|
|
9
|
+
TGGeometryBench.sizes.each do |size|
|
|
10
|
+
entries = TGGeometryBench.entries_for(kind, size)
|
|
11
|
+
flat = TGGeometryBench.build_index(entries, strategy: :flat)
|
|
12
|
+
rtree = TGGeometryBench.build_index(entries, strategy: :rtree)
|
|
13
|
+
geom = TG::Geometry.parse_geojson(entries.first.last)
|
|
14
|
+
|
|
15
|
+
puts "kind=#{kind} n=#{size} geom_memsize=#{ObjectSpace.memsize_of(geom)} flat_memsize=#{ObjectSpace.memsize_of(flat)} rtree_memsize=#{ObjectSpace.memsize_of(rtree)}"
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "_support"
|
|
4
|
+
|
|
5
|
+
TGGeometryBench.say_header("parse_throughput")
|
|
6
|
+
|
|
7
|
+
def polygon_with_points(count)
|
|
8
|
+
points = Array.new(count) do |i|
|
|
9
|
+
angle = (2.0 * Math::PI * i) / count
|
|
10
|
+
[Math.cos(angle) * 10.0, Math.sin(angle) * 10.0]
|
|
11
|
+
end
|
|
12
|
+
points << points.first
|
|
13
|
+
coordinates = points.map { |x, y| "#{x} #{y}" }.join(", ")
|
|
14
|
+
"POLYGON ((#{coordinates}))"
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
small = TGGeometryBench.box_wkt(0, 0, 10, 10)
|
|
18
|
+
medium = polygon_with_points(250)
|
|
19
|
+
large = polygon_with_points(2_500)
|
|
20
|
+
geojson = '{"type":"Polygon","coordinates":[[[0,0],[10,0],[10,10],[0,10],[0,0]]]}'
|
|
21
|
+
wkb = TG::Geometry.parse_wkt(small).to_wkb
|
|
22
|
+
|
|
23
|
+
cases = {
|
|
24
|
+
"wkt:small" => [small, :wkt],
|
|
25
|
+
"geojson:small" => [geojson, :geojson],
|
|
26
|
+
"wkb:small" => [wkb, :wkb],
|
|
27
|
+
"wkt:medium" => [medium, :wkt],
|
|
28
|
+
"wkt:large" => [large, :wkt]
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
iterations = TGGeometryBench.iterations(10_000)
|
|
32
|
+
|
|
33
|
+
cases.each do |name, (payload, format)|
|
|
34
|
+
time = Benchmark.realtime do
|
|
35
|
+
iterations.times { TG::Geometry.parse(payload, format: format) }
|
|
36
|
+
end
|
|
37
|
+
puts "%s iterations=%d seconds=%.6f ops_per_sec=%.2f" % [name, iterations, time, iterations / time]
|
|
38
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "_support"
|
|
4
|
+
|
|
5
|
+
TGGeometryBench.say_header("rss_stability")
|
|
6
|
+
|
|
7
|
+
total_queries = Integer(ENV.fetch("TGEOMETRY_RSS_QUERIES", "10_000_000"))
|
|
8
|
+
rebuilds = Integer(ENV.fetch("TGEOMETRY_RSS_REBUILDS", "100"))
|
|
9
|
+
entries_count = Integer(ENV.fetch("TGEOMETRY_RSS_ENTRIES", "1_000"))
|
|
10
|
+
max_drift_kb = Integer(ENV.fetch("TGEOMETRY_RSS_MAX_DRIFT_KB", "51_200"))
|
|
11
|
+
|
|
12
|
+
queries_per_rebuild = (total_queries / rebuilds).clamp(1, total_queries)
|
|
13
|
+
entries = TGGeometryBench.compact_entries(entries_count)
|
|
14
|
+
points = TGGeometryBench.points_for(:compact)
|
|
15
|
+
packed_batch = TGGeometryBench.packed_points(points * 10)
|
|
16
|
+
|
|
17
|
+
GC.start
|
|
18
|
+
GC.start
|
|
19
|
+
start_rss = TGGeometryBench.rss_kb
|
|
20
|
+
peak_rss = start_rss
|
|
21
|
+
samples = []
|
|
22
|
+
|
|
23
|
+
queries_executed = 0
|
|
24
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
25
|
+
|
|
26
|
+
rebuilds.times do |cycle|
|
|
27
|
+
index = TGGeometryBench.build_index(entries, strategy: (cycle.even? ? :flat : :rtree))
|
|
28
|
+
|
|
29
|
+
queries_per_rebuild.times do |q|
|
|
30
|
+
lon, lat = points[(q + cycle) % points.length]
|
|
31
|
+
index.find_covering(lon, lat)
|
|
32
|
+
queries_executed += 1
|
|
33
|
+
|
|
34
|
+
if (q & 0xff).zero?
|
|
35
|
+
index.covering_ids_batch_packed(packed_batch)
|
|
36
|
+
queries_executed += points.length * 10
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
index = nil
|
|
41
|
+
|
|
42
|
+
if (cycle % 10).zero?
|
|
43
|
+
GC.start
|
|
44
|
+
rss = TGGeometryBench.rss_kb
|
|
45
|
+
peak_rss = [peak_rss, rss].max
|
|
46
|
+
samples << [cycle, queries_executed, rss]
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
GC.start
|
|
51
|
+
GC.start
|
|
52
|
+
finish_rss = TGGeometryBench.rss_kb
|
|
53
|
+
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at
|
|
54
|
+
drift_kb = finish_rss - start_rss
|
|
55
|
+
|
|
56
|
+
puts format(
|
|
57
|
+
"queries=%d rebuilds=%d entries=%d elapsed_s=%.2f start_rss_kb=%d peak_rss_kb=%d finish_rss_kb=%d drift_kb=%d",
|
|
58
|
+
queries_executed, rebuilds, entries_count, elapsed,
|
|
59
|
+
start_rss, peak_rss, finish_rss, drift_kb
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
if samples.length > 1
|
|
63
|
+
puts "samples (cycle, queries, rss_kb):"
|
|
64
|
+
samples.each { |row| puts " #{row.inspect}" }
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
if max_drift_kb.positive? && drift_kb > max_drift_kb
|
|
68
|
+
warn "[rss_stability] RSS drift #{drift_kb} KB exceeds threshold #{max_drift_kb} KB"
|
|
69
|
+
exit 1
|
|
70
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# ActiveRecord source helper
|
|
2
|
+
|
|
3
|
+
`TG::Geometry::ActiveRecordSource` is an optional Ruby helper. It does not require Rails or ActiveRecord and is not loaded from a separate integration gem.
|
|
4
|
+
|
|
5
|
+
It accepts any object that responds to `find_each`, or any Enumerable:
|
|
6
|
+
|
|
7
|
+
```ruby
|
|
8
|
+
entries = TG::Geometry::ActiveRecordSource.call(
|
|
9
|
+
Zone.where(active: true),
|
|
10
|
+
id: :id,
|
|
11
|
+
geometry: :geojson,
|
|
12
|
+
batch_size: 1_000
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
index = TG::Geometry::Index.build(entries, via: :geojson, strategy: :rtree)
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
It can also feed a Registry:
|
|
19
|
+
|
|
20
|
+
```ruby
|
|
21
|
+
class DeliveryZones < TG::Geometry::Registry
|
|
22
|
+
active_record_source Zone.where(active: true), id: :id, geometry: :geojson
|
|
23
|
+
end
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Field readers may be Symbols, Strings, or Procs. The helper only converts application records into `[[id, object], ...]`; it does not mutate records, keep database connections, create background jobs, install reload hooks, or add a Rails dependency to the native extension.
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# tg_geometry architecture
|
|
2
|
+
|
|
3
|
+
`tg_geometry` is a Ruby C extension for the public namespace `TG::Geometry`. It vendors the upstream `tidwall/tg` geometry engine and `tidwall/rtree.c`; it does not depend on GEOS, PostGIS, PROJ, GDAL, system TG, or system rtree libraries.
|
|
4
|
+
|
|
5
|
+
The gem targets a small production-grade core:
|
|
6
|
+
|
|
7
|
+
- parsing and writing TG geometries from Ruby;
|
|
8
|
+
- exact planar geometry predicates;
|
|
9
|
+
- immutable rectangles for bounding boxes and query windows;
|
|
10
|
+
- immutable geofencing-oriented indexes that return user ids;
|
|
11
|
+
- flat and rtree collection-level search strategies;
|
|
12
|
+
- native-endian packed point batches for high-throughput same-process calls;
|
|
13
|
+
- read-only borrowed low-level Line/Ring/Polygon wrappers;
|
|
14
|
+
- grouped TG API coverage for predicates, accessors, point/empty constructors, Segment values, and GeometryCollection children.
|
|
15
|
+
|
|
16
|
+
The gem is not a full GIS system. See `docs/LIMITATIONS.md`.
|
|
17
|
+
|
|
18
|
+
## Public namespace
|
|
19
|
+
|
|
20
|
+
The canonical require path is:
|
|
21
|
+
|
|
22
|
+
```ruby
|
|
23
|
+
require "tg/geometry"
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
The public API lives under `TG::Geometry`. The top-level `TG` module is only a namespace container, not the public gem API.
|
|
27
|
+
|
|
28
|
+
## Native extension shape
|
|
29
|
+
|
|
30
|
+
The current extension is built by `ext/tg_geometry/extconf.rb` and loaded as `tg_geometry_ext_geometry_ext`. The build requires Ruby >= 3.0 and a C11 compiler. Vendor sources are compiled into the extension through small wrapper files:
|
|
31
|
+
|
|
32
|
+
- `ext/tg_geometry/tg_geometry_vendor_tg.c` includes `vendor/tg/tg.c`;
|
|
33
|
+
- `ext/tg_geometry/tg_geometry_vendor_rtree.c` includes `vendor/rtree/rtree.c`.
|
|
34
|
+
|
|
35
|
+
No visibility-hiding flag is enabled unless the Init symbol is explicitly exported. The Init function is exported with `RUBY_FUNC_EXPORTED`.
|
|
36
|
+
|
|
37
|
+
## Immutable `TG::Geometry::Geom`
|
|
38
|
+
|
|
39
|
+
`TG::Geometry::Geom` usually wraps one owned `struct tg_geom *`. Expansion Block J also allows internal borrowed `TG::Geometry::Geom` wrappers for GeometryCollection children. Borrowed wrappers keep a parent `geom_owner`, report no owned native bytes, and do not call `tg_geom_free` on the borrowed child pointer.
|
|
40
|
+
|
|
41
|
+
Rules:
|
|
42
|
+
|
|
43
|
+
- public `.allocate` is disabled;
|
|
44
|
+
- objects are created only by parse APIs, safe constructors, or internal borrowed child wrappers;
|
|
45
|
+
- the native pointer is never replaced;
|
|
46
|
+
- there is no `close!`, `free!`, `detach!`, or mutation API;
|
|
47
|
+
- parsed objects are frozen before being returned.
|
|
48
|
+
|
|
49
|
+
This immutability is required because `TG::Geometry::Index` can borrow a native geometry pointer from a `TG::Geometry::Geom` and keep the Ruby owner alive.
|
|
50
|
+
|
|
51
|
+
## Immutable `TG::Geometry::Rect`
|
|
52
|
+
|
|
53
|
+
`TG::Geometry::Rect` is a small Ruby object around four finite coordinates. It is constructible from Ruby and frozen after initialization. The first release exposes only unambiguous rectangle APIs:
|
|
54
|
+
|
|
55
|
+
- coordinate readers;
|
|
56
|
+
- `center`;
|
|
57
|
+
- `intersects?`;
|
|
58
|
+
- `contains_point?`;
|
|
59
|
+
- expansion methods returning new Rect objects.
|
|
60
|
+
|
|
61
|
+
`Rect#contains?` is intentionally not exposed because the name is ambiguous.
|
|
62
|
+
|
|
63
|
+
## Immutable `TG::Geometry::Index`
|
|
64
|
+
|
|
65
|
+
`TG::Geometry::Index` is built once and read-only afterwards.
|
|
66
|
+
|
|
67
|
+
```ruby
|
|
68
|
+
index = TG::Geometry::Index.build(
|
|
69
|
+
[[id1, object1], [id2, object2]],
|
|
70
|
+
via: :geojson,
|
|
71
|
+
strategy: :rtree,
|
|
72
|
+
predicate: :covers,
|
|
73
|
+
geometry_index: :ystripes
|
|
74
|
+
)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Accepted `via:` modes:
|
|
78
|
+
|
|
79
|
+
- `:geom` borrows native geometry from existing `TG::Geometry::Geom` wrappers and marks the owner Ruby objects;
|
|
80
|
+
- `:geojson` parses entry strings into Index-owned TG geometries;
|
|
81
|
+
- `:wkb` parses entry strings as raw WKB bytes into Index-owned TG geometries.
|
|
82
|
+
|
|
83
|
+
Accepted strategies:
|
|
84
|
+
|
|
85
|
+
- `:flat` scans entries in insertion order with bbox prefiltering and exact TG geometry filtering;
|
|
86
|
+
- `:rtree` uses vendored `rtree.c` over entry bboxes, then applies exact TG geometry filtering.
|
|
87
|
+
|
|
88
|
+
`strategy: :auto` is not implemented in the first public release. Use explicit `:flat` or `:rtree` and validate with repository benchmarks.
|
|
89
|
+
|
|
90
|
+
## Result order
|
|
91
|
+
|
|
92
|
+
Insertion order is public behavior.
|
|
93
|
+
|
|
94
|
+
Each entry stores a unique `ordinal`. Flat strategy naturally scans entries in ordinal order. Rtree strategy uses rtree only as a candidate prefilter; candidate marks are local to the query and results are emitted by scanning entries in ordinal order. Rtree traversal order never leaks into Ruby results.
|
|
95
|
+
|
|
96
|
+
## Point predicate implementation
|
|
97
|
+
|
|
98
|
+
Point queries allocate a temporary TG point geometry and use exact TG predicates:
|
|
99
|
+
|
|
100
|
+
- `:covers` calls `tg_geom_covers(entry_geom, point_geom)`;
|
|
101
|
+
- `:contains` calls `tg_geom_contains(entry_geom, point_geom)`.
|
|
102
|
+
|
|
103
|
+
This is intentionally not the fastest possible point path. The first release chooses exact `covers` / `contains` semantics over a no-allocation shortcut. A faster path such as `tg_geom_intersects_xy` can only replace it after boundary and hole-boundary equivalence tests plus benchmarks are added.
|
|
104
|
+
|
|
105
|
+
## Reload pattern
|
|
106
|
+
|
|
107
|
+
The intended application reload pattern is atomic reference replacement:
|
|
108
|
+
|
|
109
|
+
```ruby
|
|
110
|
+
new_index = TG::Geometry::Index.build(entries, via: :geojson, strategy: :rtree)
|
|
111
|
+
@index = new_index
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Old readers keep using the old immutable object until they release it. New readers see the new object after the Ruby reference swap. There is no in-place reload, mutation, add, delete, or builder API in the first release.
|
|
115
|
+
|
|
116
|
+
## Expansion Blocks A-E and I-J
|
|
117
|
+
|
|
118
|
+
Expansion Block A (`strategy: :auto`) is not enabled in the first public release. The native Index stores only explicit concrete strategies (`:flat` or `:rtree`).
|
|
119
|
+
|
|
120
|
+
Expansion Block B adds `TG::Geometry::Registry` in Ruby. Registry wraps an immutable Index reference and reloads by building a new Index before swapping the reference.
|
|
121
|
+
|
|
122
|
+
Expansion Block C adds `TG::Geometry::ActiveRecordSource` as optional Ruby-only source sugar. The native extension does not depend on Rails or ActiveRecord.
|
|
123
|
+
|
|
124
|
+
Expansion Block D adds Hex/GeoBIN parse/write helpers and raw `extra_json` copying. It does not parse properties into Ruby Hashes.
|
|
125
|
+
|
|
126
|
+
Expansion Block E adds read-only borrowed wrappers for `TG::Geometry::Line`, `TG::Geometry::Ring`, and `TG::Geometry::Polygon`. These wrappers keep the parent `TG::Geometry::Geom` alive through `geom_owner`, mark it for GC, update it during compaction, and never free borrowed TG child pointers directly.
|
|
127
|
+
|
|
128
|
+
Expansion Block I documents and tests the current Ractor boundary: native wrappers are not advertised as Ractor-shareable objects. Normal thread read-only access remains the supported concurrency model.
|
|
129
|
+
|
|
130
|
+
Expansion Block J adds grouped TG API coverage without exposing global mutable environment settings or callback-based APIs. Implemented groups are additional predicates, geometry metadata/collection accessors, point and empty geometry constructors, value `TG::Geometry::Segment`, and borrowed GeometryCollection child `Geom` wrappers. See `docs/FULL_TG_API_COVERAGE.md`.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Auto strategy status
|
|
2
|
+
|
|
3
|
+
`strategy: :auto` is not exposed in the first public release.
|
|
4
|
+
|
|
5
|
+
The release-core contract only enables explicit `strategy: :flat` and `strategy: :rtree`. Automatic threshold selection requires a complete project-owned benchmark matrix and explicit approval before it can become public API.
|
|
6
|
+
|
|
7
|
+
Use `benchmark/flat_vs_rtree.rb` to compare strategies for a workload, then pass the chosen strategy explicitly:
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
index = TG::Geometry::Index.build(entries, via: :geojson, strategy: :rtree)
|
|
11
|
+
# or
|
|
12
|
+
index = TG::Geometry::Index.build(entries, via: :geojson, strategy: :flat)
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
Do not infer a universal crossover from rtree internals or from a partial benchmark run.
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Benchmarking
|
|
2
|
+
|
|
3
|
+
Benchmarks are engineering tools for this gem. They are not marketing claims.
|
|
4
|
+
|
|
5
|
+
Do not copy upstream TG C benchmark numbers into `tg_geometry` docs. Ruby C extension boundary cost, Ruby object handling, Index construction, batch result arrays, and GC behavior must be measured in this project.
|
|
6
|
+
|
|
7
|
+
## Required scripts
|
|
8
|
+
|
|
9
|
+
The repository includes these benchmark entry points:
|
|
10
|
+
|
|
11
|
+
- `benchmark/parse_throughput.rb`
|
|
12
|
+
- `benchmark/gvl_threshold.rb`
|
|
13
|
+
- `benchmark/flat_vs_rtree.rb`
|
|
14
|
+
- `benchmark/batch_packed_vs_loop.rb`
|
|
15
|
+
- `benchmark/falcon_concurrency.rb`
|
|
16
|
+
- `benchmark/objectspace_memsize.rb`
|
|
17
|
+
- `benchmark/rss_stability.rb`
|
|
18
|
+
|
|
19
|
+
Run after compiling the extension:
|
|
20
|
+
|
|
21
|
+
```sh
|
|
22
|
+
bundle exec rake compile
|
|
23
|
+
ruby benchmark/flat_vs_rtree.rb
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
By default, scripts use a reduced local set of entry sizes so they can be run quickly while developing. Full first-release benchmark scenarios are enabled with:
|
|
27
|
+
|
|
28
|
+
```sh
|
|
29
|
+
TGEOMETRY_BENCH_FULL=1 ruby benchmark/flat_vs_rtree.rb
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Scenarios
|
|
33
|
+
|
|
34
|
+
Benchmark generators cover:
|
|
35
|
+
|
|
36
|
+
- entry counts: 100, 500, 1K, 5K, 50K;
|
|
37
|
+
- compact bboxes;
|
|
38
|
+
- long thin bboxes;
|
|
39
|
+
- overlapping zones;
|
|
40
|
+
- point queries;
|
|
41
|
+
- viewport rect queries;
|
|
42
|
+
- flat vs rtree;
|
|
43
|
+
- scalar vs packed batch;
|
|
44
|
+
- parse small/medium/large geometry strings;
|
|
45
|
+
- RSS stability over repeated build/query/free.
|
|
46
|
+
|
|
47
|
+
## Output format
|
|
48
|
+
|
|
49
|
+
Scripts print line-oriented key/value records such as:
|
|
50
|
+
|
|
51
|
+
```text
|
|
52
|
+
kind=compact n=1000 query=point lon=0.4 lat=0.4 flat_sec=... rtree_sec=... flat_qps=... rtree_qps=...
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
These records are intentionally plain text so they can be redirected to files and compared across machines.
|
|
56
|
+
|
|
57
|
+
## No `:auto` strategy yet
|
|
58
|
+
|
|
59
|
+
The first release does not expose `strategy: :auto`. Choosing a threshold requires project-owned benchmark output across the required scenario matrix. Internal rtree constants such as leaf capacity are not a flat-vs-rtree crossover threshold.
|
|
60
|
+
|
|
61
|
+
## GVL threshold
|
|
62
|
+
|
|
63
|
+
`benchmark/gvl_threshold.rb` records baseline parse wall time for several valid WKT payload sizes while the first release keeps the GVL. It does not enable no-GVL execution. A future no-GVL implementation requires a separate design because Ruby C API calls and `RSTRING_PTR` lifetimes are not valid outside the GVL.
|
|
64
|
+
|
|
65
|
+
## RSS stability
|
|
66
|
+
|
|
67
|
+
`benchmark/rss_stability.rb` reports start, peak, finish, and delta RSS while repeatedly building, querying, and releasing indexes. CI thresholds should be chosen from observed baseline data on the target CI image, not guessed.
|
|
68
|
+
|
|
69
|
+
## Falcon / Async
|
|
70
|
+
|
|
71
|
+
The first release does not claim Falcon or Async behavior. A dedicated Falcon/Async benchmark remains an open setup item until the dependency and scenario are approved.
|
|
72
|
+
|
|
73
|
+
## Expansion Block A: auto strategy threshold
|
|
74
|
+
|
|
75
|
+
`strategy: :auto` remains postponed for the first public release. A future implementation must use a complete project-owned benchmark matrix and document the selected threshold before exposing the public option.
|