nyxis 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/nxs.rb +538 -6
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 21f182240eeb38df31bb91d81c52a41df156a234b82e61454b49c34f49de5ac8
|
|
4
|
+
data.tar.gz: ce6fca2da3243e57488f081ee10302cf780575591b2947d11a7becc85ad4feea
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 27d92d5aad9aa3f661c610666361cd420e337b1955891abd006d02ec899f4b94e7040c5baeb4614b2d71efe457bb6728806a2c7e7f0e6ccda09075416a0286d7
|
|
7
|
+
data.tar.gz: d5f7df6269b0fe00e8ae6d6512cc67eed7ba33891654b51833f8519a627ac29136f90cd6e0bcceae3cf01db0ef025166bbc84e836a00cf6bf489eafb744ea4d5
|
data/nxs.rb
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative 'pattern'
|
|
4
|
+
|
|
3
5
|
# NXS Reader — .nxb parser (Ruby 3.x, stdlib only).
|
|
4
6
|
#
|
|
5
7
|
# Implements Nyxis v1.1 binary wire format.
|
|
@@ -35,6 +37,221 @@ module Nxs
|
|
|
35
37
|
COL_TAIL_ENTRY_BYTES = 20
|
|
36
38
|
PAX_TAIL_ENTRY_BYTES = 28
|
|
37
39
|
|
|
40
|
+
# Adaptive prefetch (phase 1) — spec §6–§8.4
|
|
41
|
+
DEFAULT_PAGE_SIZE = 65_536
|
|
42
|
+
DEFAULT_MAX_PAGES = 64
|
|
43
|
+
DEFAULT_COALESCE_GAP_PAGES = 1
|
|
44
|
+
DEFAULT_PREFETCH_DEPTH = 4
|
|
45
|
+
EAGER_THRESHOLD_MB = 10
|
|
46
|
+
LAZY_THRESHOLD_MB = 50
|
|
47
|
+
|
|
48
|
+
HINT_UNKNOWN = 0
|
|
49
|
+
HINT_SEQUENTIAL = 1
|
|
50
|
+
HINT_RANDOM = 2
|
|
51
|
+
HINT_FULL = 3
|
|
52
|
+
HINT_PARTIAL = 4
|
|
53
|
+
|
|
54
|
+
HINT_SYMBOLS = {
|
|
55
|
+
unknown: HINT_UNKNOWN,
|
|
56
|
+
sequential: HINT_SEQUENTIAL,
|
|
57
|
+
random: HINT_RANDOM,
|
|
58
|
+
full: HINT_FULL,
|
|
59
|
+
partial: HINT_PARTIAL
|
|
60
|
+
}.freeze
|
|
61
|
+
|
|
62
|
+
def self.normalize_hint(hint)
|
|
63
|
+
return hint if hint.is_a?(Integer)
|
|
64
|
+
|
|
65
|
+
HINT_SYMBOLS.fetch(hint) { HINT_UNKNOWN }
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Initial prefetch strategy from open hint and file size (spec §5.1).
|
|
69
|
+
def self.initial_strategy(hint, file_size)
|
|
70
|
+
hint = normalize_hint(hint)
|
|
71
|
+
file_size_mb = file_size / (1024 * 1024)
|
|
72
|
+
return 'eager' if hint == HINT_FULL && file_size_mb <= EAGER_THRESHOLD_MB
|
|
73
|
+
return 'lazy' if file_size_mb > LAZY_THRESHOLD_MB
|
|
74
|
+
|
|
75
|
+
'adaptive'
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Row-layout data sector byte range [start, length).
|
|
79
|
+
def self.row_data_sector(tail_start, file_size)
|
|
80
|
+
sector_start = 32
|
|
81
|
+
if tail_start > sector_start && tail_start <= file_size
|
|
82
|
+
[sector_start, tail_start - sector_start]
|
|
83
|
+
else
|
|
84
|
+
[sector_start, 0]
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Merge sorted unique page indices when gap <= gap_pages (inclusive).
|
|
89
|
+
def self.coalesce_page_indices(indices, gap_pages, page_size = DEFAULT_PAGE_SIZE)
|
|
90
|
+
return [] if indices.empty?
|
|
91
|
+
|
|
92
|
+
uniq = indices.uniq.sort
|
|
93
|
+
spans = []
|
|
94
|
+
start = uniq[0]
|
|
95
|
+
end_ = uniq[0]
|
|
96
|
+
uniq.each_cons(2) do |_a, b|
|
|
97
|
+
if b - end_ <= gap_pages
|
|
98
|
+
end_ = b
|
|
99
|
+
else
|
|
100
|
+
spans << [start, end_]
|
|
101
|
+
start = end_ = b
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
spans << [start, end_]
|
|
105
|
+
spans.map do |a, b|
|
|
106
|
+
{ page_start: a, page_end: b, byte_start: a * page_size, byte_length: (b - a + 1) * page_size }
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def self.clamp_page_ranges(ranges, file_size)
|
|
111
|
+
ranges.filter_map do |r|
|
|
112
|
+
len = r[:byte_length]
|
|
113
|
+
len = file_size - r[:byte_start] if r[:byte_start] + len > file_size
|
|
114
|
+
next nil if len <= 0
|
|
115
|
+
|
|
116
|
+
r.merge(byte_length: len)
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def self.page_indices_for_viewport(start_index, end_index, page_size, &record_offset)
|
|
121
|
+
(start_index..end_index).map { |i| record_offset.call(i) / page_size }
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# LRU page cache with optional pinning (spec §6).
|
|
125
|
+
class PageCache
|
|
126
|
+
attr_reader :max_pages, :page_size, :hits, :misses
|
|
127
|
+
|
|
128
|
+
def initialize(max_pages = DEFAULT_MAX_PAGES, page_size = DEFAULT_PAGE_SIZE)
|
|
129
|
+
@max_pages = max_pages
|
|
130
|
+
@page_size = page_size
|
|
131
|
+
@pages = {}
|
|
132
|
+
@clock = 0
|
|
133
|
+
@hits = 0
|
|
134
|
+
@misses = 0
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def has?(page_index)
|
|
138
|
+
@pages.key?(page_index)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def get(page_index)
|
|
142
|
+
entry = @pages[page_index]
|
|
143
|
+
unless entry
|
|
144
|
+
@misses += 1
|
|
145
|
+
return nil
|
|
146
|
+
end
|
|
147
|
+
@clock += 1
|
|
148
|
+
entry[:last_used] = @clock
|
|
149
|
+
@hits += 1
|
|
150
|
+
entry[:data]
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def set(page_index, data, pinned: false)
|
|
154
|
+
return if @max_pages <= 0
|
|
155
|
+
|
|
156
|
+
while @pages.size >= @max_pages && !evict_one?; end
|
|
157
|
+
@clock += 1
|
|
158
|
+
@pages[page_index] = { data: data, last_used: @clock, pinned: pinned }
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def pin_pages(page_indices)
|
|
162
|
+
page_indices.each do |p|
|
|
163
|
+
entry = @pages[p]
|
|
164
|
+
entry[:pinned] = true if entry
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def unpin_all
|
|
169
|
+
@pages.each_value { |entry| entry[:pinned] = false }
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def stats
|
|
173
|
+
bytes = @pages.values.sum { |e| e[:data].bytesize }
|
|
174
|
+
{
|
|
175
|
+
pages_cached: @pages.size,
|
|
176
|
+
pages_max: @max_pages,
|
|
177
|
+
memory_used_bytes: bytes,
|
|
178
|
+
cache_hits: @hits,
|
|
179
|
+
cache_misses: @misses
|
|
180
|
+
}
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
private
|
|
184
|
+
|
|
185
|
+
def evict_one?
|
|
186
|
+
victim = nil
|
|
187
|
+
oldest = nil
|
|
188
|
+
@pages.each do |idx, entry|
|
|
189
|
+
next if entry[:pinned]
|
|
190
|
+
|
|
191
|
+
if oldest.nil? || entry[:last_used] < oldest
|
|
192
|
+
oldest = entry[:last_used]
|
|
193
|
+
victim = idx
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
return false unless victim
|
|
197
|
+
|
|
198
|
+
@pages.delete(victim)
|
|
199
|
+
true
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# In-flight page fetch deduplication for concurrent prefetch_viewport calls.
|
|
204
|
+
class InFlightMap
|
|
205
|
+
Entry = Struct.new(:queue, :data, :error)
|
|
206
|
+
|
|
207
|
+
def initialize
|
|
208
|
+
@mu = Mutex.new
|
|
209
|
+
@map = {}
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def has?(page_index)
|
|
213
|
+
@mu.synchronize { @map.key?(page_index) }
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def wait(page_index)
|
|
217
|
+
entry = @mu.synchronize { @map[page_index] }
|
|
218
|
+
return nil unless entry
|
|
219
|
+
|
|
220
|
+
entry.queue.pop
|
|
221
|
+
raise entry.error if entry.error
|
|
222
|
+
|
|
223
|
+
entry.data
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
def with(page_index)
|
|
227
|
+
entry = nil
|
|
228
|
+
leader = @mu.synchronize do
|
|
229
|
+
existing = @map[page_index]
|
|
230
|
+
if existing
|
|
231
|
+
false
|
|
232
|
+
else
|
|
233
|
+
entry = Entry.new(Queue.new)
|
|
234
|
+
@map[page_index] = entry
|
|
235
|
+
true
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
return wait(page_index) unless leader
|
|
239
|
+
|
|
240
|
+
begin
|
|
241
|
+
data = yield
|
|
242
|
+
entry.data = data
|
|
243
|
+
entry.queue << true
|
|
244
|
+
data
|
|
245
|
+
rescue StandardError => e
|
|
246
|
+
entry.error = e
|
|
247
|
+
entry.queue << true
|
|
248
|
+
raise
|
|
249
|
+
ensure
|
|
250
|
+
@mu.synchronize { @map.delete(page_index) if @map[page_index] == entry }
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
38
255
|
class NxsError < StandardError
|
|
39
256
|
attr_reader :code
|
|
40
257
|
|
|
@@ -49,7 +266,13 @@ module Nxs
|
|
|
49
266
|
class Reader
|
|
50
267
|
attr_reader :keys, :record_count, :layout
|
|
51
268
|
|
|
52
|
-
def initialize(bytes)
|
|
269
|
+
def initialize(bytes, **options)
|
|
270
|
+
hint = options.fetch(:hint, HINT_UNKNOWN)
|
|
271
|
+
max_pages = options.fetch(:max_pages, DEFAULT_MAX_PAGES)
|
|
272
|
+
page_size = options.fetch(:page_size, DEFAULT_PAGE_SIZE)
|
|
273
|
+
coalesce_gap_pages = options.fetch(:coalesce_gap_pages, DEFAULT_COALESCE_GAP_PAGES)
|
|
274
|
+
prefetch_depth = options.fetch(:prefetch_depth, DEFAULT_PREFETCH_DEPTH)
|
|
275
|
+
fetch_range = options.fetch(:fetch_range, nil)
|
|
53
276
|
@data = bytes.b # force binary encoding
|
|
54
277
|
sz = @data.bytesize
|
|
55
278
|
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'file too small') if sz < 32
|
|
@@ -86,6 +309,15 @@ module Nxs
|
|
|
86
309
|
@col_buf_off = []
|
|
87
310
|
@col_buf_len = []
|
|
88
311
|
parse_layout_tail!(preamble_tail)
|
|
312
|
+
init_column_prefetch!(fetch_range: fetch_range)
|
|
313
|
+
init_prefetch!(
|
|
314
|
+
hint: hint,
|
|
315
|
+
max_pages: max_pages,
|
|
316
|
+
page_size: page_size,
|
|
317
|
+
coalesce_gap_pages: coalesce_gap_pages,
|
|
318
|
+
prefetch_depth: prefetch_depth,
|
|
319
|
+
fetch_range: fetch_range
|
|
320
|
+
)
|
|
89
321
|
end
|
|
90
322
|
|
|
91
323
|
# O(1) record lookup — row tail-index or columnar/PAX record index.
|
|
@@ -96,10 +328,41 @@ module Nxs
|
|
|
96
328
|
|
|
97
329
|
return Object.new(self, i, i) if @layout != :row
|
|
98
330
|
|
|
331
|
+
on_access(i)
|
|
99
332
|
abs_offset = @data.unpack1("@#{@tail_start + i * 10 + 2}Q<")
|
|
100
333
|
Object.new(self, abs_offset)
|
|
101
334
|
end
|
|
102
335
|
|
|
336
|
+
# Prefetch one column buffer (columnar layout only; §7.4).
|
|
337
|
+
def prefetch_column(key)
|
|
338
|
+
raise NxsError.new('ERR_LAYOUT', 'prefetch_column requires columnar layout') unless @layout == :columnar
|
|
339
|
+
|
|
340
|
+
slot = @key_index[key]
|
|
341
|
+
raise NxsError.new('ERR_KEY_NOT_FOUND', "key #{key.inspect} not in schema") unless slot
|
|
342
|
+
|
|
343
|
+
off = nil
|
|
344
|
+
length = nil
|
|
345
|
+
fetch = nil
|
|
346
|
+
@col_mu.synchronize do
|
|
347
|
+
return if @col_warmed[slot]
|
|
348
|
+
|
|
349
|
+
off = @col_buf_off[slot].to_i
|
|
350
|
+
length = @col_buf_len[slot].to_i
|
|
351
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'column buffer') if off.negative? || length.negative?
|
|
352
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'column buffer') if !@col_remote_fetch && off + length > @data.bytesize
|
|
353
|
+
|
|
354
|
+
fetch = @col_fetch_range
|
|
355
|
+
end
|
|
356
|
+
blob = fetch.call(off, length)
|
|
357
|
+
@col_mu.synchronize do
|
|
358
|
+
return if @col_warmed[slot]
|
|
359
|
+
|
|
360
|
+
@col_overlay[slot] = blob if off + blob.bytesize > @data.bytesize
|
|
361
|
+
@col_warmed[slot] = true
|
|
362
|
+
@col_fetches += 1
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
|
|
103
366
|
# Sum f64 column — columnar/PAX buffer path or row scan.
|
|
104
367
|
def sum_f64(key)
|
|
105
368
|
return col_sum_f64(key) if @layout != :row
|
|
@@ -282,8 +545,255 @@ module Nxs
|
|
|
282
545
|
end
|
|
283
546
|
end
|
|
284
547
|
|
|
548
|
+
# rubocop:disable Metrics/ParameterLists -- prefetch open options mirror Go OpenOptions
|
|
549
|
+
def init_prefetch!(hint:, max_pages:, page_size:, coalesce_gap_pages:, prefetch_depth:, fetch_range:)
|
|
550
|
+
@prefetch_mu = Mutex.new
|
|
551
|
+
@cache_mu = Mutex.new
|
|
552
|
+
@prefetch_hint = Nxs.normalize_hint(hint)
|
|
553
|
+
@prefetch_page_size = page_size
|
|
554
|
+
@prefetch_depth = prefetch_depth.positive? ? prefetch_depth : DEFAULT_PREFETCH_DEPTH
|
|
555
|
+
@coalesce_gap_pages = coalesce_gap_pages
|
|
556
|
+
@page_cache = PageCache.new(max_pages, page_size)
|
|
557
|
+
@in_flight = InFlightMap.new
|
|
558
|
+
@fetches_issued = 0
|
|
559
|
+
@detector = AccessPatternDetector.new
|
|
560
|
+
@prefetch_strategy = Nxs.initial_strategy(@prefetch_hint, @data.bytesize)
|
|
561
|
+
@prefetch_pattern = PATTERN_UNKNOWN
|
|
562
|
+
@eager_started = false
|
|
563
|
+
@eager_complete = false
|
|
564
|
+
@eager_cancel = false
|
|
565
|
+
@eager_thread = nil
|
|
566
|
+
@closed = false
|
|
567
|
+
@prefetch_paused = false
|
|
568
|
+
@fetch_range = fetch_range || lambda do |byte_start, byte_length|
|
|
569
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'fetch range out of bounds') if byte_start.negative?
|
|
570
|
+
|
|
571
|
+
end_ = byte_start + byte_length
|
|
572
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'fetch range out of bounds') if end_ > @data.bytesize
|
|
573
|
+
|
|
574
|
+
@data[byte_start, byte_length]
|
|
575
|
+
end
|
|
576
|
+
start_eager_background! if @layout == :row && @prefetch_strategy == 'eager'
|
|
577
|
+
end
|
|
578
|
+
# rubocop:enable Metrics/ParameterLists
|
|
579
|
+
|
|
580
|
+
# Block until eager / background prefetch completes (spec §8).
|
|
581
|
+
def warmup
|
|
582
|
+
t = @prefetch_mu.synchronize { @eager_thread }
|
|
583
|
+
t&.join
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
# Stop scheduling speculative and eager prefetch (§8.1).
|
|
587
|
+
def pause_prefetch
|
|
588
|
+
@prefetch_mu.synchronize { @prefetch_paused = true }
|
|
589
|
+
end
|
|
590
|
+
|
|
591
|
+
# Re-enable speculative prefetch after pause_prefetch.
|
|
592
|
+
def resume_prefetch
|
|
593
|
+
@prefetch_mu.synchronize { @prefetch_paused = false }
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
# Cancel in-flight eager prefetch and wait for the background thread.
|
|
597
|
+
def close
|
|
598
|
+
t = nil
|
|
599
|
+
@prefetch_mu.synchronize do
|
|
600
|
+
@closed = true
|
|
601
|
+
@eager_cancel = true
|
|
602
|
+
t = @eager_thread
|
|
603
|
+
end
|
|
604
|
+
t&.join
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
def on_access(index)
|
|
608
|
+
return unless @layout == :row
|
|
609
|
+
return if @record_count.zero?
|
|
610
|
+
|
|
611
|
+
adaptive_seq = false
|
|
612
|
+
skip_spec = false
|
|
613
|
+
start_eager = false
|
|
614
|
+
@prefetch_mu.synchronize do
|
|
615
|
+
return if @closed || @prefetch_paused
|
|
616
|
+
|
|
617
|
+
@detector.observe(index)
|
|
618
|
+
@prefetch_pattern = @detector.pattern
|
|
619
|
+
start_eager = maybe_upgrade_to_eager!
|
|
620
|
+
if eager_complete? || @prefetch_strategy == 'eager'
|
|
621
|
+
skip_spec = true
|
|
622
|
+
next
|
|
623
|
+
end
|
|
624
|
+
page_index = record_byte_offset(index) / @prefetch_page_size
|
|
625
|
+
@cache_mu.synchronize { @page_cache.get(page_index) }
|
|
626
|
+
adaptive_seq = @prefetch_strategy == 'adaptive' && @detector.pattern == PATTERN_SEQUENTIAL
|
|
627
|
+
end
|
|
628
|
+
start_eager_background! if start_eager
|
|
629
|
+
return if skip_spec
|
|
630
|
+
|
|
631
|
+
speculative_prefetch! if adaptive_seq
|
|
632
|
+
end
|
|
633
|
+
|
|
634
|
+
def record_byte_offset(i)
|
|
635
|
+
@data.unpack1("@#{@tail_start + i * 10 + 2}Q<")
|
|
636
|
+
end
|
|
637
|
+
|
|
638
|
+
# Prefetch pages for records [start_index, end_index] (row layout only).
|
|
639
|
+
def prefetch_viewport(start_index, end_index)
|
|
640
|
+
return self if @layout != :row
|
|
641
|
+
|
|
642
|
+
n = @record_count
|
|
643
|
+
unless start_index.between?(0, end_index) && end_index < n
|
|
644
|
+
raise NxsError.new(
|
|
645
|
+
'ERR_OUT_OF_BOUNDS',
|
|
646
|
+
"prefetch_viewport [#{start_index}, #{end_index}] out of [0, #{n})"
|
|
647
|
+
)
|
|
648
|
+
end
|
|
649
|
+
|
|
650
|
+
@cache_mu.synchronize do
|
|
651
|
+
page_size = @prefetch_page_size
|
|
652
|
+
indices = Nxs.page_indices_for_viewport(start_index, end_index, page_size) do |i|
|
|
653
|
+
record_byte_offset(i)
|
|
654
|
+
end
|
|
655
|
+
missing = indices.uniq.select { |p| !@page_cache.has?(p) && !@in_flight.has?(p) }
|
|
656
|
+
if missing.empty?
|
|
657
|
+
@page_cache.pin_pages(indices)
|
|
658
|
+
@page_cache.unpin_all
|
|
659
|
+
return self
|
|
660
|
+
end
|
|
661
|
+
|
|
662
|
+
ranges = Nxs.clamp_page_ranges(
|
|
663
|
+
Nxs.coalesce_page_indices(missing, @coalesce_gap_pages, page_size),
|
|
664
|
+
@data.bytesize
|
|
665
|
+
)
|
|
666
|
+
ranges.each { |r| fetch_coalesced_range_unlocked!(r) }
|
|
667
|
+
@page_cache.pin_pages(indices)
|
|
668
|
+
@page_cache.unpin_all
|
|
669
|
+
end
|
|
670
|
+
self
|
|
671
|
+
end
|
|
672
|
+
|
|
673
|
+
def cache_stats
|
|
674
|
+
stats = @page_cache.stats
|
|
675
|
+
col_fetches = @col_mu.synchronize { @col_fetches }
|
|
676
|
+
strategy, pattern = @prefetch_mu.synchronize do
|
|
677
|
+
[@prefetch_strategy, @detector.pattern]
|
|
678
|
+
end
|
|
679
|
+
stats.merge(
|
|
680
|
+
fetches_issued: @fetches_issued,
|
|
681
|
+
column_fetches_issued: col_fetches,
|
|
682
|
+
strategy: strategy,
|
|
683
|
+
pattern: pattern
|
|
684
|
+
)
|
|
685
|
+
end
|
|
686
|
+
|
|
285
687
|
private
|
|
286
688
|
|
|
689
|
+
def eager_complete?
|
|
690
|
+
@prefetch_strategy == 'eager' && @eager_complete
|
|
691
|
+
end
|
|
692
|
+
|
|
693
|
+
def maybe_upgrade_to_eager!
|
|
694
|
+
return if @prefetch_paused
|
|
695
|
+
return unless @prefetch_strategy == 'adaptive'
|
|
696
|
+
return unless @detector.pattern == PATTERN_SEQUENTIAL
|
|
697
|
+
return if @detector.sequential_runs < UPGRADE_SEQUENTIAL_THRESHOLD
|
|
698
|
+
return if @data.bytesize / (1024 * 1024) > EAGER_THRESHOLD_MB
|
|
699
|
+
|
|
700
|
+
@prefetch_strategy = 'eager'
|
|
701
|
+
true
|
|
702
|
+
end
|
|
703
|
+
|
|
704
|
+
def speculative_prefetch!
|
|
705
|
+
return if @prefetch_mu.synchronize { @prefetch_paused }
|
|
706
|
+
|
|
707
|
+
predicted = @prefetch_mu.synchronize { @detector.predict_next(@prefetch_depth, @record_count) }
|
|
708
|
+
return if predicted.empty?
|
|
709
|
+
|
|
710
|
+
page_size = @prefetch_page_size
|
|
711
|
+
missing = @cache_mu.synchronize do
|
|
712
|
+
predicted.filter_map do |idx|
|
|
713
|
+
off = record_byte_offset(idx)
|
|
714
|
+
p = off / page_size
|
|
715
|
+
p unless @page_cache.has?(p) || @in_flight.has?(p)
|
|
716
|
+
end.uniq
|
|
717
|
+
end
|
|
718
|
+
return if missing.empty?
|
|
719
|
+
|
|
720
|
+
ranges = Nxs.clamp_page_ranges(
|
|
721
|
+
Nxs.coalesce_page_indices(missing, @coalesce_gap_pages, page_size),
|
|
722
|
+
@data.bytesize
|
|
723
|
+
)
|
|
724
|
+
ranges.each { |r| fetch_coalesced_range!(r) }
|
|
725
|
+
end
|
|
726
|
+
|
|
727
|
+
def start_eager_background!
|
|
728
|
+
return unless @prefetch_strategy == 'eager'
|
|
729
|
+
|
|
730
|
+
@prefetch_mu.synchronize do
|
|
731
|
+
return if @prefetch_paused || @eager_started
|
|
732
|
+
|
|
733
|
+
@eager_started = true
|
|
734
|
+
sector_start, sector_len = Nxs.row_data_sector(@tail_start, @data.bytesize)
|
|
735
|
+
if sector_len.zero?
|
|
736
|
+
@eager_complete = true
|
|
737
|
+
next
|
|
738
|
+
end
|
|
739
|
+
@eager_thread = Thread.new { run_eager_background(sector_start, sector_len) }
|
|
740
|
+
end
|
|
741
|
+
end
|
|
742
|
+
|
|
743
|
+
def run_eager_background(sector_start, sector_len)
|
|
744
|
+
end_byte = [sector_start + sector_len, @data.bytesize].min
|
|
745
|
+
return if sector_start >= end_byte
|
|
746
|
+
|
|
747
|
+
page_size = @prefetch_page_size
|
|
748
|
+
first_page = sector_start / page_size
|
|
749
|
+
last_page = (end_byte - 1) / page_size
|
|
750
|
+
indices = (first_page..last_page).to_a
|
|
751
|
+
eager_cancelled = @prefetch_mu.synchronize { @eager_cancel }
|
|
752
|
+
return if eager_cancelled
|
|
753
|
+
|
|
754
|
+
missing = @cache_mu.synchronize do
|
|
755
|
+
indices.select { |p| !@page_cache.has?(p) && !@in_flight.has?(p) }
|
|
756
|
+
end
|
|
757
|
+
if missing.empty?
|
|
758
|
+
@prefetch_mu.synchronize { @eager_complete = true unless @eager_cancel }
|
|
759
|
+
return
|
|
760
|
+
end
|
|
761
|
+
|
|
762
|
+
ranges = Nxs.clamp_page_ranges(
|
|
763
|
+
Nxs.coalesce_page_indices(missing, @coalesce_gap_pages, page_size),
|
|
764
|
+
@data.bytesize
|
|
765
|
+
)
|
|
766
|
+
ranges.each do |r|
|
|
767
|
+
break if @prefetch_mu.synchronize { @eager_cancel }
|
|
768
|
+
|
|
769
|
+
fetch_coalesced_range!(r)
|
|
770
|
+
end
|
|
771
|
+
@prefetch_mu.synchronize { @eager_complete = true unless @eager_cancel }
|
|
772
|
+
end
|
|
773
|
+
|
|
774
|
+
def fetch_coalesced_range!(page_range)
|
|
775
|
+
@cache_mu.synchronize { fetch_coalesced_range_unlocked!(page_range) }
|
|
776
|
+
end
|
|
777
|
+
|
|
778
|
+
def fetch_coalesced_range_unlocked!(page_range)
|
|
779
|
+
blob = fetch_range_bytes!(page_range[:byte_start], page_range[:byte_length])
|
|
780
|
+
page_size = @prefetch_page_size
|
|
781
|
+
(page_range[:page_start]..page_range[:page_end]).each do |p|
|
|
782
|
+
next if @page_cache.has?(p)
|
|
783
|
+
|
|
784
|
+
page_off = p * page_size - page_range[:byte_start]
|
|
785
|
+
page_len = [page_size, blob.bytesize - page_off].min
|
|
786
|
+
next if page_len <= 0
|
|
787
|
+
|
|
788
|
+
@page_cache.set(p, blob[page_off, page_len])
|
|
789
|
+
end
|
|
790
|
+
end
|
|
791
|
+
|
|
792
|
+
def fetch_range_bytes!(byte_start, byte_length)
|
|
793
|
+
@fetches_issued += 1
|
|
794
|
+
@fetch_range.call(byte_start, byte_length)
|
|
795
|
+
end
|
|
796
|
+
|
|
287
797
|
def parse_layout_tail!(preamble_tail)
|
|
288
798
|
if (@flags & FLAG_COLUMNAR != 0) && (@flags & FLAG_PAX != 0)
|
|
289
799
|
raise NxsError.new('ERR_INVALID_FLAGS', 'columnar and PAX both set')
|
|
@@ -422,17 +932,39 @@ module Nxs
|
|
|
422
932
|
end
|
|
423
933
|
|
|
424
934
|
def col_field_parts(slot)
|
|
935
|
+
sector = column_sector(slot)
|
|
936
|
+
bm_len = null_bitmap_bytes(@record_count)
|
|
937
|
+
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'null bitmap') if sector.bytesize < bm_len
|
|
938
|
+
|
|
939
|
+
[sector[0, bm_len], sector[bm_len..]]
|
|
940
|
+
end
|
|
941
|
+
|
|
942
|
+
def init_column_prefetch!(fetch_range: nil)
|
|
943
|
+
return unless @layout == :columnar
|
|
944
|
+
|
|
945
|
+
@col_mu = Mutex.new
|
|
946
|
+
@col_warmed = {}
|
|
947
|
+
@col_overlay = {}
|
|
948
|
+
@col_fetches = 0
|
|
949
|
+
@col_remote_fetch = !fetch_range.nil?
|
|
950
|
+
data = @data
|
|
951
|
+
@col_fetch_range = fetch_range || ->(off, len) { data[off, len] }
|
|
952
|
+
end
|
|
953
|
+
|
|
954
|
+
def column_sector(slot)
|
|
425
955
|
raise NxsError.new('ERR_OUT_OF_BOUNDS', "key slot #{slot}") if slot.negative? || slot >= @col_buf_off.length
|
|
426
956
|
|
|
427
957
|
off = @col_buf_off[slot].to_i
|
|
428
958
|
length = @col_buf_len[slot].to_i
|
|
959
|
+
if @col_warmed
|
|
960
|
+
@col_mu.synchronize do
|
|
961
|
+
overlay = @col_overlay[slot]
|
|
962
|
+
return overlay[0, length] if @col_warmed[slot] && overlay && !overlay.empty?
|
|
963
|
+
end
|
|
964
|
+
end
|
|
429
965
|
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'column buffer') if off + length > @data.bytesize
|
|
430
966
|
|
|
431
|
-
|
|
432
|
-
raise NxsError.new('ERR_OUT_OF_BOUNDS', 'null bitmap') if length < bm_len
|
|
433
|
-
|
|
434
|
-
sector = @data[off, length]
|
|
435
|
-
[sector[0, bm_len], sector[bm_len..]]
|
|
967
|
+
@data[off, length]
|
|
436
968
|
end
|
|
437
969
|
|
|
438
970
|
def col_var_parts(slot)
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: nyxis
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.2.
|
|
4
|
+
version: 1.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Micael Malta
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: |
|
|
14
14
|
Pure-Ruby reader for NXB files produced by the NXS compiler. Provides
|