nyxis 1.0.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/nxs.rb +959 -18
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0ebcbfcd1dfc34288e738e81acc006a8cabda0fdc50e9e851c73e55d422dadb2
4
- data.tar.gz: d1eb4b53469a62e382bbf385cf039307af2a5be59b62ba0db0d6f0abe3dffe73
3
+ metadata.gz: 21f182240eeb38df31bb91d81c52a41df156a234b82e61454b49c34f49de5ac8
4
+ data.tar.gz: ce6fca2da3243e57488f081ee10302cf780575591b2947d11a7becc85ad4feea
5
5
  SHA512:
6
- metadata.gz: 74dcb43ae96b59f6a56b52dd64d03efb2074c8f3789a99fbe1889b26ed931cb9280fd418036d837702a28cf040d79722f7d51a120d43788aa239f1ec6dbb4d35
7
- data.tar.gz: 339bf6357eebf0557ea3d114dbe970eb42f9f796f6665db9ca6369a59c2c63237135b2ff4681b5305bea05a15f1fc6b09c8347e8a836244ae53afd9f354966c6
6
+ metadata.gz: 27d92d5aad9aa3f661c610666361cd420e337b1955891abd006d02ec899f4b94e7040c5baeb4614b2d71efe457bb6728806a2c7e7f0e6ccda09075416a0286d7
7
+ data.tar.gz: d5f7df6269b0fe00e8ae6d6512cc67eed7ba33891654b51833f8519a627ac29136f90cd6e0bcceae3cf01db0ef025166bbc84e836a00cf6bf489eafb744ea4d5
data/nxs.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative 'pattern'
4
+
3
5
  # NXS Reader — .nxb parser (Ruby 3.x, stdlib only).
4
6
  #
5
7
  # Implements Nyxis v1.1 binary wire format.
@@ -22,8 +24,233 @@
22
24
  module Nxs
23
25
  MAGIC_FILE = 0x4E595842 # NYXB
24
26
  MAGIC_OBJ = 0x4E59584F # NYXO
27
+ MAGIC_LIST = 0x4E59584C # NYXL
28
+ MAGIC_PAGE = 0x4E585350 # NYXP
25
29
  MAGIC_FOOTER = 0x2153584E # NXS!
26
- FLAG_SCHEMA = 0x0002
30
+ FLAG_COLUMNAR = 0x0001
31
+ FLAG_PAX = 0x0004
32
+ FLAG_SCHEMA = 0x0002
33
+
34
+ FOOTER_ROW_BYTES = 12
35
+ FOOTER_COL_BYTES = 20
36
+ FOOTER_PAX_BYTES = 28
37
+ COL_TAIL_ENTRY_BYTES = 20
38
+ PAX_TAIL_ENTRY_BYTES = 28
39
+
40
+ # Adaptive prefetch (phase 1) — spec §6–§8.4
41
+ DEFAULT_PAGE_SIZE = 65_536
42
+ DEFAULT_MAX_PAGES = 64
43
+ DEFAULT_COALESCE_GAP_PAGES = 1
44
+ DEFAULT_PREFETCH_DEPTH = 4
45
+ EAGER_THRESHOLD_MB = 10
46
+ LAZY_THRESHOLD_MB = 50
47
+
48
+ HINT_UNKNOWN = 0
49
+ HINT_SEQUENTIAL = 1
50
+ HINT_RANDOM = 2
51
+ HINT_FULL = 3
52
+ HINT_PARTIAL = 4
53
+
54
+ HINT_SYMBOLS = {
55
+ unknown: HINT_UNKNOWN,
56
+ sequential: HINT_SEQUENTIAL,
57
+ random: HINT_RANDOM,
58
+ full: HINT_FULL,
59
+ partial: HINT_PARTIAL
60
+ }.freeze
61
+
62
+ def self.normalize_hint(hint)
63
+ return hint if hint.is_a?(Integer)
64
+
65
+ HINT_SYMBOLS.fetch(hint) { HINT_UNKNOWN }
66
+ end
67
+
68
+ # Initial prefetch strategy from open hint and file size (spec §5.1).
69
+ def self.initial_strategy(hint, file_size)
70
+ hint = normalize_hint(hint)
71
+ file_size_mb = file_size / (1024 * 1024)
72
+ return 'eager' if hint == HINT_FULL && file_size_mb <= EAGER_THRESHOLD_MB
73
+ return 'lazy' if file_size_mb > LAZY_THRESHOLD_MB
74
+
75
+ 'adaptive'
76
+ end
77
+
78
+ # Row-layout data sector byte range [start, length).
79
+ def self.row_data_sector(tail_start, file_size)
80
+ sector_start = 32
81
+ if tail_start > sector_start && tail_start <= file_size
82
+ [sector_start, tail_start - sector_start]
83
+ else
84
+ [sector_start, 0]
85
+ end
86
+ end
87
+
88
+ # Merge sorted unique page indices when gap <= gap_pages (inclusive).
89
+ def self.coalesce_page_indices(indices, gap_pages, page_size = DEFAULT_PAGE_SIZE)
90
+ return [] if indices.empty?
91
+
92
+ uniq = indices.uniq.sort
93
+ spans = []
94
+ start = uniq[0]
95
+ end_ = uniq[0]
96
+ uniq.each_cons(2) do |_a, b|
97
+ if b - end_ <= gap_pages
98
+ end_ = b
99
+ else
100
+ spans << [start, end_]
101
+ start = end_ = b
102
+ end
103
+ end
104
+ spans << [start, end_]
105
+ spans.map do |a, b|
106
+ { page_start: a, page_end: b, byte_start: a * page_size, byte_length: (b - a + 1) * page_size }
107
+ end
108
+ end
109
+
110
+ def self.clamp_page_ranges(ranges, file_size)
111
+ ranges.filter_map do |r|
112
+ len = r[:byte_length]
113
+ len = file_size - r[:byte_start] if r[:byte_start] + len > file_size
114
+ next nil if len <= 0
115
+
116
+ r.merge(byte_length: len)
117
+ end
118
+ end
119
+
120
+ def self.page_indices_for_viewport(start_index, end_index, page_size, &record_offset)
121
+ (start_index..end_index).map { |i| record_offset.call(i) / page_size }
122
+ end
123
+
124
+ # LRU page cache with optional pinning (spec §6).
125
+ class PageCache
126
+ attr_reader :max_pages, :page_size, :hits, :misses
127
+
128
+ def initialize(max_pages = DEFAULT_MAX_PAGES, page_size = DEFAULT_PAGE_SIZE)
129
+ @max_pages = max_pages
130
+ @page_size = page_size
131
+ @pages = {}
132
+ @clock = 0
133
+ @hits = 0
134
+ @misses = 0
135
+ end
136
+
137
+ def has?(page_index)
138
+ @pages.key?(page_index)
139
+ end
140
+
141
+ def get(page_index)
142
+ entry = @pages[page_index]
143
+ unless entry
144
+ @misses += 1
145
+ return nil
146
+ end
147
+ @clock += 1
148
+ entry[:last_used] = @clock
149
+ @hits += 1
150
+ entry[:data]
151
+ end
152
+
153
+ def set(page_index, data, pinned: false)
154
+ return if @max_pages <= 0
155
+
156
+ while @pages.size >= @max_pages && !evict_one?; end
157
+ @clock += 1
158
+ @pages[page_index] = { data: data, last_used: @clock, pinned: pinned }
159
+ end
160
+
161
+ def pin_pages(page_indices)
162
+ page_indices.each do |p|
163
+ entry = @pages[p]
164
+ entry[:pinned] = true if entry
165
+ end
166
+ end
167
+
168
+ def unpin_all
169
+ @pages.each_value { |entry| entry[:pinned] = false }
170
+ end
171
+
172
+ def stats
173
+ bytes = @pages.values.sum { |e| e[:data].bytesize }
174
+ {
175
+ pages_cached: @pages.size,
176
+ pages_max: @max_pages,
177
+ memory_used_bytes: bytes,
178
+ cache_hits: @hits,
179
+ cache_misses: @misses
180
+ }
181
+ end
182
+
183
+ private
184
+
185
+ def evict_one?
186
+ victim = nil
187
+ oldest = nil
188
+ @pages.each do |idx, entry|
189
+ next if entry[:pinned]
190
+
191
+ if oldest.nil? || entry[:last_used] < oldest
192
+ oldest = entry[:last_used]
193
+ victim = idx
194
+ end
195
+ end
196
+ return false unless victim
197
+
198
+ @pages.delete(victim)
199
+ true
200
+ end
201
+ end
202
+
203
+ # In-flight page fetch deduplication for concurrent prefetch_viewport calls.
204
+ class InFlightMap
205
+ Entry = Struct.new(:queue, :data, :error)
206
+
207
+ def initialize
208
+ @mu = Mutex.new
209
+ @map = {}
210
+ end
211
+
212
+ def has?(page_index)
213
+ @mu.synchronize { @map.key?(page_index) }
214
+ end
215
+
216
+ def wait(page_index)
217
+ entry = @mu.synchronize { @map[page_index] }
218
+ return nil unless entry
219
+
220
+ entry.queue.pop
221
+ raise entry.error if entry.error
222
+
223
+ entry.data
224
+ end
225
+
226
+ def with(page_index)
227
+ entry = nil
228
+ leader = @mu.synchronize do
229
+ existing = @map[page_index]
230
+ if existing
231
+ false
232
+ else
233
+ entry = Entry.new(Queue.new)
234
+ @map[page_index] = entry
235
+ true
236
+ end
237
+ end
238
+ return wait(page_index) unless leader
239
+
240
+ begin
241
+ data = yield
242
+ entry.data = data
243
+ entry.queue << true
244
+ data
245
+ rescue StandardError => e
246
+ entry.error = e
247
+ entry.queue << true
248
+ raise
249
+ ensure
250
+ @mu.synchronize { @map.delete(page_index) if @map[page_index] == entry }
251
+ end
252
+ end
253
+ end
27
254
 
28
255
  class NxsError < StandardError
29
256
  attr_reader :code
@@ -37,9 +264,15 @@ module Nxs
37
264
  # ── Reader ──────────────────────────────────────────────────────────────────
38
265
 
39
266
  class Reader
40
- attr_reader :keys, :record_count
41
-
42
- def initialize(bytes)
267
+ attr_reader :keys, :record_count, :layout
268
+
269
+ def initialize(bytes, **options)
270
+ hint = options.fetch(:hint, HINT_UNKNOWN)
271
+ max_pages = options.fetch(:max_pages, DEFAULT_MAX_PAGES)
272
+ page_size = options.fetch(:page_size, DEFAULT_PAGE_SIZE)
273
+ coalesce_gap_pages = options.fetch(:coalesce_gap_pages, DEFAULT_COALESCE_GAP_PAGES)
274
+ prefetch_depth = options.fetch(:prefetch_depth, DEFAULT_PREFETCH_DEPTH)
275
+ fetch_range = options.fetch(:fetch_range, nil)
43
276
  @data = bytes.b # force binary encoding
44
277
  sz = @data.bytesize
45
278
  raise NxsError.new('ERR_OUT_OF_BOUNDS', 'file too small') if sz < 32
@@ -51,12 +284,14 @@ module Nxs
51
284
  raise NxsError.new('ERR_BAD_MAGIC', 'footer magic mismatch') if footer != MAGIC_FOOTER
52
285
 
53
286
  # Preamble: Version(2) + Flags(2) + DictHash(8) + TailPtr(8) + Reserved(8)
54
- @flags = @data.unpack1('@6 S<')
55
- @tail_ptr = @data.unpack1('@16 Q<')
56
- if @tail_ptr.zero?
287
+ @flags = @data.unpack1('@6 S<')
288
+ preamble_tail = @data.unpack1('@16 Q<')
289
+ @tail_ptr = preamble_tail
290
+ layout_flags = @flags & (FLAG_COLUMNAR | FLAG_PAX)
291
+ if @tail_ptr.zero? && layout_flags.zero?
57
292
  raise NxsError.new('ERR_OUT_OF_BOUNDS', 'stream footer') if sz < 44
58
293
 
59
- @tail_ptr = @data.unpack1("@#{sz - 12}Q<")
294
+ @tail_ptr = @data.unpack1("@#{sz - FOOTER_ROW_BYTES}Q<")
60
295
  end
61
296
 
62
297
  @dict_hash = @data.unpack1('@8 Q<')
@@ -71,24 +306,67 @@ module Nxs
71
306
  raise NxsError.new('ERR_DICT_MISMATCH', 'schema hash mismatch') if computed != @dict_hash
72
307
  end
73
308
 
74
- # Tail-index: u32 EntryCount followed by records
75
- @record_count = @data.unpack1("@#{@tail_ptr}L<")
76
- @tail_start = @tail_ptr + 4
309
+ @col_buf_off = []
310
+ @col_buf_len = []
311
+ parse_layout_tail!(preamble_tail)
312
+ init_column_prefetch!(fetch_range: fetch_range)
313
+ init_prefetch!(
314
+ hint: hint,
315
+ max_pages: max_pages,
316
+ page_size: page_size,
317
+ coalesce_gap_pages: coalesce_gap_pages,
318
+ prefetch_depth: prefetch_depth,
319
+ fetch_range: fetch_range
320
+ )
77
321
  end
78
322
 
79
- # O(1) record lookup — reads one 10-byte tail-index entry.
323
+ # O(1) record lookup — row tail-index or columnar/PAX record index.
80
324
  def record(i)
81
325
  unless i >= 0 && i < @record_count
82
326
  raise NxsError.new('ERR_OUT_OF_BOUNDS', "record #{i} out of [0, #{@record_count})")
83
327
  end
84
328
 
85
- # Each tail-index entry: u16 KeyID + u64 AbsoluteOffset = 10 bytes
329
+ return Object.new(self, i, i) if @layout != :row
330
+
331
+ on_access(i)
86
332
  abs_offset = @data.unpack1("@#{@tail_start + i * 10 + 2}Q<")
87
333
  Object.new(self, abs_offset)
88
334
  end
89
335
 
90
- # Tight allocation-free sum loop.
336
+ # Prefetch one column buffer (columnar layout only; §7.4).
337
+ def prefetch_column(key)
338
+ raise NxsError.new('ERR_LAYOUT', 'prefetch_column requires columnar layout') unless @layout == :columnar
339
+
340
+ slot = @key_index[key]
341
+ raise NxsError.new('ERR_KEY_NOT_FOUND', "key #{key.inspect} not in schema") unless slot
342
+
343
+ off = nil
344
+ length = nil
345
+ fetch = nil
346
+ @col_mu.synchronize do
347
+ return if @col_warmed[slot]
348
+
349
+ off = @col_buf_off[slot].to_i
350
+ length = @col_buf_len[slot].to_i
351
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'column buffer') if off.negative? || length.negative?
352
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'column buffer') if !@col_remote_fetch && off + length > @data.bytesize
353
+
354
+ fetch = @col_fetch_range
355
+ end
356
+ blob = fetch.call(off, length)
357
+ @col_mu.synchronize do
358
+ return if @col_warmed[slot]
359
+
360
+ @col_overlay[slot] = blob if off + blob.bytesize > @data.bytesize
361
+ @col_warmed[slot] = true
362
+ @col_fetches += 1
363
+ end
364
+ end
365
+
366
+ # Sum f64 column — columnar/PAX buffer path or row scan.
91
367
  def sum_f64(key)
368
+ return col_sum_f64(key) if @layout != :row
369
+
92
370
  slot = @key_index[key]
93
371
  raise NxsError.new('ERR_OUT_OF_BOUNDS', "key '#{key}' not in schema") unless slot
94
372
 
@@ -106,6 +384,68 @@ module Nxs
106
384
  sum
107
385
  end
108
386
 
387
+ # Columnar/PAX f64 sum (row layout delegates to sum_f64).
388
+ def col_sum_f64(key)
389
+ slot = @key_index[key]
390
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', "key '#{key}' not in schema") unless slot
391
+
392
+ return sum_f64(key) if @layout == :row
393
+ return pax_sum_f64(slot) if @layout == :pax
394
+
395
+ bm, vals = col_field_parts(slot)
396
+ n = @record_count
397
+ sum = 0.0
398
+ i = 0
399
+ while i < n
400
+ if col_bit(bm, i)
401
+ off = i * 8
402
+ sum += vals.unpack1("@#{off}E") if off + 8 <= vals.bytesize
403
+ end
404
+ i += 1
405
+ end
406
+ sum
407
+ end
408
+
409
+ # Raw value bytes for a fixed-width column (columnar/PAX).
410
+ def col_buffer(key)
411
+ raise NxsError.new('ERR_LAYOUT', 'col_buffer requires columnar or PAX layout') if @layout == :row
412
+
413
+ slot = @key_index[key]
414
+ return nil unless slot
415
+ return nil if var_sigil?(@key_sigils[slot])
416
+
417
+ _bm, vals = col_field_parts(slot)
418
+ vals
419
+ rescue NxsError
420
+ nil
421
+ end
422
+
423
+ # Null bitmap + u32 offsets + values for var-length columns (columnar only).
424
+ def col_var_buffer(key)
425
+ raise NxsError.new('ERR_LAYOUT', 'col_var_buffer is columnar-only') unless @layout == :columnar
426
+
427
+ slot = @key_index[key]
428
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', "key '#{key}' not in schema") unless slot
429
+ raise NxsError.new('ERR_UNSUPPORTED_FIELD_TYPE', key) unless var_sigil?(@key_sigils[slot])
430
+
431
+ bm, offsets, values = col_var_parts(slot)
432
+ { bitmap: bm, offsets: offsets, values: values, count: @record_count }
433
+ end
434
+
435
+ def col_get_str(key, record_index)
436
+ slot = @key_index[key]
437
+ return nil unless slot && record_index < @record_count && @layout != :row
438
+ return nil unless @key_sigils[slot] == 0x22
439
+
440
+ bm, offsets, values, ok = col_var_parts_at(record_index, slot)
441
+ return nil unless ok
442
+
443
+ bit_idx = @layout == :pax ? pax_find_page(record_index)&.[](:local) : record_index
444
+ return nil if bit_idx.nil? || !col_bit(bm, bit_idx)
445
+
446
+ var_str_at(offsets, values, bit_idx)
447
+ end
448
+
109
449
  def min_f64(key)
110
450
  slot = @key_index[key]
111
451
  raise NxsError.new('ERR_OUT_OF_BOUNDS', "key '#{key}' not in schema") unless slot
@@ -178,6 +518,8 @@ module Nxs
178
518
  t_idx = 0
179
519
 
180
520
  loop do
521
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'bitmask overrun on corrupt input') if p >= data.bytesize
522
+
181
523
  b = data.getbyte(p)
182
524
  p += 1
183
525
  bits = b & 0x7F
@@ -203,8 +545,556 @@ module Nxs
203
545
  end
204
546
  end
205
547
 
548
+ # rubocop:disable Metrics/ParameterLists -- prefetch open options mirror Go OpenOptions
549
+ def init_prefetch!(hint:, max_pages:, page_size:, coalesce_gap_pages:, prefetch_depth:, fetch_range:)
550
+ @prefetch_mu = Mutex.new
551
+ @cache_mu = Mutex.new
552
+ @prefetch_hint = Nxs.normalize_hint(hint)
553
+ @prefetch_page_size = page_size
554
+ @prefetch_depth = prefetch_depth.positive? ? prefetch_depth : DEFAULT_PREFETCH_DEPTH
555
+ @coalesce_gap_pages = coalesce_gap_pages
556
+ @page_cache = PageCache.new(max_pages, page_size)
557
+ @in_flight = InFlightMap.new
558
+ @fetches_issued = 0
559
+ @detector = AccessPatternDetector.new
560
+ @prefetch_strategy = Nxs.initial_strategy(@prefetch_hint, @data.bytesize)
561
+ @prefetch_pattern = PATTERN_UNKNOWN
562
+ @eager_started = false
563
+ @eager_complete = false
564
+ @eager_cancel = false
565
+ @eager_thread = nil
566
+ @closed = false
567
+ @prefetch_paused = false
568
+ @fetch_range = fetch_range || lambda do |byte_start, byte_length|
569
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'fetch range out of bounds') if byte_start.negative?
570
+
571
+ end_ = byte_start + byte_length
572
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'fetch range out of bounds') if end_ > @data.bytesize
573
+
574
+ @data[byte_start, byte_length]
575
+ end
576
+ start_eager_background! if @layout == :row && @prefetch_strategy == 'eager'
577
+ end
578
+ # rubocop:enable Metrics/ParameterLists
579
+
580
+ # Block until eager / background prefetch completes (spec §8).
581
+ def warmup
582
+ t = @prefetch_mu.synchronize { @eager_thread }
583
+ t&.join
584
+ end
585
+
586
+ # Stop scheduling speculative and eager prefetch (§8.1).
587
+ def pause_prefetch
588
+ @prefetch_mu.synchronize { @prefetch_paused = true }
589
+ end
590
+
591
+ # Re-enable speculative prefetch after pause_prefetch.
592
+ def resume_prefetch
593
+ @prefetch_mu.synchronize { @prefetch_paused = false }
594
+ end
595
+
596
+ # Cancel in-flight eager prefetch and wait for the background thread.
597
+ def close
598
+ t = nil
599
+ @prefetch_mu.synchronize do
600
+ @closed = true
601
+ @eager_cancel = true
602
+ t = @eager_thread
603
+ end
604
+ t&.join
605
+ end
606
+
607
+ def on_access(index)
608
+ return unless @layout == :row
609
+ return if @record_count.zero?
610
+
611
+ adaptive_seq = false
612
+ skip_spec = false
613
+ start_eager = false
614
+ @prefetch_mu.synchronize do
615
+ return if @closed || @prefetch_paused
616
+
617
+ @detector.observe(index)
618
+ @prefetch_pattern = @detector.pattern
619
+ start_eager = maybe_upgrade_to_eager!
620
+ if eager_complete? || @prefetch_strategy == 'eager'
621
+ skip_spec = true
622
+ next
623
+ end
624
+ page_index = record_byte_offset(index) / @prefetch_page_size
625
+ @cache_mu.synchronize { @page_cache.get(page_index) }
626
+ adaptive_seq = @prefetch_strategy == 'adaptive' && @detector.pattern == PATTERN_SEQUENTIAL
627
+ end
628
+ start_eager_background! if start_eager
629
+ return if skip_spec
630
+
631
+ speculative_prefetch! if adaptive_seq
632
+ end
633
+
634
+ def record_byte_offset(i)
635
+ @data.unpack1("@#{@tail_start + i * 10 + 2}Q<")
636
+ end
637
+
638
+ # Prefetch pages for records [start_index, end_index] (row layout only).
639
+ def prefetch_viewport(start_index, end_index)
640
+ return self if @layout != :row
641
+
642
+ n = @record_count
643
+ unless start_index.between?(0, end_index) && end_index < n
644
+ raise NxsError.new(
645
+ 'ERR_OUT_OF_BOUNDS',
646
+ "prefetch_viewport [#{start_index}, #{end_index}] out of [0, #{n})"
647
+ )
648
+ end
649
+
650
+ @cache_mu.synchronize do
651
+ page_size = @prefetch_page_size
652
+ indices = Nxs.page_indices_for_viewport(start_index, end_index, page_size) do |i|
653
+ record_byte_offset(i)
654
+ end
655
+ missing = indices.uniq.select { |p| !@page_cache.has?(p) && !@in_flight.has?(p) }
656
+ if missing.empty?
657
+ @page_cache.pin_pages(indices)
658
+ @page_cache.unpin_all
659
+ return self
660
+ end
661
+
662
+ ranges = Nxs.clamp_page_ranges(
663
+ Nxs.coalesce_page_indices(missing, @coalesce_gap_pages, page_size),
664
+ @data.bytesize
665
+ )
666
+ ranges.each { |r| fetch_coalesced_range_unlocked!(r) }
667
+ @page_cache.pin_pages(indices)
668
+ @page_cache.unpin_all
669
+ end
670
+ self
671
+ end
672
+
673
+ def cache_stats
674
+ stats = @page_cache.stats
675
+ col_fetches = @col_mu.synchronize { @col_fetches }
676
+ strategy, pattern = @prefetch_mu.synchronize do
677
+ [@prefetch_strategy, @detector.pattern]
678
+ end
679
+ stats.merge(
680
+ fetches_issued: @fetches_issued,
681
+ column_fetches_issued: col_fetches,
682
+ strategy: strategy,
683
+ pattern: pattern
684
+ )
685
+ end
686
+
206
687
  private
207
688
 
689
+ def eager_complete?
690
+ @prefetch_strategy == 'eager' && @eager_complete
691
+ end
692
+
693
+ def maybe_upgrade_to_eager!
694
+ return if @prefetch_paused
695
+ return unless @prefetch_strategy == 'adaptive'
696
+ return unless @detector.pattern == PATTERN_SEQUENTIAL
697
+ return if @detector.sequential_runs < UPGRADE_SEQUENTIAL_THRESHOLD
698
+ return if @data.bytesize / (1024 * 1024) > EAGER_THRESHOLD_MB
699
+
700
+ @prefetch_strategy = 'eager'
701
+ true
702
+ end
703
+
704
+ def speculative_prefetch!
705
+ return if @prefetch_mu.synchronize { @prefetch_paused }
706
+
707
+ predicted = @prefetch_mu.synchronize { @detector.predict_next(@prefetch_depth, @record_count) }
708
+ return if predicted.empty?
709
+
710
+ page_size = @prefetch_page_size
711
+ missing = @cache_mu.synchronize do
712
+ predicted.filter_map do |idx|
713
+ off = record_byte_offset(idx)
714
+ p = off / page_size
715
+ p unless @page_cache.has?(p) || @in_flight.has?(p)
716
+ end.uniq
717
+ end
718
+ return if missing.empty?
719
+
720
+ ranges = Nxs.clamp_page_ranges(
721
+ Nxs.coalesce_page_indices(missing, @coalesce_gap_pages, page_size),
722
+ @data.bytesize
723
+ )
724
+ ranges.each { |r| fetch_coalesced_range!(r) }
725
+ end
726
+
727
+ def start_eager_background!
728
+ return unless @prefetch_strategy == 'eager'
729
+
730
+ @prefetch_mu.synchronize do
731
+ return if @prefetch_paused || @eager_started
732
+
733
+ @eager_started = true
734
+ sector_start, sector_len = Nxs.row_data_sector(@tail_start, @data.bytesize)
735
+ if sector_len.zero?
736
+ @eager_complete = true
737
+ next
738
+ end
739
+ @eager_thread = Thread.new { run_eager_background(sector_start, sector_len) }
740
+ end
741
+ end
742
+
743
+ def run_eager_background(sector_start, sector_len)
744
+ end_byte = [sector_start + sector_len, @data.bytesize].min
745
+ return if sector_start >= end_byte
746
+
747
+ page_size = @prefetch_page_size
748
+ first_page = sector_start / page_size
749
+ last_page = (end_byte - 1) / page_size
750
+ indices = (first_page..last_page).to_a
751
+ eager_cancelled = @prefetch_mu.synchronize { @eager_cancel }
752
+ return if eager_cancelled
753
+
754
+ missing = @cache_mu.synchronize do
755
+ indices.select { |p| !@page_cache.has?(p) && !@in_flight.has?(p) }
756
+ end
757
+ if missing.empty?
758
+ @prefetch_mu.synchronize { @eager_complete = true unless @eager_cancel }
759
+ return
760
+ end
761
+
762
+ ranges = Nxs.clamp_page_ranges(
763
+ Nxs.coalesce_page_indices(missing, @coalesce_gap_pages, page_size),
764
+ @data.bytesize
765
+ )
766
+ ranges.each do |r|
767
+ break if @prefetch_mu.synchronize { @eager_cancel }
768
+
769
+ fetch_coalesced_range!(r)
770
+ end
771
+ @prefetch_mu.synchronize { @eager_complete = true unless @eager_cancel }
772
+ end
773
+
774
+ def fetch_coalesced_range!(page_range)
775
+ @cache_mu.synchronize { fetch_coalesced_range_unlocked!(page_range) }
776
+ end
777
+
778
+ def fetch_coalesced_range_unlocked!(page_range)
779
+ blob = fetch_range_bytes!(page_range[:byte_start], page_range[:byte_length])
780
+ page_size = @prefetch_page_size
781
+ (page_range[:page_start]..page_range[:page_end]).each do |p|
782
+ next if @page_cache.has?(p)
783
+
784
+ page_off = p * page_size - page_range[:byte_start]
785
+ page_len = [page_size, blob.bytesize - page_off].min
786
+ next if page_len <= 0
787
+
788
+ @page_cache.set(p, blob[page_off, page_len])
789
+ end
790
+ end
791
+
792
+ def fetch_range_bytes!(byte_start, byte_length)
793
+ @fetches_issued += 1
794
+ @fetch_range.call(byte_start, byte_length)
795
+ end
796
+
797
+ def parse_layout_tail!(preamble_tail)
798
+ if (@flags & FLAG_COLUMNAR != 0) && (@flags & FLAG_PAX != 0)
799
+ raise NxsError.new('ERR_INVALID_FLAGS', 'columnar and PAX both set')
800
+ end
801
+ if (@flags & FLAG_COLUMNAR != 0) && preamble_tail.zero?
802
+ raise NxsError.new('ERR_INCOMPATIBLE_FLAGS', 'columnar with TailPtr=0')
803
+ end
804
+
805
+ if (@flags & FLAG_COLUMNAR) != 0
806
+ @layout = :columnar
807
+ parse_columnar_footer!
808
+ return
809
+ end
810
+ if (@flags & FLAG_PAX) != 0
811
+ @layout = :pax
812
+ parse_pax_footer!
813
+ return
814
+ end
815
+
816
+ @layout = :row
817
+ if preamble_tail.zero?
818
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'streamable footer') if @data.bytesize < 44
819
+
820
+ @tail_ptr = @data.unpack1("@#{@data.bytesize - FOOTER_ROW_BYTES}Q<")
821
+ end
822
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'tail index') if @tail_ptr + 4 > @data.bytesize
823
+
824
+ @record_count = @data.unpack1("@#{@tail_ptr}L<")
825
+ @tail_start = @tail_ptr + 4
826
+ end
827
+
828
+ def parse_columnar_footer!
829
+ sz = @data.bytesize
830
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'columnar footer') if sz < FOOTER_COL_BYTES
831
+
832
+ fo = sz - FOOTER_COL_BYTES
833
+ @tail_ptr = @data.unpack1("@#{fo}Q<")
834
+ @record_count = @data.unpack1("@#{fo + 8}Q<")
835
+ @tail_start = @tail_ptr
836
+ kc = @keys.length
837
+ @col_buf_off = Array.new(kc)
838
+ @col_buf_len = Array.new(kc)
839
+ kc.times do |i|
840
+ e = @tail_start + i * COL_TAIL_ENTRY_BYTES
841
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'columnar tail entry') if e + COL_TAIL_ENTRY_BYTES > sz
842
+
843
+ fid = @data.unpack1("@#{e}S<")
844
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', "invalid field ID #{fid}") if fid >= kc
845
+
846
+ @col_buf_off[fid] = @data.unpack1("@#{e + 4}Q<")
847
+ @col_buf_len[fid] = @data.unpack1("@#{e + 12}Q<")
848
+ end
849
+ end
850
+
851
+ def parse_pax_footer!
852
+ sz = @data.bytesize
853
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'PAX footer') if sz < FOOTER_PAX_BYTES
854
+
855
+ fo = sz - FOOTER_PAX_BYTES
856
+ @tail_ptr = @data.unpack1("@#{fo}Q<")
857
+ @record_count = @data.unpack1("@#{fo + 8}Q<")
858
+ @page_count = @data.unpack1("@#{fo + 16}L<")
859
+ @page_size_hint = @data.unpack1("@#{fo + 20}L<")
860
+ @tail_start = @tail_ptr
861
+ @page_index = []
862
+ @page_rec_start = []
863
+ @page_rec_count = []
864
+ @page_offset = []
865
+ @page_length = []
866
+
867
+ @page_count.times do |i|
868
+ e = @tail_start + i * PAX_TAIL_ENTRY_BYTES
869
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'PAX tail entry') if e + PAX_TAIL_ENTRY_BYTES > sz
870
+
871
+ @page_index << @data.unpack1("@#{e}L<")
872
+ @page_rec_start << @data.unpack1("@#{e + 4}Q<")
873
+ @page_rec_count << @data.unpack1("@#{e + 12}L<")
874
+ @page_offset << @data.unpack1("@#{e + 16}Q<")
875
+ @page_length << @data.unpack1("@#{e + 24}L<")
876
+ end
877
+
878
+ @page_count.times do |i|
879
+ poff = @page_offset[i]
880
+ if poff > sz || poff + 4 > sz || @data.unpack1("@#{poff}L<") != MAGIC_PAGE
881
+ raise NxsError.new('ERR_INVALID_PAGE_MAGIC', 'PAX page magic mismatch')
882
+ end
883
+ end
884
+ end
885
+
886
+ def null_bitmap_bytes(n)
887
+ raw = (n + 7) / 8
888
+ (raw + 7) & ~7
889
+ end
890
+
891
+ # rubocop:disable Naming/PredicateMethod -- mirrors C col_bit naming
892
+ def col_bit(bm, rec)
893
+ ((bm.getbyte(rec / 8) >> (rec % 8)) & 1) == 1
894
+ end
895
+ # rubocop:enable Naming/PredicateMethod
896
+
897
+ def var_sigil?(sig)
898
+ [0x22, 0x3C].include?(sig)
899
+ end
900
+
901
+ def var_off_bytes_len(rc)
902
+ off = (rc + 1) * 4
903
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'var offsets overflow') if off > @data.bytesize
904
+
905
+ off
906
+ end
907
+
908
+ def field_sector_len(sector_off, rc, sigil)
909
+ bm_len = null_bitmap_bytes(rc)
910
+ return bm_len + rc * 8 unless var_sigil?(sigil)
911
+
912
+ off_bytes = var_off_bytes_len(rc)
913
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'var offsets') if sector_off + bm_len + off_bytes > @data.bytesize
914
+
915
+ end_off = @data.unpack1("@#{sector_off + bm_len + rc * 4}L<")
916
+ total = bm_len + off_bytes + end_off
917
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'var values') if sector_off + total > @data.bytesize
918
+
919
+ total
920
+ end
921
+
922
+ def var_str_at(offsets, values, record_index)
923
+ need = (record_index + 2) * 4
924
+ return nil if offsets.bytesize < need
925
+
926
+ off = record_index * 4
927
+ start = offsets.unpack1("@#{off}L<")
928
+ end_ = offsets.unpack1("@#{off + 4}L<")
929
+ return nil if end_ < start || end_ > values.bytesize
930
+
931
+ values[start...end_].force_encoding('UTF-8')
932
+ end
933
+
934
+ def col_field_parts(slot)
935
+ sector = column_sector(slot)
936
+ bm_len = null_bitmap_bytes(@record_count)
937
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'null bitmap') if sector.bytesize < bm_len
938
+
939
+ [sector[0, bm_len], sector[bm_len..]]
940
+ end
941
+
942
+ def init_column_prefetch!(fetch_range: nil)
943
+ return unless @layout == :columnar
944
+
945
+ @col_mu = Mutex.new
946
+ @col_warmed = {}
947
+ @col_overlay = {}
948
+ @col_fetches = 0
949
+ @col_remote_fetch = !fetch_range.nil?
950
+ data = @data
951
+ @col_fetch_range = fetch_range || ->(off, len) { data[off, len] }
952
+ end
953
+
954
+ def column_sector(slot)
955
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', "key slot #{slot}") if slot.negative? || slot >= @col_buf_off.length
956
+
957
+ off = @col_buf_off[slot].to_i
958
+ length = @col_buf_len[slot].to_i
959
+ if @col_warmed
960
+ @col_mu.synchronize do
961
+ overlay = @col_overlay[slot]
962
+ return overlay[0, length] if @col_warmed[slot] && overlay && !overlay.empty?
963
+ end
964
+ end
965
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'column buffer') if off + length > @data.bytesize
966
+
967
+ @data[off, length]
968
+ end
969
+
970
+ def col_var_parts(slot)
971
+ bm, tail = col_field_parts(slot)
972
+ off_bytes = var_off_bytes_len(@record_count)
973
+ raise NxsError.new('ERR_OUT_OF_BOUNDS', 'var offsets') if tail.bytesize < off_bytes
974
+
975
+ [bm, tail[0, off_bytes], tail[off_bytes..]]
976
+ end
977
+
978
+ def col_var_parts_at(rec, slot)
979
+ return [nil, nil, nil, false] if slot.negative? || slot >= @key_sigils.length || !var_sigil?(@key_sigils[slot])
980
+
981
+ if @layout == :columnar
982
+ bm, offsets, values = col_var_parts(slot)
983
+ return [bm, offsets, values, true]
984
+ end
985
+ if @layout == :pax
986
+ loc = pax_find_page(rec)
987
+ return [nil, nil, nil, false] unless loc
988
+
989
+ bm, tail = page_field_parts(loc[:page], slot)
990
+ return [nil, nil, nil, false] unless bm
991
+
992
+ rc = @page_rec_count[loc[:page]]
993
+ off_bytes = var_off_bytes_len(rc)
994
+ return [nil, nil, nil, false] if tail.bytesize < off_bytes
995
+
996
+ return [bm, tail[0, off_bytes], tail[off_bytes..], true]
997
+ end
998
+ [nil, nil, nil, false]
999
+ end
1000
+
1001
+ def col_numeric_bytes(rec, slot)
1002
+ return nil if slot >= 0 && slot < @key_sigils.length && var_sigil?(@key_sigils[slot])
1003
+
1004
+ if @layout == :columnar
1005
+ bm, vals = col_field_parts(slot)
1006
+ return nil if rec >= @record_count || !col_bit(bm, rec)
1007
+
1008
+ off = rec * 8
1009
+ return nil if off + 8 > vals.bytesize
1010
+
1011
+ return vals[off, 8]
1012
+ end
1013
+ if @layout == :pax
1014
+ loc = pax_find_page(rec)
1015
+ return nil unless loc
1016
+
1017
+ bm, vals = page_field_parts(loc[:page], slot)
1018
+ return nil unless bm && col_bit(bm, loc[:local])
1019
+
1020
+ off = loc[:local] * 8
1021
+ return nil if off + 8 > vals.bytesize
1022
+
1023
+ return vals[off, 8]
1024
+ end
1025
+ nil
1026
+ end
1027
+
1028
+ def pax_find_page(rec)
1029
+ return nil if @page_count.zero?
1030
+
1031
+ lo = 0
1032
+ hi = @page_count - 1
1033
+ while lo <= hi
1034
+ mid = lo + (hi - lo) / 2
1035
+ start = @page_rec_start[mid]
1036
+ count = @page_rec_count[mid]
1037
+ if rec < start
1038
+ hi = mid - 1
1039
+ elsif rec >= start + count
1040
+ lo = mid + 1
1041
+ else
1042
+ return { page: mid, local: rec - start }
1043
+ end
1044
+ end
1045
+ nil
1046
+ end
1047
+
1048
+ def page_field_sector(pi, slot)
1049
+ poff = @page_offset[pi].to_i
1050
+ return nil if poff + 24 > @data.bytesize || @data.unpack1("@#{poff}L<") != MAGIC_PAGE
1051
+
1052
+ fc = @data.unpack1("@#{poff + 20}S<")
1053
+ return nil if slot.negative? || slot >= fc || fc > @key_sigils.length
1054
+
1055
+ rc = @page_rec_count[pi]
1056
+ body = poff + 24
1057
+ slot.times do |fi|
1058
+ sig = fi < @key_sigils.length ? @key_sigils[fi] : 0x3D
1059
+ flen = field_sector_len(body, rc, sig)
1060
+ body += flen
1061
+ end
1062
+ sig = slot < @key_sigils.length ? @key_sigils[slot] : 0x3D
1063
+ flen = field_sector_len(body, rc, sig)
1064
+ return nil if body + flen > @data.bytesize
1065
+
1066
+ @data[body, flen]
1067
+ end
1068
+
1069
+ def page_field_parts(pi, slot)
1070
+ sector = page_field_sector(pi, slot)
1071
+ return [nil, nil] unless sector
1072
+
1073
+ bm_len = null_bitmap_bytes(@page_rec_count[pi])
1074
+ return [nil, nil] if sector.bytesize < bm_len
1075
+
1076
+ [sector[0, bm_len], sector[bm_len..]]
1077
+ end
1078
+
1079
+ def pax_sum_f64(slot)
1080
+ sum = 0.0
1081
+ @page_count.times do |pi|
1082
+ bm, vals = page_field_parts(pi, slot)
1083
+ next unless bm
1084
+
1085
+ rc = @page_rec_count[pi]
1086
+ i = 0
1087
+ while i < rc
1088
+ if col_bit(bm, i)
1089
+ off = i * 8
1090
+ sum += vals.unpack1("@#{off}E") if off + 8 <= vals.bytesize
1091
+ end
1092
+ i += 1
1093
+ end
1094
+ end
1095
+ sum
1096
+ end
1097
+
208
1098
  def read_schema(offset)
209
1099
  key_count = @data.unpack1("@#{offset}S<")
210
1100
  offset += 2
@@ -438,13 +1328,19 @@ module Nxs
438
1328
  # ── Object ───────────────────────────────────────────────────────────────────
439
1329
 
440
1330
  class Object
441
- def initialize(reader, offset)
442
- @reader = reader
443
- @offset = offset
444
- @parsed = false
1331
+ def initialize(reader, offset, record_index = nil)
1332
+ @reader = reader
1333
+ @offset = offset
1334
+ @record_index = record_index
1335
+ @parsed = false
445
1336
  end
446
1337
 
447
1338
  def get_str(key)
1339
+ slot = @reader.key_index[key]
1340
+ return nil unless slot
1341
+
1342
+ return @reader.col_get_str(key, record_index) if uses_columnar_field_access?
1343
+
448
1344
  off = field_offset(key)
449
1345
  return nil unless off
450
1346
 
@@ -453,6 +1349,16 @@ module Nxs
453
1349
  end
454
1350
 
455
1351
  def get_i64(key)
1352
+ slot = @reader.key_index[key]
1353
+ return nil unless slot
1354
+
1355
+ if uses_columnar_field_access?
1356
+ cell = @reader.send(:col_numeric_bytes, record_index, slot)
1357
+ return nil unless cell
1358
+
1359
+ return cell.unpack1('q<')
1360
+ end
1361
+
456
1362
  off = field_offset(key)
457
1363
  return nil unless off
458
1364
 
@@ -460,6 +1366,16 @@ module Nxs
460
1366
  end
461
1367
 
462
1368
  def get_f64(key)
1369
+ slot = @reader.key_index[key]
1370
+ return nil unless slot
1371
+
1372
+ if uses_columnar_field_access?
1373
+ cell = @reader.send(:col_numeric_bytes, record_index, slot)
1374
+ return nil unless cell
1375
+
1376
+ return cell.unpack1('E')
1377
+ end
1378
+
463
1379
  off = field_offset(key)
464
1380
  return nil unless off
465
1381
 
@@ -467,6 +1383,16 @@ module Nxs
467
1383
  end
468
1384
 
469
1385
  def get_bool(key)
1386
+ slot = @reader.key_index[key]
1387
+ return nil unless slot
1388
+
1389
+ if uses_columnar_field_access?
1390
+ cell = @reader.send(:col_numeric_bytes, record_index, slot)
1391
+ return nil unless cell
1392
+
1393
+ return cell.getbyte(0) != 0
1394
+ end
1395
+
470
1396
  off = field_offset(key)
471
1397
  return nil unless off
472
1398
 
@@ -475,6 +1401,21 @@ module Nxs
475
1401
 
476
1402
  private
477
1403
 
1404
+ def record_index
1405
+ @record_index.nil? ? @offset : @record_index
1406
+ end
1407
+
1408
+ def obj_at_nyxo?
1409
+ return false if @offset + 4 > @reader.data.bytesize
1410
+
1411
+ @reader.data.unpack1("@#{@offset}L<") == MAGIC_OBJ
1412
+ end
1413
+
1414
+ # Columnar/PAX top-level records use record index; nested NYXO blobs use row paths.
1415
+ def uses_columnar_field_access?
1416
+ @reader.layout != :row && !obj_at_nyxo?
1417
+ end
1418
+
478
1419
  # Parse the object header (lazy — only on first field access).
479
1420
  def parse_header
480
1421
  return if @parsed
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nyxis
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Micael Malta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-05-20 00:00:00.000000000 Z
11
+ date: 2026-05-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  Pure-Ruby reader for NXB files produced by the NXS compiler. Provides